1 | //===- bolt/Passes/MCF.cpp ------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements functions for solving minimum-cost flow problem. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/MCF.h" |
14 | #include "bolt/Core/BinaryFunction.h" |
15 | #include "bolt/Passes/DataflowInfoManager.h" |
16 | #include "bolt/Utils/CommandLineOpts.h" |
17 | #include "llvm/ADT/DenseMap.h" |
18 | #include "llvm/Support/CommandLine.h" |
19 | #include <algorithm> |
20 | #include <vector> |
21 | |
22 | #undef DEBUG_TYPE |
23 | #define DEBUG_TYPE "mcf" |
24 | |
25 | using namespace llvm; |
26 | using namespace bolt; |
27 | |
28 | namespace opts { |
29 | |
30 | extern cl::OptionCategory BoltOptCategory; |
31 | |
32 | extern cl::opt<bool> TimeOpts; |
33 | |
34 | static cl::opt<bool> IterativeGuess( |
35 | "iterative-guess" , |
36 | cl::desc("in non-LBR mode, guess edge counts using iterative technique" ), |
37 | cl::Hidden, cl::cat(BoltOptCategory)); |
38 | |
39 | static cl::opt<bool> UseRArcs( |
40 | "mcf-use-rarcs" , |
41 | cl::desc("in MCF, consider the possibility of cancelling flow to balance " |
42 | "edges" ), |
43 | cl::Hidden, cl::cat(BoltOptCategory)); |
44 | |
45 | } // namespace opts |
46 | |
47 | namespace llvm { |
48 | namespace bolt { |
49 | |
50 | namespace { |
51 | |
52 | // Edge Weight Inference Heuristic |
53 | // |
54 | // We start by maintaining the invariant used in LBR mode where the sum of |
55 | // pred edges count is equal to the block execution count. This loop will set |
56 | // pred edges count by balancing its own execution count in different pred |
57 | // edges. The weight of each edge is guessed by looking at how hot each pred |
58 | // block is (in terms of samples). |
59 | // There are two caveats in this approach. One is for critical edges and the |
60 | // other is for self-referencing blocks (loops of 1 BB). For critical edges, |
61 | // we can't infer the hotness of them based solely on pred BBs execution |
62 | // count. For each critical edge we look at the pred BB, then look at its |
63 | // succs to adjust its weight. |
64 | // |
65 | // [ 60 ] [ 25 ] |
66 | // | \ | |
67 | // [ 10 ] [ 75 ] |
68 | // |
69 | // The illustration above shows a critical edge \. We wish to adjust bb count |
70 | // 60 to 50 to properly determine the weight of the critical edge to be |
71 | // 50 / 75. |
72 | // For self-referencing edges, we attribute its weight by subtracting the |
73 | // current BB execution count by the sum of predecessors count if this result |
74 | // is non-negative. |
75 | using EdgeWeightMap = |
76 | DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>, |
77 | double>; |
78 | |
79 | template <class NodeT> |
80 | void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A, |
81 | const BinaryBasicBlock *B, double Weight); |
82 | |
83 | template <> |
84 | void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights, |
85 | const BinaryBasicBlock *A, |
86 | const BinaryBasicBlock *B, |
87 | double Weight) { |
88 | EdgeWeights[std::make_pair(x&: A, y&: B)] = Weight; |
89 | } |
90 | |
91 | template <> |
92 | void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights, |
93 | const BinaryBasicBlock *A, |
94 | const BinaryBasicBlock *B, |
95 | double Weight) { |
96 | EdgeWeights[std::make_pair(x&: B, y&: A)] = Weight; |
97 | } |
98 | |
99 | template <class NodeT> |
100 | void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) { |
101 | typedef GraphTraits<NodeT> GraphT; |
102 | typedef GraphTraits<Inverse<NodeT>> InvTraits; |
103 | |
104 | double TotalChildrenCount = 0.0; |
105 | SmallVector<double, 4> ChildrenExecCount; |
106 | // First pass computes total children execution count that directly |
107 | // contribute to this BB. |
108 | for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), |
109 | E = GraphT::child_end(BB); |
110 | CI != E; ++CI) { |
111 | typename GraphT::NodeRef Child = *CI; |
112 | double ChildExecCount = Child->getExecutionCount(); |
113 | // Is self-reference? |
114 | if (Child == BB) { |
115 | ChildExecCount = 0.0; // will fill this in second pass |
116 | } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 && |
117 | InvTraits::child_end(Child) - InvTraits::child_begin(Child) > |
118 | 1) { |
119 | // Handle critical edges. This will cause a skew towards crit edges, but |
120 | // it is a quick solution. |
121 | double CritWeight = 0.0; |
122 | uint64_t Denominator = 0; |
123 | for (typename InvTraits::ChildIteratorType |
124 | II = InvTraits::child_begin(Child), |
125 | IE = InvTraits::child_end(Child); |
126 | II != IE; ++II) { |
127 | typename GraphT::NodeRef N = *II; |
128 | Denominator += N->getExecutionCount(); |
129 | if (N != BB) |
130 | continue; |
131 | CritWeight = N->getExecutionCount(); |
132 | } |
133 | if (Denominator) |
134 | CritWeight /= static_cast<double>(Denominator); |
135 | ChildExecCount *= CritWeight; |
136 | } |
137 | ChildrenExecCount.push_back(Elt: ChildExecCount); |
138 | TotalChildrenCount += ChildExecCount; |
139 | } |
140 | // Second pass fixes the weight of a possible self-reference edge |
141 | uint32_t ChildIndex = 0; |
142 | for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), |
143 | E = GraphT::child_end(BB); |
144 | CI != E; ++CI) { |
145 | typename GraphT::NodeRef Child = *CI; |
146 | if (Child != BB) { |
147 | ++ChildIndex; |
148 | continue; |
149 | } |
150 | if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) { |
151 | ChildrenExecCount[ChildIndex] = |
152 | BB->getExecutionCount() - TotalChildrenCount; |
153 | TotalChildrenCount += ChildrenExecCount[ChildIndex]; |
154 | } |
155 | break; |
156 | } |
157 | // Third pass finally assigns weights to edges |
158 | ChildIndex = 0; |
159 | for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), |
160 | E = GraphT::child_end(BB); |
161 | CI != E; ++CI) { |
162 | typename GraphT::NodeRef Child = *CI; |
163 | double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB)); |
164 | if (TotalChildrenCount != 0.0) |
165 | Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount; |
166 | updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight); |
167 | ++ChildIndex; |
168 | } |
169 | } |
170 | |
171 | template <class NodeT> |
172 | void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) { |
173 | for (BinaryBasicBlock &BB : BF) |
174 | computeEdgeWeights<NodeT>(&BB, EdgeWeights); |
175 | } |
176 | |
177 | /// Make BB count match the sum of all incoming edges. If AllEdges is true, |
178 | /// make it match max(SumPredEdges, SumSuccEdges). |
179 | void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) { |
180 | for (BinaryBasicBlock &BB : BF) { |
181 | uint64_t TotalPredsEWeight = 0; |
182 | for (BinaryBasicBlock *Pred : BB.predecessors()) |
183 | TotalPredsEWeight += Pred->getBranchInfo(Succ: BB).Count; |
184 | |
185 | if (TotalPredsEWeight > BB.getExecutionCount()) |
186 | BB.setExecutionCount(TotalPredsEWeight); |
187 | |
188 | if (!AllEdges) |
189 | continue; |
190 | |
191 | uint64_t TotalSuccsEWeight = 0; |
192 | for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info()) |
193 | TotalSuccsEWeight += BI.Count; |
194 | |
195 | if (TotalSuccsEWeight > BB.getExecutionCount()) |
196 | BB.setExecutionCount(TotalSuccsEWeight); |
197 | } |
198 | } |
199 | |
200 | // This is our main edge count guessing heuristic. Look at predecessors and |
201 | // assign a proportionally higher count to pred edges coming from blocks with |
202 | // a higher execution count in comparison with the other predecessor blocks, |
203 | // making SumPredEdges match the current BB count. |
204 | // If "UseSucc" is true, apply the same logic to successor edges as well. Since |
205 | // some successor edges may already have assigned a count, only update it if the |
206 | // new count is higher. |
207 | void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc, |
208 | EdgeWeightMap &PredEdgeWeights, |
209 | EdgeWeightMap &SuccEdgeWeights) { |
210 | for (BinaryBasicBlock &BB : BF) { |
211 | for (BinaryBasicBlock *Pred : BB.predecessors()) { |
212 | double RelativeExec = PredEdgeWeights[std::make_pair(x&: Pred, y: &BB)]; |
213 | RelativeExec *= BB.getExecutionCount(); |
214 | BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(Succ: BB); |
215 | if (static_cast<uint64_t>(RelativeExec) > BI.Count) |
216 | BI.Count = static_cast<uint64_t>(RelativeExec); |
217 | } |
218 | |
219 | if (!UseSucc) |
220 | continue; |
221 | |
222 | auto BI = BB.branch_info_begin(); |
223 | for (BinaryBasicBlock *Succ : BB.successors()) { |
224 | double RelativeExec = SuccEdgeWeights[std::make_pair(x: &BB, y&: Succ)]; |
225 | RelativeExec *= BB.getExecutionCount(); |
226 | if (static_cast<uint64_t>(RelativeExec) > BI->Count) |
227 | BI->Count = static_cast<uint64_t>(RelativeExec); |
228 | ++BI; |
229 | } |
230 | } |
231 | } |
232 | |
233 | using ArcSet = |
234 | DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>; |
235 | |
236 | /// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has |
237 | /// all edges we already established their count. Try to guess the count of |
238 | /// the remaining edge, if there is only one to guess, and return true if we |
239 | /// were able to guess. |
240 | bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) { |
241 | if (BB->pred_size() == 0) |
242 | return false; |
243 | |
244 | uint64_t TotalPredCount = 0; |
245 | unsigned NumGuessedEdges = 0; |
246 | for (BinaryBasicBlock *Pred : BB->predecessors()) { |
247 | if (GuessedArcs.count(V: std::make_pair(x&: Pred, y&: BB))) |
248 | ++NumGuessedEdges; |
249 | TotalPredCount += Pred->getBranchInfo(Succ: *BB).Count; |
250 | } |
251 | |
252 | if (NumGuessedEdges != BB->pred_size() - 1) |
253 | return false; |
254 | |
255 | int64_t Guessed = |
256 | static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount; |
257 | if (Guessed < 0) |
258 | Guessed = 0; |
259 | |
260 | for (BinaryBasicBlock *Pred : BB->predecessors()) { |
261 | if (GuessedArcs.count(V: std::make_pair(x&: Pred, y&: BB))) |
262 | continue; |
263 | |
264 | Pred->getBranchInfo(Succ: *BB).Count = Guessed; |
265 | GuessedArcs.insert(V: std::make_pair(x&: Pred, y&: BB)); |
266 | return true; |
267 | } |
268 | llvm_unreachable("Expected unguessed arc" ); |
269 | } |
270 | |
271 | /// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has |
272 | /// all edges we already established their count. Try to guess the count of |
273 | /// the remaining edge, if there is only one to guess, and return true if we |
274 | /// were able to guess. |
275 | bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) { |
276 | if (BB->succ_size() == 0) |
277 | return false; |
278 | |
279 | uint64_t TotalSuccCount = 0; |
280 | unsigned NumGuessedEdges = 0; |
281 | auto BI = BB->branch_info_begin(); |
282 | for (BinaryBasicBlock *Succ : BB->successors()) { |
283 | if (GuessedArcs.count(V: std::make_pair(x&: BB, y&: Succ))) |
284 | ++NumGuessedEdges; |
285 | TotalSuccCount += BI->Count; |
286 | ++BI; |
287 | } |
288 | |
289 | if (NumGuessedEdges != BB->succ_size() - 1) |
290 | return false; |
291 | |
292 | int64_t Guessed = |
293 | static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount; |
294 | if (Guessed < 0) |
295 | Guessed = 0; |
296 | |
297 | BI = BB->branch_info_begin(); |
298 | for (BinaryBasicBlock *Succ : BB->successors()) { |
299 | if (GuessedArcs.count(V: std::make_pair(x&: BB, y&: Succ))) { |
300 | ++BI; |
301 | continue; |
302 | } |
303 | |
304 | BI->Count = Guessed; |
305 | GuessedArcs.insert(V: std::make_pair(x&: BB, y&: Succ)); |
306 | return true; |
307 | } |
308 | llvm_unreachable("Expected unguessed arc" ); |
309 | } |
310 | |
311 | /// Guess edge count whenever we have only one edge (pred or succ) left |
312 | /// to guess. Then make its count equal to BB count minus all other edge |
313 | /// counts we already know their count. Repeat this until there is no |
314 | /// change. |
315 | void guessEdgeByIterativeApproach(BinaryFunction &BF) { |
316 | ArcSet KnownArcs; |
317 | bool Changed = false; |
318 | |
319 | do { |
320 | Changed = false; |
321 | for (BinaryBasicBlock &BB : BF) { |
322 | if (guessPredEdgeCounts(BB: &BB, GuessedArcs&: KnownArcs)) |
323 | Changed = true; |
324 | if (guessSuccEdgeCounts(BB: &BB, GuessedArcs&: KnownArcs)) |
325 | Changed = true; |
326 | } |
327 | } while (Changed); |
328 | |
329 | // Guess count for non-inferred edges |
330 | for (BinaryBasicBlock &BB : BF) { |
331 | for (BinaryBasicBlock *Pred : BB.predecessors()) { |
332 | if (KnownArcs.count(V: std::make_pair(x&: Pred, y: &BB))) |
333 | continue; |
334 | BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(Succ: BB); |
335 | BI.Count = |
336 | std::min(a: Pred->getExecutionCount(), b: BB.getExecutionCount()) / 2; |
337 | KnownArcs.insert(V: std::make_pair(x&: Pred, y: &BB)); |
338 | } |
339 | auto BI = BB.branch_info_begin(); |
340 | for (BinaryBasicBlock *Succ : BB.successors()) { |
341 | if (KnownArcs.count(V: std::make_pair(x: &BB, y&: Succ))) { |
342 | ++BI; |
343 | continue; |
344 | } |
345 | BI->Count = |
346 | std::min(a: BB.getExecutionCount(), b: Succ->getExecutionCount()) / 2; |
347 | KnownArcs.insert(V: std::make_pair(x: &BB, y&: Succ)); |
348 | break; |
349 | } |
350 | } |
351 | } |
352 | |
353 | /// Associate each basic block with the BinaryLoop object corresponding to the |
354 | /// innermost loop containing this block. |
355 | DenseMap<const BinaryBasicBlock *, const BinaryLoop *> |
356 | createLoopNestLevelMap(BinaryFunction &BF) { |
357 | DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel; |
358 | const BinaryLoopInfo &BLI = BF.getLoopInfo(); |
359 | |
360 | for (BinaryBasicBlock &BB : BF) |
361 | LoopNestLevel[&BB] = BLI[&BB]; |
362 | |
363 | return LoopNestLevel; |
364 | } |
365 | |
366 | } // end anonymous namespace |
367 | |
368 | void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF) { |
369 | if (BF.begin() == BF.end()) |
370 | return; |
371 | |
372 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
373 | DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis(); |
374 | auto &InsnToBB = Info.getInsnToBBMap(); |
375 | // These analyses work at the instruction granularity, but we really only need |
376 | // basic block granularity here. So we'll use a set of visited edges to avoid |
377 | // revisiting the same BBs again and again. |
378 | DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>> |
379 | Visited; |
380 | // Equivalence classes mapping. Each equivalence class is defined by the set |
381 | // of BBs that obeys the aforementioned properties. |
382 | DenseMap<const BinaryBasicBlock *, signed> BBsToEC; |
383 | std::vector<std::vector<BinaryBasicBlock *>> Classes; |
384 | |
385 | BF.calculateLoopInfo(); |
386 | DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel = |
387 | createLoopNestLevelMap(BF); |
388 | |
389 | for (BinaryBasicBlock &BB : BF) |
390 | BBsToEC[&BB] = -1; |
391 | |
392 | for (BinaryBasicBlock &BB : BF) { |
393 | auto I = BB.begin(); |
394 | if (I == BB.end()) |
395 | continue; |
396 | |
397 | DA.doForAllDominators(Inst: *I, Task: [&](const MCInst &DomInst) { |
398 | BinaryBasicBlock *DomBB = InsnToBB[&DomInst]; |
399 | if (Visited[DomBB].count(x: &BB)) |
400 | return; |
401 | Visited[DomBB].insert(x: &BB); |
402 | if (!PDA.doesADominateB(A: *I, B: DomInst)) |
403 | return; |
404 | if (LoopNestLevel[&BB] != LoopNestLevel[DomBB]) |
405 | return; |
406 | if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) { |
407 | BBsToEC[DomBB] = Classes.size(); |
408 | BBsToEC[&BB] = Classes.size(); |
409 | Classes.emplace_back(); |
410 | Classes.back().push_back(x: DomBB); |
411 | Classes.back().push_back(x: &BB); |
412 | return; |
413 | } |
414 | if (BBsToEC[DomBB] == -1) { |
415 | BBsToEC[DomBB] = BBsToEC[&BB]; |
416 | Classes[BBsToEC[&BB]].push_back(x: DomBB); |
417 | return; |
418 | } |
419 | if (BBsToEC[&BB] == -1) { |
420 | BBsToEC[&BB] = BBsToEC[DomBB]; |
421 | Classes[BBsToEC[DomBB]].push_back(x: &BB); |
422 | return; |
423 | } |
424 | signed BBECNum = BBsToEC[&BB]; |
425 | std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]]; |
426 | std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum]; |
427 | for (BinaryBasicBlock *Block : DomEC) { |
428 | BBsToEC[Block] = BBECNum; |
429 | BBEC.push_back(x: Block); |
430 | } |
431 | DomEC.clear(); |
432 | }); |
433 | } |
434 | |
435 | for (std::vector<BinaryBasicBlock *> &Class : Classes) { |
436 | uint64_t Max = 0ULL; |
437 | for (BinaryBasicBlock *BB : Class) |
438 | Max = std::max(a: Max, b: BB->getExecutionCount()); |
439 | for (BinaryBasicBlock *BB : Class) |
440 | BB->setExecutionCount(Max); |
441 | } |
442 | } |
443 | |
444 | void estimateEdgeCounts(BinaryFunction &BF) { |
445 | EdgeWeightMap PredEdgeWeights; |
446 | EdgeWeightMap SuccEdgeWeights; |
447 | if (!opts::IterativeGuess) { |
448 | computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, EdgeWeights&: PredEdgeWeights); |
449 | computeEdgeWeights<BinaryBasicBlock *>(BF, EdgeWeights&: SuccEdgeWeights); |
450 | } |
451 | if (opts::EqualizeBBCounts) { |
452 | LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts" )); |
453 | auto Info = DataflowInfoManager(BF, nullptr, nullptr); |
454 | equalizeBBCounts(Info, BF); |
455 | LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts" )); |
456 | } |
457 | if (opts::IterativeGuess) |
458 | guessEdgeByIterativeApproach(BF); |
459 | else |
460 | guessEdgeByRelHotness(BF, /*UseSuccs=*/UseSucc: false, PredEdgeWeights, |
461 | SuccEdgeWeights); |
462 | recalculateBBCounts(BF, /*AllEdges=*/false); |
463 | } |
464 | |
465 | void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) { |
466 | llvm_unreachable("not implemented" ); |
467 | } |
468 | |
469 | } // namespace bolt |
470 | } // namespace llvm |
471 | |