1 | //===- bolt/Passes/MCF.cpp ------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements functions for solving minimum-cost flow problem. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/MCF.h" |
14 | #include "bolt/Core/BinaryFunction.h" |
15 | #include "bolt/Core/ParallelUtilities.h" |
16 | #include "bolt/Passes/DataflowInfoManager.h" |
17 | #include "bolt/Utils/CommandLineOpts.h" |
18 | #include "llvm/ADT/DenseMap.h" |
19 | #include "llvm/ADT/STLExtras.h" |
20 | #include "llvm/Support/CommandLine.h" |
21 | #include <algorithm> |
22 | #include <vector> |
23 | |
24 | #undef DEBUG_TYPE |
25 | #define DEBUG_TYPE "mcf" |
26 | |
27 | using namespace llvm; |
28 | using namespace bolt; |
29 | |
30 | namespace opts { |
31 | |
32 | extern cl::OptionCategory BoltOptCategory; |
33 | |
34 | static cl::opt<bool> IterativeGuess( |
35 | "iterative-guess" , |
36 | cl::desc("in non-LBR mode, guess edge counts using iterative technique" ), |
37 | cl::Hidden, cl::cat(BoltOptCategory)); |
38 | } // namespace opts |
39 | |
40 | namespace llvm { |
41 | namespace bolt { |
42 | |
43 | namespace { |
44 | |
45 | // Edge Weight Inference Heuristic |
46 | // |
47 | // We start by maintaining the invariant used in LBR mode where the sum of |
48 | // pred edges count is equal to the block execution count. This loop will set |
49 | // pred edges count by balancing its own execution count in different pred |
50 | // edges. The weight of each edge is guessed by looking at how hot each pred |
51 | // block is (in terms of samples). |
52 | // There are two caveats in this approach. One is for critical edges and the |
53 | // other is for self-referencing blocks (loops of 1 BB). For critical edges, |
54 | // we can't infer the hotness of them based solely on pred BBs execution |
55 | // count. For each critical edge we look at the pred BB, then look at its |
56 | // succs to adjust its weight. |
57 | // |
58 | // [ 60 ] [ 25 ] |
59 | // | \ | |
60 | // [ 10 ] [ 75 ] |
61 | // |
62 | // The illustration above shows a critical edge \. We wish to adjust bb count |
63 | // 60 to 50 to properly determine the weight of the critical edge to be |
64 | // 50 / 75. |
65 | // For self-referencing edges, we attribute its weight by subtracting the |
66 | // current BB execution count by the sum of predecessors count if this result |
67 | // is non-negative. |
68 | using EdgeWeightMap = |
69 | DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>, |
70 | double>; |
71 | |
72 | template <class NodeT> |
73 | void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A, |
74 | const BinaryBasicBlock *B, double Weight); |
75 | |
76 | template <> |
77 | void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights, |
78 | const BinaryBasicBlock *A, |
79 | const BinaryBasicBlock *B, |
80 | double Weight) { |
81 | EdgeWeights[std::make_pair(x&: A, y&: B)] = Weight; |
82 | } |
83 | |
84 | template <> |
85 | void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights, |
86 | const BinaryBasicBlock *A, |
87 | const BinaryBasicBlock *B, |
88 | double Weight) { |
89 | EdgeWeights[std::make_pair(x&: B, y&: A)] = Weight; |
90 | } |
91 | |
92 | template <class NodeT> |
93 | void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) { |
94 | typedef GraphTraits<NodeT> GraphT; |
95 | typedef GraphTraits<Inverse<NodeT>> InvTraits; |
96 | |
97 | double TotalChildrenCount = 0.0; |
98 | SmallVector<double, 4> ChildrenExecCount; |
99 | // First pass computes total children execution count that directly |
100 | // contribute to this BB. |
101 | for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), |
102 | E = GraphT::child_end(BB); |
103 | CI != E; ++CI) { |
104 | typename GraphT::NodeRef Child = *CI; |
105 | double ChildExecCount = Child->getExecutionCount(); |
106 | // Is self-reference? |
107 | if (Child == BB) { |
108 | ChildExecCount = 0.0; // will fill this in second pass |
109 | } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 && |
110 | InvTraits::child_end(Child) - InvTraits::child_begin(Child) > |
111 | 1) { |
112 | // Handle critical edges. This will cause a skew towards crit edges, but |
113 | // it is a quick solution. |
114 | double CritWeight = 0.0; |
115 | uint64_t Denominator = 0; |
116 | for (typename InvTraits::ChildIteratorType |
117 | II = InvTraits::child_begin(Child), |
118 | IE = InvTraits::child_end(Child); |
119 | II != IE; ++II) { |
120 | typename GraphT::NodeRef N = *II; |
121 | Denominator += N->getExecutionCount(); |
122 | if (N != BB) |
123 | continue; |
124 | CritWeight = N->getExecutionCount(); |
125 | } |
126 | if (Denominator) |
127 | CritWeight /= static_cast<double>(Denominator); |
128 | ChildExecCount *= CritWeight; |
129 | } |
130 | ChildrenExecCount.push_back(Elt: ChildExecCount); |
131 | TotalChildrenCount += ChildExecCount; |
132 | } |
133 | // Second pass fixes the weight of a possible self-reference edge |
134 | uint32_t ChildIndex = 0; |
135 | for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), |
136 | E = GraphT::child_end(BB); |
137 | CI != E; ++CI) { |
138 | typename GraphT::NodeRef Child = *CI; |
139 | if (Child != BB) { |
140 | ++ChildIndex; |
141 | continue; |
142 | } |
143 | if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) { |
144 | ChildrenExecCount[ChildIndex] = |
145 | BB->getExecutionCount() - TotalChildrenCount; |
146 | TotalChildrenCount += ChildrenExecCount[ChildIndex]; |
147 | } |
148 | break; |
149 | } |
150 | // Third pass finally assigns weights to edges |
151 | ChildIndex = 0; |
152 | for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), |
153 | E = GraphT::child_end(BB); |
154 | CI != E; ++CI) { |
155 | typename GraphT::NodeRef Child = *CI; |
156 | double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB)); |
157 | if (TotalChildrenCount != 0.0) |
158 | Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount; |
159 | updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight); |
160 | ++ChildIndex; |
161 | } |
162 | } |
163 | |
164 | template <class NodeT> |
165 | void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) { |
166 | for (BinaryBasicBlock &BB : BF) |
167 | computeEdgeWeights<NodeT>(&BB, EdgeWeights); |
168 | } |
169 | |
170 | /// Make BB count match the sum of all incoming edges. If AllEdges is true, |
171 | /// make it match max(SumPredEdges, SumSuccEdges). |
172 | void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) { |
173 | for (BinaryBasicBlock &BB : BF) { |
174 | uint64_t TotalPredsEWeight = 0; |
175 | for (BinaryBasicBlock *Pred : BB.predecessors()) |
176 | TotalPredsEWeight += Pred->getBranchInfo(Succ: BB).Count; |
177 | |
178 | if (TotalPredsEWeight > BB.getExecutionCount()) |
179 | BB.setExecutionCount(TotalPredsEWeight); |
180 | |
181 | if (!AllEdges) |
182 | continue; |
183 | |
184 | uint64_t TotalSuccsEWeight = 0; |
185 | for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info()) |
186 | TotalSuccsEWeight += BI.Count; |
187 | |
188 | if (TotalSuccsEWeight > BB.getExecutionCount()) |
189 | BB.setExecutionCount(TotalSuccsEWeight); |
190 | } |
191 | } |
192 | |
193 | // This is our main edge count guessing heuristic. Look at predecessors and |
194 | // assign a proportionally higher count to pred edges coming from blocks with |
195 | // a higher execution count in comparison with the other predecessor blocks, |
196 | // making SumPredEdges match the current BB count. |
197 | // If "UseSucc" is true, apply the same logic to successor edges as well. Since |
198 | // some successor edges may already have assigned a count, only update it if the |
199 | // new count is higher. |
200 | void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc, |
201 | EdgeWeightMap &PredEdgeWeights, |
202 | EdgeWeightMap &SuccEdgeWeights) { |
203 | for (BinaryBasicBlock &BB : BF) { |
204 | for (BinaryBasicBlock *Pred : BB.predecessors()) { |
205 | double RelativeExec = PredEdgeWeights[std::make_pair(x&: Pred, y: &BB)]; |
206 | RelativeExec *= BB.getExecutionCount(); |
207 | BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(Succ: BB); |
208 | if (static_cast<uint64_t>(RelativeExec) > BI.Count) |
209 | BI.Count = static_cast<uint64_t>(RelativeExec); |
210 | } |
211 | |
212 | if (!UseSucc) |
213 | continue; |
214 | |
215 | auto BI = BB.branch_info_begin(); |
216 | for (BinaryBasicBlock *Succ : BB.successors()) { |
217 | double RelativeExec = SuccEdgeWeights[std::make_pair(x: &BB, y&: Succ)]; |
218 | RelativeExec *= BB.getExecutionCount(); |
219 | if (static_cast<uint64_t>(RelativeExec) > BI->Count) |
220 | BI->Count = static_cast<uint64_t>(RelativeExec); |
221 | ++BI; |
222 | } |
223 | } |
224 | } |
225 | |
226 | using ArcSet = |
227 | DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>; |
228 | |
229 | /// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has |
230 | /// all edges we already established their count. Try to guess the count of |
231 | /// the remaining edge, if there is only one to guess, and return true if we |
232 | /// were able to guess. |
233 | bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) { |
234 | if (BB->pred_size() == 0) |
235 | return false; |
236 | |
237 | uint64_t TotalPredCount = 0; |
238 | unsigned NumGuessedEdges = 0; |
239 | for (BinaryBasicBlock *Pred : BB->predecessors()) { |
240 | if (GuessedArcs.count(V: std::make_pair(x&: Pred, y&: BB))) |
241 | ++NumGuessedEdges; |
242 | TotalPredCount += Pred->getBranchInfo(Succ: *BB).Count; |
243 | } |
244 | |
245 | if (NumGuessedEdges != BB->pred_size() - 1) |
246 | return false; |
247 | |
248 | int64_t Guessed = |
249 | static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount; |
250 | if (Guessed < 0) |
251 | Guessed = 0; |
252 | |
253 | for (BinaryBasicBlock *Pred : BB->predecessors()) { |
254 | if (GuessedArcs.count(V: std::make_pair(x&: Pred, y&: BB))) |
255 | continue; |
256 | |
257 | Pred->getBranchInfo(Succ: *BB).Count = Guessed; |
258 | GuessedArcs.insert(V: std::make_pair(x&: Pred, y&: BB)); |
259 | return true; |
260 | } |
261 | llvm_unreachable("Expected unguessed arc" ); |
262 | } |
263 | |
264 | /// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has |
265 | /// all edges we already established their count. Try to guess the count of |
266 | /// the remaining edge, if there is only one to guess, and return true if we |
267 | /// were able to guess. |
268 | bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) { |
269 | if (BB->succ_size() == 0) |
270 | return false; |
271 | |
272 | uint64_t TotalSuccCount = 0; |
273 | unsigned NumGuessedEdges = 0; |
274 | auto BI = BB->branch_info_begin(); |
275 | for (BinaryBasicBlock *Succ : BB->successors()) { |
276 | if (GuessedArcs.count(V: std::make_pair(x&: BB, y&: Succ))) |
277 | ++NumGuessedEdges; |
278 | TotalSuccCount += BI->Count; |
279 | ++BI; |
280 | } |
281 | |
282 | if (NumGuessedEdges != BB->succ_size() - 1) |
283 | return false; |
284 | |
285 | int64_t Guessed = |
286 | static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount; |
287 | if (Guessed < 0) |
288 | Guessed = 0; |
289 | |
290 | BI = BB->branch_info_begin(); |
291 | for (BinaryBasicBlock *Succ : BB->successors()) { |
292 | if (GuessedArcs.count(V: std::make_pair(x&: BB, y&: Succ))) { |
293 | ++BI; |
294 | continue; |
295 | } |
296 | |
297 | BI->Count = Guessed; |
298 | GuessedArcs.insert(V: std::make_pair(x&: BB, y&: Succ)); |
299 | return true; |
300 | } |
301 | llvm_unreachable("Expected unguessed arc" ); |
302 | } |
303 | |
304 | /// Guess edge count whenever we have only one edge (pred or succ) left |
305 | /// to guess. Then make its count equal to BB count minus all other edge |
306 | /// counts we already know their count. Repeat this until there is no |
307 | /// change. |
308 | void guessEdgeByIterativeApproach(BinaryFunction &BF) { |
309 | ArcSet KnownArcs; |
310 | bool Changed = false; |
311 | |
312 | do { |
313 | Changed = false; |
314 | for (BinaryBasicBlock &BB : BF) { |
315 | if (guessPredEdgeCounts(BB: &BB, GuessedArcs&: KnownArcs)) |
316 | Changed = true; |
317 | if (guessSuccEdgeCounts(BB: &BB, GuessedArcs&: KnownArcs)) |
318 | Changed = true; |
319 | } |
320 | } while (Changed); |
321 | |
322 | // Guess count for non-inferred edges |
323 | for (BinaryBasicBlock &BB : BF) { |
324 | for (BinaryBasicBlock *Pred : BB.predecessors()) { |
325 | if (KnownArcs.count(V: std::make_pair(x&: Pred, y: &BB))) |
326 | continue; |
327 | BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(Succ: BB); |
328 | BI.Count = |
329 | std::min(a: Pred->getExecutionCount(), b: BB.getExecutionCount()) / 2; |
330 | KnownArcs.insert(V: std::make_pair(x&: Pred, y: &BB)); |
331 | } |
332 | auto BI = BB.branch_info_begin(); |
333 | for (BinaryBasicBlock *Succ : BB.successors()) { |
334 | if (KnownArcs.count(V: std::make_pair(x: &BB, y&: Succ))) { |
335 | ++BI; |
336 | continue; |
337 | } |
338 | BI->Count = |
339 | std::min(a: BB.getExecutionCount(), b: Succ->getExecutionCount()) / 2; |
340 | KnownArcs.insert(V: std::make_pair(x: &BB, y&: Succ)); |
341 | break; |
342 | } |
343 | } |
344 | } |
345 | |
346 | /// Associate each basic block with the BinaryLoop object corresponding to the |
347 | /// innermost loop containing this block. |
348 | DenseMap<const BinaryBasicBlock *, const BinaryLoop *> |
349 | createLoopNestLevelMap(BinaryFunction &BF) { |
350 | DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel; |
351 | const BinaryLoopInfo &BLI = BF.getLoopInfo(); |
352 | |
353 | for (BinaryBasicBlock &BB : BF) |
354 | LoopNestLevel[&BB] = BLI[&BB]; |
355 | |
356 | return LoopNestLevel; |
357 | } |
358 | |
359 | } // end anonymous namespace |
360 | |
361 | void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF) { |
362 | if (BF.begin() == BF.end()) |
363 | return; |
364 | |
365 | DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); |
366 | DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis(); |
367 | auto &InsnToBB = Info.getInsnToBBMap(); |
368 | // These analyses work at the instruction granularity, but we really only need |
369 | // basic block granularity here. So we'll use a set of visited edges to avoid |
370 | // revisiting the same BBs again and again. |
371 | DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>> |
372 | Visited; |
373 | // Equivalence classes mapping. Each equivalence class is defined by the set |
374 | // of BBs that obeys the aforementioned properties. |
375 | DenseMap<const BinaryBasicBlock *, signed> BBsToEC; |
376 | std::vector<std::vector<BinaryBasicBlock *>> Classes; |
377 | |
378 | BF.calculateLoopInfo(); |
379 | DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel = |
380 | createLoopNestLevelMap(BF); |
381 | |
382 | for (BinaryBasicBlock &BB : BF) |
383 | BBsToEC[&BB] = -1; |
384 | |
385 | for (BinaryBasicBlock &BB : BF) { |
386 | auto I = BB.begin(); |
387 | if (I == BB.end()) |
388 | continue; |
389 | |
390 | DA.doForAllDominators(Inst: *I, Task: [&](const MCInst &DomInst) { |
391 | BinaryBasicBlock *DomBB = InsnToBB[&DomInst]; |
392 | if (Visited[DomBB].count(x: &BB)) |
393 | return; |
394 | Visited[DomBB].insert(x: &BB); |
395 | if (!PDA.doesADominateB(A: *I, B: DomInst)) |
396 | return; |
397 | if (LoopNestLevel[&BB] != LoopNestLevel[DomBB]) |
398 | return; |
399 | if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) { |
400 | BBsToEC[DomBB] = Classes.size(); |
401 | BBsToEC[&BB] = Classes.size(); |
402 | Classes.emplace_back(); |
403 | Classes.back().push_back(x: DomBB); |
404 | Classes.back().push_back(x: &BB); |
405 | return; |
406 | } |
407 | if (BBsToEC[DomBB] == -1) { |
408 | BBsToEC[DomBB] = BBsToEC[&BB]; |
409 | Classes[BBsToEC[&BB]].push_back(x: DomBB); |
410 | return; |
411 | } |
412 | if (BBsToEC[&BB] == -1) { |
413 | BBsToEC[&BB] = BBsToEC[DomBB]; |
414 | Classes[BBsToEC[DomBB]].push_back(x: &BB); |
415 | return; |
416 | } |
417 | signed BBECNum = BBsToEC[&BB]; |
418 | std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]]; |
419 | std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum]; |
420 | for (BinaryBasicBlock *Block : DomEC) { |
421 | BBsToEC[Block] = BBECNum; |
422 | BBEC.push_back(x: Block); |
423 | } |
424 | DomEC.clear(); |
425 | }); |
426 | } |
427 | |
428 | for (std::vector<BinaryBasicBlock *> &Class : Classes) { |
429 | uint64_t Max = 0ULL; |
430 | for (BinaryBasicBlock *BB : Class) |
431 | Max = std::max(a: Max, b: BB->getExecutionCount()); |
432 | for (BinaryBasicBlock *BB : Class) |
433 | BB->setExecutionCount(Max); |
434 | } |
435 | } |
436 | |
437 | void EstimateEdgeCounts::runOnFunction(BinaryFunction &BF) { |
438 | EdgeWeightMap PredEdgeWeights; |
439 | EdgeWeightMap SuccEdgeWeights; |
440 | if (!opts::IterativeGuess) { |
441 | computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, EdgeWeights&: PredEdgeWeights); |
442 | computeEdgeWeights<BinaryBasicBlock *>(BF, EdgeWeights&: SuccEdgeWeights); |
443 | } |
444 | if (opts::EqualizeBBCounts) { |
445 | LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts" )); |
446 | auto Info = DataflowInfoManager(BF, nullptr, nullptr); |
447 | equalizeBBCounts(Info, BF); |
448 | LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts" )); |
449 | } |
450 | if (opts::IterativeGuess) |
451 | guessEdgeByIterativeApproach(BF); |
452 | else |
453 | guessEdgeByRelHotness(BF, /*UseSuccs=*/UseSucc: false, PredEdgeWeights, |
454 | SuccEdgeWeights); |
455 | recalculateBBCounts(BF, /*AllEdges=*/false); |
456 | } |
457 | |
458 | Error EstimateEdgeCounts::runOnFunctions(BinaryContext &BC) { |
459 | if (llvm::none_of(Range: llvm::make_second_range(c&: BC.getBinaryFunctions()), |
460 | P: [](const BinaryFunction &BF) { |
461 | return BF.getProfileFlags() == BinaryFunction::PF_BASIC; |
462 | })) |
463 | return Error::success(); |
464 | |
465 | ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { |
466 | runOnFunction(BF); |
467 | }; |
468 | ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { |
469 | return BF.getProfileFlags() != BinaryFunction::PF_BASIC; |
470 | }; |
471 | |
472 | ParallelUtilities::runOnEachFunction( |
473 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_BB_QUADRATIC, WorkFunction: WorkFun, |
474 | SkipPredicate: SkipFunc, LogName: "EstimateEdgeCounts" ); |
475 | return Error::success(); |
476 | } |
477 | |
478 | } // namespace bolt |
479 | } // namespace llvm |
480 | |