1//===- bolt/Passes/MCF.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements functions for solving minimum-cost flow problem.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/MCF.h"
14#include "bolt/Core/BinaryFunction.h"
15#include "bolt/Passes/DataflowInfoManager.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/Support/CommandLine.h"
19#include <algorithm>
20#include <vector>
21
22#undef DEBUG_TYPE
23#define DEBUG_TYPE "mcf"
24
25using namespace llvm;
26using namespace bolt;
27
28namespace opts {
29
30extern cl::OptionCategory BoltOptCategory;
31
32extern cl::opt<bool> TimeOpts;
33
34static cl::opt<bool> IterativeGuess(
35 "iterative-guess",
36 cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
37 cl::Hidden, cl::cat(BoltOptCategory));
38
39static cl::opt<bool> UseRArcs(
40 "mcf-use-rarcs",
41 cl::desc("in MCF, consider the possibility of cancelling flow to balance "
42 "edges"),
43 cl::Hidden, cl::cat(BoltOptCategory));
44
45} // namespace opts
46
47namespace llvm {
48namespace bolt {
49
50namespace {
51
52// Edge Weight Inference Heuristic
53//
54// We start by maintaining the invariant used in LBR mode where the sum of
55// pred edges count is equal to the block execution count. This loop will set
56// pred edges count by balancing its own execution count in different pred
57// edges. The weight of each edge is guessed by looking at how hot each pred
58// block is (in terms of samples).
59// There are two caveats in this approach. One is for critical edges and the
60// other is for self-referencing blocks (loops of 1 BB). For critical edges,
61// we can't infer the hotness of them based solely on pred BBs execution
62// count. For each critical edge we look at the pred BB, then look at its
63// succs to adjust its weight.
64//
65// [ 60 ] [ 25 ]
66// | \ |
67// [ 10 ] [ 75 ]
68//
69// The illustration above shows a critical edge \. We wish to adjust bb count
70// 60 to 50 to properly determine the weight of the critical edge to be
71// 50 / 75.
72// For self-referencing edges, we attribute its weight by subtracting the
73// current BB execution count by the sum of predecessors count if this result
74// is non-negative.
75using EdgeWeightMap =
76 DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>,
77 double>;
78
79template <class NodeT>
80void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
81 const BinaryBasicBlock *B, double Weight);
82
83template <>
84void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights,
85 const BinaryBasicBlock *A,
86 const BinaryBasicBlock *B,
87 double Weight) {
88 EdgeWeights[std::make_pair(x&: A, y&: B)] = Weight;
89}
90
91template <>
92void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights,
93 const BinaryBasicBlock *A,
94 const BinaryBasicBlock *B,
95 double Weight) {
96 EdgeWeights[std::make_pair(x&: B, y&: A)] = Weight;
97}
98
99template <class NodeT>
100void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) {
101 typedef GraphTraits<NodeT> GraphT;
102 typedef GraphTraits<Inverse<NodeT>> InvTraits;
103
104 double TotalChildrenCount = 0.0;
105 SmallVector<double, 4> ChildrenExecCount;
106 // First pass computes total children execution count that directly
107 // contribute to this BB.
108 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
109 E = GraphT::child_end(BB);
110 CI != E; ++CI) {
111 typename GraphT::NodeRef Child = *CI;
112 double ChildExecCount = Child->getExecutionCount();
113 // Is self-reference?
114 if (Child == BB) {
115 ChildExecCount = 0.0; // will fill this in second pass
116 } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 &&
117 InvTraits::child_end(Child) - InvTraits::child_begin(Child) >
118 1) {
119 // Handle critical edges. This will cause a skew towards crit edges, but
120 // it is a quick solution.
121 double CritWeight = 0.0;
122 uint64_t Denominator = 0;
123 for (typename InvTraits::ChildIteratorType
124 II = InvTraits::child_begin(Child),
125 IE = InvTraits::child_end(Child);
126 II != IE; ++II) {
127 typename GraphT::NodeRef N = *II;
128 Denominator += N->getExecutionCount();
129 if (N != BB)
130 continue;
131 CritWeight = N->getExecutionCount();
132 }
133 if (Denominator)
134 CritWeight /= static_cast<double>(Denominator);
135 ChildExecCount *= CritWeight;
136 }
137 ChildrenExecCount.push_back(Elt: ChildExecCount);
138 TotalChildrenCount += ChildExecCount;
139 }
140 // Second pass fixes the weight of a possible self-reference edge
141 uint32_t ChildIndex = 0;
142 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
143 E = GraphT::child_end(BB);
144 CI != E; ++CI) {
145 typename GraphT::NodeRef Child = *CI;
146 if (Child != BB) {
147 ++ChildIndex;
148 continue;
149 }
150 if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) {
151 ChildrenExecCount[ChildIndex] =
152 BB->getExecutionCount() - TotalChildrenCount;
153 TotalChildrenCount += ChildrenExecCount[ChildIndex];
154 }
155 break;
156 }
157 // Third pass finally assigns weights to edges
158 ChildIndex = 0;
159 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
160 E = GraphT::child_end(BB);
161 CI != E; ++CI) {
162 typename GraphT::NodeRef Child = *CI;
163 double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB));
164 if (TotalChildrenCount != 0.0)
165 Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount;
166 updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight);
167 ++ChildIndex;
168 }
169}
170
171template <class NodeT>
172void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) {
173 for (BinaryBasicBlock &BB : BF)
174 computeEdgeWeights<NodeT>(&BB, EdgeWeights);
175}
176
177/// Make BB count match the sum of all incoming edges. If AllEdges is true,
178/// make it match max(SumPredEdges, SumSuccEdges).
179void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) {
180 for (BinaryBasicBlock &BB : BF) {
181 uint64_t TotalPredsEWeight = 0;
182 for (BinaryBasicBlock *Pred : BB.predecessors())
183 TotalPredsEWeight += Pred->getBranchInfo(Succ: BB).Count;
184
185 if (TotalPredsEWeight > BB.getExecutionCount())
186 BB.setExecutionCount(TotalPredsEWeight);
187
188 if (!AllEdges)
189 continue;
190
191 uint64_t TotalSuccsEWeight = 0;
192 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info())
193 TotalSuccsEWeight += BI.Count;
194
195 if (TotalSuccsEWeight > BB.getExecutionCount())
196 BB.setExecutionCount(TotalSuccsEWeight);
197 }
198}
199
200// This is our main edge count guessing heuristic. Look at predecessors and
201// assign a proportionally higher count to pred edges coming from blocks with
202// a higher execution count in comparison with the other predecessor blocks,
203// making SumPredEdges match the current BB count.
204// If "UseSucc" is true, apply the same logic to successor edges as well. Since
205// some successor edges may already have assigned a count, only update it if the
206// new count is higher.
207void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc,
208 EdgeWeightMap &PredEdgeWeights,
209 EdgeWeightMap &SuccEdgeWeights) {
210 for (BinaryBasicBlock &BB : BF) {
211 for (BinaryBasicBlock *Pred : BB.predecessors()) {
212 double RelativeExec = PredEdgeWeights[std::make_pair(x&: Pred, y: &BB)];
213 RelativeExec *= BB.getExecutionCount();
214 BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(Succ: BB);
215 if (static_cast<uint64_t>(RelativeExec) > BI.Count)
216 BI.Count = static_cast<uint64_t>(RelativeExec);
217 }
218
219 if (!UseSucc)
220 continue;
221
222 auto BI = BB.branch_info_begin();
223 for (BinaryBasicBlock *Succ : BB.successors()) {
224 double RelativeExec = SuccEdgeWeights[std::make_pair(x: &BB, y&: Succ)];
225 RelativeExec *= BB.getExecutionCount();
226 if (static_cast<uint64_t>(RelativeExec) > BI->Count)
227 BI->Count = static_cast<uint64_t>(RelativeExec);
228 ++BI;
229 }
230 }
231}
232
233using ArcSet =
234 DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>;
235
236/// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
237/// all edges we already established their count. Try to guess the count of
238/// the remaining edge, if there is only one to guess, and return true if we
239/// were able to guess.
240bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
241 if (BB->pred_size() == 0)
242 return false;
243
244 uint64_t TotalPredCount = 0;
245 unsigned NumGuessedEdges = 0;
246 for (BinaryBasicBlock *Pred : BB->predecessors()) {
247 if (GuessedArcs.count(V: std::make_pair(x&: Pred, y&: BB)))
248 ++NumGuessedEdges;
249 TotalPredCount += Pred->getBranchInfo(Succ: *BB).Count;
250 }
251
252 if (NumGuessedEdges != BB->pred_size() - 1)
253 return false;
254
255 int64_t Guessed =
256 static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount;
257 if (Guessed < 0)
258 Guessed = 0;
259
260 for (BinaryBasicBlock *Pred : BB->predecessors()) {
261 if (GuessedArcs.count(V: std::make_pair(x&: Pred, y&: BB)))
262 continue;
263
264 Pred->getBranchInfo(Succ: *BB).Count = Guessed;
265 GuessedArcs.insert(V: std::make_pair(x&: Pred, y&: BB));
266 return true;
267 }
268 llvm_unreachable("Expected unguessed arc");
269}
270
271/// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
272/// all edges we already established their count. Try to guess the count of
273/// the remaining edge, if there is only one to guess, and return true if we
274/// were able to guess.
275bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
276 if (BB->succ_size() == 0)
277 return false;
278
279 uint64_t TotalSuccCount = 0;
280 unsigned NumGuessedEdges = 0;
281 auto BI = BB->branch_info_begin();
282 for (BinaryBasicBlock *Succ : BB->successors()) {
283 if (GuessedArcs.count(V: std::make_pair(x&: BB, y&: Succ)))
284 ++NumGuessedEdges;
285 TotalSuccCount += BI->Count;
286 ++BI;
287 }
288
289 if (NumGuessedEdges != BB->succ_size() - 1)
290 return false;
291
292 int64_t Guessed =
293 static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount;
294 if (Guessed < 0)
295 Guessed = 0;
296
297 BI = BB->branch_info_begin();
298 for (BinaryBasicBlock *Succ : BB->successors()) {
299 if (GuessedArcs.count(V: std::make_pair(x&: BB, y&: Succ))) {
300 ++BI;
301 continue;
302 }
303
304 BI->Count = Guessed;
305 GuessedArcs.insert(V: std::make_pair(x&: BB, y&: Succ));
306 return true;
307 }
308 llvm_unreachable("Expected unguessed arc");
309}
310
311/// Guess edge count whenever we have only one edge (pred or succ) left
312/// to guess. Then make its count equal to BB count minus all other edge
313/// counts we already know their count. Repeat this until there is no
314/// change.
315void guessEdgeByIterativeApproach(BinaryFunction &BF) {
316 ArcSet KnownArcs;
317 bool Changed = false;
318
319 do {
320 Changed = false;
321 for (BinaryBasicBlock &BB : BF) {
322 if (guessPredEdgeCounts(BB: &BB, GuessedArcs&: KnownArcs))
323 Changed = true;
324 if (guessSuccEdgeCounts(BB: &BB, GuessedArcs&: KnownArcs))
325 Changed = true;
326 }
327 } while (Changed);
328
329 // Guess count for non-inferred edges
330 for (BinaryBasicBlock &BB : BF) {
331 for (BinaryBasicBlock *Pred : BB.predecessors()) {
332 if (KnownArcs.count(V: std::make_pair(x&: Pred, y: &BB)))
333 continue;
334 BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(Succ: BB);
335 BI.Count =
336 std::min(a: Pred->getExecutionCount(), b: BB.getExecutionCount()) / 2;
337 KnownArcs.insert(V: std::make_pair(x&: Pred, y: &BB));
338 }
339 auto BI = BB.branch_info_begin();
340 for (BinaryBasicBlock *Succ : BB.successors()) {
341 if (KnownArcs.count(V: std::make_pair(x: &BB, y&: Succ))) {
342 ++BI;
343 continue;
344 }
345 BI->Count =
346 std::min(a: BB.getExecutionCount(), b: Succ->getExecutionCount()) / 2;
347 KnownArcs.insert(V: std::make_pair(x: &BB, y&: Succ));
348 break;
349 }
350 }
351}
352
353/// Associate each basic block with the BinaryLoop object corresponding to the
354/// innermost loop containing this block.
355DenseMap<const BinaryBasicBlock *, const BinaryLoop *>
356createLoopNestLevelMap(BinaryFunction &BF) {
357 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel;
358 const BinaryLoopInfo &BLI = BF.getLoopInfo();
359
360 for (BinaryBasicBlock &BB : BF)
361 LoopNestLevel[&BB] = BLI[&BB];
362
363 return LoopNestLevel;
364}
365
366} // end anonymous namespace
367
368void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF) {
369 if (BF.begin() == BF.end())
370 return;
371
372 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
373 DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis();
374 auto &InsnToBB = Info.getInsnToBBMap();
375 // These analyses work at the instruction granularity, but we really only need
376 // basic block granularity here. So we'll use a set of visited edges to avoid
377 // revisiting the same BBs again and again.
378 DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>>
379 Visited;
380 // Equivalence classes mapping. Each equivalence class is defined by the set
381 // of BBs that obeys the aforementioned properties.
382 DenseMap<const BinaryBasicBlock *, signed> BBsToEC;
383 std::vector<std::vector<BinaryBasicBlock *>> Classes;
384
385 BF.calculateLoopInfo();
386 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel =
387 createLoopNestLevelMap(BF);
388
389 for (BinaryBasicBlock &BB : BF)
390 BBsToEC[&BB] = -1;
391
392 for (BinaryBasicBlock &BB : BF) {
393 auto I = BB.begin();
394 if (I == BB.end())
395 continue;
396
397 DA.doForAllDominators(Inst: *I, Task: [&](const MCInst &DomInst) {
398 BinaryBasicBlock *DomBB = InsnToBB[&DomInst];
399 if (Visited[DomBB].count(x: &BB))
400 return;
401 Visited[DomBB].insert(x: &BB);
402 if (!PDA.doesADominateB(A: *I, B: DomInst))
403 return;
404 if (LoopNestLevel[&BB] != LoopNestLevel[DomBB])
405 return;
406 if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) {
407 BBsToEC[DomBB] = Classes.size();
408 BBsToEC[&BB] = Classes.size();
409 Classes.emplace_back();
410 Classes.back().push_back(x: DomBB);
411 Classes.back().push_back(x: &BB);
412 return;
413 }
414 if (BBsToEC[DomBB] == -1) {
415 BBsToEC[DomBB] = BBsToEC[&BB];
416 Classes[BBsToEC[&BB]].push_back(x: DomBB);
417 return;
418 }
419 if (BBsToEC[&BB] == -1) {
420 BBsToEC[&BB] = BBsToEC[DomBB];
421 Classes[BBsToEC[DomBB]].push_back(x: &BB);
422 return;
423 }
424 signed BBECNum = BBsToEC[&BB];
425 std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]];
426 std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum];
427 for (BinaryBasicBlock *Block : DomEC) {
428 BBsToEC[Block] = BBECNum;
429 BBEC.push_back(x: Block);
430 }
431 DomEC.clear();
432 });
433 }
434
435 for (std::vector<BinaryBasicBlock *> &Class : Classes) {
436 uint64_t Max = 0ULL;
437 for (BinaryBasicBlock *BB : Class)
438 Max = std::max(a: Max, b: BB->getExecutionCount());
439 for (BinaryBasicBlock *BB : Class)
440 BB->setExecutionCount(Max);
441 }
442}
443
444void estimateEdgeCounts(BinaryFunction &BF) {
445 EdgeWeightMap PredEdgeWeights;
446 EdgeWeightMap SuccEdgeWeights;
447 if (!opts::IterativeGuess) {
448 computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, EdgeWeights&: PredEdgeWeights);
449 computeEdgeWeights<BinaryBasicBlock *>(BF, EdgeWeights&: SuccEdgeWeights);
450 }
451 if (opts::EqualizeBBCounts) {
452 LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts"));
453 auto Info = DataflowInfoManager(BF, nullptr, nullptr);
454 equalizeBBCounts(Info, BF);
455 LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts"));
456 }
457 if (opts::IterativeGuess)
458 guessEdgeByIterativeApproach(BF);
459 else
460 guessEdgeByRelHotness(BF, /*UseSuccs=*/UseSucc: false, PredEdgeWeights,
461 SuccEdgeWeights);
462 recalculateBBCounts(BF, /*AllEdges=*/false);
463}
464
465void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) {
466 llvm_unreachable("not implemented");
467}
468
469} // namespace bolt
470} // namespace llvm
471

source code of bolt/lib/Passes/MCF.cpp