1//===- bolt/Passes/MCF.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements functions for solving minimum-cost flow problem.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/MCF.h"
14#include "bolt/Core/BinaryFunction.h"
15#include "bolt/Core/ParallelUtilities.h"
16#include "bolt/Passes/DataflowInfoManager.h"
17#include "bolt/Utils/CommandLineOpts.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/Support/CommandLine.h"
21#include <algorithm>
22#include <vector>
23
24#undef DEBUG_TYPE
25#define DEBUG_TYPE "mcf"
26
27using namespace llvm;
28using namespace bolt;
29
30namespace opts {
31
32extern cl::OptionCategory BoltOptCategory;
33
34static cl::opt<bool> IterativeGuess(
35 "iterative-guess",
36 cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
37 cl::Hidden, cl::cat(BoltOptCategory));
38} // namespace opts
39
40namespace llvm {
41namespace bolt {
42
43namespace {
44
45// Edge Weight Inference Heuristic
46//
47// We start by maintaining the invariant used in LBR mode where the sum of
48// pred edges count is equal to the block execution count. This loop will set
49// pred edges count by balancing its own execution count in different pred
50// edges. The weight of each edge is guessed by looking at how hot each pred
51// block is (in terms of samples).
52// There are two caveats in this approach. One is for critical edges and the
53// other is for self-referencing blocks (loops of 1 BB). For critical edges,
54// we can't infer the hotness of them based solely on pred BBs execution
55// count. For each critical edge we look at the pred BB, then look at its
56// succs to adjust its weight.
57//
58// [ 60 ] [ 25 ]
59// | \ |
60// [ 10 ] [ 75 ]
61//
62// The illustration above shows a critical edge \. We wish to adjust bb count
63// 60 to 50 to properly determine the weight of the critical edge to be
64// 50 / 75.
65// For self-referencing edges, we attribute its weight by subtracting the
66// current BB execution count by the sum of predecessors count if this result
67// is non-negative.
68using EdgeWeightMap =
69 DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>,
70 double>;
71
72template <class NodeT>
73void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
74 const BinaryBasicBlock *B, double Weight);
75
76template <>
77void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights,
78 const BinaryBasicBlock *A,
79 const BinaryBasicBlock *B,
80 double Weight) {
81 EdgeWeights[std::make_pair(x&: A, y&: B)] = Weight;
82}
83
84template <>
85void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights,
86 const BinaryBasicBlock *A,
87 const BinaryBasicBlock *B,
88 double Weight) {
89 EdgeWeights[std::make_pair(x&: B, y&: A)] = Weight;
90}
91
92template <class NodeT>
93void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) {
94 typedef GraphTraits<NodeT> GraphT;
95 typedef GraphTraits<Inverse<NodeT>> InvTraits;
96
97 double TotalChildrenCount = 0.0;
98 SmallVector<double, 4> ChildrenExecCount;
99 // First pass computes total children execution count that directly
100 // contribute to this BB.
101 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
102 E = GraphT::child_end(BB);
103 CI != E; ++CI) {
104 typename GraphT::NodeRef Child = *CI;
105 double ChildExecCount = Child->getExecutionCount();
106 // Is self-reference?
107 if (Child == BB) {
108 ChildExecCount = 0.0; // will fill this in second pass
109 } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 &&
110 InvTraits::child_end(Child) - InvTraits::child_begin(Child) >
111 1) {
112 // Handle critical edges. This will cause a skew towards crit edges, but
113 // it is a quick solution.
114 double CritWeight = 0.0;
115 uint64_t Denominator = 0;
116 for (typename InvTraits::ChildIteratorType
117 II = InvTraits::child_begin(Child),
118 IE = InvTraits::child_end(Child);
119 II != IE; ++II) {
120 typename GraphT::NodeRef N = *II;
121 Denominator += N->getExecutionCount();
122 if (N != BB)
123 continue;
124 CritWeight = N->getExecutionCount();
125 }
126 if (Denominator)
127 CritWeight /= static_cast<double>(Denominator);
128 ChildExecCount *= CritWeight;
129 }
130 ChildrenExecCount.push_back(Elt: ChildExecCount);
131 TotalChildrenCount += ChildExecCount;
132 }
133 // Second pass fixes the weight of a possible self-reference edge
134 uint32_t ChildIndex = 0;
135 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
136 E = GraphT::child_end(BB);
137 CI != E; ++CI) {
138 typename GraphT::NodeRef Child = *CI;
139 if (Child != BB) {
140 ++ChildIndex;
141 continue;
142 }
143 if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) {
144 ChildrenExecCount[ChildIndex] =
145 BB->getExecutionCount() - TotalChildrenCount;
146 TotalChildrenCount += ChildrenExecCount[ChildIndex];
147 }
148 break;
149 }
150 // Third pass finally assigns weights to edges
151 ChildIndex = 0;
152 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
153 E = GraphT::child_end(BB);
154 CI != E; ++CI) {
155 typename GraphT::NodeRef Child = *CI;
156 double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB));
157 if (TotalChildrenCount != 0.0)
158 Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount;
159 updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight);
160 ++ChildIndex;
161 }
162}
163
164template <class NodeT>
165void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) {
166 for (BinaryBasicBlock &BB : BF)
167 computeEdgeWeights<NodeT>(&BB, EdgeWeights);
168}
169
170/// Make BB count match the sum of all incoming edges. If AllEdges is true,
171/// make it match max(SumPredEdges, SumSuccEdges).
172void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) {
173 for (BinaryBasicBlock &BB : BF) {
174 uint64_t TotalPredsEWeight = 0;
175 for (BinaryBasicBlock *Pred : BB.predecessors())
176 TotalPredsEWeight += Pred->getBranchInfo(Succ: BB).Count;
177
178 if (TotalPredsEWeight > BB.getExecutionCount())
179 BB.setExecutionCount(TotalPredsEWeight);
180
181 if (!AllEdges)
182 continue;
183
184 uint64_t TotalSuccsEWeight = 0;
185 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info())
186 TotalSuccsEWeight += BI.Count;
187
188 if (TotalSuccsEWeight > BB.getExecutionCount())
189 BB.setExecutionCount(TotalSuccsEWeight);
190 }
191}
192
193// This is our main edge count guessing heuristic. Look at predecessors and
194// assign a proportionally higher count to pred edges coming from blocks with
195// a higher execution count in comparison with the other predecessor blocks,
196// making SumPredEdges match the current BB count.
197// If "UseSucc" is true, apply the same logic to successor edges as well. Since
198// some successor edges may already have assigned a count, only update it if the
199// new count is higher.
200void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc,
201 EdgeWeightMap &PredEdgeWeights,
202 EdgeWeightMap &SuccEdgeWeights) {
203 for (BinaryBasicBlock &BB : BF) {
204 for (BinaryBasicBlock *Pred : BB.predecessors()) {
205 double RelativeExec = PredEdgeWeights[std::make_pair(x&: Pred, y: &BB)];
206 RelativeExec *= BB.getExecutionCount();
207 BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(Succ: BB);
208 if (static_cast<uint64_t>(RelativeExec) > BI.Count)
209 BI.Count = static_cast<uint64_t>(RelativeExec);
210 }
211
212 if (!UseSucc)
213 continue;
214
215 auto BI = BB.branch_info_begin();
216 for (BinaryBasicBlock *Succ : BB.successors()) {
217 double RelativeExec = SuccEdgeWeights[std::make_pair(x: &BB, y&: Succ)];
218 RelativeExec *= BB.getExecutionCount();
219 if (static_cast<uint64_t>(RelativeExec) > BI->Count)
220 BI->Count = static_cast<uint64_t>(RelativeExec);
221 ++BI;
222 }
223 }
224}
225
226using ArcSet =
227 DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>;
228
229/// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
230/// all edges we already established their count. Try to guess the count of
231/// the remaining edge, if there is only one to guess, and return true if we
232/// were able to guess.
233bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
234 if (BB->pred_size() == 0)
235 return false;
236
237 uint64_t TotalPredCount = 0;
238 unsigned NumGuessedEdges = 0;
239 for (BinaryBasicBlock *Pred : BB->predecessors()) {
240 if (GuessedArcs.count(V: std::make_pair(x&: Pred, y&: BB)))
241 ++NumGuessedEdges;
242 TotalPredCount += Pred->getBranchInfo(Succ: *BB).Count;
243 }
244
245 if (NumGuessedEdges != BB->pred_size() - 1)
246 return false;
247
248 int64_t Guessed =
249 static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount;
250 if (Guessed < 0)
251 Guessed = 0;
252
253 for (BinaryBasicBlock *Pred : BB->predecessors()) {
254 if (GuessedArcs.count(V: std::make_pair(x&: Pred, y&: BB)))
255 continue;
256
257 Pred->getBranchInfo(Succ: *BB).Count = Guessed;
258 GuessedArcs.insert(V: std::make_pair(x&: Pred, y&: BB));
259 return true;
260 }
261 llvm_unreachable("Expected unguessed arc");
262}
263
264/// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
265/// all edges we already established their count. Try to guess the count of
266/// the remaining edge, if there is only one to guess, and return true if we
267/// were able to guess.
268bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
269 if (BB->succ_size() == 0)
270 return false;
271
272 uint64_t TotalSuccCount = 0;
273 unsigned NumGuessedEdges = 0;
274 auto BI = BB->branch_info_begin();
275 for (BinaryBasicBlock *Succ : BB->successors()) {
276 if (GuessedArcs.count(V: std::make_pair(x&: BB, y&: Succ)))
277 ++NumGuessedEdges;
278 TotalSuccCount += BI->Count;
279 ++BI;
280 }
281
282 if (NumGuessedEdges != BB->succ_size() - 1)
283 return false;
284
285 int64_t Guessed =
286 static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount;
287 if (Guessed < 0)
288 Guessed = 0;
289
290 BI = BB->branch_info_begin();
291 for (BinaryBasicBlock *Succ : BB->successors()) {
292 if (GuessedArcs.count(V: std::make_pair(x&: BB, y&: Succ))) {
293 ++BI;
294 continue;
295 }
296
297 BI->Count = Guessed;
298 GuessedArcs.insert(V: std::make_pair(x&: BB, y&: Succ));
299 return true;
300 }
301 llvm_unreachable("Expected unguessed arc");
302}
303
304/// Guess edge count whenever we have only one edge (pred or succ) left
305/// to guess. Then make its count equal to BB count minus all other edge
306/// counts we already know their count. Repeat this until there is no
307/// change.
308void guessEdgeByIterativeApproach(BinaryFunction &BF) {
309 ArcSet KnownArcs;
310 bool Changed = false;
311
312 do {
313 Changed = false;
314 for (BinaryBasicBlock &BB : BF) {
315 if (guessPredEdgeCounts(BB: &BB, GuessedArcs&: KnownArcs))
316 Changed = true;
317 if (guessSuccEdgeCounts(BB: &BB, GuessedArcs&: KnownArcs))
318 Changed = true;
319 }
320 } while (Changed);
321
322 // Guess count for non-inferred edges
323 for (BinaryBasicBlock &BB : BF) {
324 for (BinaryBasicBlock *Pred : BB.predecessors()) {
325 if (KnownArcs.count(V: std::make_pair(x&: Pred, y: &BB)))
326 continue;
327 BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(Succ: BB);
328 BI.Count =
329 std::min(a: Pred->getExecutionCount(), b: BB.getExecutionCount()) / 2;
330 KnownArcs.insert(V: std::make_pair(x&: Pred, y: &BB));
331 }
332 auto BI = BB.branch_info_begin();
333 for (BinaryBasicBlock *Succ : BB.successors()) {
334 if (KnownArcs.count(V: std::make_pair(x: &BB, y&: Succ))) {
335 ++BI;
336 continue;
337 }
338 BI->Count =
339 std::min(a: BB.getExecutionCount(), b: Succ->getExecutionCount()) / 2;
340 KnownArcs.insert(V: std::make_pair(x: &BB, y&: Succ));
341 break;
342 }
343 }
344}
345
346/// Associate each basic block with the BinaryLoop object corresponding to the
347/// innermost loop containing this block.
348DenseMap<const BinaryBasicBlock *, const BinaryLoop *>
349createLoopNestLevelMap(BinaryFunction &BF) {
350 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel;
351 const BinaryLoopInfo &BLI = BF.getLoopInfo();
352
353 for (BinaryBasicBlock &BB : BF)
354 LoopNestLevel[&BB] = BLI[&BB];
355
356 return LoopNestLevel;
357}
358
359} // end anonymous namespace
360
361void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF) {
362 if (BF.begin() == BF.end())
363 return;
364
365 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
366 DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis();
367 auto &InsnToBB = Info.getInsnToBBMap();
368 // These analyses work at the instruction granularity, but we really only need
369 // basic block granularity here. So we'll use a set of visited edges to avoid
370 // revisiting the same BBs again and again.
371 DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>>
372 Visited;
373 // Equivalence classes mapping. Each equivalence class is defined by the set
374 // of BBs that obeys the aforementioned properties.
375 DenseMap<const BinaryBasicBlock *, signed> BBsToEC;
376 std::vector<std::vector<BinaryBasicBlock *>> Classes;
377
378 BF.calculateLoopInfo();
379 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel =
380 createLoopNestLevelMap(BF);
381
382 for (BinaryBasicBlock &BB : BF)
383 BBsToEC[&BB] = -1;
384
385 for (BinaryBasicBlock &BB : BF) {
386 auto I = BB.begin();
387 if (I == BB.end())
388 continue;
389
390 DA.doForAllDominators(Inst: *I, Task: [&](const MCInst &DomInst) {
391 BinaryBasicBlock *DomBB = InsnToBB[&DomInst];
392 if (Visited[DomBB].count(x: &BB))
393 return;
394 Visited[DomBB].insert(x: &BB);
395 if (!PDA.doesADominateB(A: *I, B: DomInst))
396 return;
397 if (LoopNestLevel[&BB] != LoopNestLevel[DomBB])
398 return;
399 if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) {
400 BBsToEC[DomBB] = Classes.size();
401 BBsToEC[&BB] = Classes.size();
402 Classes.emplace_back();
403 Classes.back().push_back(x: DomBB);
404 Classes.back().push_back(x: &BB);
405 return;
406 }
407 if (BBsToEC[DomBB] == -1) {
408 BBsToEC[DomBB] = BBsToEC[&BB];
409 Classes[BBsToEC[&BB]].push_back(x: DomBB);
410 return;
411 }
412 if (BBsToEC[&BB] == -1) {
413 BBsToEC[&BB] = BBsToEC[DomBB];
414 Classes[BBsToEC[DomBB]].push_back(x: &BB);
415 return;
416 }
417 signed BBECNum = BBsToEC[&BB];
418 std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]];
419 std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum];
420 for (BinaryBasicBlock *Block : DomEC) {
421 BBsToEC[Block] = BBECNum;
422 BBEC.push_back(x: Block);
423 }
424 DomEC.clear();
425 });
426 }
427
428 for (std::vector<BinaryBasicBlock *> &Class : Classes) {
429 uint64_t Max = 0ULL;
430 for (BinaryBasicBlock *BB : Class)
431 Max = std::max(a: Max, b: BB->getExecutionCount());
432 for (BinaryBasicBlock *BB : Class)
433 BB->setExecutionCount(Max);
434 }
435}
436
437void EstimateEdgeCounts::runOnFunction(BinaryFunction &BF) {
438 EdgeWeightMap PredEdgeWeights;
439 EdgeWeightMap SuccEdgeWeights;
440 if (!opts::IterativeGuess) {
441 computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, EdgeWeights&: PredEdgeWeights);
442 computeEdgeWeights<BinaryBasicBlock *>(BF, EdgeWeights&: SuccEdgeWeights);
443 }
444 if (opts::EqualizeBBCounts) {
445 LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts"));
446 auto Info = DataflowInfoManager(BF, nullptr, nullptr);
447 equalizeBBCounts(Info, BF);
448 LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts"));
449 }
450 if (opts::IterativeGuess)
451 guessEdgeByIterativeApproach(BF);
452 else
453 guessEdgeByRelHotness(BF, /*UseSuccs=*/UseSucc: false, PredEdgeWeights,
454 SuccEdgeWeights);
455 recalculateBBCounts(BF, /*AllEdges=*/false);
456}
457
458Error EstimateEdgeCounts::runOnFunctions(BinaryContext &BC) {
459 if (llvm::none_of(Range: llvm::make_second_range(c&: BC.getBinaryFunctions()),
460 P: [](const BinaryFunction &BF) {
461 return BF.getProfileFlags() == BinaryFunction::PF_BASIC;
462 }))
463 return Error::success();
464
465 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
466 runOnFunction(BF);
467 };
468 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
469 return BF.getProfileFlags() != BinaryFunction::PF_BASIC;
470 };
471
472 ParallelUtilities::runOnEachFunction(
473 BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_BB_QUADRATIC, WorkFunction: WorkFun,
474 SkipPredicate: SkipFunc, LogName: "EstimateEdgeCounts");
475 return Error::success();
476}
477
478} // namespace bolt
479} // namespace llvm
480

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of bolt/lib/Passes/MCF.cpp