1//===- bolt/Passes/ProfileQualityStats.cpp ----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the profile quality stats calculation pass.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/ProfileQualityStats.h"
14#include "bolt/Core/BinaryBasicBlock.h"
15#include "bolt/Core/BinaryFunction.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include "llvm/Support/CommandLine.h"
18#include <queue>
19#include <unordered_map>
20#include <unordered_set>
21
22using namespace llvm;
23using namespace bolt;
24
25namespace opts {
26extern cl::opt<unsigned> Verbosity;
27static cl::opt<unsigned> TopFunctionsForProfileQualityCheck(
28 "top-functions-for-profile-quality-check",
29 cl::desc("number of hottest functions to print aggregated "
30 "profile quality stats of."),
31 cl::init(Val: 1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
32static cl::opt<unsigned> PercentileForProfileQualityCheck(
33 "percentile-for-profile-quality-check",
34 cl::desc("Percentile of profile quality distributions over hottest "
35 "functions to report."),
36 cl::init(Val: 95), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
37} // namespace opts
38
39namespace {
40using FunctionListType = std::vector<const BinaryFunction *>;
41using function_iterator = FunctionListType::iterator;
42
43// Function number -> vector of flows for BBs in the function
44using TotalFlowMapTy = std::unordered_map<uint64_t, std::vector<uint64_t>>;
45// Function number -> flow count
46using FunctionFlowMapTy = std::unordered_map<uint64_t, uint64_t>;
47struct FlowInfo {
48 TotalFlowMapTy TotalIncomingFlows;
49 TotalFlowMapTy TotalOutgoingFlows;
50 TotalFlowMapTy TotalMaxCountMaps;
51 TotalFlowMapTy TotalMinCountMaps;
52 FunctionFlowMapTy CallGraphIncomingFlows;
53};
54
55// When reporting exception handling stats, we only consider functions with at
56// least MinLPECSum counts in landing pads to avoid false positives due to
57// sampling noise
58const uint16_t MinLPECSum = 50;
59
60// When reporting CFG flow conservation stats, we only consider blocks with
61// execution counts > MinBlockCount when reporting the distribution of worst
62// gaps.
63const uint16_t MinBlockCount = 500;
64
65template <typename T>
66void printDistribution(raw_ostream &OS, std::vector<T> &values,
67 bool Fraction = false) {
68 // Assume values are sorted.
69 if (values.empty())
70 return;
71
72 OS << " Length : " << values.size() << "\n";
73
74 auto printLine = [&](std::string Text, double Percent) {
75 int Rank = int(values.size() * (100 - Percent) / 100);
76 if (Percent == 0)
77 Rank = values.size() - 1;
78 if (Fraction)
79 OS << " " << Text << std::string(11 - Text.length(), ' ') << ": "
80 << formatv("{0:P}", values[Rank]) << "\n";
81 else
82 OS << " " << Text << std::string(11 - Text.length(), ' ') << ": "
83 << values[Rank] << "\n";
84 };
85
86 printLine("MAX", 0);
87 const int percentages[] = {1, 5, 10, 20, 50, 80};
88 for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) {
89 printLine("TOP " + std::to_string(val: percentages[i]) + "%", percentages[i]);
90 }
91 printLine("MIN", 100);
92}
93
94void printCFGContinuityStats(raw_ostream &OS,
95 iterator_range<function_iterator> &Functions) {
96 // Given a perfect profile, every positive-execution-count BB should be
97 // connected to an entry of the function through a positive-execution-count
98 // directed path in the control flow graph.
99 std::vector<size_t> NumUnreachables;
100 std::vector<size_t> SumECUnreachables;
101 std::vector<double> FractionECUnreachables;
102
103 for (const BinaryFunction *Function : Functions) {
104 if (Function->size() <= 1) {
105 NumUnreachables.push_back(x: 0);
106 SumECUnreachables.push_back(x: 0);
107 FractionECUnreachables.push_back(x: 0.0);
108 continue;
109 }
110
111 // Compute the sum of all BB execution counts (ECs).
112 size_t NumPosECBBs = 0;
113 size_t SumAllBBEC = 0;
114 for (const BinaryBasicBlock &BB : *Function) {
115 const size_t BBEC = BB.getKnownExecutionCount();
116 NumPosECBBs += !!BBEC;
117 SumAllBBEC += BBEC;
118 }
119
120 // Perform BFS on subgraph of CFG induced by positive weight edges.
121 // Compute the number of BBs reachable from the entry(s) of the function and
122 // the sum of their execution counts (ECs).
123 std::unordered_set<unsigned> Visited;
124 std::queue<unsigned> Queue;
125 size_t SumReachableBBEC = 0;
126
127 Function->forEachEntryPoint(Callback: [&](uint64_t Offset, const MCSymbol *Label) {
128 const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset);
129 if (!EntryBB || EntryBB->getKnownExecutionCount() == 0)
130 return true;
131 Queue.push(x: EntryBB->getLayoutIndex());
132 Visited.insert(x: EntryBB->getLayoutIndex());
133 SumReachableBBEC += EntryBB->getKnownExecutionCount();
134 return true;
135 });
136
137 const FunctionLayout &Layout = Function->getLayout();
138
139 while (!Queue.empty()) {
140 const unsigned BBIndex = Queue.front();
141 const BinaryBasicBlock *BB = Layout.getBlock(Index: BBIndex);
142 Queue.pop();
143 for (const auto &[Succ, BI] :
144 llvm::zip(t: BB->successors(), u: BB->branch_info())) {
145 const uint64_t Count = BI.Count;
146 if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0 ||
147 !Visited.insert(x: Succ->getLayoutIndex()).second)
148 continue;
149 SumReachableBBEC += Succ->getKnownExecutionCount();
150 Queue.push(x: Succ->getLayoutIndex());
151 }
152 }
153
154 const size_t NumReachableBBs = Visited.size();
155
156 const size_t NumPosECBBsUnreachableFromEntry =
157 NumPosECBBs - NumReachableBBs;
158 const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
159
160 double FractionECUnreachable = 0.0;
161 if (SumAllBBEC > 0)
162 FractionECUnreachable = (double)SumUnreachableBBEC / SumAllBBEC;
163
164 if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
165 OS << "Non-trivial CFG discontinuity observed in function "
166 << Function->getPrintName() << "\n";
167 if (opts::Verbosity >= 3)
168 Function->dump();
169 }
170
171 NumUnreachables.push_back(x: NumPosECBBsUnreachableFromEntry);
172 SumECUnreachables.push_back(x: SumUnreachableBBEC);
173 FractionECUnreachables.push_back(x: FractionECUnreachable);
174 }
175
176 llvm::sort(C&: FractionECUnreachables);
177 const int Rank = int(FractionECUnreachables.size() *
178 opts::PercentileForProfileQualityCheck / 100);
179 OS << formatv(Fmt: "function CFG discontinuity {0:P}; ",
180 Vals&: FractionECUnreachables[Rank]);
181 if (opts::Verbosity >= 1) {
182 OS << "\nabbreviations: EC = execution count, POS BBs = positive EC BBs\n"
183 << "distribution of NUM(unreachable POS BBs) per function\n";
184 llvm::sort(C&: NumUnreachables);
185 printDistribution(OS, values&: NumUnreachables);
186
187 OS << "distribution of SUM_EC(unreachable POS BBs) per function\n";
188 llvm::sort(C&: SumECUnreachables);
189 printDistribution(OS, values&: SumECUnreachables);
190
191 OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all "
192 "POS BBs))] per function\n";
193 printDistribution(OS, values&: FractionECUnreachables, /*Fraction=*/true);
194 }
195}
196
197void printCallGraphFlowConservationStats(
198 raw_ostream &OS, iterator_range<function_iterator> &Functions,
199 FlowInfo &TotalFlowMap) {
200 std::vector<double> CallGraphGaps;
201
202 for (const BinaryFunction *Function : Functions) {
203 if (Function->size() <= 1 || !Function->isSimple()) {
204 CallGraphGaps.push_back(x: 0.0);
205 continue;
206 }
207
208 const uint64_t FunctionNum = Function->getFunctionNumber();
209 std::vector<uint64_t> &IncomingFlows =
210 TotalFlowMap.TotalIncomingFlows[FunctionNum];
211 std::vector<uint64_t> &OutgoingFlows =
212 TotalFlowMap.TotalOutgoingFlows[FunctionNum];
213 FunctionFlowMapTy &CallGraphIncomingFlows =
214 TotalFlowMap.CallGraphIncomingFlows;
215
216 // Only consider functions that are not a program entry.
217 if (CallGraphIncomingFlows.find(x: FunctionNum) ==
218 CallGraphIncomingFlows.end()) {
219 CallGraphGaps.push_back(x: 0.0);
220 continue;
221 }
222
223 uint64_t EntryInflow = 0;
224 uint64_t EntryOutflow = 0;
225 uint32_t NumConsideredEntryBlocks = 0;
226
227 Function->forEachEntryPoint(Callback: [&](uint64_t Offset, const MCSymbol *Label) {
228 const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset);
229 if (!EntryBB || EntryBB->succ_size() == 0)
230 return true;
231 NumConsideredEntryBlocks++;
232 EntryInflow += IncomingFlows[EntryBB->getLayoutIndex()];
233 EntryOutflow += OutgoingFlows[EntryBB->getLayoutIndex()];
234 return true;
235 });
236
237 uint64_t NetEntryOutflow = 0;
238 if (EntryOutflow < EntryInflow) {
239 if (opts::Verbosity >= 2) {
240 // We expect entry blocks' CFG outflow >= inflow, i.e., it has a
241 // non-negative net outflow. If this is not the case, then raise a
242 // warning if requested.
243 OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow "
244 "in function "
245 << Function->getPrintName() << "\n";
246 if (opts::Verbosity >= 3)
247 Function->dump();
248 }
249 } else {
250 NetEntryOutflow = EntryOutflow - EntryInflow;
251 }
252 if (NumConsideredEntryBlocks > 0) {
253 const uint64_t CallGraphInflow =
254 TotalFlowMap.CallGraphIncomingFlows[Function->getFunctionNumber()];
255 const uint64_t Min = std::min(a: NetEntryOutflow, b: CallGraphInflow);
256 const uint64_t Max = std::max(a: NetEntryOutflow, b: CallGraphInflow);
257 double CallGraphGap = 0.0;
258 if (Max > 0)
259 CallGraphGap = 1 - (double)Min / Max;
260
261 if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) {
262 OS << "Non-trivial call graph gap of size "
263 << formatv(Fmt: "{0:P}", Vals&: CallGraphGap) << " observed in function "
264 << Function->getPrintName() << "\n";
265 if (opts::Verbosity >= 3)
266 Function->dump();
267 }
268
269 CallGraphGaps.push_back(x: CallGraphGap);
270 } else {
271 CallGraphGaps.push_back(x: 0.0);
272 }
273 }
274
275 llvm::sort(C&: CallGraphGaps);
276 const int Rank =
277 int(CallGraphGaps.size() * opts::PercentileForProfileQualityCheck / 100);
278 OS << formatv(Fmt: "call graph flow conservation gap {0:P}; ",
279 Vals&: CallGraphGaps[Rank]);
280 if (opts::Verbosity >= 1) {
281 OS << "\ndistribution of function entry flow conservation gaps\n";
282 printDistribution(OS, values&: CallGraphGaps, /*Fraction=*/true);
283 }
284}
285
286void printCFGFlowConservationStats(const BinaryContext &BC, raw_ostream &OS,
287 iterator_range<function_iterator> &Functions,
288 FlowInfo &TotalFlowMap) {
289 std::vector<double> CFGGapsWeightedAvg;
290 std::vector<double> CFGGapsWorst;
291 std::vector<uint64_t> CFGGapsWorstAbs;
292 for (const BinaryFunction *Function : Functions) {
293 if (Function->size() <= 1 || !Function->isSimple()) {
294 CFGGapsWeightedAvg.push_back(x: 0.0);
295 CFGGapsWorst.push_back(x: 0.0);
296 CFGGapsWorstAbs.push_back(x: 0);
297 continue;
298 }
299
300 const uint64_t FunctionNum = Function->getFunctionNumber();
301 std::vector<uint64_t> &MaxCountMaps =
302 TotalFlowMap.TotalMaxCountMaps[FunctionNum];
303 std::vector<uint64_t> &MinCountMaps =
304 TotalFlowMap.TotalMinCountMaps[FunctionNum];
305 double WeightedGapSum = 0.0;
306 double WeightSum = 0.0;
307 double WorstGap = 0.0;
308 uint64_t WorstGapAbs = 0;
309 BinaryBasicBlock *BBWorstGap = nullptr;
310 BinaryBasicBlock *BBWorstGapAbs = nullptr;
311 for (BinaryBasicBlock &BB : *Function) {
312 // We don't consider function entry or exit blocks for CFG flow
313 // conservation
314 if (BB.isEntryPoint() || BB.succ_size() == 0)
315 continue;
316
317 if (BB.getKnownExecutionCount() == 0 || BB.getNumNonPseudos() == 0)
318 continue;
319
320 // We don't consider blocks that is a landing pad or has a
321 // positive-execution-count landing pad
322 if (BB.isLandingPad())
323 continue;
324
325 if (llvm::any_of(Range: BB.landing_pads(),
326 P: std::mem_fn(pm: &BinaryBasicBlock::getKnownExecutionCount)))
327 continue;
328
329 // We don't consider blocks that end with a recursive call instruction
330 const MCInst *Inst = BB.getLastNonPseudoInstr();
331 if (BC.MIB->isCall(Inst: *Inst)) {
332 const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst: *Inst);
333 const BinaryFunction *DstFunc =
334 DstSym ? BC.getFunctionForSymbol(Symbol: DstSym) : nullptr;
335 if (DstFunc == Function)
336 continue;
337 }
338
339 const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()];
340 const uint64_t Min = MinCountMaps[BB.getLayoutIndex()];
341 double Gap = 0.0;
342 if (Max > 0)
343 Gap = 1 - (double)Min / Max;
344 double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos();
345 // We use log to prevent the stats from being dominated by extremely hot
346 // blocks
347 Weight = log(x: Weight);
348 WeightedGapSum += Gap * Weight;
349 WeightSum += Weight;
350 if (BB.getKnownExecutionCount() > MinBlockCount && Gap > WorstGap) {
351 WorstGap = Gap;
352 BBWorstGap = &BB;
353 }
354 if (BB.getKnownExecutionCount() > MinBlockCount &&
355 Max - Min > WorstGapAbs) {
356 WorstGapAbs = Max - Min;
357 BBWorstGapAbs = &BB;
358 }
359 }
360 double WeightedGap = WeightedGapSum;
361 if (WeightSum > 0)
362 WeightedGap /= WeightSum;
363 if (opts::Verbosity >= 2 && WorstGap >= 0.9) {
364 OS << "Non-trivial CFG gap observed in function "
365 << Function->getPrintName() << "\n"
366 << "Weighted gap: " << formatv(Fmt: "{0:P}", Vals&: WeightedGap) << "\n";
367 if (BBWorstGap)
368 OS << "Worst gap: " << formatv(Fmt: "{0:P}", Vals&: WorstGap)
369 << " at BB with input offset: 0x"
370 << Twine::utohexstr(Val: BBWorstGap->getInputOffset()) << "\n";
371 if (BBWorstGapAbs)
372 OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with "
373 << "input offset 0x"
374 << Twine::utohexstr(Val: BBWorstGapAbs->getInputOffset()) << "\n";
375 if (opts::Verbosity >= 3)
376 Function->dump();
377 }
378 CFGGapsWeightedAvg.push_back(x: WeightedGap);
379 CFGGapsWorst.push_back(x: WorstGap);
380 CFGGapsWorstAbs.push_back(x: WorstGapAbs);
381 }
382
383 llvm::sort(C&: CFGGapsWeightedAvg);
384 const int RankWA = int(CFGGapsWeightedAvg.size() *
385 opts::PercentileForProfileQualityCheck / 100);
386 llvm::sort(C&: CFGGapsWorst);
387 const int RankW =
388 int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100);
389 OS << formatv(Fmt: "CFG flow conservation gap {0:P} (weighted) {1:P} (worst); ",
390 Vals&: CFGGapsWeightedAvg[RankWA], Vals&: CFGGapsWorst[RankW]);
391 if (opts::Verbosity >= 1) {
392 OS << "distribution of weighted CFG flow conservation gaps\n";
393 printDistribution(OS, values&: CFGGapsWeightedAvg, /*Fraction=*/true);
394 OS << format(Fmt: "Consider only blocks with execution counts > %zu:\n",
395 Vals: MinBlockCount)
396 << "distribution of worst block flow conservation gap per "
397 "function \n";
398 printDistribution(OS, values&: CFGGapsWorst, /*Fraction=*/true);
399 OS << "distribution of worst block flow conservation gap (absolute "
400 "value) per function\n";
401 llvm::sort(C&: CFGGapsWorstAbs);
402 printDistribution(OS, values&: CFGGapsWorstAbs, /*Fraction=*/false);
403 }
404}
405
406void printExceptionHandlingStats(const BinaryContext &BC, raw_ostream &OS,
407 iterator_range<function_iterator> &Functions) {
408 std::vector<double> LPCountFractionsOfTotalBBEC;
409 std::vector<double> LPCountFractionsOfTotalInvokeEC;
410 for (const BinaryFunction *Function : Functions) {
411 size_t LPECSum = 0;
412 size_t BBECSum = 0;
413 size_t InvokeECSum = 0;
414 for (BinaryBasicBlock &BB : *Function) {
415 const size_t BBEC = BB.getKnownExecutionCount();
416 BBECSum += BBEC;
417 if (BB.isLandingPad())
418 LPECSum += BBEC;
419 for (const MCInst &Inst : BB) {
420 if (!BC.MIB->isInvoke(Inst))
421 continue;
422 const std::optional<MCPlus::MCLandingPad> EHInfo =
423 BC.MIB->getEHInfo(Inst);
424 if (EHInfo->first)
425 InvokeECSum += BBEC;
426 }
427 }
428
429 if (LPECSum <= MinLPECSum) {
430 LPCountFractionsOfTotalBBEC.push_back(x: 0.0);
431 LPCountFractionsOfTotalInvokeEC.push_back(x: 0.0);
432 continue;
433 }
434 double FracTotalBBEC = 0.0;
435 if (BBECSum > 0)
436 FracTotalBBEC = (double)LPECSum / BBECSum;
437 double FracTotalInvokeEC = 0.0;
438 if (InvokeECSum > 0)
439 FracTotalInvokeEC = (double)LPECSum / InvokeECSum;
440 LPCountFractionsOfTotalBBEC.push_back(x: FracTotalBBEC);
441 LPCountFractionsOfTotalInvokeEC.push_back(x: FracTotalInvokeEC);
442
443 if (opts::Verbosity >= 2 && FracTotalInvokeEC >= 0.05) {
444 OS << "Non-trivial usage of exception handling observed in function "
445 << Function->getPrintName() << "\n"
446 << formatv(
447 Fmt: "Fraction of total InvokeEC that goes to landing pads: {0:P}\n",
448 Vals&: FracTotalInvokeEC);
449 if (opts::Verbosity >= 3)
450 Function->dump();
451 }
452 }
453
454 llvm::sort(C&: LPCountFractionsOfTotalBBEC);
455 const int RankBBEC = int(LPCountFractionsOfTotalBBEC.size() *
456 opts::PercentileForProfileQualityCheck / 100);
457 llvm::sort(C&: LPCountFractionsOfTotalInvokeEC);
458 const int RankInvoke = int(LPCountFractionsOfTotalInvokeEC.size() *
459 opts::PercentileForProfileQualityCheck / 100);
460 OS << formatv(Fmt: "exception handling usage {0:P} (of total BBEC) {1:P} (of "
461 "total InvokeEC)\n",
462 Vals&: LPCountFractionsOfTotalBBEC[RankBBEC],
463 Vals&: LPCountFractionsOfTotalInvokeEC[RankInvoke]);
464 if (opts::Verbosity >= 1) {
465 OS << "distribution of exception handling usage as a fraction of total "
466 "BBEC of each function\n";
467 printDistribution(OS, values&: LPCountFractionsOfTotalBBEC, /*Fraction=*/true);
468 OS << "distribution of exception handling usage as a fraction of total "
469 "InvokeEC of each function\n";
470 printDistribution(OS, values&: LPCountFractionsOfTotalInvokeEC, /*Fraction=*/true);
471 }
472}
473
474void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
475 // Increment block inflow and outflow with CFG jump counts.
476 TotalFlowMapTy &TotalIncomingFlows = TotalFlowMap.TotalIncomingFlows;
477 TotalFlowMapTy &TotalOutgoingFlows = TotalFlowMap.TotalOutgoingFlows;
478 for (const auto &BFI : BC.getBinaryFunctions()) {
479 const BinaryFunction *Function = &BFI.second;
480 std::vector<uint64_t> &IncomingFlows =
481 TotalIncomingFlows[Function->getFunctionNumber()];
482 std::vector<uint64_t> &OutgoingFlows =
483 TotalOutgoingFlows[Function->getFunctionNumber()];
484 const uint64_t NumBlocks = Function->size();
485 IncomingFlows.resize(new_size: NumBlocks, x: 0);
486 OutgoingFlows.resize(new_size: NumBlocks, x: 0);
487 if (Function->empty() || !Function->hasValidProfile())
488 continue;
489 for (const BinaryBasicBlock &BB : *Function) {
490 uint64_t TotalOutgoing = 0ULL;
491 for (const auto &[Succ, BI] :
492 llvm::zip(t: BB.successors(), u: BB.branch_info())) {
493 const uint64_t Count = BI.Count;
494 if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0)
495 continue;
496 TotalOutgoing += Count;
497 IncomingFlows[Succ->getLayoutIndex()] += Count;
498 }
499 OutgoingFlows[BB.getLayoutIndex()] = TotalOutgoing;
500 }
501 }
502 // Initialize TotalMaxCountMaps and TotalMinCountMaps using
503 // TotalIncomingFlows and TotalOutgoingFlows
504 TotalFlowMapTy &TotalMaxCountMaps = TotalFlowMap.TotalMaxCountMaps;
505 TotalFlowMapTy &TotalMinCountMaps = TotalFlowMap.TotalMinCountMaps;
506 for (const auto &BFI : BC.getBinaryFunctions()) {
507 const BinaryFunction *Function = &BFI.second;
508 uint64_t FunctionNum = Function->getFunctionNumber();
509 std::vector<uint64_t> &IncomingFlows = TotalIncomingFlows[FunctionNum];
510 std::vector<uint64_t> &OutgoingFlows = TotalOutgoingFlows[FunctionNum];
511 std::vector<uint64_t> &MaxCountMap = TotalMaxCountMaps[FunctionNum];
512 std::vector<uint64_t> &MinCountMap = TotalMinCountMaps[FunctionNum];
513 const uint64_t NumBlocks = Function->size();
514 MaxCountMap.resize(new_size: NumBlocks, x: 0);
515 MinCountMap.resize(new_size: NumBlocks, x: 0);
516 if (Function->empty() || !Function->hasValidProfile())
517 continue;
518 for (const BinaryBasicBlock &BB : *Function) {
519 uint64_t BBNum = BB.getLayoutIndex();
520 MaxCountMap[BBNum] = std::max(a: IncomingFlows[BBNum], b: OutgoingFlows[BBNum]);
521 MinCountMap[BBNum] = std::min(a: IncomingFlows[BBNum], b: OutgoingFlows[BBNum]);
522 }
523 }
524
525 // Modify TotalMaxCountMaps and TotalMinCountMaps using call counts and
526 // fill out CallGraphIncomingFlows
527 FunctionFlowMapTy &CallGraphIncomingFlows =
528 TotalFlowMap.CallGraphIncomingFlows;
529 for (const auto &BFI : BC.getBinaryFunctions()) {
530 const BinaryFunction *Function = &BFI.second;
531 uint64_t FunctionNum = Function->getFunctionNumber();
532 std::vector<uint64_t> &MaxCountMap = TotalMaxCountMaps[FunctionNum];
533 std::vector<uint64_t> &MinCountMap = TotalMinCountMaps[FunctionNum];
534
535 // Record external entry count into CallGraphIncomingFlows
536 CallGraphIncomingFlows[FunctionNum] += Function->getExternEntryCount();
537
538 // Update MaxCountMap, MinCountMap, and CallGraphIncomingFlows
539 auto recordCall = [&](const BinaryBasicBlock *SourceBB,
540 const MCSymbol *DestSymbol, uint64_t Count,
541 uint64_t TotalCallCount) {
542 if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
543 Count = 0;
544 const BinaryFunction *DstFunc =
545 DestSymbol ? BC.getFunctionForSymbol(Symbol: DestSymbol) : nullptr;
546 if (DstFunc)
547 CallGraphIncomingFlows[DstFunc->getFunctionNumber()] += Count;
548 if (SourceBB) {
549 unsigned BlockIndex = SourceBB->getLayoutIndex();
550 MaxCountMap[BlockIndex] =
551 std::max(a: MaxCountMap[BlockIndex], b: TotalCallCount);
552 MinCountMap[BlockIndex] =
553 std::min(a: MinCountMap[BlockIndex], b: TotalCallCount);
554 }
555 };
556
557 // Get pairs of (symbol, count) for each target at this callsite.
558 // If the call is to an unknown function the symbol will be nullptr.
559 // If there is no profiling data the count will be COUNT_NO_PROFILE.
560 using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
561 using CallInfoTy = std::vector<TargetDesc>;
562 auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
563 CallInfoTy Counts;
564 const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
565
566 if (!DstSym && BC.MIB->hasAnnotation(Inst, Name: "CallProfile")) {
567 for (const auto &CSI : BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
568 Inst, Name: "CallProfile"))
569 if (CSI.Symbol)
570 Counts.emplace_back(args: CSI.Symbol, args: CSI.Count);
571 } else {
572 const uint64_t Count = BB->getExecutionCount();
573 Counts.emplace_back(args&: DstSym, args: Count);
574 }
575
576 return Counts;
577 };
578
579 // If the function has an invalid profile, try to use the perf data
580 // directly. The call EC is only used to update CallGraphIncomingFlows.
581 if (!Function->hasValidProfile() && !Function->getAllCallSites().empty()) {
582 for (const IndirectCallProfile &CSI : Function->getAllCallSites())
583 if (CSI.Symbol)
584 recordCall(nullptr, CSI.Symbol, CSI.Count, CSI.Count);
585 continue;
586 } else {
587 // If the function has a valid profile
588 for (const BinaryBasicBlock &BB : *Function) {
589 for (const MCInst &Inst : BB) {
590 if (!BC.MIB->isCall(Inst))
591 continue;
592 // Find call instructions and extract target symbols from each
593 // one.
594 const CallInfoTy CallInfo = getCallInfo(&BB, Inst);
595 // We need the total call count to update MaxCountMap and
596 // MinCountMap in recordCall for indirect calls
597 uint64_t TotalCallCount = 0;
598 for (const TargetDesc &CI : CallInfo)
599 TotalCallCount += CI.second;
600 for (const TargetDesc &CI : CallInfo)
601 recordCall(&BB, CI.first, CI.second, TotalCallCount);
602 }
603 }
604 }
605 }
606}
607
608void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
609 size_t NumTopFunctions) {
610 // Sort the list of functions by execution counts (reverse).
611 llvm::sort(C&: ValidFunctions,
612 Comp: [&](const BinaryFunction *A, const BinaryFunction *B) {
613 return A->getKnownExecutionCount() > B->getKnownExecutionCount();
614 });
615
616 const size_t RealNumTopFunctions =
617 std::min(a: NumTopFunctions, b: ValidFunctions.size());
618
619 iterator_range<function_iterator> Functions(
620 ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions);
621
622 FlowInfo TotalFlowMap;
623 computeFlowMappings(BC, TotalFlowMap);
624
625 BC.outs() << format(Fmt: "BOLT-INFO: profile quality metrics for the hottest %zu "
626 "functions (reporting top %zu%% values): ",
627 Vals: RealNumTopFunctions,
628 Vals: 100 - opts::PercentileForProfileQualityCheck);
629 printCFGContinuityStats(OS&: BC.outs(), Functions);
630 printCallGraphFlowConservationStats(OS&: BC.outs(), Functions, TotalFlowMap);
631 printCFGFlowConservationStats(BC, OS&: BC.outs(), Functions, TotalFlowMap);
632 printExceptionHandlingStats(BC, OS&: BC.outs(), Functions);
633 // Print more detailed bucketed stats if requested.
634 if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) {
635 const size_t PerBucketSize = RealNumTopFunctions / 5;
636 BC.outs() << format(
637 Fmt: "Detailed stats for 5 buckets, each with %zu functions:\n",
638 Vals: PerBucketSize);
639
640 // For each bucket, print the CFG continuity stats of the functions in
641 // the bucket.
642 for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) {
643 const size_t StartIndex = BucketIndex * PerBucketSize;
644 const size_t EndIndex = StartIndex + PerBucketSize;
645 iterator_range<function_iterator> Functions(
646 ValidFunctions.begin() + StartIndex,
647 ValidFunctions.begin() + EndIndex);
648 const size_t MaxFunctionExecutionCount =
649 ValidFunctions[StartIndex]->getKnownExecutionCount();
650 const size_t MinFunctionExecutionCount =
651 ValidFunctions[EndIndex - 1]->getKnownExecutionCount();
652 BC.outs() << format(Fmt: "----------------\n| Bucket %zu: "
653 "|\n----------------\n",
654 Vals: BucketIndex + 1)
655 << format(
656 Fmt: "execution counts of the %zu functions in the bucket: "
657 "%zu-%zu\n",
658 Vals: EndIndex - StartIndex, Vals: MinFunctionExecutionCount,
659 Vals: MaxFunctionExecutionCount);
660 printCFGContinuityStats(OS&: BC.outs(), Functions);
661 printCallGraphFlowConservationStats(OS&: BC.outs(), Functions, TotalFlowMap);
662 printCFGFlowConservationStats(BC, OS&: BC.outs(), Functions, TotalFlowMap);
663 printExceptionHandlingStats(BC, OS&: BC.outs(), Functions);
664 }
665 }
666}
667} // namespace
668
669bool PrintProfileQualityStats::shouldOptimize(const BinaryFunction &BF) const {
670 if (BF.empty() || !BF.hasValidProfile())
671 return false;
672
673 return BinaryFunctionPass::shouldOptimize(BF);
674}
675
676Error PrintProfileQualityStats::runOnFunctions(BinaryContext &BC) {
677 // Create a list of functions with valid profiles.
678 FunctionListType ValidFunctions;
679 for (const auto &BFI : BC.getBinaryFunctions()) {
680 const BinaryFunction *Function = &BFI.second;
681 if (PrintProfileQualityStats::shouldOptimize(BF: *Function))
682 ValidFunctions.push_back(x: Function);
683 }
684 if (ValidFunctions.empty() || opts::TopFunctionsForProfileQualityCheck == 0)
685 return Error::success();
686
687 printAll(BC, ValidFunctions, NumTopFunctions: opts::TopFunctionsForProfileQualityCheck);
688 return Error::success();
689}
690

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of bolt/lib/Passes/ProfileQualityStats.cpp