1//===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the DynoStats class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/DynoStats.h"
14#include "bolt/Core/BinaryBasicBlock.h"
15#include "bolt/Core/BinaryFunction.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/Debug.h"
19#include "llvm/Support/raw_ostream.h"
20#include <algorithm>
21#include <string>
22
23#undef DEBUG_TYPE
24#define DEBUG_TYPE "bolt"
25
26using namespace llvm;
27using namespace bolt;
28
29namespace opts {
30
31extern cl::OptionCategory BoltCategory;
32
33static cl::opt<uint32_t>
34DynoStatsScale("dyno-stats-scale",
35 cl::desc("scale to be applied while reporting dyno stats"),
36 cl::Optional,
37 cl::init(Val: 1),
38 cl::Hidden,
39 cl::cat(BoltCategory));
40
41static cl::opt<uint32_t>
42PrintDynoOpcodeStat("print-dyno-opcode-stats",
43 cl::desc("print per instruction opcode dyno stats and the function"
44 "names:BB offsets of the nth highest execution counts"),
45 cl::init(Val: 0),
46 cl::Hidden,
47 cl::cat(BoltCategory));
48
49} // namespace opts
50
51namespace llvm {
52namespace bolt {
53
54constexpr const char *DynoStats::Desc[];
55
56bool DynoStats::operator<(const DynoStats &Other) const {
57 return std::lexicographical_compare(
58 first1: &Stats[FIRST_DYNO_STAT], last1: &Stats[LAST_DYNO_STAT],
59 first2: &Other.Stats[FIRST_DYNO_STAT], last2: &Other.Stats[LAST_DYNO_STAT]);
60}
61
62bool DynoStats::operator==(const DynoStats &Other) const {
63 return std::equal(first1: &Stats[FIRST_DYNO_STAT], last1: &Stats[LAST_DYNO_STAT],
64 first2: &Other.Stats[FIRST_DYNO_STAT]);
65}
66
67bool DynoStats::lessThan(const DynoStats &Other,
68 ArrayRef<Category> Keys) const {
69 return std::lexicographical_compare(
70 first1: Keys.begin(), last1: Keys.end(), first2: Keys.begin(), last2: Keys.end(),
71 comp: [this, &Other](const Category A, const Category) {
72 return Stats[A] < Other.Stats[A];
73 });
74}
75
76void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
77 MCInstPrinter *Printer) const {
78 auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
79 uint64_t OtherStat) {
80 OS << format(Fmt: "%'20lld : ", Vals: Stat * opts::DynoStatsScale) << Name;
81 if (Other) {
82 if (Stat != OtherStat) {
83 OtherStat = std::max(a: OtherStat, b: uint64_t(1)); // to prevent divide by 0
84 OS << format(Fmt: " (%+.1f%%)", Vals: ((float)Stat - (float)OtherStat) * 100.0 /
85 (float)(OtherStat));
86 } else {
87 OS << " (=)";
88 }
89 }
90 OS << '\n';
91 };
92
93 for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
94 Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
95
96 if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
97 continue;
98
99 printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
100 }
101 if (opts::PrintDynoOpcodeStat && Printer) {
102 OS << "\nProgram-wide opcode histogram:\n";
103 OS << " Opcode, Execution Count, Max Exec Count, "
104 "Function Name:Offset ...\n";
105 std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
106 for (const OpcodeStatTy &Stat : OpcodeHistogram)
107 SortedHistogram.emplace_back(args: Stat.second.first, args: Stat.first);
108
109 // Sort using lexicographic ordering
110 llvm::sort(C&: SortedHistogram);
111
112 // Dump in ascending order: Start with Opcode with Highest execution
113 // count.
114 for (auto &Stat : llvm::reverse(C&: SortedHistogram)) {
115 OS << format(Fmt: "%20s,%'18lld", Vals: Printer->getOpcodeName(Opcode: Stat.second).data(),
116 Vals: Stat.first * opts::DynoStatsScale);
117 auto It = OpcodeHistogram.find(x: Stat.second);
118 assert(It != OpcodeHistogram.end());
119 MaxOpcodeHistogramTy MaxMultiMap = It->second.second;
120 // Start with function name:BB offset with highest execution count.
121 for (auto &Max : llvm::reverse(C&: MaxMultiMap)) {
122 OS << format(Fmt: ", %'18lld, ", Vals: Max.first * opts::DynoStatsScale)
123 << Max.second.first.str() << ':' << Max.second.second;
124 }
125 OS << '\n';
126 }
127 }
128}
129
130void DynoStats::operator+=(const DynoStats &Other) {
131 for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
132 Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
133 Stats[Stat] += Other[Stat];
134 }
135 for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
136 auto I = OpcodeHistogram.find(x: Stat.first);
137 if (I == OpcodeHistogram.end()) {
138 OpcodeHistogram.emplace(args: Stat);
139 } else {
140 // Merge other histograms, log only the opts::PrintDynoOpcodeStat'th
141 // maximum counts.
142 I->second.first += Stat.second.first;
143 auto &MMap = I->second.second;
144 auto &OtherMMap = Stat.second.second;
145 auto Size = MMap.size();
146 assert(Size <= opts::PrintDynoOpcodeStat);
147 for (auto OtherMMapPair : llvm::reverse(C: OtherMMap)) {
148 if (Size++ >= opts::PrintDynoOpcodeStat) {
149 auto First = MMap.begin();
150 if (OtherMMapPair.first <= First->first)
151 break;
152 MMap.erase(position: First);
153 }
154 MMap.emplace(args&: OtherMMapPair);
155 }
156 }
157 }
158}
159
160DynoStats getDynoStats(BinaryFunction &BF) {
161 auto &BC = BF.getBinaryContext();
162
163 DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
164
165 // Return empty-stats about the function we don't completely understand.
166 if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG())
167 return Stats;
168
169 // Update enumeration of basic blocks for correct detection of branch'
170 // direction.
171 BF.getLayout().updateLayoutIndices();
172
173 for (BinaryBasicBlock *const BB : BF.getLayout().blocks()) {
174 // The basic block execution count equals to the sum of incoming branch
175 // frequencies. This may deviate from the sum of outgoing branches of the
176 // basic block especially since the block may contain a function that
177 // does not return or a function that throws an exception.
178 const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
179
180 // Ignore empty blocks and blocks that were not executed.
181 if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
182 continue;
183
184 // Count AArch64 linker-inserted veneers
185 if (BF.isAArch64Veneer())
186 Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
187
188 // Count various instruction types by iterating through all instructions.
189 // When -print-dyno-opcode-stats is on, count per each opcode and record
190 // maximum execution counts.
191 for (const MCInst &Instr : *BB) {
192 if (opts::PrintDynoOpcodeStat) {
193 unsigned Opcode = Instr.getOpcode();
194 auto I = Stats.OpcodeHistogram.find(x: Opcode);
195 if (I == Stats.OpcodeHistogram.end()) {
196 DynoStats::MaxOpcodeHistogramTy MMap;
197 MMap.emplace(args: BBExecutionCount,
198 args: std::make_pair(x: BF.getOneName(), y: BB->getOffset()));
199 Stats.OpcodeHistogram.emplace(args&: Opcode,
200 args: std::make_pair(x: BBExecutionCount, y&: MMap));
201 } else {
202 I->second.first += BBExecutionCount;
203 bool Insert = true;
204 if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
205 auto First = I->second.second.begin();
206 if (First->first < BBExecutionCount)
207 I->second.second.erase(position: First);
208 else
209 Insert = false;
210 }
211 if (Insert) {
212 I->second.second.emplace(
213 args: BBExecutionCount,
214 args: std::make_pair(x: BF.getOneName(), y: BB->getOffset()));
215 }
216 }
217 }
218
219 if (BC.MIB->mayStore(Inst: Instr)) {
220 Stats[DynoStats::STORES] += BBExecutionCount;
221 }
222 if (BC.MIB->mayLoad(Inst: Instr)) {
223 Stats[DynoStats::LOADS] += BBExecutionCount;
224 }
225 if (!BC.MIB->isCall(Inst: Instr))
226 continue;
227
228 uint64_t CallFreq = BBExecutionCount;
229 if (BC.MIB->getConditionalTailCall(Inst: Instr)) {
230 CallFreq =
231 BC.MIB->getAnnotationWithDefault<uint64_t>(Inst: Instr, Name: "CTCTakenCount");
232 }
233 Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
234 if (BC.MIB->isIndirectCall(Inst: Instr)) {
235 Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
236 } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Inst: Instr)) {
237 const BinaryFunction *BF = BC.getFunctionForSymbol(Symbol: CallSymbol);
238 if (BF && BF->isPLTFunction()) {
239 Stats[DynoStats::PLT_CALLS] += CallFreq;
240
241 // We don't process PLT functions and hence have to adjust relevant
242 // dynostats here for:
243 //
244 // jmp *GOT_ENTRY(%rip)
245 //
246 // NOTE: this is arch-specific.
247 Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
248 Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
249 Stats[DynoStats::LOADS] += CallFreq;
250 Stats[DynoStats::INSTRUCTIONS] += CallFreq;
251 }
252 }
253 }
254
255 Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
256
257 // Jump tables.
258 const MCInst *LastInstr = BB->getLastNonPseudoInstr();
259 if (BC.MIB->getJumpTable(Inst: *LastInstr)) {
260 Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
261 LLVM_DEBUG(
262 static uint64_t MostFrequentJT;
263 if (BBExecutionCount > MostFrequentJT) {
264 MostFrequentJT = BBExecutionCount;
265 dbgs() << "BOLT-INFO: most frequently executed jump table is in "
266 << "function " << BF << " in basic block " << BB->getName()
267 << " executed totally " << BBExecutionCount << " times.\n";
268 }
269 );
270 continue;
271 }
272
273 if (BC.MIB->isIndirectBranch(Inst: *LastInstr) && !BC.MIB->isCall(Inst: *LastInstr)) {
274 Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount;
275 continue;
276 }
277
278 // Update stats for branches.
279 const MCSymbol *TBB = nullptr;
280 const MCSymbol *FBB = nullptr;
281 MCInst *CondBranch = nullptr;
282 MCInst *UncondBranch = nullptr;
283 if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
284 continue;
285
286 if (!CondBranch && !UncondBranch)
287 continue;
288
289 // Simple unconditional branch.
290 if (!CondBranch) {
291 Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
292 continue;
293 }
294
295 // CTCs: instruction annotations could be stripped, hence check the number
296 // of successors to identify conditional tail calls.
297 if (BB->succ_size() == 1) {
298 if (BB->branch_info_begin() != BB->branch_info_end())
299 Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
300 continue;
301 }
302
303 // Conditional branch that could be followed by an unconditional branch.
304 uint64_t TakenCount = BB->getTakenBranchInfo().Count;
305 if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
306 TakenCount = 0;
307
308 uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
309 if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
310 NonTakenCount = 0;
311
312 if (BF.isForwardBranch(From: BB, To: BB->getConditionalSuccessor(Condition: true))) {
313 Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
314 Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
315 } else {
316 Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
317 Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
318 }
319
320 if (UncondBranch) {
321 Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
322 }
323 }
324
325 return Stats;
326}
327
328} // namespace bolt
329} // namespace llvm
330

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of bolt/lib/Core/DynoStats.cpp