1 | //===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the DynoStats class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/DynoStats.h" |
14 | #include "bolt/Core/BinaryBasicBlock.h" |
15 | #include "bolt/Core/BinaryFunction.h" |
16 | #include "llvm/ADT/StringRef.h" |
17 | #include "llvm/Support/CommandLine.h" |
18 | #include "llvm/Support/Debug.h" |
19 | #include "llvm/Support/raw_ostream.h" |
20 | #include <algorithm> |
21 | #include <string> |
22 | |
23 | #undef DEBUG_TYPE |
24 | #define DEBUG_TYPE "bolt" |
25 | |
26 | using namespace llvm; |
27 | using namespace bolt; |
28 | |
29 | namespace opts { |
30 | |
31 | extern cl::OptionCategory BoltCategory; |
32 | |
33 | static cl::opt<uint32_t> |
34 | DynoStatsScale("dyno-stats-scale" , |
35 | cl::desc("scale to be applied while reporting dyno stats" ), |
36 | cl::Optional, |
37 | cl::init(Val: 1), |
38 | cl::Hidden, |
39 | cl::cat(BoltCategory)); |
40 | |
41 | static cl::opt<uint32_t> |
42 | PrintDynoOpcodeStat("print-dyno-opcode-stats" , |
43 | cl::desc("print per instruction opcode dyno stats and the function" |
44 | "names:BB offsets of the nth highest execution counts" ), |
45 | cl::init(Val: 0), |
46 | cl::Hidden, |
47 | cl::cat(BoltCategory)); |
48 | |
49 | } // namespace opts |
50 | |
51 | namespace llvm { |
52 | namespace bolt { |
53 | |
54 | constexpr const char *DynoStats::Desc[]; |
55 | |
56 | bool DynoStats::operator<(const DynoStats &Other) const { |
57 | return std::lexicographical_compare( |
58 | first1: &Stats[FIRST_DYNO_STAT], last1: &Stats[LAST_DYNO_STAT], |
59 | first2: &Other.Stats[FIRST_DYNO_STAT], last2: &Other.Stats[LAST_DYNO_STAT]); |
60 | } |
61 | |
62 | bool DynoStats::operator==(const DynoStats &Other) const { |
63 | return std::equal(first1: &Stats[FIRST_DYNO_STAT], last1: &Stats[LAST_DYNO_STAT], |
64 | first2: &Other.Stats[FIRST_DYNO_STAT]); |
65 | } |
66 | |
67 | bool DynoStats::lessThan(const DynoStats &Other, |
68 | ArrayRef<Category> Keys) const { |
69 | return std::lexicographical_compare( |
70 | first1: Keys.begin(), last1: Keys.end(), first2: Keys.begin(), last2: Keys.end(), |
71 | comp: [this, &Other](const Category A, const Category) { |
72 | return Stats[A] < Other.Stats[A]; |
73 | }); |
74 | } |
75 | |
76 | void DynoStats::print(raw_ostream &OS, const DynoStats *Other, |
77 | MCInstPrinter *Printer) const { |
78 | auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat, |
79 | uint64_t OtherStat) { |
80 | OS << format(Fmt: "%'20lld : " , Vals: Stat * opts::DynoStatsScale) << Name; |
81 | if (Other) { |
82 | if (Stat != OtherStat) { |
83 | OtherStat = std::max(a: OtherStat, b: uint64_t(1)); // to prevent divide by 0 |
84 | OS << format(Fmt: " (%+.1f%%)" , Vals: ((float)Stat - (float)OtherStat) * 100.0 / |
85 | (float)(OtherStat)); |
86 | } else { |
87 | OS << " (=)" ; |
88 | } |
89 | } |
90 | OS << '\n'; |
91 | }; |
92 | |
93 | for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1; |
94 | Stat < DynoStats::LAST_DYNO_STAT; ++Stat) { |
95 | |
96 | if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64) |
97 | continue; |
98 | |
99 | printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0); |
100 | } |
101 | if (opts::PrintDynoOpcodeStat && Printer) { |
102 | OS << "\nProgram-wide opcode histogram:\n" ; |
103 | OS << " Opcode, Execution Count, Max Exec Count, " |
104 | "Function Name:Offset ...\n" ; |
105 | std::vector<std::pair<uint64_t, unsigned>> SortedHistogram; |
106 | for (const OpcodeStatTy &Stat : OpcodeHistogram) |
107 | SortedHistogram.emplace_back(args: Stat.second.first, args: Stat.first); |
108 | |
109 | // Sort using lexicographic ordering |
110 | llvm::sort(C&: SortedHistogram); |
111 | |
112 | // Dump in ascending order: Start with Opcode with Highest execution |
113 | // count. |
114 | for (auto &Stat : llvm::reverse(C&: SortedHistogram)) { |
115 | OS << format(Fmt: "%20s,%'18lld" , Vals: Printer->getOpcodeName(Opcode: Stat.second).data(), |
116 | Vals: Stat.first * opts::DynoStatsScale); |
117 | |
118 | MaxOpcodeHistogramTy MaxMultiMap = OpcodeHistogram.at(k: Stat.second).second; |
119 | // Start with function name:BB offset with highest execution count. |
120 | for (auto &Max : llvm::reverse(C&: MaxMultiMap)) { |
121 | OS << format(Fmt: ", %'18lld, " , Vals: Max.first * opts::DynoStatsScale) |
122 | << Max.second.first.str() << ':' << Max.second.second; |
123 | } |
124 | OS << '\n'; |
125 | } |
126 | } |
127 | } |
128 | |
129 | void DynoStats::operator+=(const DynoStats &Other) { |
130 | for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1; |
131 | Stat < DynoStats::LAST_DYNO_STAT; ++Stat) { |
132 | Stats[Stat] += Other[Stat]; |
133 | } |
134 | for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) { |
135 | auto I = OpcodeHistogram.find(x: Stat.first); |
136 | if (I == OpcodeHistogram.end()) { |
137 | OpcodeHistogram.emplace(args: Stat); |
138 | } else { |
139 | // Merge other histograms, log only the opts::PrintDynoOpcodeStat'th |
140 | // maximum counts. |
141 | I->second.first += Stat.second.first; |
142 | auto &MMap = I->second.second; |
143 | auto &OtherMMap = Stat.second.second; |
144 | auto Size = MMap.size(); |
145 | assert(Size <= opts::PrintDynoOpcodeStat); |
146 | for (auto OtherMMapPair : llvm::reverse(C: OtherMMap)) { |
147 | if (Size++ >= opts::PrintDynoOpcodeStat) { |
148 | auto First = MMap.begin(); |
149 | if (OtherMMapPair.first <= First->first) |
150 | break; |
151 | MMap.erase(position: First); |
152 | } |
153 | MMap.emplace(args&: OtherMMapPair); |
154 | } |
155 | } |
156 | } |
157 | } |
158 | |
159 | DynoStats getDynoStats(BinaryFunction &BF) { |
160 | auto &BC = BF.getBinaryContext(); |
161 | |
162 | DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64()); |
163 | |
164 | // Return empty-stats about the function we don't completely understand. |
165 | if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG()) |
166 | return Stats; |
167 | |
168 | // Update enumeration of basic blocks for correct detection of branch' |
169 | // direction. |
170 | BF.getLayout().updateLayoutIndices(); |
171 | |
172 | for (BinaryBasicBlock *const BB : BF.getLayout().blocks()) { |
173 | // The basic block execution count equals to the sum of incoming branch |
174 | // frequencies. This may deviate from the sum of outgoing branches of the |
175 | // basic block especially since the block may contain a function that |
176 | // does not return or a function that throws an exception. |
177 | const uint64_t BBExecutionCount = BB->getKnownExecutionCount(); |
178 | |
179 | // Ignore empty blocks and blocks that were not executed. |
180 | if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0) |
181 | continue; |
182 | |
183 | // Count AArch64 linker-inserted veneers |
184 | if (BF.isAArch64Veneer()) |
185 | Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount(); |
186 | |
187 | // Count various instruction types by iterating through all instructions. |
188 | // When -print-dyno-opcode-stats is on, count per each opcode and record |
189 | // maximum execution counts. |
190 | for (const MCInst &Instr : *BB) { |
191 | if (opts::PrintDynoOpcodeStat) { |
192 | unsigned Opcode = Instr.getOpcode(); |
193 | auto I = Stats.OpcodeHistogram.find(x: Opcode); |
194 | if (I == Stats.OpcodeHistogram.end()) { |
195 | DynoStats::MaxOpcodeHistogramTy MMap; |
196 | MMap.emplace(args: BBExecutionCount, |
197 | args: std::make_pair(x: BF.getOneName(), y: BB->getOffset())); |
198 | Stats.OpcodeHistogram.emplace(args&: Opcode, |
199 | args: std::make_pair(x: BBExecutionCount, y&: MMap)); |
200 | } else { |
201 | I->second.first += BBExecutionCount; |
202 | bool Insert = true; |
203 | if (I->second.second.size() == opts::PrintDynoOpcodeStat) { |
204 | auto First = I->second.second.begin(); |
205 | if (First->first < BBExecutionCount) |
206 | I->second.second.erase(position: First); |
207 | else |
208 | Insert = false; |
209 | } |
210 | if (Insert) { |
211 | I->second.second.emplace( |
212 | args: BBExecutionCount, |
213 | args: std::make_pair(x: BF.getOneName(), y: BB->getOffset())); |
214 | } |
215 | } |
216 | } |
217 | |
218 | if (BC.MIB->mayStore(Inst: Instr)) { |
219 | Stats[DynoStats::STORES] += BBExecutionCount; |
220 | } |
221 | if (BC.MIB->mayLoad(Inst: Instr)) { |
222 | Stats[DynoStats::LOADS] += BBExecutionCount; |
223 | } |
224 | if (!BC.MIB->isCall(Inst: Instr)) |
225 | continue; |
226 | |
227 | uint64_t CallFreq = BBExecutionCount; |
228 | if (BC.MIB->getConditionalTailCall(Inst: Instr)) { |
229 | CallFreq = |
230 | BC.MIB->getAnnotationWithDefault<uint64_t>(Inst: Instr, Name: "CTCTakenCount" ); |
231 | } |
232 | Stats[DynoStats::FUNCTION_CALLS] += CallFreq; |
233 | if (BC.MIB->isIndirectCall(Inst: Instr)) { |
234 | Stats[DynoStats::INDIRECT_CALLS] += CallFreq; |
235 | } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Inst: Instr)) { |
236 | const BinaryFunction *BF = BC.getFunctionForSymbol(Symbol: CallSymbol); |
237 | if (BF && BF->isPLTFunction()) { |
238 | Stats[DynoStats::PLT_CALLS] += CallFreq; |
239 | |
240 | // We don't process PLT functions and hence have to adjust relevant |
241 | // dynostats here for: |
242 | // |
243 | // jmp *GOT_ENTRY(%rip) |
244 | // |
245 | // NOTE: this is arch-specific. |
246 | Stats[DynoStats::FUNCTION_CALLS] += CallFreq; |
247 | Stats[DynoStats::INDIRECT_CALLS] += CallFreq; |
248 | Stats[DynoStats::LOADS] += CallFreq; |
249 | Stats[DynoStats::INSTRUCTIONS] += CallFreq; |
250 | } |
251 | } |
252 | } |
253 | |
254 | Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount; |
255 | |
256 | // Jump tables. |
257 | const MCInst *LastInstr = BB->getLastNonPseudoInstr(); |
258 | if (BC.MIB->getJumpTable(Inst: *LastInstr)) { |
259 | Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount; |
260 | LLVM_DEBUG( |
261 | static uint64_t MostFrequentJT; |
262 | if (BBExecutionCount > MostFrequentJT) { |
263 | MostFrequentJT = BBExecutionCount; |
264 | dbgs() << "BOLT-INFO: most frequently executed jump table is in " |
265 | << "function " << BF << " in basic block " << BB->getName() |
266 | << " executed totally " << BBExecutionCount << " times.\n" ; |
267 | } |
268 | ); |
269 | continue; |
270 | } |
271 | |
272 | if (BC.MIB->isIndirectBranch(Inst: *LastInstr) && !BC.MIB->isCall(Inst: *LastInstr)) { |
273 | Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount; |
274 | continue; |
275 | } |
276 | |
277 | // Update stats for branches. |
278 | const MCSymbol *TBB = nullptr; |
279 | const MCSymbol *FBB = nullptr; |
280 | MCInst *CondBranch = nullptr; |
281 | MCInst *UncondBranch = nullptr; |
282 | if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) |
283 | continue; |
284 | |
285 | if (!CondBranch && !UncondBranch) |
286 | continue; |
287 | |
288 | // Simple unconditional branch. |
289 | if (!CondBranch) { |
290 | Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount; |
291 | continue; |
292 | } |
293 | |
294 | // CTCs: instruction annotations could be stripped, hence check the number |
295 | // of successors to identify conditional tail calls. |
296 | if (BB->succ_size() == 1) { |
297 | if (BB->branch_info_begin() != BB->branch_info_end()) |
298 | Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count; |
299 | continue; |
300 | } |
301 | |
302 | // Conditional branch that could be followed by an unconditional branch. |
303 | uint64_t TakenCount = BB->getTakenBranchInfo().Count; |
304 | if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE) |
305 | TakenCount = 0; |
306 | |
307 | uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count; |
308 | if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE) |
309 | NonTakenCount = 0; |
310 | |
311 | if (BF.isForwardBranch(From: BB, To: BB->getConditionalSuccessor(Condition: true))) { |
312 | Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount; |
313 | Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount; |
314 | } else { |
315 | Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount; |
316 | Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount; |
317 | } |
318 | |
319 | if (UncondBranch) { |
320 | Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount; |
321 | } |
322 | } |
323 | |
324 | return Stats; |
325 | } |
326 | |
327 | } // namespace bolt |
328 | } // namespace llvm |
329 | |