1//===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements BinaryFunction member functions related to processing
10// the execution profile.
11//
12//===----------------------------------------------------------------------===//
13
14#include "bolt/Core/BinaryBasicBlock.h"
15#include "bolt/Core/BinaryFunction.h"
16#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/Debug.h"
18#include "llvm/Support/raw_ostream.h"
19
20#undef DEBUG_TYPE
21#define DEBUG_TYPE "bolt-prof"
22
23using namespace llvm;
24using namespace bolt;
25
26namespace opts {
27
28extern cl::OptionCategory BoltOptCategory;
29
30cl::opt<IndirectCallPromotionType> ICP(
31 "indirect-call-promotion", cl::init(Val: ICP_NONE),
32 cl::desc("indirect call promotion"),
33 cl::values(
34 clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"),
35 clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"),
36 clEnumValN(ICP_JUMP_TABLES, "jump-tables",
37 "perform ICP on jump tables"),
38 clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")),
39 cl::ZeroOrMore, cl::cat(BoltOptCategory));
40
41static cl::alias ICPAlias("icp",
42 cl::desc("Alias for --indirect-call-promotion"),
43 cl::aliasopt(ICP));
44
45extern cl::opt<JumpTableSupportLevel> JumpTables;
46
47static cl::opt<bool> FixFuncCounts(
48 "fix-func-counts",
49 cl::desc("adjust function counts based on basic blocks execution count"),
50 cl::Hidden, cl::cat(BoltOptCategory));
51
52static cl::opt<bool> FixBlockCounts(
53 "fix-block-counts",
54 cl::desc("adjust block counts based on outgoing branch counts"),
55 cl::init(Val: true), cl::Hidden, cl::cat(BoltOptCategory));
56
57static cl::opt<bool>
58 InferFallThroughs("infer-fall-throughs",
59 cl::desc("infer execution count for fall-through blocks"),
60 cl::Hidden, cl::cat(BoltOptCategory));
61
62} // namespace opts
63
64namespace llvm {
65namespace bolt {
66
67void BinaryFunction::postProcessProfile() {
68 if (!hasValidProfile()) {
69 clearProfile();
70 return;
71 }
72
73 if (!(getProfileFlags() & PF_LBR))
74 return;
75
76 // If we have at least some branch data for the function indicate that it
77 // was executed.
78 if (opts::FixFuncCounts && ExecutionCount == 0)
79 ExecutionCount = 1;
80
81 // Compute preliminary execution count for each basic block.
82 for (BinaryBasicBlock *BB : BasicBlocks) {
83 if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
84 BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
85 BB->ExecutionCount = 0;
86 }
87 for (BinaryBasicBlock *BB : BasicBlocks) {
88 auto SuccBIIter = BB->branch_info_begin();
89 for (BinaryBasicBlock *Succ : BB->successors()) {
90 // All incoming edges to the primary entry have been accounted for, thus
91 // we skip the update here.
92 if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
93 Succ != BasicBlocks.front())
94 Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
95 ++SuccBIIter;
96 }
97 }
98
99 // Fix for old profiles.
100 for (BinaryBasicBlock *BB : BasicBlocks) {
101 if (BB->size() != 1 || BB->succ_size() != 1)
102 continue;
103
104 if (BB->getKnownExecutionCount() == 0)
105 continue;
106
107 MCInst *Instr = BB->getFirstNonPseudoInstr();
108 assert(Instr && "expected non-pseudo instr");
109 if (!BC.MIB->hasAnnotation(Inst: *Instr, Name: "NOP"))
110 continue;
111
112 BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
113 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(Succ: *FTSuccessor);
114 if (!BI.Count) {
115 BI.Count = BB->getKnownExecutionCount();
116 FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
117 BI.Count);
118 }
119 }
120
121 if (opts::FixBlockCounts) {
122 for (BinaryBasicBlock *BB : BasicBlocks) {
123 // Make sure that execution count of a block is at least the branch count
124 // of an incoming/outgoing jump.
125 auto SuccBIIter = BB->branch_info_begin();
126 for (BinaryBasicBlock *Succ : BB->successors()) {
127 uint64_t Count = SuccBIIter->Count;
128 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) {
129 Succ->setExecutionCount(std::max(a: Succ->getExecutionCount(), b: Count));
130 BB->setExecutionCount(std::max(a: BB->getExecutionCount(), b: Count));
131 }
132 ++SuccBIIter;
133 }
134 // Make sure that execution count of a block is at least the number of
135 // function calls from the block.
136 for (MCInst &Inst : *BB) {
137 // Ignore non-call instruction
138 if (!BC.MIB->isCall(Inst))
139 continue;
140
141 auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, Name: "Count");
142 if (CountAnnt)
143 BB->setExecutionCount(std::max(a: BB->getExecutionCount(), b: *CountAnnt));
144 }
145 }
146 }
147
148 if (opts::InferFallThroughs)
149 inferFallThroughCounts();
150
151 // Update profile information for jump tables based on CFG branch data.
152 for (BinaryBasicBlock *BB : BasicBlocks) {
153 const MCInst *LastInstr = BB->getLastNonPseudoInstr();
154 if (!LastInstr)
155 continue;
156 const uint64_t JTAddress = BC.MIB->getJumpTable(Inst: *LastInstr);
157 if (!JTAddress)
158 continue;
159 JumpTable *JT = getJumpTableContainingAddress(Address: JTAddress);
160 if (!JT)
161 continue;
162
163 uint64_t TotalBranchCount = 0;
164 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
165 BB->branch_info()) {
166 TotalBranchCount += BranchInfo.Count;
167 }
168 JT->Count += TotalBranchCount;
169
170 if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE)
171 continue;
172
173 if (JT->Counts.empty())
174 JT->Counts.resize(new_size: JT->Entries.size());
175 auto EI = JT->Entries.begin();
176 uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize;
177 EI += Delta;
178 while (EI != JT->Entries.end()) {
179 const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(Label: *EI);
180 if (TargetBB) {
181 const BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
182 BB->getBranchInfo(Succ: *TargetBB);
183 assert(Delta < JT->Counts.size());
184 JT->Counts[Delta].Count += BranchInfo.Count;
185 JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
186 }
187 ++Delta;
188 ++EI;
189 // A label marks the start of another jump table.
190 if (JT->Labels.count(x: Delta * JT->EntrySize))
191 break;
192 }
193 }
194}
195
196void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
197 // No reason to merge invalid or empty profiles into BF.
198 if (!hasValidProfile())
199 return;
200
201 // Update function execution count.
202 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE)
203 BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
204
205 // Since we are merging a valid profile, the new profile should be valid too.
206 // It has either already been valid, or it has been cleaned up.
207 BF.ProfileMatchRatio = 1.0f;
208
209 // Update basic block and edge counts.
210 auto BBMergeI = BF.begin();
211 for (BinaryBasicBlock *BB : BasicBlocks) {
212 BinaryBasicBlock *BBMerge = &*BBMergeI;
213 assert(getIndex(BB) == BF.getIndex(BBMerge));
214
215 // Update basic block count.
216 if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
217 BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() +
218 BB->getExecutionCount());
219 }
220
221 // Update edge count for successors of this basic block.
222 auto BBMergeSI = BBMerge->succ_begin();
223 auto BIMergeI = BBMerge->branch_info_begin();
224 auto BII = BB->branch_info_begin();
225 for (const BinaryBasicBlock *BBSucc : BB->successors()) {
226 (void)BBSucc;
227 assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
228 (void)BBMergeSI;
229
230 // At this point no branch count should be set to COUNT_NO_PROFILE.
231 assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
232 "unexpected unknown branch profile");
233 assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
234 "unexpected unknown branch profile");
235
236 BIMergeI->Count += BII->Count;
237
238 // When we merge inferred and real fall-through branch data, the merged
239 // data is considered inferred.
240 if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
241 BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
242 BIMergeI->MispredictedCount += BII->MispredictedCount;
243 } else {
244 BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
245 }
246
247 ++BBMergeSI;
248 ++BII;
249 ++BIMergeI;
250 }
251 assert(BBMergeSI == BBMerge->succ_end());
252
253 ++BBMergeI;
254 }
255 assert(BBMergeI == BF.end());
256
257 // Merge jump tables profile info.
258 auto JTMergeI = BF.JumpTables.begin();
259 for (const auto &JTEntry : JumpTables) {
260 if (JTMergeI->second->Counts.empty())
261 JTMergeI->second->Counts.resize(new_size: JTEntry.second->Counts.size());
262 auto CountMergeI = JTMergeI->second->Counts.begin();
263 for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) {
264 CountMergeI->Count += JI.Count;
265 CountMergeI->Mispreds += JI.Mispreds;
266 ++CountMergeI;
267 }
268 assert(CountMergeI == JTMergeI->second->Counts.end());
269
270 ++JTMergeI;
271 }
272 assert(JTMergeI == BF.JumpTables.end());
273}
274
275void BinaryFunction::inferFallThroughCounts() {
276 // Work on a basic block at a time, propagating frequency information
277 // forwards.
278 // It is important to walk in the layout order.
279 for (BinaryBasicBlock *BB : BasicBlocks) {
280 const uint64_t BBExecCount = BB->getExecutionCount();
281
282 // Propagate this information to successors, filling in fall-through edges
283 // with frequency information
284 if (BB->succ_size() == 0)
285 continue;
286
287 // Calculate frequency of outgoing branches from this node according to
288 // LBR data.
289 uint64_t ReportedBranches = 0;
290 for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info())
291 if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
292 ReportedBranches += SuccBI.Count;
293
294 // Get taken count of conditional tail call if the block ends with one.
295 uint64_t CTCTakenCount = 0;
296 const MCInst *CTCInstr = BB->getLastNonPseudoInstr();
297 if (CTCInstr && BC.MIB->getConditionalTailCall(Inst: *CTCInstr)) {
298 CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
299 Inst: *CTCInstr, Name: "CTCTakenCount");
300 }
301
302 // Calculate frequency of throws from this node according to LBR data
303 // for branching into associated landing pads. Since it is possible
304 // for a landing pad to be associated with more than one basic blocks,
305 // we may overestimate the frequency of throws for such blocks.
306 uint64_t ReportedThrows = 0;
307 for (const BinaryBasicBlock *LP : BB->landing_pads())
308 ReportedThrows += LP->getExecutionCount();
309
310 const uint64_t TotalReportedJumps =
311 ReportedBranches + CTCTakenCount + ReportedThrows;
312
313 // Infer the frequency of the fall-through edge, representing not taking the
314 // branch.
315 uint64_t Inferred = 0;
316 if (BBExecCount > TotalReportedJumps)
317 Inferred = BBExecCount - TotalReportedJumps;
318
319 LLVM_DEBUG(
320 if (BBExecCount < TotalReportedJumps) dbgs()
321 << "Fall-through inference is slightly inconsistent. "
322 "exec frequency is less than the outgoing edges frequency ("
323 << BBExecCount << " < " << ReportedBranches
324 << ") for BB at offset 0x"
325 << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';);
326
327 if (BB->succ_size() <= 2) {
328 // Skip if the last instruction is an unconditional jump.
329 const MCInst *LastInstr = BB->getLastNonPseudoInstr();
330 if (LastInstr && (BC.MIB->isUnconditionalBranch(Inst: *LastInstr) ||
331 BC.MIB->isIndirectBranch(Inst: *LastInstr)))
332 continue;
333 // If there is an FT it will be the last successor.
334 auto &SuccBI = *BB->branch_info_rbegin();
335 auto &Succ = *BB->succ_rbegin();
336 if (SuccBI.Count == 0) {
337 SuccBI.Count = Inferred;
338 SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
339 Succ->ExecutionCount += Inferred;
340 }
341 }
342 }
343}
344
345void BinaryFunction::clearProfile() {
346 // Keep function execution profile the same. Only clear basic block and edge
347 // counts.
348 for (BinaryBasicBlock *BB : BasicBlocks) {
349 BB->ExecutionCount = 0;
350 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
351 BI.Count = 0;
352 BI.MispredictedCount = 0;
353 }
354 }
355}
356
357} // namespace bolt
358} // namespace llvm
359

source code of bolt/lib/Core/BinaryFunctionProfile.cpp