1//===- bolt/Passes/BinaryFunctionCallGraph.cpp ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the BinaryFunctionCallGraph class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/BinaryFunctionCallGraph.h"
14#include "bolt/Core/BinaryContext.h"
15#include "bolt/Core/BinaryFunction.h"
16#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/Timer.h"
18#include <stack>
19
20#define DEBUG_TYPE "callgraph"
21
22using namespace llvm;
23
24namespace opts {
25
26extern cl::opt<bool> TimeOpts;
27extern cl::opt<unsigned> Verbosity;
28extern cl::OptionCategory BoltCategory;
29
30static cl::opt<std::string>
31 DumpCGDot("dump-cg", cl::desc("dump callgraph to the given file"),
32 cl::cat(BoltCategory));
33
34} // namespace opts
35
36namespace llvm {
37namespace bolt {
38
39CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
40 uint32_t Size,
41 uint64_t Samples) {
42 NodeId Id = CallGraph::addNode(Size, Samples);
43 assert(size_t(Id) == Funcs.size());
44 Funcs.push_back(x: BF);
45 FuncToNodeId[BF] = Id;
46 assert(Funcs[Id] == BF);
47 return Id;
48}
49
50std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
51 NamedRegionTimer T1("buildcgorder", "Build cg traversal order",
52 "CG breakdown", "CG breakdown", opts::TimeOpts);
53 std::deque<BinaryFunction *> TopologicalOrder;
54 enum NodeStatus { NEW, VISITING, VISITED };
55 std::vector<NodeStatus> NodeStatus(Funcs.size());
56 std::stack<NodeId> Worklist;
57
58 for (BinaryFunction *Func : Funcs) {
59 const NodeId Id = FuncToNodeId.at(k: Func);
60 Worklist.push(x: Id);
61 NodeStatus[Id] = NEW;
62 }
63
64 while (!Worklist.empty()) {
65 const NodeId FuncId = Worklist.top();
66 Worklist.pop();
67
68 if (NodeStatus[FuncId] == VISITED)
69 continue;
70
71 if (NodeStatus[FuncId] == VISITING) {
72 TopologicalOrder.push_back(x: Funcs[FuncId]);
73 NodeStatus[FuncId] = VISITED;
74 continue;
75 }
76
77 assert(NodeStatus[FuncId] == NEW);
78 NodeStatus[FuncId] = VISITING;
79 Worklist.push(x: FuncId);
80 for (const NodeId Callee : successors(Id: FuncId)) {
81 if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
82 continue;
83 Worklist.push(x: Callee);
84 }
85 }
86
87 return TopologicalOrder;
88}
89
90BinaryFunctionCallGraph
91buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
92 bool IncludeSplitCalls, bool UseFunctionHotSize,
93 bool UseSplitHotSize, bool UseEdgeCounts,
94 bool IgnoreRecursiveCalls) {
95 NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
96 "CG breakdown", opts::TimeOpts);
97 BinaryFunctionCallGraph Cg;
98 static constexpr uint64_t COUNT_NO_PROFILE =
99 BinaryBasicBlock::COUNT_NO_PROFILE;
100
101 // Compute function size
102 auto functionSize = [&](const BinaryFunction *Function) {
103 return UseFunctionHotSize && Function->isSplit()
104 ? Function->estimateHotSize(UseSplitSize: UseSplitHotSize)
105 : Function->estimateSize();
106 };
107
108 // Add call graph nodes.
109 auto lookupNode = [&](BinaryFunction *Function) {
110 const CallGraph::NodeId Id = Cg.maybeGetNodeId(BF: Function);
111 if (Id == CallGraph::InvalidId) {
112 // It's ok to use the hot size here when the function is split. This is
113 // because emitFunctions will emit the hot part first in the order that is
114 // computed by ReorderFunctions. The cold part will be emitted with the
115 // rest of the cold functions and code.
116 const size_t Size = functionSize(Function);
117 // NOTE: for functions without a profile, we set the number of samples
118 // to zero. This will keep these functions from appearing in the hot
119 // section. This is a little weird because we wouldn't be trying to
120 // create a node for a function unless it was the target of a call from
121 // a hot block. The alternative would be to set the count to one or
122 // accumulate the number of calls from the callsite into the function
123 // samples. Results from perfomance testing seem to favor the zero
124 // count though, so I'm leaving it this way for now.
125 return Cg.addNode(BF: Function, Size, Samples: Function->getKnownExecutionCount());
126 }
127 return Id;
128 };
129
130 // Add call graph edges.
131 uint64_t NotProcessed = 0;
132 uint64_t TotalCallsites = 0;
133 uint64_t NoProfileCallsites = 0;
134 uint64_t NumFallbacks = 0;
135 uint64_t RecursiveCallsites = 0;
136 for (auto &It : BC.getBinaryFunctions()) {
137 BinaryFunction *Function = &It.second;
138
139 if (Filter(*Function))
140 continue;
141
142 const CallGraph::NodeId SrcId = lookupNode(Function);
143 // Offset of the current basic block from the beginning of the function
144 uint64_t Offset = 0;
145
146 auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
147 BinaryFunction *DstFunc =
148 DestSymbol ? BC.getFunctionForSymbol(Symbol: DestSymbol) : nullptr;
149 if (!DstFunc) {
150 LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
151 << "BOLT-DEBUG: buildCallGraph: no function for symbol\n");
152 return false;
153 }
154
155 if (DstFunc == Function) {
156 LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
157 << *DstFunc << "\n");
158 ++RecursiveCallsites;
159 if (IgnoreRecursiveCalls)
160 return false;
161 }
162 if (Filter(*DstFunc)) {
163 LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
164 << "BOLT-DEBUG: buildCallGraph: filtered " << *DstFunc
165 << '\n');
166 return false;
167 }
168
169 const CallGraph::NodeId DstId = lookupNode(DstFunc);
170 const bool IsValidCount = Count != COUNT_NO_PROFILE;
171 const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
172 if (!IsValidCount)
173 ++NoProfileCallsites;
174 Cg.incArcWeight(Src: SrcId, Dst: DstId, W: AdjCount, Offset);
175 LLVM_DEBUG(if (opts::Verbosity > 1) {
176 dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> "
177 << *DstFunc << " @ " << Offset << "\n";
178 });
179 return true;
180 };
181
182 // Pairs of (symbol, count) for each target at this callsite.
183 using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
184 using CallInfoTy = std::vector<TargetDesc>;
185
186 // Get pairs of (symbol, count) for each target at this callsite.
187 // If the call is to an unknown function the symbol will be nullptr.
188 // If there is no profiling data the count will be COUNT_NO_PROFILE.
189 auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
190 CallInfoTy Counts;
191 const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
192
193 // If this is an indirect call use perf data directly.
194 if (!DstSym && BC.MIB->hasAnnotation(Inst, Name: "CallProfile")) {
195 const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
196 Inst, Name: "CallProfile");
197 for (const IndirectCallProfile &CSI : ICSP)
198 if (CSI.Symbol)
199 Counts.emplace_back(args: CSI.Symbol, args: CSI.Count);
200 } else {
201 const uint64_t Count = BB->getExecutionCount();
202 Counts.emplace_back(args&: DstSym, args: Count);
203 }
204
205 return Counts;
206 };
207
208 // If the function has an invalid profile, try to use the perf data
209 // directly (if requested). If there is no perf data for this function,
210 // fall back to the CFG walker which attempts to handle missing data.
211 if (!Function->hasValidProfile() && CgFromPerfData &&
212 !Function->getAllCallSites().empty()) {
213 LLVM_DEBUG(
214 dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
215 << " for " << *Function << "\n");
216 ++NumFallbacks;
217 const size_t Size = functionSize(Function);
218 for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
219 ++TotalCallsites;
220
221 if (!CSI.Symbol)
222 continue;
223
224 // The computed offset may exceed the hot part of the function; hence,
225 // bound it by the size.
226 Offset = CSI.Offset;
227 if (Offset > Size)
228 Offset = Size;
229
230 if (!recordCall(CSI.Symbol, CSI.Count))
231 ++NotProcessed;
232 }
233 } else {
234 for (BinaryBasicBlock *BB : Function->getLayout().blocks()) {
235 // Don't count calls from split blocks unless requested.
236 if (BB->isSplit() && !IncludeSplitCalls)
237 continue;
238
239 // Determine whether the block is included in Function's (hot) size
240 // See BinaryFunction::estimateHotSize
241 bool BBIncludedInFunctionSize = false;
242 if (UseFunctionHotSize && Function->isSplit()) {
243 if (UseSplitHotSize)
244 BBIncludedInFunctionSize = !BB->isSplit();
245 else
246 BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
247 } else {
248 BBIncludedInFunctionSize = true;
249 }
250
251 for (MCInst &Inst : *BB) {
252 // Find call instructions and extract target symbols from each one.
253 if (BC.MIB->isCall(Inst)) {
254 const CallInfoTy CallInfo = getCallInfo(BB, Inst);
255
256 if (!CallInfo.empty()) {
257 for (const TargetDesc &CI : CallInfo) {
258 ++TotalCallsites;
259 if (!recordCall(CI.first, CI.second))
260 ++NotProcessed;
261 }
262 } else {
263 ++TotalCallsites;
264 ++NotProcessed;
265 }
266 }
267 // Increase Offset if needed
268 if (BBIncludedInFunctionSize)
269 Offset += BC.computeCodeSize(Beg: &Inst, End: &Inst + 1);
270 }
271 }
272 }
273 }
274
275#ifndef NDEBUG
276 bool PrintInfo = DebugFlag && isCurrentDebugType(Type: "callgraph");
277#else
278 bool PrintInfo = false;
279#endif
280 if (PrintInfo || opts::Verbosity > 0)
281 BC.outs() << format(Fmt: "BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
282 "(%u recursive), density = %.6lf, %u callsites not "
283 "processed, %u callsites with invalid profile, "
284 "used perf data for %u stale functions.\n",
285 Vals: Cg.numNodes(), Vals: TotalCallsites, Vals: RecursiveCallsites,
286 Vals: Cg.density(), Vals: NotProcessed, Vals: NoProfileCallsites,
287 Vals: NumFallbacks);
288
289 if (opts::DumpCGDot.getNumOccurrences()) {
290 Cg.printDot(FileName: opts::DumpCGDot, GetLabel: [&](CallGraph::NodeId Id) {
291 return Cg.nodeIdToFunc(Id)->getPrintName();
292 });
293 }
294
295 return Cg;
296}
297
298} // namespace bolt
299} // namespace llvm
300

source code of bolt/lib/Passes/BinaryFunctionCallGraph.cpp