1 | //===- bolt/Passes/BinaryFunctionCallGraph.cpp ----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the BinaryFunctionCallGraph class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/BinaryFunctionCallGraph.h" |
14 | #include "bolt/Core/BinaryContext.h" |
15 | #include "bolt/Core/BinaryFunction.h" |
16 | #include "llvm/Support/CommandLine.h" |
17 | #include "llvm/Support/Timer.h" |
18 | #include <stack> |
19 | |
20 | #define DEBUG_TYPE "callgraph" |
21 | |
22 | using namespace llvm; |
23 | |
24 | namespace opts { |
25 | |
26 | extern cl::opt<bool> TimeOpts; |
27 | extern cl::opt<unsigned> Verbosity; |
28 | extern cl::OptionCategory BoltCategory; |
29 | |
30 | static cl::opt<std::string> |
31 | DumpCGDot("dump-cg" , cl::desc("dump callgraph to the given file" ), |
32 | cl::cat(BoltCategory)); |
33 | |
34 | } // namespace opts |
35 | |
36 | namespace llvm { |
37 | namespace bolt { |
38 | |
39 | CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF, |
40 | uint32_t Size, |
41 | uint64_t Samples) { |
42 | NodeId Id = CallGraph::addNode(Size, Samples); |
43 | assert(size_t(Id) == Funcs.size()); |
44 | Funcs.push_back(x: BF); |
45 | FuncToNodeId[BF] = Id; |
46 | assert(Funcs[Id] == BF); |
47 | return Id; |
48 | } |
49 | |
50 | std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() { |
51 | NamedRegionTimer T1("buildcgorder" , "Build cg traversal order" , |
52 | "CG breakdown" , "CG breakdown" , opts::TimeOpts); |
53 | std::deque<BinaryFunction *> TopologicalOrder; |
54 | enum NodeStatus { NEW, VISITING, VISITED }; |
55 | std::vector<NodeStatus> NodeStatus(Funcs.size()); |
56 | std::stack<NodeId> Worklist; |
57 | |
58 | for (BinaryFunction *Func : Funcs) { |
59 | const NodeId Id = FuncToNodeId.at(k: Func); |
60 | Worklist.push(x: Id); |
61 | NodeStatus[Id] = NEW; |
62 | } |
63 | |
64 | while (!Worklist.empty()) { |
65 | const NodeId FuncId = Worklist.top(); |
66 | Worklist.pop(); |
67 | |
68 | if (NodeStatus[FuncId] == VISITED) |
69 | continue; |
70 | |
71 | if (NodeStatus[FuncId] == VISITING) { |
72 | TopologicalOrder.push_back(x: Funcs[FuncId]); |
73 | NodeStatus[FuncId] = VISITED; |
74 | continue; |
75 | } |
76 | |
77 | assert(NodeStatus[FuncId] == NEW); |
78 | NodeStatus[FuncId] = VISITING; |
79 | Worklist.push(x: FuncId); |
80 | for (const NodeId Callee : successors(Id: FuncId)) { |
81 | if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED) |
82 | continue; |
83 | Worklist.push(x: Callee); |
84 | } |
85 | } |
86 | |
87 | return TopologicalOrder; |
88 | } |
89 | |
90 | BinaryFunctionCallGraph |
91 | buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData, |
92 | bool IncludeSplitCalls, bool UseFunctionHotSize, |
93 | bool UseSplitHotSize, bool UseEdgeCounts, |
94 | bool IgnoreRecursiveCalls) { |
95 | NamedRegionTimer T1("buildcg" , "Callgraph construction" , "CG breakdown" , |
96 | "CG breakdown" , opts::TimeOpts); |
97 | BinaryFunctionCallGraph Cg; |
98 | static constexpr uint64_t COUNT_NO_PROFILE = |
99 | BinaryBasicBlock::COUNT_NO_PROFILE; |
100 | |
101 | // Compute function size |
102 | auto functionSize = [&](const BinaryFunction *Function) { |
103 | return UseFunctionHotSize && Function->isSplit() |
104 | ? Function->estimateHotSize(UseSplitSize: UseSplitHotSize) |
105 | : Function->estimateSize(); |
106 | }; |
107 | |
108 | // Add call graph nodes. |
109 | auto lookupNode = [&](BinaryFunction *Function) { |
110 | const CallGraph::NodeId Id = Cg.maybeGetNodeId(BF: Function); |
111 | if (Id == CallGraph::InvalidId) { |
112 | // It's ok to use the hot size here when the function is split. This is |
113 | // because emitFunctions will emit the hot part first in the order that is |
114 | // computed by ReorderFunctions. The cold part will be emitted with the |
115 | // rest of the cold functions and code. |
116 | const size_t Size = functionSize(Function); |
117 | // NOTE: for functions without a profile, we set the number of samples |
118 | // to zero. This will keep these functions from appearing in the hot |
119 | // section. This is a little weird because we wouldn't be trying to |
120 | // create a node for a function unless it was the target of a call from |
121 | // a hot block. The alternative would be to set the count to one or |
122 | // accumulate the number of calls from the callsite into the function |
123 | // samples. Results from perfomance testing seem to favor the zero |
124 | // count though, so I'm leaving it this way for now. |
125 | return Cg.addNode(BF: Function, Size, Samples: Function->getKnownExecutionCount()); |
126 | } |
127 | return Id; |
128 | }; |
129 | |
130 | // Add call graph edges. |
131 | uint64_t NotProcessed = 0; |
132 | uint64_t TotalCallsites = 0; |
133 | uint64_t NoProfileCallsites = 0; |
134 | uint64_t NumFallbacks = 0; |
135 | uint64_t RecursiveCallsites = 0; |
136 | for (auto &It : BC.getBinaryFunctions()) { |
137 | BinaryFunction *Function = &It.second; |
138 | |
139 | if (Filter(*Function)) |
140 | continue; |
141 | |
142 | const CallGraph::NodeId SrcId = lookupNode(Function); |
143 | // Offset of the current basic block from the beginning of the function |
144 | uint64_t Offset = 0; |
145 | |
146 | auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) { |
147 | BinaryFunction *DstFunc = |
148 | DestSymbol ? BC.getFunctionForSymbol(Symbol: DestSymbol) : nullptr; |
149 | if (!DstFunc) { |
150 | LLVM_DEBUG(if (opts::Verbosity > 1) dbgs() |
151 | << "BOLT-DEBUG: buildCallGraph: no function for symbol\n" ); |
152 | return false; |
153 | } |
154 | |
155 | if (DstFunc == Function) { |
156 | LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in " |
157 | << *DstFunc << "\n" ); |
158 | ++RecursiveCallsites; |
159 | if (IgnoreRecursiveCalls) |
160 | return false; |
161 | } |
162 | if (Filter(*DstFunc)) { |
163 | LLVM_DEBUG(if (opts::Verbosity > 1) dbgs() |
164 | << "BOLT-DEBUG: buildCallGraph: filtered " << *DstFunc |
165 | << '\n'); |
166 | return false; |
167 | } |
168 | |
169 | const CallGraph::NodeId DstId = lookupNode(DstFunc); |
170 | const bool IsValidCount = Count != COUNT_NO_PROFILE; |
171 | const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1; |
172 | if (!IsValidCount) |
173 | ++NoProfileCallsites; |
174 | Cg.incArcWeight(Src: SrcId, Dst: DstId, W: AdjCount, Offset); |
175 | LLVM_DEBUG(if (opts::Verbosity > 1) { |
176 | dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> " |
177 | << *DstFunc << " @ " << Offset << "\n" ; |
178 | }); |
179 | return true; |
180 | }; |
181 | |
182 | // Pairs of (symbol, count) for each target at this callsite. |
183 | using TargetDesc = std::pair<const MCSymbol *, uint64_t>; |
184 | using CallInfoTy = std::vector<TargetDesc>; |
185 | |
186 | // Get pairs of (symbol, count) for each target at this callsite. |
187 | // If the call is to an unknown function the symbol will be nullptr. |
188 | // If there is no profiling data the count will be COUNT_NO_PROFILE. |
189 | auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) { |
190 | CallInfoTy Counts; |
191 | const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); |
192 | |
193 | // If this is an indirect call use perf data directly. |
194 | if (!DstSym && BC.MIB->hasAnnotation(Inst, Name: "CallProfile" )) { |
195 | const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>( |
196 | Inst, Name: "CallProfile" ); |
197 | for (const IndirectCallProfile &CSI : ICSP) |
198 | if (CSI.Symbol) |
199 | Counts.emplace_back(args: CSI.Symbol, args: CSI.Count); |
200 | } else { |
201 | const uint64_t Count = BB->getExecutionCount(); |
202 | Counts.emplace_back(args&: DstSym, args: Count); |
203 | } |
204 | |
205 | return Counts; |
206 | }; |
207 | |
208 | // If the function has an invalid profile, try to use the perf data |
209 | // directly (if requested). If there is no perf data for this function, |
210 | // fall back to the CFG walker which attempts to handle missing data. |
211 | if (!Function->hasValidProfile() && CgFromPerfData && |
212 | !Function->getAllCallSites().empty()) { |
213 | LLVM_DEBUG( |
214 | dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data" |
215 | << " for " << *Function << "\n" ); |
216 | ++NumFallbacks; |
217 | const size_t Size = functionSize(Function); |
218 | for (const IndirectCallProfile &CSI : Function->getAllCallSites()) { |
219 | ++TotalCallsites; |
220 | |
221 | if (!CSI.Symbol) |
222 | continue; |
223 | |
224 | // The computed offset may exceed the hot part of the function; hence, |
225 | // bound it by the size. |
226 | Offset = CSI.Offset; |
227 | if (Offset > Size) |
228 | Offset = Size; |
229 | |
230 | if (!recordCall(CSI.Symbol, CSI.Count)) |
231 | ++NotProcessed; |
232 | } |
233 | } else { |
234 | for (BinaryBasicBlock *BB : Function->getLayout().blocks()) { |
235 | // Don't count calls from split blocks unless requested. |
236 | if (BB->isSplit() && !IncludeSplitCalls) |
237 | continue; |
238 | |
239 | // Determine whether the block is included in Function's (hot) size |
240 | // See BinaryFunction::estimateHotSize |
241 | bool BBIncludedInFunctionSize = false; |
242 | if (UseFunctionHotSize && Function->isSplit()) { |
243 | if (UseSplitHotSize) |
244 | BBIncludedInFunctionSize = !BB->isSplit(); |
245 | else |
246 | BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0; |
247 | } else { |
248 | BBIncludedInFunctionSize = true; |
249 | } |
250 | |
251 | for (MCInst &Inst : *BB) { |
252 | // Find call instructions and extract target symbols from each one. |
253 | if (BC.MIB->isCall(Inst)) { |
254 | const CallInfoTy CallInfo = getCallInfo(BB, Inst); |
255 | |
256 | if (!CallInfo.empty()) { |
257 | for (const TargetDesc &CI : CallInfo) { |
258 | ++TotalCallsites; |
259 | if (!recordCall(CI.first, CI.second)) |
260 | ++NotProcessed; |
261 | } |
262 | } else { |
263 | ++TotalCallsites; |
264 | ++NotProcessed; |
265 | } |
266 | } |
267 | // Increase Offset if needed |
268 | if (BBIncludedInFunctionSize) |
269 | Offset += BC.computeCodeSize(Beg: &Inst, End: &Inst + 1); |
270 | } |
271 | } |
272 | } |
273 | } |
274 | |
275 | #ifndef NDEBUG |
276 | bool PrintInfo = DebugFlag && isCurrentDebugType(Type: "callgraph" ); |
277 | #else |
278 | bool PrintInfo = false; |
279 | #endif |
280 | if (PrintInfo || opts::Verbosity > 0) |
281 | BC.outs() << format(Fmt: "BOLT-INFO: buildCallGraph: %u nodes, %u callsites " |
282 | "(%u recursive), density = %.6lf, %u callsites not " |
283 | "processed, %u callsites with invalid profile, " |
284 | "used perf data for %u stale functions.\n" , |
285 | Vals: Cg.numNodes(), Vals: TotalCallsites, Vals: RecursiveCallsites, |
286 | Vals: Cg.density(), Vals: NotProcessed, Vals: NoProfileCallsites, |
287 | Vals: NumFallbacks); |
288 | |
289 | if (opts::DumpCGDot.getNumOccurrences()) { |
290 | Cg.printDot(FileName: opts::DumpCGDot, GetLabel: [&](CallGraph::NodeId Id) { |
291 | return Cg.nodeIdToFunc(Id)->getPrintName(); |
292 | }); |
293 | } |
294 | |
295 | return Cg; |
296 | } |
297 | |
298 | } // namespace bolt |
299 | } // namespace llvm |
300 | |