| 1 | //===- bolt/Core/BinaryFunctionCallGraph.cpp ----------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the BinaryFunctionCallGraph class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Core/BinaryFunctionCallGraph.h" |
| 14 | #include "bolt/Core/BinaryContext.h" |
| 15 | #include "bolt/Core/BinaryFunction.h" |
| 16 | #include "llvm/Support/CommandLine.h" |
| 17 | #include "llvm/Support/Timer.h" |
| 18 | #include <stack> |
| 19 | |
| 20 | #define DEBUG_TYPE "callgraph" |
| 21 | |
| 22 | using namespace llvm; |
| 23 | |
| 24 | namespace opts { |
| 25 | |
| 26 | extern cl::opt<bool> TimeOpts; |
| 27 | extern cl::opt<unsigned> Verbosity; |
| 28 | extern cl::OptionCategory BoltCategory; |
| 29 | |
| 30 | static cl::opt<std::string> |
| 31 | DumpCGDot("dump-cg" , cl::desc("dump callgraph to the given file" ), |
| 32 | cl::cat(BoltCategory)); |
| 33 | |
| 34 | } // namespace opts |
| 35 | |
| 36 | namespace llvm { |
| 37 | namespace bolt { |
| 38 | |
| 39 | CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF, |
| 40 | uint32_t Size, |
| 41 | uint64_t Samples) { |
| 42 | NodeId Id = CallGraph::addNode(Size, Samples); |
| 43 | assert(size_t(Id) == Funcs.size()); |
| 44 | Funcs.push_back(x: BF); |
| 45 | FuncToNodeId[BF] = Id; |
| 46 | assert(Funcs[Id] == BF); |
| 47 | return Id; |
| 48 | } |
| 49 | |
| 50 | std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() { |
| 51 | NamedRegionTimer T1("buildcgorder" , "Build cg traversal order" , |
| 52 | "CG breakdown" , "CG breakdown" , opts::TimeOpts); |
| 53 | std::deque<BinaryFunction *> TopologicalOrder; |
| 54 | enum NodeStatus { NEW, VISITING, VISITED }; |
| 55 | std::vector<NodeStatus> NodeStatus(Funcs.size()); |
| 56 | std::stack<NodeId> Worklist; |
| 57 | |
| 58 | for (BinaryFunction *Func : Funcs) { |
| 59 | auto It = FuncToNodeId.find(x: Func); |
| 60 | assert(It != FuncToNodeId.end()); |
| 61 | const NodeId Id = It->second; |
| 62 | Worklist.push(x: Id); |
| 63 | NodeStatus[Id] = NEW; |
| 64 | } |
| 65 | |
| 66 | while (!Worklist.empty()) { |
| 67 | const NodeId FuncId = Worklist.top(); |
| 68 | Worklist.pop(); |
| 69 | |
| 70 | if (NodeStatus[FuncId] == VISITED) |
| 71 | continue; |
| 72 | |
| 73 | if (NodeStatus[FuncId] == VISITING) { |
| 74 | TopologicalOrder.push_back(x: Funcs[FuncId]); |
| 75 | NodeStatus[FuncId] = VISITED; |
| 76 | continue; |
| 77 | } |
| 78 | |
| 79 | assert(NodeStatus[FuncId] == NEW); |
| 80 | NodeStatus[FuncId] = VISITING; |
| 81 | Worklist.push(x: FuncId); |
| 82 | for (const NodeId Callee : successors(Id: FuncId)) { |
| 83 | if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED) |
| 84 | continue; |
| 85 | Worklist.push(x: Callee); |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | return TopologicalOrder; |
| 90 | } |
| 91 | |
| 92 | BinaryFunctionCallGraph |
| 93 | buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData, |
| 94 | bool IncludeSplitCalls, bool UseFunctionHotSize, |
| 95 | bool UseSplitHotSize, bool UseEdgeCounts, |
| 96 | bool IgnoreRecursiveCalls) { |
| 97 | NamedRegionTimer T1("buildcg" , "Callgraph construction" , "CG breakdown" , |
| 98 | "CG breakdown" , opts::TimeOpts); |
| 99 | BinaryFunctionCallGraph Cg; |
| 100 | static constexpr uint64_t COUNT_NO_PROFILE = |
| 101 | BinaryBasicBlock::COUNT_NO_PROFILE; |
| 102 | |
| 103 | // Compute function size |
| 104 | auto functionSize = [&](const BinaryFunction *Function) { |
| 105 | return UseFunctionHotSize && Function->isSplit() |
| 106 | ? Function->estimateHotSize(UseSplitSize: UseSplitHotSize) |
| 107 | : Function->estimateSize(); |
| 108 | }; |
| 109 | |
| 110 | // Add call graph nodes. |
| 111 | auto lookupNode = [&](BinaryFunction *Function) { |
| 112 | const CallGraph::NodeId Id = Cg.maybeGetNodeId(BF: Function); |
| 113 | if (Id == CallGraph::InvalidId) { |
| 114 | // It's ok to use the hot size here when the function is split. This is |
| 115 | // because emitFunctions will emit the hot part first in the order that is |
| 116 | // computed by ReorderFunctions. The cold part will be emitted with the |
| 117 | // rest of the cold functions and code. |
| 118 | const size_t Size = functionSize(Function); |
| 119 | // NOTE: for functions without a profile, we set the number of samples |
| 120 | // to zero. This will keep these functions from appearing in the hot |
| 121 | // section. This is a little weird because we wouldn't be trying to |
| 122 | // create a node for a function unless it was the target of a call from |
| 123 | // a hot block. The alternative would be to set the count to one or |
| 124 | // accumulate the number of calls from the callsite into the function |
| 125 | // samples. Results from perfomance testing seem to favor the zero |
| 126 | // count though, so I'm leaving it this way for now. |
| 127 | return Cg.addNode(BF: Function, Size, Samples: Function->getKnownExecutionCount()); |
| 128 | } |
| 129 | return Id; |
| 130 | }; |
| 131 | |
| 132 | // Add call graph edges. |
| 133 | uint64_t NotProcessed = 0; |
| 134 | uint64_t TotalCallsites = 0; |
| 135 | uint64_t NoProfileCallsites = 0; |
| 136 | uint64_t NumFallbacks = 0; |
| 137 | uint64_t RecursiveCallsites = 0; |
| 138 | for (auto &It : BC.getBinaryFunctions()) { |
| 139 | BinaryFunction *Function = &It.second; |
| 140 | |
| 141 | if (Filter(*Function)) |
| 142 | continue; |
| 143 | |
| 144 | const CallGraph::NodeId SrcId = lookupNode(Function); |
| 145 | // Offset of the current basic block from the beginning of the function |
| 146 | uint64_t Offset = 0; |
| 147 | |
| 148 | auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) { |
| 149 | BinaryFunction *DstFunc = |
| 150 | DestSymbol ? BC.getFunctionForSymbol(Symbol: DestSymbol) : nullptr; |
| 151 | if (!DstFunc) { |
| 152 | LLVM_DEBUG(if (opts::Verbosity > 1) dbgs() |
| 153 | << "BOLT-DEBUG: buildCallGraph: no function for symbol\n" ); |
| 154 | return false; |
| 155 | } |
| 156 | |
| 157 | if (DstFunc == Function) { |
| 158 | LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in " |
| 159 | << *DstFunc << "\n" ); |
| 160 | ++RecursiveCallsites; |
| 161 | if (IgnoreRecursiveCalls) |
| 162 | return false; |
| 163 | } |
| 164 | if (Filter(*DstFunc)) { |
| 165 | LLVM_DEBUG(if (opts::Verbosity > 1) dbgs() |
| 166 | << "BOLT-DEBUG: buildCallGraph: filtered " << *DstFunc |
| 167 | << '\n'); |
| 168 | return false; |
| 169 | } |
| 170 | |
| 171 | const CallGraph::NodeId DstId = lookupNode(DstFunc); |
| 172 | const bool IsValidCount = Count != COUNT_NO_PROFILE; |
| 173 | const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1; |
| 174 | if (!IsValidCount) |
| 175 | ++NoProfileCallsites; |
| 176 | Cg.incArcWeight(Src: SrcId, Dst: DstId, W: AdjCount, Offset); |
| 177 | LLVM_DEBUG(if (opts::Verbosity > 1) { |
| 178 | dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> " |
| 179 | << *DstFunc << " @ " << Offset << "\n" ; |
| 180 | }); |
| 181 | return true; |
| 182 | }; |
| 183 | |
| 184 | // Pairs of (symbol, count) for each target at this callsite. |
| 185 | using TargetDesc = std::pair<const MCSymbol *, uint64_t>; |
| 186 | using CallInfoTy = std::vector<TargetDesc>; |
| 187 | |
| 188 | // Get pairs of (symbol, count) for each target at this callsite. |
| 189 | // If the call is to an unknown function the symbol will be nullptr. |
| 190 | // If there is no profiling data the count will be COUNT_NO_PROFILE. |
| 191 | auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) { |
| 192 | CallInfoTy Counts; |
| 193 | const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); |
| 194 | |
| 195 | // If this is an indirect call use perf data directly. |
| 196 | if (!DstSym && BC.MIB->hasAnnotation(Inst, Name: "CallProfile" )) { |
| 197 | const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>( |
| 198 | Inst, Name: "CallProfile" ); |
| 199 | for (const IndirectCallProfile &CSI : ICSP) |
| 200 | if (CSI.Symbol) |
| 201 | Counts.emplace_back(args: CSI.Symbol, args: CSI.Count); |
| 202 | } else { |
| 203 | const uint64_t Count = BB->getExecutionCount(); |
| 204 | Counts.emplace_back(args&: DstSym, args: Count); |
| 205 | } |
| 206 | |
| 207 | return Counts; |
| 208 | }; |
| 209 | |
| 210 | // If the function has an invalid profile, try to use the perf data |
| 211 | // directly (if requested). If there is no perf data for this function, |
| 212 | // fall back to the CFG walker which attempts to handle missing data. |
| 213 | if (!Function->hasValidProfile() && CgFromPerfData && |
| 214 | !Function->getAllCallSites().empty()) { |
| 215 | LLVM_DEBUG( |
| 216 | dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data" |
| 217 | << " for " << *Function << "\n" ); |
| 218 | ++NumFallbacks; |
| 219 | const size_t Size = functionSize(Function); |
| 220 | for (const IndirectCallProfile &CSI : Function->getAllCallSites()) { |
| 221 | ++TotalCallsites; |
| 222 | |
| 223 | if (!CSI.Symbol) |
| 224 | continue; |
| 225 | |
| 226 | // The computed offset may exceed the hot part of the function; hence, |
| 227 | // bound it by the size. |
| 228 | Offset = CSI.Offset; |
| 229 | if (Offset > Size) |
| 230 | Offset = Size; |
| 231 | |
| 232 | if (!recordCall(CSI.Symbol, CSI.Count)) |
| 233 | ++NotProcessed; |
| 234 | } |
| 235 | } else { |
| 236 | for (BinaryBasicBlock *BB : Function->getLayout().blocks()) { |
| 237 | // Don't count calls from split blocks unless requested. |
| 238 | if (BB->isSplit() && !IncludeSplitCalls) |
| 239 | continue; |
| 240 | |
| 241 | // Determine whether the block is included in Function's (hot) size |
| 242 | // See BinaryFunction::estimateHotSize |
| 243 | bool BBIncludedInFunctionSize = false; |
| 244 | if (UseFunctionHotSize && Function->isSplit()) { |
| 245 | if (UseSplitHotSize) |
| 246 | BBIncludedInFunctionSize = !BB->isSplit(); |
| 247 | else |
| 248 | BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0; |
| 249 | } else { |
| 250 | BBIncludedInFunctionSize = true; |
| 251 | } |
| 252 | |
| 253 | for (MCInst &Inst : *BB) { |
| 254 | // Find call instructions and extract target symbols from each one. |
| 255 | if (BC.MIB->isCall(Inst)) { |
| 256 | const CallInfoTy CallInfo = getCallInfo(BB, Inst); |
| 257 | |
| 258 | if (!CallInfo.empty()) { |
| 259 | for (const TargetDesc &CI : CallInfo) { |
| 260 | ++TotalCallsites; |
| 261 | if (!recordCall(CI.first, CI.second)) |
| 262 | ++NotProcessed; |
| 263 | } |
| 264 | } else { |
| 265 | ++TotalCallsites; |
| 266 | ++NotProcessed; |
| 267 | } |
| 268 | } |
| 269 | // Increase Offset if needed |
| 270 | if (BBIncludedInFunctionSize) |
| 271 | Offset += BC.computeCodeSize(Beg: &Inst, End: &Inst + 1); |
| 272 | } |
| 273 | } |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | #ifndef NDEBUG |
| 278 | bool PrintInfo = DebugFlag && isCurrentDebugType(Type: "callgraph" ); |
| 279 | #else |
| 280 | bool PrintInfo = false; |
| 281 | #endif |
| 282 | if (PrintInfo || opts::Verbosity > 0) |
| 283 | BC.outs() << format(Fmt: "BOLT-INFO: buildCallGraph: %u nodes, %u callsites " |
| 284 | "(%u recursive), density = %.6lf, %u callsites not " |
| 285 | "processed, %u callsites with invalid profile, " |
| 286 | "used perf data for %u stale functions.\n" , |
| 287 | Vals: Cg.numNodes(), Vals: TotalCallsites, Vals: RecursiveCallsites, |
| 288 | Vals: Cg.density(), Vals: NotProcessed, Vals: NoProfileCallsites, |
| 289 | Vals: NumFallbacks); |
| 290 | |
| 291 | if (opts::DumpCGDot.getNumOccurrences()) { |
| 292 | Cg.printDot(FileName: opts::DumpCGDot, GetLabel: [&](CallGraph::NodeId Id) { |
| 293 | return Cg.nodeIdToFunc(Id)->getPrintName(); |
| 294 | }); |
| 295 | } |
| 296 | |
| 297 | return Cg; |
| 298 | } |
| 299 | |
| 300 | } // namespace bolt |
| 301 | } // namespace llvm |
| 302 | |