| 1 | //===- bolt/Passes/Instrumentation.cpp ------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the Instrumentation class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Passes/Instrumentation.h" |
| 14 | #include "bolt/Core/ParallelUtilities.h" |
| 15 | #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" |
| 16 | #include "bolt/Utils/CommandLineOpts.h" |
| 17 | #include "bolt/Utils/Utils.h" |
| 18 | #include "llvm/Support/CommandLine.h" |
| 19 | #include "llvm/Support/RWMutex.h" |
| 20 | #include <queue> |
| 21 | #include <stack> |
| 22 | #include <unordered_set> |
| 23 | |
| 24 | #define DEBUG_TYPE "bolt-instrumentation" |
| 25 | |
| 26 | using namespace llvm; |
| 27 | |
| 28 | namespace opts { |
| 29 | extern cl::OptionCategory BoltInstrCategory; |
| 30 | |
| 31 | cl::opt<std::string> InstrumentationFilename( |
| 32 | "instrumentation-file" , |
| 33 | cl::desc("file name where instrumented profile will be saved (default: " |
| 34 | "/tmp/prof.fdata)" ), |
| 35 | cl::init(Val: "/tmp/prof.fdata" ), cl::Optional, cl::cat(BoltInstrCategory)); |
| 36 | |
| 37 | cl::opt<std::string> InstrumentationBinpath( |
| 38 | "instrumentation-binpath" , |
| 39 | cl::desc("path to instrumented binary in case if /proc/self/map_files " |
| 40 | "is not accessible due to access restriction issues" ), |
| 41 | cl::Optional, cl::cat(BoltInstrCategory)); |
| 42 | |
| 43 | cl::opt<bool> InstrumentationFileAppendPID( |
| 44 | "instrumentation-file-append-pid" , |
| 45 | cl::desc("append PID to saved profile file name (default: false)" ), |
| 46 | cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory)); |
| 47 | |
| 48 | cl::opt<bool> ConservativeInstrumentation( |
| 49 | "conservative-instrumentation" , |
| 50 | cl::desc("disable instrumentation optimizations that sacrifice profile " |
| 51 | "accuracy (for debugging, default: false)" ), |
| 52 | cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory)); |
| 53 | |
| 54 | cl::opt<uint32_t> InstrumentationSleepTime( |
| 55 | "instrumentation-sleep-time" , |
| 56 | cl::desc("interval between profile writes (default: 0 = write only at " |
| 57 | "program end). This is useful for service workloads when you " |
| 58 | "want to dump profile every X minutes or if you are killing the " |
| 59 | "program and the profile is not being dumped at the end." ), |
| 60 | cl::init(Val: 0), cl::Optional, cl::cat(BoltInstrCategory)); |
| 61 | |
| 62 | cl::opt<bool> InstrumentationNoCountersClear( |
| 63 | "instrumentation-no-counters-clear" , |
| 64 | cl::desc("Don't clear counters across dumps " |
| 65 | "(use with instrumentation-sleep-time option)" ), |
| 66 | cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory)); |
| 67 | |
| 68 | cl::opt<bool> InstrumentationWaitForks( |
| 69 | "instrumentation-wait-forks" , |
| 70 | cl::desc("Wait until all forks of instrumented process will finish " |
| 71 | "(use with instrumentation-sleep-time option)" ), |
| 72 | cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory)); |
| 73 | |
| 74 | cl::opt<bool> |
| 75 | InstrumentHotOnly("instrument-hot-only" , |
| 76 | cl::desc("only insert instrumentation on hot functions " |
| 77 | "(needs profile, default: false)" ), |
| 78 | cl::init(Val: false), cl::Optional, |
| 79 | cl::cat(BoltInstrCategory)); |
| 80 | |
| 81 | cl::opt<bool> InstrumentCalls("instrument-calls" , |
| 82 | cl::desc("record profile for inter-function " |
| 83 | "control flow activity (default: true)" ), |
| 84 | cl::init(Val: true), cl::Optional, |
| 85 | cl::cat(BoltInstrCategory)); |
| 86 | } // namespace opts |
| 87 | |
| 88 | namespace llvm { |
| 89 | namespace bolt { |
| 90 | |
| 91 | static bool hasAArch64ExclusiveMemop( |
| 92 | BinaryFunction &Function, |
| 93 | std::unordered_set<const BinaryBasicBlock *> &BBToSkip) { |
| 94 | // FIXME ARMv8-a architecture reference manual says that software must avoid |
| 95 | // having any explicit memory accesses between exclusive load and associated |
| 96 | // store instruction. So for now skip instrumentation for basic blocks that |
| 97 | // have these instructions, since it might lead to runtime deadlock. |
| 98 | BinaryContext &BC = Function.getBinaryContext(); |
| 99 | std::queue<std::pair<BinaryBasicBlock *, bool>> BBQueue; // {BB, isLoad} |
| 100 | std::unordered_set<BinaryBasicBlock *> Visited; |
| 101 | |
| 102 | if (Function.getLayout().block_begin() == Function.getLayout().block_end()) |
| 103 | return 0; |
| 104 | |
| 105 | BinaryBasicBlock *BBfirst = *Function.getLayout().block_begin(); |
| 106 | BBQueue.push(x: {BBfirst, false}); |
| 107 | |
| 108 | while (!BBQueue.empty()) { |
| 109 | BinaryBasicBlock *BB = BBQueue.front().first; |
| 110 | bool IsLoad = BBQueue.front().second; |
| 111 | BBQueue.pop(); |
| 112 | if (!Visited.insert(x: BB).second) |
| 113 | continue; |
| 114 | |
| 115 | for (const MCInst &Inst : *BB) { |
| 116 | // Two loads one after another - skip whole function |
| 117 | if (BC.MIB->isAArch64ExclusiveLoad(Inst) && IsLoad) { |
| 118 | if (opts::Verbosity >= 2) { |
| 119 | outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName() |
| 120 | << " has two exclusive loads. Ignoring the function.\n" ; |
| 121 | } |
| 122 | return true; |
| 123 | } |
| 124 | |
| 125 | if (BC.MIB->isAArch64ExclusiveLoad(Inst)) |
| 126 | IsLoad = true; |
| 127 | |
| 128 | if (IsLoad && BBToSkip.insert(x: BB).second) { |
| 129 | if (opts::Verbosity >= 2) { |
| 130 | outs() << "BOLT-INSTRUMENTER: skip BB " << BB->getName() |
| 131 | << " due to exclusive instruction in function " |
| 132 | << Function.getPrintName() << "\n" ; |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | if (!IsLoad && BC.MIB->isAArch64ExclusiveStore(Inst)) { |
| 137 | if (opts::Verbosity >= 2) { |
| 138 | outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName() |
| 139 | << " has exclusive store without corresponding load. Ignoring " |
| 140 | "the function.\n" ; |
| 141 | } |
| 142 | return true; |
| 143 | } |
| 144 | |
| 145 | if (IsLoad && (BC.MIB->isAArch64ExclusiveStore(Inst) || |
| 146 | BC.MIB->isAArch64ExclusiveClear(Inst))) |
| 147 | IsLoad = false; |
| 148 | } |
| 149 | |
| 150 | if (IsLoad && BB->succ_size() == 0) { |
| 151 | if (opts::Verbosity >= 2) { |
| 152 | outs() |
| 153 | << "BOLT-INSTRUMENTER: function " << Function.getPrintName() |
| 154 | << " has exclusive load in trailing BB. Ignoring the function.\n" ; |
| 155 | } |
| 156 | return true; |
| 157 | } |
| 158 | |
| 159 | for (BinaryBasicBlock *BBS : BB->successors()) |
| 160 | BBQueue.push(x: {BBS, IsLoad}); |
| 161 | } |
| 162 | |
| 163 | if (BBToSkip.size() == Visited.size()) { |
| 164 | if (opts::Verbosity >= 2) { |
| 165 | outs() << "BOLT-INSTRUMENTER: all BBs are marked with true. Ignoring the " |
| 166 | "function " |
| 167 | << Function.getPrintName() << "\n" ; |
| 168 | } |
| 169 | return true; |
| 170 | } |
| 171 | |
| 172 | return false; |
| 173 | } |
| 174 | |
| 175 | uint32_t Instrumentation::getFunctionNameIndex(const BinaryFunction &Function) { |
| 176 | auto Iter = FuncToStringIdx.find(x: &Function); |
| 177 | if (Iter != FuncToStringIdx.end()) |
| 178 | return Iter->second; |
| 179 | size_t Idx = Summary->StringTable.size(); |
| 180 | FuncToStringIdx.emplace(args: std::make_pair(x: &Function, y&: Idx)); |
| 181 | Summary->StringTable.append(str: getEscapedName(Name: Function.getOneName())); |
| 182 | Summary->StringTable.append(n: 1, c: '\0'); |
| 183 | return Idx; |
| 184 | } |
| 185 | |
| 186 | bool Instrumentation::createCallDescription(FunctionDescription &FuncDesc, |
| 187 | const BinaryFunction &FromFunction, |
| 188 | uint32_t From, uint32_t FromNodeID, |
| 189 | const BinaryFunction &ToFunction, |
| 190 | uint32_t To, bool IsInvoke) { |
| 191 | CallDescription CD; |
| 192 | // Ordinarily, we don't augment direct calls with an explicit counter, except |
| 193 | // when forced to do so or when we know this callee could be throwing |
| 194 | // exceptions, in which case there is no other way to accurately record its |
| 195 | // frequency. |
| 196 | bool ForceInstrumentation = opts::ConservativeInstrumentation || IsInvoke; |
| 197 | CD.FromLoc.FuncString = getFunctionNameIndex(Function: FromFunction); |
| 198 | CD.FromLoc.Offset = From; |
| 199 | CD.FromNode = FromNodeID; |
| 200 | CD.Target = &ToFunction; |
| 201 | CD.ToLoc.FuncString = getFunctionNameIndex(Function: ToFunction); |
| 202 | CD.ToLoc.Offset = To; |
| 203 | CD.Counter = ForceInstrumentation ? Summary->Counters.size() : 0xffffffff; |
| 204 | if (ForceInstrumentation) |
| 205 | ++DirectCallCounters; |
| 206 | FuncDesc.Calls.emplace_back(args&: CD); |
| 207 | return ForceInstrumentation; |
| 208 | } |
| 209 | |
| 210 | void Instrumentation::createIndCallDescription( |
| 211 | const BinaryFunction &FromFunction, uint32_t From) { |
| 212 | IndCallDescription ICD; |
| 213 | ICD.FromLoc.FuncString = getFunctionNameIndex(Function: FromFunction); |
| 214 | ICD.FromLoc.Offset = From; |
| 215 | Summary->IndCallDescriptions.emplace_back(args&: ICD); |
| 216 | } |
| 217 | |
| 218 | void Instrumentation::createIndCallTargetDescription( |
| 219 | const BinaryFunction &ToFunction, uint32_t To) { |
| 220 | IndCallTargetDescription ICD; |
| 221 | ICD.ToLoc.FuncString = getFunctionNameIndex(Function: ToFunction); |
| 222 | ICD.ToLoc.Offset = To; |
| 223 | ICD.Target = &ToFunction; |
| 224 | Summary->IndCallTargetDescriptions.emplace_back(args&: ICD); |
| 225 | } |
| 226 | |
| 227 | bool Instrumentation::createEdgeDescription(FunctionDescription &FuncDesc, |
| 228 | const BinaryFunction &FromFunction, |
| 229 | uint32_t From, uint32_t FromNodeID, |
| 230 | const BinaryFunction &ToFunction, |
| 231 | uint32_t To, uint32_t ToNodeID, |
| 232 | bool Instrumented) { |
| 233 | EdgeDescription ED; |
| 234 | auto Result = FuncDesc.EdgesSet.insert(V: std::make_pair(x&: FromNodeID, y&: ToNodeID)); |
| 235 | // Avoid creating duplicated edge descriptions. This happens in CFGs where a |
| 236 | // block jumps to its fall-through. |
| 237 | if (Result.second == false) |
| 238 | return false; |
| 239 | ED.FromLoc.FuncString = getFunctionNameIndex(Function: FromFunction); |
| 240 | ED.FromLoc.Offset = From; |
| 241 | ED.FromNode = FromNodeID; |
| 242 | ED.ToLoc.FuncString = getFunctionNameIndex(Function: ToFunction); |
| 243 | ED.ToLoc.Offset = To; |
| 244 | ED.ToNode = ToNodeID; |
| 245 | ED.Counter = Instrumented ? Summary->Counters.size() : 0xffffffff; |
| 246 | if (Instrumented) |
| 247 | ++BranchCounters; |
| 248 | FuncDesc.Edges.emplace_back(args&: ED); |
| 249 | return Instrumented; |
| 250 | } |
| 251 | |
| 252 | void Instrumentation::createLeafNodeDescription(FunctionDescription &FuncDesc, |
| 253 | uint32_t Node) { |
| 254 | InstrumentedNode IN; |
| 255 | IN.Node = Node; |
| 256 | IN.Counter = Summary->Counters.size(); |
| 257 | ++LeafNodeCounters; |
| 258 | FuncDesc.LeafNodes.emplace_back(args&: IN); |
| 259 | } |
| 260 | |
| 261 | InstructionListType |
| 262 | Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) { |
| 263 | auto L = BC.scopeLock(); |
| 264 | MCSymbol *Label = BC.Ctx->createNamedTempSymbol(Name: "InstrEntry" ); |
| 265 | Summary->Counters.emplace_back(args&: Label); |
| 266 | return BC.MIB->createInstrIncMemory(Target: Label, Ctx: BC.Ctx.get(), IsLeaf, |
| 267 | CodePointerSize: BC.AsmInfo->getCodePointerSize()); |
| 268 | } |
| 269 | |
| 270 | // Helper instruction sequence insertion function |
| 271 | static BinaryBasicBlock::iterator |
| 272 | insertInstructions(InstructionListType &Instrs, BinaryBasicBlock &BB, |
| 273 | BinaryBasicBlock::iterator Iter) { |
| 274 | for (MCInst &NewInst : Instrs) { |
| 275 | Iter = BB.insertInstruction(At: Iter, NewInst); |
| 276 | ++Iter; |
| 277 | } |
| 278 | return Iter; |
| 279 | } |
| 280 | |
| 281 | void Instrumentation::instrumentLeafNode(BinaryBasicBlock &BB, |
| 282 | BinaryBasicBlock::iterator Iter, |
| 283 | bool IsLeaf, |
| 284 | FunctionDescription &FuncDesc, |
| 285 | uint32_t Node) { |
| 286 | createLeafNodeDescription(FuncDesc, Node); |
| 287 | InstructionListType CounterInstrs = createInstrumentationSnippet( |
| 288 | BC&: BB.getFunction()->getBinaryContext(), IsLeaf); |
| 289 | insertInstructions(Instrs&: CounterInstrs, BB, Iter); |
| 290 | } |
| 291 | |
| 292 | void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB, |
| 293 | BinaryBasicBlock::iterator &Iter, |
| 294 | BinaryFunction &FromFunction, |
| 295 | uint32_t From) { |
| 296 | auto L = FromFunction.getBinaryContext().scopeLock(); |
| 297 | const size_t IndCallSiteID = Summary->IndCallDescriptions.size(); |
| 298 | createIndCallDescription(FromFunction, From); |
| 299 | |
| 300 | BinaryContext &BC = FromFunction.getBinaryContext(); |
| 301 | bool IsTailCall = BC.MIB->isTailCall(Inst: *Iter); |
| 302 | InstructionListType CounterInstrs = BC.MIB->createInstrumentedIndirectCall( |
| 303 | CallInst: std::move(*Iter), |
| 304 | HandlerFuncAddr: IsTailCall ? IndTailCallHandlerExitBBFunction->getSymbol() |
| 305 | : IndCallHandlerExitBBFunction->getSymbol(), |
| 306 | CallSiteID: IndCallSiteID, Ctx: &*BC.Ctx); |
| 307 | |
| 308 | Iter = BB.eraseInstruction(II: Iter); |
| 309 | Iter = insertInstructions(Instrs&: CounterInstrs, BB, Iter); |
| 310 | --Iter; |
| 311 | } |
| 312 | |
| 313 | bool Instrumentation::instrumentOneTarget( |
| 314 | SplitWorklistTy &SplitWorklist, SplitInstrsTy &SplitInstrs, |
| 315 | BinaryBasicBlock::iterator &Iter, BinaryFunction &FromFunction, |
| 316 | BinaryBasicBlock &FromBB, uint32_t From, BinaryFunction &ToFunc, |
| 317 | BinaryBasicBlock *TargetBB, uint32_t ToOffset, bool IsLeaf, bool IsInvoke, |
| 318 | FunctionDescription *FuncDesc, uint32_t FromNodeID, uint32_t ToNodeID) { |
| 319 | BinaryContext &BC = FromFunction.getBinaryContext(); |
| 320 | { |
| 321 | auto L = BC.scopeLock(); |
| 322 | bool Created = true; |
| 323 | if (!TargetBB) |
| 324 | Created = createCallDescription(FuncDesc&: *FuncDesc, FromFunction, From, FromNodeID, |
| 325 | ToFunction: ToFunc, To: ToOffset, IsInvoke); |
| 326 | else |
| 327 | Created = createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction, From, FromNodeID, |
| 328 | ToFunction: ToFunc, To: ToOffset, ToNodeID, |
| 329 | /*Instrumented=*/true); |
| 330 | if (!Created) |
| 331 | return false; |
| 332 | } |
| 333 | |
| 334 | InstructionListType CounterInstrs = createInstrumentationSnippet(BC, IsLeaf); |
| 335 | |
| 336 | const MCInst &Inst = *Iter; |
| 337 | if (BC.MIB->isCall(Inst)) { |
| 338 | // This code handles both |
| 339 | // - (regular) inter-function calls (cross-function control transfer), |
| 340 | // - (rare) intra-function calls (function-local control transfer) |
| 341 | Iter = insertInstructions(Instrs&: CounterInstrs, BB&: FromBB, Iter); |
| 342 | return true; |
| 343 | } |
| 344 | |
| 345 | if (!TargetBB || !FuncDesc) |
| 346 | return false; |
| 347 | |
| 348 | // Indirect branch, conditional branches or fall-throughs |
| 349 | // Regular cond branch, put counter at start of target block |
| 350 | // |
| 351 | // N.B.: (FromBB != TargetBBs) checks below handle conditional jumps where |
| 352 | // we can't put the instrumentation counter in this block because not all |
| 353 | // paths that reach it at this point will be taken and going to the target. |
| 354 | if (TargetBB->pred_size() == 1 && &FromBB != TargetBB && |
| 355 | !TargetBB->isEntryPoint()) { |
| 356 | insertInstructions(Instrs&: CounterInstrs, BB&: *TargetBB, Iter: TargetBB->begin()); |
| 357 | return true; |
| 358 | } |
| 359 | if (FromBB.succ_size() == 1 && &FromBB != TargetBB) { |
| 360 | Iter = insertInstructions(Instrs&: CounterInstrs, BB&: FromBB, Iter); |
| 361 | return true; |
| 362 | } |
| 363 | // Critical edge, create BB and put counter there |
| 364 | SplitWorklist.emplace_back(args: &FromBB, args&: TargetBB); |
| 365 | SplitInstrs.emplace_back(args: std::move(CounterInstrs)); |
| 366 | return true; |
| 367 | } |
| 368 | |
| 369 | void Instrumentation::instrumentFunction(BinaryFunction &Function, |
| 370 | MCPlusBuilder::AllocatorIdTy AllocId) { |
| 371 | if (Function.hasUnknownControlFlow()) |
| 372 | return; |
| 373 | |
| 374 | BinaryContext &BC = Function.getBinaryContext(); |
| 375 | if (BC.isMachO() && Function.hasName(FunctionName: "___GLOBAL_init_65535/1" )) |
| 376 | return; |
| 377 | |
| 378 | std::unordered_set<const BinaryBasicBlock *> BBToSkip; |
| 379 | if (BC.isAArch64() && hasAArch64ExclusiveMemop(Function, BBToSkip)) |
| 380 | return; |
| 381 | |
| 382 | SplitWorklistTy SplitWorklist; |
| 383 | SplitInstrsTy SplitInstrs; |
| 384 | |
| 385 | FunctionDescription *FuncDesc = nullptr; |
| 386 | { |
| 387 | std::unique_lock<llvm::sys::RWMutex> L(FDMutex); |
| 388 | Summary->FunctionDescriptions.emplace_back(); |
| 389 | FuncDesc = &Summary->FunctionDescriptions.back(); |
| 390 | } |
| 391 | |
| 392 | FuncDesc->Function = &Function; |
| 393 | Function.disambiguateJumpTables(AllocId); |
| 394 | Function.deleteConservativeEdges(); |
| 395 | |
| 396 | std::unordered_map<const BinaryBasicBlock *, uint32_t> BBToID; |
| 397 | uint32_t Id = 0; |
| 398 | for (auto BBI = Function.begin(); BBI != Function.end(); ++BBI) { |
| 399 | BBToID[&*BBI] = Id++; |
| 400 | } |
| 401 | std::unordered_set<const BinaryBasicBlock *> VisitedSet; |
| 402 | // DFS to establish edges we will use for a spanning tree. Edges in the |
| 403 | // spanning tree can be instrumentation-free since their count can be |
| 404 | // inferred by solving flow equations on a bottom-up traversal of the tree. |
| 405 | // Exit basic blocks are always instrumented so we start the traversal with |
| 406 | // a minimum number of defined variables to make the equation solvable. |
| 407 | std::stack<std::pair<const BinaryBasicBlock *, BinaryBasicBlock *>> Stack; |
| 408 | std::unordered_map<const BinaryBasicBlock *, |
| 409 | std::set<const BinaryBasicBlock *>> |
| 410 | STOutSet; |
| 411 | for (auto BBI = Function.getLayout().block_rbegin(); |
| 412 | BBI != Function.getLayout().block_rend(); ++BBI) { |
| 413 | if ((*BBI)->isEntryPoint() || (*BBI)->isLandingPad()) { |
| 414 | Stack.push(x: std::make_pair(x: nullptr, y&: *BBI)); |
| 415 | if (opts::InstrumentCalls && (*BBI)->isEntryPoint()) { |
| 416 | EntryNode E; |
| 417 | E.Node = BBToID[&**BBI]; |
| 418 | E.Address = (*BBI)->getInputOffset(); |
| 419 | FuncDesc->EntryNodes.emplace_back(args&: E); |
| 420 | createIndCallTargetDescription(ToFunction: Function, To: (*BBI)->getInputOffset()); |
| 421 | } |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | // Modified version of BinaryFunction::dfs() to build a spanning tree |
| 426 | if (!opts::ConservativeInstrumentation) { |
| 427 | while (!Stack.empty()) { |
| 428 | BinaryBasicBlock *BB; |
| 429 | const BinaryBasicBlock *Pred; |
| 430 | std::tie(args&: Pred, args&: BB) = Stack.top(); |
| 431 | Stack.pop(); |
| 432 | if (llvm::is_contained(Range&: VisitedSet, Element: BB)) |
| 433 | continue; |
| 434 | |
| 435 | VisitedSet.insert(x: BB); |
| 436 | if (Pred) |
| 437 | STOutSet[Pred].insert(x: BB); |
| 438 | |
| 439 | for (BinaryBasicBlock *SuccBB : BB->successors()) |
| 440 | Stack.push(x: std::make_pair(x&: BB, y&: SuccBB)); |
| 441 | } |
| 442 | } |
| 443 | |
| 444 | // Determine whether this is a leaf function, which needs special |
| 445 | // instructions to protect the red zone |
| 446 | bool IsLeafFunction = true; |
| 447 | DenseSet<const BinaryBasicBlock *> InvokeBlocks; |
| 448 | for (const BinaryBasicBlock &BB : Function) { |
| 449 | for (const MCInst &Inst : BB) { |
| 450 | if (BC.MIB->isCall(Inst)) { |
| 451 | if (BC.MIB->isInvoke(Inst)) |
| 452 | InvokeBlocks.insert(V: &BB); |
| 453 | if (!BC.MIB->isTailCall(Inst)) |
| 454 | IsLeafFunction = false; |
| 455 | } |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) { |
| 460 | BinaryBasicBlock &BB = *BBI; |
| 461 | |
| 462 | // Skip BBs with exclusive load/stores |
| 463 | if (BBToSkip.find(x: &BB) != BBToSkip.end()) |
| 464 | continue; |
| 465 | |
| 466 | bool HasUnconditionalBranch = false; |
| 467 | bool HasJumpTable = false; |
| 468 | bool IsInvokeBlock = InvokeBlocks.count(V: &BB) > 0; |
| 469 | |
| 470 | for (auto I = BB.begin(); I != BB.end(); ++I) { |
| 471 | const MCInst &Inst = *I; |
| 472 | if (!BC.MIB->getOffset(Inst)) |
| 473 | continue; |
| 474 | |
| 475 | const bool IsJumpTable = Function.getJumpTable(Inst); |
| 476 | if (IsJumpTable) |
| 477 | HasJumpTable = true; |
| 478 | else if (BC.MIB->isUnconditionalBranch(Inst)) |
| 479 | HasUnconditionalBranch = true; |
| 480 | else if (!(BC.MIB->isCall(Inst) || BC.MIB->isConditionalBranch(Inst))) |
| 481 | continue; |
| 482 | |
| 483 | const uint32_t FromOffset = *BC.MIB->getOffset(Inst); |
| 484 | const MCSymbol *Target = BC.MIB->getTargetSymbol(Inst); |
| 485 | BinaryBasicBlock *TargetBB = Function.getBasicBlockForLabel(Label: Target); |
| 486 | uint32_t ToOffset = TargetBB ? TargetBB->getInputOffset() : 0; |
| 487 | BinaryFunction *TargetFunc = |
| 488 | TargetBB ? &Function : BC.getFunctionForSymbol(Symbol: Target); |
| 489 | if (TargetFunc && BC.MIB->isCall(Inst)) { |
| 490 | if (opts::InstrumentCalls) { |
| 491 | const BinaryBasicBlock *ForeignBB = |
| 492 | TargetFunc->getBasicBlockForLabel(Label: Target); |
| 493 | if (ForeignBB) |
| 494 | ToOffset = ForeignBB->getInputOffset(); |
| 495 | instrumentOneTarget(SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB, |
| 496 | From: FromOffset, ToFunc&: *TargetFunc, TargetBB, ToOffset, |
| 497 | IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, FuncDesc, |
| 498 | FromNodeID: BBToID[&BB]); |
| 499 | } |
| 500 | continue; |
| 501 | } |
| 502 | if (TargetFunc) { |
| 503 | // Do not instrument edges in the spanning tree |
| 504 | if (llvm::is_contained(Range&: STOutSet[&BB], Element: TargetBB)) { |
| 505 | auto L = BC.scopeLock(); |
| 506 | createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction: Function, From: FromOffset, FromNodeID: BBToID[&BB], |
| 507 | ToFunction: Function, To: ToOffset, ToNodeID: BBToID[TargetBB], |
| 508 | /*Instrumented=*/false); |
| 509 | continue; |
| 510 | } |
| 511 | instrumentOneTarget(SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB, |
| 512 | From: FromOffset, ToFunc&: *TargetFunc, TargetBB, ToOffset, |
| 513 | IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, FuncDesc, |
| 514 | FromNodeID: BBToID[&BB], ToNodeID: BBToID[TargetBB]); |
| 515 | continue; |
| 516 | } |
| 517 | |
| 518 | if (IsJumpTable) { |
| 519 | for (BinaryBasicBlock *&Succ : BB.successors()) { |
| 520 | // Do not instrument edges in the spanning tree |
| 521 | if (llvm::is_contained(Range&: STOutSet[&BB], Element: &*Succ)) { |
| 522 | auto L = BC.scopeLock(); |
| 523 | createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction: Function, From: FromOffset, FromNodeID: BBToID[&BB], |
| 524 | ToFunction: Function, To: Succ->getInputOffset(), |
| 525 | ToNodeID: BBToID[&*Succ], /*Instrumented=*/false); |
| 526 | continue; |
| 527 | } |
| 528 | instrumentOneTarget( |
| 529 | SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB, From: FromOffset, ToFunc&: Function, |
| 530 | TargetBB: &*Succ, ToOffset: Succ->getInputOffset(), IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, |
| 531 | FuncDesc, FromNodeID: BBToID[&BB], ToNodeID: BBToID[&*Succ]); |
| 532 | } |
| 533 | continue; |
| 534 | } |
| 535 | |
| 536 | // Handle indirect calls -- could be direct calls with unknown targets |
| 537 | // or secondary entry points of known functions, so check it is indirect |
| 538 | // to be sure. |
| 539 | if (opts::InstrumentCalls && BC.MIB->isIndirectCall(Inst: *I)) |
| 540 | instrumentIndirectTarget(BB, Iter&: I, FromFunction&: Function, From: FromOffset); |
| 541 | |
| 542 | } // End of instructions loop |
| 543 | |
| 544 | // Instrument fallthroughs (when the direct jump instruction is missing) |
| 545 | if (!HasUnconditionalBranch && !HasJumpTable && BB.succ_size() > 0 && |
| 546 | BB.size() > 0) { |
| 547 | BinaryBasicBlock *FTBB = BB.getFallthrough(); |
| 548 | assert(FTBB && "expected valid fall-through basic block" ); |
| 549 | auto I = BB.begin(); |
| 550 | auto LastInstr = BB.end(); |
| 551 | --LastInstr; |
| 552 | while (LastInstr != I && BC.MIB->isPseudo(Inst: *LastInstr)) |
| 553 | --LastInstr; |
| 554 | uint32_t FromOffset = 0; |
| 555 | // The last instruction in the BB should have an annotation, except |
| 556 | // if it was branching to the end of the function as a result of |
| 557 | // __builtin_unreachable(), in which case it was deleted by fixBranches. |
| 558 | // Ignore this case. FIXME: force fixBranches() to preserve the offset. |
| 559 | if (!BC.MIB->getOffset(Inst: *LastInstr)) |
| 560 | continue; |
| 561 | FromOffset = *BC.MIB->getOffset(Inst: *LastInstr); |
| 562 | |
| 563 | // Do not instrument edges in the spanning tree |
| 564 | if (llvm::is_contained(Range&: STOutSet[&BB], Element: FTBB)) { |
| 565 | auto L = BC.scopeLock(); |
| 566 | createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction: Function, From: FromOffset, FromNodeID: BBToID[&BB], |
| 567 | ToFunction: Function, To: FTBB->getInputOffset(), ToNodeID: BBToID[FTBB], |
| 568 | /*Instrumented=*/false); |
| 569 | continue; |
| 570 | } |
| 571 | instrumentOneTarget(SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB, |
| 572 | From: FromOffset, ToFunc&: Function, TargetBB: FTBB, ToOffset: FTBB->getInputOffset(), |
| 573 | IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, FuncDesc, FromNodeID: BBToID[&BB], |
| 574 | ToNodeID: BBToID[FTBB]); |
| 575 | } |
| 576 | } // End of BBs loop |
| 577 | |
| 578 | // Instrument spanning tree leaves |
| 579 | if (!opts::ConservativeInstrumentation) { |
| 580 | for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) { |
| 581 | BinaryBasicBlock &BB = *BBI; |
| 582 | if (STOutSet[&BB].size() == 0) |
| 583 | instrumentLeafNode(BB, Iter: BB.begin(), IsLeaf: IsLeafFunction, FuncDesc&: *FuncDesc, |
| 584 | Node: BBToID[&BB]); |
| 585 | } |
| 586 | } |
| 587 | |
| 588 | // Consume list of critical edges: split them and add instrumentation to the |
| 589 | // newly created BBs |
| 590 | auto Iter = SplitInstrs.begin(); |
| 591 | for (std::pair<BinaryBasicBlock *, BinaryBasicBlock *> &BBPair : |
| 592 | SplitWorklist) { |
| 593 | BinaryBasicBlock *NewBB = Function.splitEdge(From: BBPair.first, To: BBPair.second); |
| 594 | NewBB->addInstructions(Begin: Iter->begin(), End: Iter->end()); |
| 595 | ++Iter; |
| 596 | } |
| 597 | |
| 598 | // Unused now |
| 599 | FuncDesc->EdgesSet.clear(); |
| 600 | } |
| 601 | |
| 602 | Error Instrumentation::runOnFunctions(BinaryContext &BC) { |
| 603 | const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/false, |
| 604 | /*IsText=*/false, |
| 605 | /*IsAllocatable=*/true); |
| 606 | BC.registerOrUpdateSection(Name: ".bolt.instr.counters" , ELFType: ELF::SHT_PROGBITS, ELFFlags: Flags, |
| 607 | Data: nullptr, Size: 0, Alignment: BC.RegularPageSize); |
| 608 | |
| 609 | BC.registerOrUpdateNoteSection(Name: ".bolt.instr.tables" , Data: nullptr, Size: 0, |
| 610 | /*Alignment=*/1, |
| 611 | /*IsReadOnly=*/true, ELFType: ELF::SHT_NOTE); |
| 612 | |
| 613 | Summary->IndCallCounterFuncPtr = |
| 614 | BC.Ctx->getOrCreateSymbol(Name: "__bolt_ind_call_counter_func_pointer" ); |
| 615 | Summary->IndTailCallCounterFuncPtr = |
| 616 | BC.Ctx->getOrCreateSymbol(Name: "__bolt_ind_tailcall_counter_func_pointer" ); |
| 617 | |
| 618 | createAuxiliaryFunctions(BC); |
| 619 | |
| 620 | ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { |
| 621 | return (!BF.isSimple() || BF.isIgnored() || |
| 622 | (opts::InstrumentHotOnly && !BF.getKnownExecutionCount())); |
| 623 | }; |
| 624 | |
| 625 | ParallelUtilities::WorkFuncWithAllocTy WorkFun = |
| 626 | [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) { |
| 627 | instrumentFunction(Function&: BF, AllocId: AllocatorId); |
| 628 | }; |
| 629 | |
| 630 | ParallelUtilities::runOnEachFunctionWithUniqueAllocId( |
| 631 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFunction: WorkFun, |
| 632 | SkipPredicate, LogName: "instrumentation" , /* ForceSequential=*/true); |
| 633 | |
| 634 | if (BC.isMachO()) { |
| 635 | if (BC.StartFunctionAddress) { |
| 636 | BinaryFunction *Main = |
| 637 | BC.getBinaryFunctionAtAddress(Address: *BC.StartFunctionAddress); |
| 638 | assert(Main && "Entry point function not found" ); |
| 639 | BinaryBasicBlock &BB = Main->front(); |
| 640 | |
| 641 | ErrorOr<BinarySection &> SetupSection = |
| 642 | BC.getUniqueSectionByName(SectionName: "I__setup" ); |
| 643 | if (!SetupSection) |
| 644 | return createFatalBOLTError(S: "Cannot find I__setup section\n" ); |
| 645 | |
| 646 | MCSymbol *Target = BC.registerNameAtAddress( |
| 647 | Name: "__bolt_instr_setup" , Address: SetupSection->getAddress(), Size: 0, Alignment: 0); |
| 648 | MCInst NewInst; |
| 649 | BC.MIB->createCall(Inst&: NewInst, Target, Ctx: BC.Ctx.get()); |
| 650 | BB.insertInstruction(At: BB.begin(), NewInst: std::move(NewInst)); |
| 651 | } else { |
| 652 | BC.errs() << "BOLT-WARNING: Entry point not found\n" ; |
| 653 | } |
| 654 | |
| 655 | if (BinaryData *BD = BC.getBinaryDataByName(Name: "___GLOBAL_init_65535/1" )) { |
| 656 | BinaryFunction *Ctor = BC.getBinaryFunctionAtAddress(Address: BD->getAddress()); |
| 657 | assert(Ctor && "___GLOBAL_init_65535 function not found" ); |
| 658 | BinaryBasicBlock &BB = Ctor->front(); |
| 659 | ErrorOr<BinarySection &> FiniSection = |
| 660 | BC.getUniqueSectionByName(SectionName: "I__fini" ); |
| 661 | if (!FiniSection) |
| 662 | return createFatalBOLTError(S: "Cannot find I__fini section" ); |
| 663 | |
| 664 | MCSymbol *Target = BC.registerNameAtAddress( |
| 665 | Name: "__bolt_instr_fini" , Address: FiniSection->getAddress(), Size: 0, Alignment: 0); |
| 666 | auto IsLEA = [&BC](const MCInst &Inst) { return BC.MIB->isLEA64r(Inst); }; |
| 667 | const auto LEA = std::find_if( |
| 668 | first: std::next(x: llvm::find_if(Range: reverse(C&: BB), P: IsLEA)), last: BB.rend(), pred: IsLEA); |
| 669 | LEA->getOperand(i: 4).setExpr( |
| 670 | MCSymbolRefExpr::create(Symbol: Target, Kind: MCSymbolRefExpr::VK_None, Ctx&: *BC.Ctx)); |
| 671 | } else { |
| 672 | BC.errs() << "BOLT-WARNING: ___GLOBAL_init_65535 not found\n" ; |
| 673 | } |
| 674 | } |
| 675 | |
| 676 | setupRuntimeLibrary(BC); |
| 677 | return Error::success(); |
| 678 | } |
| 679 | |
| 680 | void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) { |
| 681 | auto createSimpleFunction = |
| 682 | [&](StringRef Title, InstructionListType Instrs) -> BinaryFunction * { |
| 683 | BinaryFunction *Func = BC.createInjectedBinaryFunction(Name: std::string(Title)); |
| 684 | |
| 685 | std::vector<std::unique_ptr<BinaryBasicBlock>> BBs; |
| 686 | BBs.emplace_back(args: Func->createBasicBlock()); |
| 687 | BBs.back()->addInstructions(Begin: Instrs.begin(), End: Instrs.end()); |
| 688 | BBs.back()->setCFIState(0); |
| 689 | Func->insertBasicBlocks(Start: nullptr, NewBBs: std::move(BBs), |
| 690 | /*UpdateLayout=*/true, |
| 691 | /*UpdateCFIState=*/false); |
| 692 | Func->updateState(State: BinaryFunction::State::CFG_Finalized); |
| 693 | return Func; |
| 694 | }; |
| 695 | |
| 696 | // Here we are creating a set of functions to handle BB entry/exit. |
| 697 | // IndCallHandlerExitBB contains instructions to finish handling traffic to an |
| 698 | // indirect call. We pass it to createInstrumentedIndCallHandlerEntryBB(), |
| 699 | // which will check if a pointer to runtime library traffic accounting |
| 700 | // function was initialized (it is done during initialization of runtime |
| 701 | // library). If it is so - calls it. Then this routine returns to normal |
| 702 | // execution by jumping to exit BB. |
| 703 | BinaryFunction *IndCallHandlerExitBB = |
| 704 | createSimpleFunction("__bolt_instr_ind_call_handler" , |
| 705 | BC.MIB->createInstrumentedIndCallHandlerExitBB()); |
| 706 | |
| 707 | IndCallHandlerExitBBFunction = |
| 708 | createSimpleFunction("__bolt_instr_ind_call_handler_func" , |
| 709 | BC.MIB->createInstrumentedIndCallHandlerEntryBB( |
| 710 | InstrTrampoline: Summary->IndCallCounterFuncPtr, |
| 711 | IndCallHandler: IndCallHandlerExitBB->getSymbol(), Ctx: &*BC.Ctx)); |
| 712 | |
| 713 | BinaryFunction *IndTailCallHandlerExitBB = createSimpleFunction( |
| 714 | "__bolt_instr_ind_tail_call_handler" , |
| 715 | BC.MIB->createInstrumentedIndTailCallHandlerExitBB()); |
| 716 | |
| 717 | IndTailCallHandlerExitBBFunction = createSimpleFunction( |
| 718 | "__bolt_instr_ind_tailcall_handler_func" , |
| 719 | BC.MIB->createInstrumentedIndCallHandlerEntryBB( |
| 720 | InstrTrampoline: Summary->IndTailCallCounterFuncPtr, |
| 721 | IndCallHandler: IndTailCallHandlerExitBB->getSymbol(), Ctx: &*BC.Ctx)); |
| 722 | |
| 723 | createSimpleFunction("__bolt_num_counters_getter" , |
| 724 | BC.MIB->createNumCountersGetter(Ctx: BC.Ctx.get())); |
| 725 | createSimpleFunction("__bolt_instr_locations_getter" , |
| 726 | BC.MIB->createInstrLocationsGetter(Ctx: BC.Ctx.get())); |
| 727 | createSimpleFunction("__bolt_instr_tables_getter" , |
| 728 | BC.MIB->createInstrTablesGetter(Ctx: BC.Ctx.get())); |
| 729 | createSimpleFunction("__bolt_instr_num_funcs_getter" , |
| 730 | BC.MIB->createInstrNumFuncsGetter(Ctx: BC.Ctx.get())); |
| 731 | |
| 732 | if (BC.isELF()) { |
| 733 | if (BC.StartFunctionAddress) { |
| 734 | BinaryFunction *Start = |
| 735 | BC.getBinaryFunctionAtAddress(Address: *BC.StartFunctionAddress); |
| 736 | assert(Start && "Entry point function not found" ); |
| 737 | const MCSymbol *StartSym = Start->getSymbol(); |
| 738 | createSimpleFunction( |
| 739 | "__bolt_start_trampoline" , |
| 740 | BC.MIB->createSymbolTrampoline(TgtSym: StartSym, Ctx: BC.Ctx.get())); |
| 741 | } |
| 742 | if (BC.FiniFunctionAddress) { |
| 743 | BinaryFunction *Fini = |
| 744 | BC.getBinaryFunctionAtAddress(Address: *BC.FiniFunctionAddress); |
| 745 | assert(Fini && "Finalization function not found" ); |
| 746 | const MCSymbol *FiniSym = Fini->getSymbol(); |
| 747 | createSimpleFunction( |
| 748 | "__bolt_fini_trampoline" , |
| 749 | BC.MIB->createSymbolTrampoline(TgtSym: FiniSym, Ctx: BC.Ctx.get())); |
| 750 | } else { |
| 751 | // Create dummy return function for trampoline to avoid issues |
| 752 | // with unknown symbol in runtime library. E.g. for static PIE |
| 753 | // executable |
| 754 | createSimpleFunction("__bolt_fini_trampoline" , |
| 755 | BC.MIB->createReturnInstructionList(Ctx: BC.Ctx.get())); |
| 756 | } |
| 757 | } |
| 758 | } |
| 759 | |
| 760 | void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) { |
| 761 | uint32_t FuncDescSize = Summary->getFDSize(); |
| 762 | |
| 763 | BC.outs() << "BOLT-INSTRUMENTER: Number of indirect call site descriptors: " |
| 764 | << Summary->IndCallDescriptions.size() << "\n" ; |
| 765 | BC.outs() << "BOLT-INSTRUMENTER: Number of indirect call target descriptors: " |
| 766 | << Summary->IndCallTargetDescriptions.size() << "\n" ; |
| 767 | BC.outs() << "BOLT-INSTRUMENTER: Number of function descriptors: " |
| 768 | << Summary->FunctionDescriptions.size() << "\n" ; |
| 769 | BC.outs() << "BOLT-INSTRUMENTER: Number of branch counters: " |
| 770 | << BranchCounters << "\n" ; |
| 771 | BC.outs() << "BOLT-INSTRUMENTER: Number of ST leaf node counters: " |
| 772 | << LeafNodeCounters << "\n" ; |
| 773 | BC.outs() << "BOLT-INSTRUMENTER: Number of direct call counters: " |
| 774 | << DirectCallCounters << "\n" ; |
| 775 | BC.outs() << "BOLT-INSTRUMENTER: Total number of counters: " |
| 776 | << Summary->Counters.size() << "\n" ; |
| 777 | BC.outs() << "BOLT-INSTRUMENTER: Total size of counters: " |
| 778 | << (Summary->Counters.size() * 8) |
| 779 | << " bytes (static alloc memory)\n" ; |
| 780 | BC.outs() << "BOLT-INSTRUMENTER: Total size of string table emitted: " |
| 781 | << Summary->StringTable.size() << " bytes in file\n" ; |
| 782 | BC.outs() << "BOLT-INSTRUMENTER: Total size of descriptors: " |
| 783 | << (FuncDescSize + |
| 784 | Summary->IndCallDescriptions.size() * |
| 785 | sizeof(IndCallDescription) + |
| 786 | Summary->IndCallTargetDescriptions.size() * |
| 787 | sizeof(IndCallTargetDescription)) |
| 788 | << " bytes in file\n" ; |
| 789 | BC.outs() << "BOLT-INSTRUMENTER: Profile will be saved to file " |
| 790 | << opts::InstrumentationFilename << "\n" ; |
| 791 | |
| 792 | InstrumentationRuntimeLibrary *RtLibrary = |
| 793 | static_cast<InstrumentationRuntimeLibrary *>(BC.getRuntimeLibrary()); |
| 794 | assert(RtLibrary && "instrumentation runtime library object must be set" ); |
| 795 | RtLibrary->setSummary(std::move(Summary)); |
| 796 | } |
| 797 | } // namespace bolt |
| 798 | } // namespace llvm |
| 799 | |