1//===- bolt/Passes/Instrumentation.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Instrumentation class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/Instrumentation.h"
14#include "bolt/Core/ParallelUtilities.h"
15#include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include "bolt/Utils/Utils.h"
18#include "llvm/Support/CommandLine.h"
19#include "llvm/Support/RWMutex.h"
20#include <queue>
21#include <stack>
22#include <unordered_set>
23
24#define DEBUG_TYPE "bolt-instrumentation"
25
26using namespace llvm;
27
28namespace opts {
29extern cl::OptionCategory BoltInstrCategory;
30
31cl::opt<std::string> InstrumentationFilename(
32 "instrumentation-file",
33 cl::desc("file name where instrumented profile will be saved (default: "
34 "/tmp/prof.fdata)"),
35 cl::init(Val: "/tmp/prof.fdata"), cl::Optional, cl::cat(BoltInstrCategory));
36
37cl::opt<std::string> InstrumentationBinpath(
38 "instrumentation-binpath",
39 cl::desc("path to instrumented binary in case if /proc/self/map_files "
40 "is not accessible due to access restriction issues"),
41 cl::Optional, cl::cat(BoltInstrCategory));
42
43cl::opt<bool> InstrumentationFileAppendPID(
44 "instrumentation-file-append-pid",
45 cl::desc("append PID to saved profile file name (default: false)"),
46 cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory));
47
48cl::opt<bool> ConservativeInstrumentation(
49 "conservative-instrumentation",
50 cl::desc("disable instrumentation optimizations that sacrifice profile "
51 "accuracy (for debugging, default: false)"),
52 cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory));
53
54cl::opt<uint32_t> InstrumentationSleepTime(
55 "instrumentation-sleep-time",
56 cl::desc("interval between profile writes (default: 0 = write only at "
57 "program end). This is useful for service workloads when you "
58 "want to dump profile every X minutes or if you are killing the "
59 "program and the profile is not being dumped at the end."),
60 cl::init(Val: 0), cl::Optional, cl::cat(BoltInstrCategory));
61
62cl::opt<bool> InstrumentationNoCountersClear(
63 "instrumentation-no-counters-clear",
64 cl::desc("Don't clear counters across dumps "
65 "(use with instrumentation-sleep-time option)"),
66 cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory));
67
68cl::opt<bool> InstrumentationWaitForks(
69 "instrumentation-wait-forks",
70 cl::desc("Wait until all forks of instrumented process will finish "
71 "(use with instrumentation-sleep-time option)"),
72 cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory));
73
74cl::opt<bool>
75 InstrumentHotOnly("instrument-hot-only",
76 cl::desc("only insert instrumentation on hot functions "
77 "(needs profile, default: false)"),
78 cl::init(Val: false), cl::Optional,
79 cl::cat(BoltInstrCategory));
80
81cl::opt<bool> InstrumentCalls("instrument-calls",
82 cl::desc("record profile for inter-function "
83 "control flow activity (default: true)"),
84 cl::init(Val: true), cl::Optional,
85 cl::cat(BoltInstrCategory));
86} // namespace opts
87
88namespace llvm {
89namespace bolt {
90
91static bool hasAArch64ExclusiveMemop(
92 BinaryFunction &Function,
93 std::unordered_set<const BinaryBasicBlock *> &BBToSkip) {
94 // FIXME ARMv8-a architecture reference manual says that software must avoid
95 // having any explicit memory accesses between exclusive load and associated
96 // store instruction. So for now skip instrumentation for basic blocks that
97 // have these instructions, since it might lead to runtime deadlock.
98 BinaryContext &BC = Function.getBinaryContext();
99 std::queue<std::pair<BinaryBasicBlock *, bool>> BBQueue; // {BB, isLoad}
100 std::unordered_set<BinaryBasicBlock *> Visited;
101
102 if (Function.getLayout().block_begin() == Function.getLayout().block_end())
103 return 0;
104
105 BinaryBasicBlock *BBfirst = *Function.getLayout().block_begin();
106 BBQueue.push(x: {BBfirst, false});
107
108 while (!BBQueue.empty()) {
109 BinaryBasicBlock *BB = BBQueue.front().first;
110 bool IsLoad = BBQueue.front().second;
111 BBQueue.pop();
112 if (Visited.find(x: BB) != Visited.end())
113 continue;
114 Visited.insert(x: BB);
115
116 for (const MCInst &Inst : *BB) {
117 // Two loads one after another - skip whole function
118 if (BC.MIB->isAArch64ExclusiveLoad(Inst) && IsLoad) {
119 if (opts::Verbosity >= 2) {
120 outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
121 << " has two exclusive loads. Ignoring the function.\n";
122 }
123 return true;
124 }
125
126 if (BC.MIB->isAArch64ExclusiveLoad(Inst))
127 IsLoad = true;
128
129 if (IsLoad && BBToSkip.find(x: BB) == BBToSkip.end()) {
130 BBToSkip.insert(x: BB);
131 if (opts::Verbosity >= 2) {
132 outs() << "BOLT-INSTRUMENTER: skip BB " << BB->getName()
133 << " due to exclusive instruction in function "
134 << Function.getPrintName() << "\n";
135 }
136 }
137
138 if (!IsLoad && BC.MIB->isAArch64ExclusiveStore(Inst)) {
139 if (opts::Verbosity >= 2) {
140 outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
141 << " has exclusive store without corresponding load. Ignoring "
142 "the function.\n";
143 }
144 return true;
145 }
146
147 if (IsLoad && (BC.MIB->isAArch64ExclusiveStore(Inst) ||
148 BC.MIB->isAArch64ExclusiveClear(Inst)))
149 IsLoad = false;
150 }
151
152 if (IsLoad && BB->succ_size() == 0) {
153 if (opts::Verbosity >= 2) {
154 outs()
155 << "BOLT-INSTRUMENTER: function " << Function.getPrintName()
156 << " has exclusive load in trailing BB. Ignoring the function.\n";
157 }
158 return true;
159 }
160
161 for (BinaryBasicBlock *BBS : BB->successors())
162 BBQueue.push(x: {BBS, IsLoad});
163 }
164
165 if (BBToSkip.size() == Visited.size()) {
166 if (opts::Verbosity >= 2) {
167 outs() << "BOLT-INSTRUMENTER: all BBs are marked with true. Ignoring the "
168 "function "
169 << Function.getPrintName() << "\n";
170 }
171 return true;
172 }
173
174 return false;
175}
176
177uint32_t Instrumentation::getFunctionNameIndex(const BinaryFunction &Function) {
178 auto Iter = FuncToStringIdx.find(x: &Function);
179 if (Iter != FuncToStringIdx.end())
180 return Iter->second;
181 size_t Idx = Summary->StringTable.size();
182 FuncToStringIdx.emplace(args: std::make_pair(x: &Function, y&: Idx));
183 Summary->StringTable.append(str: getEscapedName(Name: Function.getOneName()));
184 Summary->StringTable.append(n: 1, c: '\0');
185 return Idx;
186}
187
188bool Instrumentation::createCallDescription(FunctionDescription &FuncDesc,
189 const BinaryFunction &FromFunction,
190 uint32_t From, uint32_t FromNodeID,
191 const BinaryFunction &ToFunction,
192 uint32_t To, bool IsInvoke) {
193 CallDescription CD;
194 // Ordinarily, we don't augment direct calls with an explicit counter, except
195 // when forced to do so or when we know this callee could be throwing
196 // exceptions, in which case there is no other way to accurately record its
197 // frequency.
198 bool ForceInstrumentation = opts::ConservativeInstrumentation || IsInvoke;
199 CD.FromLoc.FuncString = getFunctionNameIndex(Function: FromFunction);
200 CD.FromLoc.Offset = From;
201 CD.FromNode = FromNodeID;
202 CD.Target = &ToFunction;
203 CD.ToLoc.FuncString = getFunctionNameIndex(Function: ToFunction);
204 CD.ToLoc.Offset = To;
205 CD.Counter = ForceInstrumentation ? Summary->Counters.size() : 0xffffffff;
206 if (ForceInstrumentation)
207 ++DirectCallCounters;
208 FuncDesc.Calls.emplace_back(args&: CD);
209 return ForceInstrumentation;
210}
211
212void Instrumentation::createIndCallDescription(
213 const BinaryFunction &FromFunction, uint32_t From) {
214 IndCallDescription ICD;
215 ICD.FromLoc.FuncString = getFunctionNameIndex(Function: FromFunction);
216 ICD.FromLoc.Offset = From;
217 Summary->IndCallDescriptions.emplace_back(args&: ICD);
218}
219
220void Instrumentation::createIndCallTargetDescription(
221 const BinaryFunction &ToFunction, uint32_t To) {
222 IndCallTargetDescription ICD;
223 ICD.ToLoc.FuncString = getFunctionNameIndex(Function: ToFunction);
224 ICD.ToLoc.Offset = To;
225 ICD.Target = &ToFunction;
226 Summary->IndCallTargetDescriptions.emplace_back(args&: ICD);
227}
228
229bool Instrumentation::createEdgeDescription(FunctionDescription &FuncDesc,
230 const BinaryFunction &FromFunction,
231 uint32_t From, uint32_t FromNodeID,
232 const BinaryFunction &ToFunction,
233 uint32_t To, uint32_t ToNodeID,
234 bool Instrumented) {
235 EdgeDescription ED;
236 auto Result = FuncDesc.EdgesSet.insert(V: std::make_pair(x&: FromNodeID, y&: ToNodeID));
237 // Avoid creating duplicated edge descriptions. This happens in CFGs where a
238 // block jumps to its fall-through.
239 if (Result.second == false)
240 return false;
241 ED.FromLoc.FuncString = getFunctionNameIndex(Function: FromFunction);
242 ED.FromLoc.Offset = From;
243 ED.FromNode = FromNodeID;
244 ED.ToLoc.FuncString = getFunctionNameIndex(Function: ToFunction);
245 ED.ToLoc.Offset = To;
246 ED.ToNode = ToNodeID;
247 ED.Counter = Instrumented ? Summary->Counters.size() : 0xffffffff;
248 if (Instrumented)
249 ++BranchCounters;
250 FuncDesc.Edges.emplace_back(args&: ED);
251 return Instrumented;
252}
253
254void Instrumentation::createLeafNodeDescription(FunctionDescription &FuncDesc,
255 uint32_t Node) {
256 InstrumentedNode IN;
257 IN.Node = Node;
258 IN.Counter = Summary->Counters.size();
259 ++LeafNodeCounters;
260 FuncDesc.LeafNodes.emplace_back(args&: IN);
261}
262
263InstructionListType
264Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) {
265 auto L = BC.scopeLock();
266 MCSymbol *Label = BC.Ctx->createNamedTempSymbol(Name: "InstrEntry");
267 Summary->Counters.emplace_back(args&: Label);
268 return BC.MIB->createInstrIncMemory(Target: Label, Ctx: BC.Ctx.get(), IsLeaf,
269 CodePointerSize: BC.AsmInfo->getCodePointerSize());
270}
271
272// Helper instruction sequence insertion function
273static BinaryBasicBlock::iterator
274insertInstructions(InstructionListType &Instrs, BinaryBasicBlock &BB,
275 BinaryBasicBlock::iterator Iter) {
276 for (MCInst &NewInst : Instrs) {
277 Iter = BB.insertInstruction(At: Iter, NewInst);
278 ++Iter;
279 }
280 return Iter;
281}
282
283void Instrumentation::instrumentLeafNode(BinaryBasicBlock &BB,
284 BinaryBasicBlock::iterator Iter,
285 bool IsLeaf,
286 FunctionDescription &FuncDesc,
287 uint32_t Node) {
288 createLeafNodeDescription(FuncDesc, Node);
289 InstructionListType CounterInstrs = createInstrumentationSnippet(
290 BC&: BB.getFunction()->getBinaryContext(), IsLeaf);
291 insertInstructions(Instrs&: CounterInstrs, BB, Iter);
292}
293
294void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
295 BinaryBasicBlock::iterator &Iter,
296 BinaryFunction &FromFunction,
297 uint32_t From) {
298 auto L = FromFunction.getBinaryContext().scopeLock();
299 const size_t IndCallSiteID = Summary->IndCallDescriptions.size();
300 createIndCallDescription(FromFunction, From);
301
302 BinaryContext &BC = FromFunction.getBinaryContext();
303 bool IsTailCall = BC.MIB->isTailCall(Inst: *Iter);
304 InstructionListType CounterInstrs = BC.MIB->createInstrumentedIndirectCall(
305 CallInst: std::move(*Iter),
306 HandlerFuncAddr: IsTailCall ? IndTailCallHandlerExitBBFunction->getSymbol()
307 : IndCallHandlerExitBBFunction->getSymbol(),
308 CallSiteID: IndCallSiteID, Ctx: &*BC.Ctx);
309
310 Iter = BB.eraseInstruction(II: Iter);
311 Iter = insertInstructions(Instrs&: CounterInstrs, BB, Iter);
312 --Iter;
313}
314
315bool Instrumentation::instrumentOneTarget(
316 SplitWorklistTy &SplitWorklist, SplitInstrsTy &SplitInstrs,
317 BinaryBasicBlock::iterator &Iter, BinaryFunction &FromFunction,
318 BinaryBasicBlock &FromBB, uint32_t From, BinaryFunction &ToFunc,
319 BinaryBasicBlock *TargetBB, uint32_t ToOffset, bool IsLeaf, bool IsInvoke,
320 FunctionDescription *FuncDesc, uint32_t FromNodeID, uint32_t ToNodeID) {
321 BinaryContext &BC = FromFunction.getBinaryContext();
322 {
323 auto L = BC.scopeLock();
324 bool Created = true;
325 if (!TargetBB)
326 Created = createCallDescription(FuncDesc&: *FuncDesc, FromFunction, From, FromNodeID,
327 ToFunction: ToFunc, To: ToOffset, IsInvoke);
328 else
329 Created = createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction, From, FromNodeID,
330 ToFunction: ToFunc, To: ToOffset, ToNodeID,
331 /*Instrumented=*/true);
332 if (!Created)
333 return false;
334 }
335
336 InstructionListType CounterInstrs = createInstrumentationSnippet(BC, IsLeaf);
337
338 const MCInst &Inst = *Iter;
339 if (BC.MIB->isCall(Inst)) {
340 // This code handles both
341 // - (regular) inter-function calls (cross-function control transfer),
342 // - (rare) intra-function calls (function-local control transfer)
343 Iter = insertInstructions(Instrs&: CounterInstrs, BB&: FromBB, Iter);
344 return true;
345 }
346
347 if (!TargetBB || !FuncDesc)
348 return false;
349
350 // Indirect branch, conditional branches or fall-throughs
351 // Regular cond branch, put counter at start of target block
352 //
353 // N.B.: (FromBB != TargetBBs) checks below handle conditional jumps where
354 // we can't put the instrumentation counter in this block because not all
355 // paths that reach it at this point will be taken and going to the target.
356 if (TargetBB->pred_size() == 1 && &FromBB != TargetBB &&
357 !TargetBB->isEntryPoint()) {
358 insertInstructions(Instrs&: CounterInstrs, BB&: *TargetBB, Iter: TargetBB->begin());
359 return true;
360 }
361 if (FromBB.succ_size() == 1 && &FromBB != TargetBB) {
362 Iter = insertInstructions(Instrs&: CounterInstrs, BB&: FromBB, Iter);
363 return true;
364 }
365 // Critical edge, create BB and put counter there
366 SplitWorklist.emplace_back(args: &FromBB, args&: TargetBB);
367 SplitInstrs.emplace_back(args: std::move(CounterInstrs));
368 return true;
369}
370
371void Instrumentation::instrumentFunction(BinaryFunction &Function,
372 MCPlusBuilder::AllocatorIdTy AllocId) {
373 if (Function.hasUnknownControlFlow())
374 return;
375
376 BinaryContext &BC = Function.getBinaryContext();
377 if (BC.isMachO() && Function.hasName(FunctionName: "___GLOBAL_init_65535/1"))
378 return;
379
380 std::unordered_set<const BinaryBasicBlock *> BBToSkip;
381 if (BC.isAArch64() && hasAArch64ExclusiveMemop(Function, BBToSkip))
382 return;
383
384 SplitWorklistTy SplitWorklist;
385 SplitInstrsTy SplitInstrs;
386
387 FunctionDescription *FuncDesc = nullptr;
388 {
389 std::unique_lock<llvm::sys::RWMutex> L(FDMutex);
390 Summary->FunctionDescriptions.emplace_back();
391 FuncDesc = &Summary->FunctionDescriptions.back();
392 }
393
394 FuncDesc->Function = &Function;
395 Function.disambiguateJumpTables(AllocId);
396 Function.deleteConservativeEdges();
397
398 std::unordered_map<const BinaryBasicBlock *, uint32_t> BBToID;
399 uint32_t Id = 0;
400 for (auto BBI = Function.begin(); BBI != Function.end(); ++BBI) {
401 BBToID[&*BBI] = Id++;
402 }
403 std::unordered_set<const BinaryBasicBlock *> VisitedSet;
404 // DFS to establish edges we will use for a spanning tree. Edges in the
405 // spanning tree can be instrumentation-free since their count can be
406 // inferred by solving flow equations on a bottom-up traversal of the tree.
407 // Exit basic blocks are always instrumented so we start the traversal with
408 // a minimum number of defined variables to make the equation solvable.
409 std::stack<std::pair<const BinaryBasicBlock *, BinaryBasicBlock *>> Stack;
410 std::unordered_map<const BinaryBasicBlock *,
411 std::set<const BinaryBasicBlock *>>
412 STOutSet;
413 for (auto BBI = Function.getLayout().block_rbegin();
414 BBI != Function.getLayout().block_rend(); ++BBI) {
415 if ((*BBI)->isEntryPoint() || (*BBI)->isLandingPad()) {
416 Stack.push(x: std::make_pair(x: nullptr, y&: *BBI));
417 if (opts::InstrumentCalls && (*BBI)->isEntryPoint()) {
418 EntryNode E;
419 E.Node = BBToID[&**BBI];
420 E.Address = (*BBI)->getInputOffset();
421 FuncDesc->EntryNodes.emplace_back(args&: E);
422 createIndCallTargetDescription(ToFunction: Function, To: (*BBI)->getInputOffset());
423 }
424 }
425 }
426
427 // Modified version of BinaryFunction::dfs() to build a spanning tree
428 if (!opts::ConservativeInstrumentation) {
429 while (!Stack.empty()) {
430 BinaryBasicBlock *BB;
431 const BinaryBasicBlock *Pred;
432 std::tie(args&: Pred, args&: BB) = Stack.top();
433 Stack.pop();
434 if (llvm::is_contained(Range&: VisitedSet, Element: BB))
435 continue;
436
437 VisitedSet.insert(x: BB);
438 if (Pred)
439 STOutSet[Pred].insert(x: BB);
440
441 for (BinaryBasicBlock *SuccBB : BB->successors())
442 Stack.push(x: std::make_pair(x&: BB, y&: SuccBB));
443 }
444 }
445
446 // Determine whether this is a leaf function, which needs special
447 // instructions to protect the red zone
448 bool IsLeafFunction = true;
449 DenseSet<const BinaryBasicBlock *> InvokeBlocks;
450 for (const BinaryBasicBlock &BB : Function) {
451 for (const MCInst &Inst : BB) {
452 if (BC.MIB->isCall(Inst)) {
453 if (BC.MIB->isInvoke(Inst))
454 InvokeBlocks.insert(V: &BB);
455 if (!BC.MIB->isTailCall(Inst))
456 IsLeafFunction = false;
457 }
458 }
459 }
460
461 for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) {
462 BinaryBasicBlock &BB = *BBI;
463
464 // Skip BBs with exclusive load/stores
465 if (BBToSkip.find(x: &BB) != BBToSkip.end())
466 continue;
467
468 bool HasUnconditionalBranch = false;
469 bool HasJumpTable = false;
470 bool IsInvokeBlock = InvokeBlocks.count(V: &BB) > 0;
471
472 for (auto I = BB.begin(); I != BB.end(); ++I) {
473 const MCInst &Inst = *I;
474 if (!BC.MIB->getOffset(Inst))
475 continue;
476
477 const bool IsJumpTable = Function.getJumpTable(Inst);
478 if (IsJumpTable)
479 HasJumpTable = true;
480 else if (BC.MIB->isUnconditionalBranch(Inst))
481 HasUnconditionalBranch = true;
482 else if ((!BC.MIB->isCall(Inst) && !BC.MIB->isConditionalBranch(Inst)) ||
483 BC.MIB->isUnsupportedBranch(Inst))
484 continue;
485
486 const uint32_t FromOffset = *BC.MIB->getOffset(Inst);
487 const MCSymbol *Target = BC.MIB->getTargetSymbol(Inst);
488 BinaryBasicBlock *TargetBB = Function.getBasicBlockForLabel(Label: Target);
489 uint32_t ToOffset = TargetBB ? TargetBB->getInputOffset() : 0;
490 BinaryFunction *TargetFunc =
491 TargetBB ? &Function : BC.getFunctionForSymbol(Symbol: Target);
492 if (TargetFunc && BC.MIB->isCall(Inst)) {
493 if (opts::InstrumentCalls) {
494 const BinaryBasicBlock *ForeignBB =
495 TargetFunc->getBasicBlockForLabel(Label: Target);
496 if (ForeignBB)
497 ToOffset = ForeignBB->getInputOffset();
498 instrumentOneTarget(SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB,
499 From: FromOffset, ToFunc&: *TargetFunc, TargetBB, ToOffset,
500 IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, FuncDesc,
501 FromNodeID: BBToID[&BB]);
502 }
503 continue;
504 }
505 if (TargetFunc) {
506 // Do not instrument edges in the spanning tree
507 if (llvm::is_contained(Range&: STOutSet[&BB], Element: TargetBB)) {
508 auto L = BC.scopeLock();
509 createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction: Function, From: FromOffset, FromNodeID: BBToID[&BB],
510 ToFunction: Function, To: ToOffset, ToNodeID: BBToID[TargetBB],
511 /*Instrumented=*/false);
512 continue;
513 }
514 instrumentOneTarget(SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB,
515 From: FromOffset, ToFunc&: *TargetFunc, TargetBB, ToOffset,
516 IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, FuncDesc,
517 FromNodeID: BBToID[&BB], ToNodeID: BBToID[TargetBB]);
518 continue;
519 }
520
521 if (IsJumpTable) {
522 for (BinaryBasicBlock *&Succ : BB.successors()) {
523 // Do not instrument edges in the spanning tree
524 if (llvm::is_contained(Range&: STOutSet[&BB], Element: &*Succ)) {
525 auto L = BC.scopeLock();
526 createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction: Function, From: FromOffset, FromNodeID: BBToID[&BB],
527 ToFunction: Function, To: Succ->getInputOffset(),
528 ToNodeID: BBToID[&*Succ], /*Instrumented=*/false);
529 continue;
530 }
531 instrumentOneTarget(
532 SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB, From: FromOffset, ToFunc&: Function,
533 TargetBB: &*Succ, ToOffset: Succ->getInputOffset(), IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock,
534 FuncDesc, FromNodeID: BBToID[&BB], ToNodeID: BBToID[&*Succ]);
535 }
536 continue;
537 }
538
539 // Handle indirect calls -- could be direct calls with unknown targets
540 // or secondary entry points of known functions, so check it is indirect
541 // to be sure.
542 if (opts::InstrumentCalls && BC.MIB->isIndirectCall(Inst: *I))
543 instrumentIndirectTarget(BB, Iter&: I, FromFunction&: Function, From: FromOffset);
544
545 } // End of instructions loop
546
547 // Instrument fallthroughs (when the direct jump instruction is missing)
548 if (!HasUnconditionalBranch && !HasJumpTable && BB.succ_size() > 0 &&
549 BB.size() > 0) {
550 BinaryBasicBlock *FTBB = BB.getFallthrough();
551 assert(FTBB && "expected valid fall-through basic block");
552 auto I = BB.begin();
553 auto LastInstr = BB.end();
554 --LastInstr;
555 while (LastInstr != I && BC.MIB->isPseudo(Inst: *LastInstr))
556 --LastInstr;
557 uint32_t FromOffset = 0;
558 // The last instruction in the BB should have an annotation, except
559 // if it was branching to the end of the function as a result of
560 // __builtin_unreachable(), in which case it was deleted by fixBranches.
561 // Ignore this case. FIXME: force fixBranches() to preserve the offset.
562 if (!BC.MIB->getOffset(Inst: *LastInstr))
563 continue;
564 FromOffset = *BC.MIB->getOffset(Inst: *LastInstr);
565
566 // Do not instrument edges in the spanning tree
567 if (llvm::is_contained(Range&: STOutSet[&BB], Element: FTBB)) {
568 auto L = BC.scopeLock();
569 createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction: Function, From: FromOffset, FromNodeID: BBToID[&BB],
570 ToFunction: Function, To: FTBB->getInputOffset(), ToNodeID: BBToID[FTBB],
571 /*Instrumented=*/false);
572 continue;
573 }
574 instrumentOneTarget(SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB,
575 From: FromOffset, ToFunc&: Function, TargetBB: FTBB, ToOffset: FTBB->getInputOffset(),
576 IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, FuncDesc, FromNodeID: BBToID[&BB],
577 ToNodeID: BBToID[FTBB]);
578 }
579 } // End of BBs loop
580
581 // Instrument spanning tree leaves
582 if (!opts::ConservativeInstrumentation) {
583 for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) {
584 BinaryBasicBlock &BB = *BBI;
585 if (STOutSet[&BB].size() == 0)
586 instrumentLeafNode(BB, Iter: BB.begin(), IsLeaf: IsLeafFunction, FuncDesc&: *FuncDesc,
587 Node: BBToID[&BB]);
588 }
589 }
590
591 // Consume list of critical edges: split them and add instrumentation to the
592 // newly created BBs
593 auto Iter = SplitInstrs.begin();
594 for (std::pair<BinaryBasicBlock *, BinaryBasicBlock *> &BBPair :
595 SplitWorklist) {
596 BinaryBasicBlock *NewBB = Function.splitEdge(From: BBPair.first, To: BBPair.second);
597 NewBB->addInstructions(Begin: Iter->begin(), End: Iter->end());
598 ++Iter;
599 }
600
601 // Unused now
602 FuncDesc->EdgesSet.clear();
603}
604
605Error Instrumentation::runOnFunctions(BinaryContext &BC) {
606 const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/false,
607 /*IsText=*/false,
608 /*IsAllocatable=*/true);
609 BC.registerOrUpdateSection(Name: ".bolt.instr.counters", ELFType: ELF::SHT_PROGBITS, ELFFlags: Flags,
610 Data: nullptr, Size: 0, Alignment: 1);
611
612 BC.registerOrUpdateNoteSection(Name: ".bolt.instr.tables", Data: nullptr, Size: 0,
613 /*Alignment=*/1,
614 /*IsReadOnly=*/true, ELFType: ELF::SHT_NOTE);
615
616 Summary->IndCallCounterFuncPtr =
617 BC.Ctx->getOrCreateSymbol(Name: "__bolt_ind_call_counter_func_pointer");
618 Summary->IndTailCallCounterFuncPtr =
619 BC.Ctx->getOrCreateSymbol(Name: "__bolt_ind_tailcall_counter_func_pointer");
620
621 createAuxiliaryFunctions(BC);
622
623 ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
624 return (!BF.isSimple() || BF.isIgnored() ||
625 (opts::InstrumentHotOnly && !BF.getKnownExecutionCount()));
626 };
627
628 ParallelUtilities::WorkFuncWithAllocTy WorkFun =
629 [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) {
630 instrumentFunction(Function&: BF, AllocId: AllocatorId);
631 };
632
633 ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
634 BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFunction: WorkFun,
635 SkipPredicate, LogName: "instrumentation", /* ForceSequential=*/true);
636
637 if (BC.isMachO()) {
638 if (BC.StartFunctionAddress) {
639 BinaryFunction *Main =
640 BC.getBinaryFunctionAtAddress(Address: *BC.StartFunctionAddress);
641 assert(Main && "Entry point function not found");
642 BinaryBasicBlock &BB = Main->front();
643
644 ErrorOr<BinarySection &> SetupSection =
645 BC.getUniqueSectionByName(SectionName: "I__setup");
646 if (!SetupSection)
647 return createFatalBOLTError(S: "Cannot find I__setup section\n");
648
649 MCSymbol *Target = BC.registerNameAtAddress(
650 Name: "__bolt_instr_setup", Address: SetupSection->getAddress(), Size: 0, Alignment: 0);
651 MCInst NewInst;
652 BC.MIB->createCall(Inst&: NewInst, Target, Ctx: BC.Ctx.get());
653 BB.insertInstruction(At: BB.begin(), NewInst: std::move(NewInst));
654 } else {
655 BC.errs() << "BOLT-WARNING: Entry point not found\n";
656 }
657
658 if (BinaryData *BD = BC.getBinaryDataByName(Name: "___GLOBAL_init_65535/1")) {
659 BinaryFunction *Ctor = BC.getBinaryFunctionAtAddress(Address: BD->getAddress());
660 assert(Ctor && "___GLOBAL_init_65535 function not found");
661 BinaryBasicBlock &BB = Ctor->front();
662 ErrorOr<BinarySection &> FiniSection =
663 BC.getUniqueSectionByName(SectionName: "I__fini");
664 if (!FiniSection)
665 return createFatalBOLTError(S: "Cannot find I__fini section");
666
667 MCSymbol *Target = BC.registerNameAtAddress(
668 Name: "__bolt_instr_fini", Address: FiniSection->getAddress(), Size: 0, Alignment: 0);
669 auto IsLEA = [&BC](const MCInst &Inst) { return BC.MIB->isLEA64r(Inst); };
670 const auto LEA = std::find_if(
671 first: std::next(x: llvm::find_if(Range: reverse(C&: BB), P: IsLEA)), last: BB.rend(), pred: IsLEA);
672 LEA->getOperand(i: 4).setExpr(
673 MCSymbolRefExpr::create(Symbol: Target, Kind: MCSymbolRefExpr::VK_None, Ctx&: *BC.Ctx));
674 } else {
675 BC.errs() << "BOLT-WARNING: ___GLOBAL_init_65535 not found\n";
676 }
677 }
678
679 setupRuntimeLibrary(BC);
680 return Error::success();
681}
682
683void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) {
684 auto createSimpleFunction =
685 [&](StringRef Title, InstructionListType Instrs) -> BinaryFunction * {
686 BinaryFunction *Func = BC.createInjectedBinaryFunction(Name: std::string(Title));
687
688 std::vector<std::unique_ptr<BinaryBasicBlock>> BBs;
689 BBs.emplace_back(args: Func->createBasicBlock());
690 BBs.back()->addInstructions(Begin: Instrs.begin(), End: Instrs.end());
691 BBs.back()->setCFIState(0);
692 Func->insertBasicBlocks(Start: nullptr, NewBBs: std::move(BBs),
693 /*UpdateLayout=*/true,
694 /*UpdateCFIState=*/false);
695 Func->updateState(State: BinaryFunction::State::CFG_Finalized);
696 return Func;
697 };
698
699 // Here we are creating a set of functions to handle BB entry/exit.
700 // IndCallHandlerExitBB contains instructions to finish handling traffic to an
701 // indirect call. We pass it to createInstrumentedIndCallHandlerEntryBB(),
702 // which will check if a pointer to runtime library traffic accounting
703 // function was initialized (it is done during initialization of runtime
704 // library). If it is so - calls it. Then this routine returns to normal
705 // execution by jumping to exit BB.
706 BinaryFunction *IndCallHandlerExitBB =
707 createSimpleFunction("__bolt_instr_ind_call_handler",
708 BC.MIB->createInstrumentedIndCallHandlerExitBB());
709
710 IndCallHandlerExitBBFunction =
711 createSimpleFunction("__bolt_instr_ind_call_handler_func",
712 BC.MIB->createInstrumentedIndCallHandlerEntryBB(
713 InstrTrampoline: Summary->IndCallCounterFuncPtr,
714 IndCallHandler: IndCallHandlerExitBB->getSymbol(), Ctx: &*BC.Ctx));
715
716 BinaryFunction *IndTailCallHandlerExitBB = createSimpleFunction(
717 "__bolt_instr_ind_tail_call_handler",
718 BC.MIB->createInstrumentedIndTailCallHandlerExitBB());
719
720 IndTailCallHandlerExitBBFunction = createSimpleFunction(
721 "__bolt_instr_ind_tailcall_handler_func",
722 BC.MIB->createInstrumentedIndCallHandlerEntryBB(
723 InstrTrampoline: Summary->IndTailCallCounterFuncPtr,
724 IndCallHandler: IndTailCallHandlerExitBB->getSymbol(), Ctx: &*BC.Ctx));
725
726 createSimpleFunction("__bolt_num_counters_getter",
727 BC.MIB->createNumCountersGetter(Ctx: BC.Ctx.get()));
728 createSimpleFunction("__bolt_instr_locations_getter",
729 BC.MIB->createInstrLocationsGetter(Ctx: BC.Ctx.get()));
730 createSimpleFunction("__bolt_instr_tables_getter",
731 BC.MIB->createInstrTablesGetter(Ctx: BC.Ctx.get()));
732 createSimpleFunction("__bolt_instr_num_funcs_getter",
733 BC.MIB->createInstrNumFuncsGetter(Ctx: BC.Ctx.get()));
734
735 if (BC.isELF()) {
736 if (BC.StartFunctionAddress) {
737 BinaryFunction *Start =
738 BC.getBinaryFunctionAtAddress(Address: *BC.StartFunctionAddress);
739 assert(Start && "Entry point function not found");
740 const MCSymbol *StartSym = Start->getSymbol();
741 createSimpleFunction(
742 "__bolt_start_trampoline",
743 BC.MIB->createSymbolTrampoline(TgtSym: StartSym, Ctx: BC.Ctx.get()));
744 }
745 if (BC.FiniFunctionAddress) {
746 BinaryFunction *Fini =
747 BC.getBinaryFunctionAtAddress(Address: *BC.FiniFunctionAddress);
748 assert(Fini && "Finalization function not found");
749 const MCSymbol *FiniSym = Fini->getSymbol();
750 createSimpleFunction(
751 "__bolt_fini_trampoline",
752 BC.MIB->createSymbolTrampoline(TgtSym: FiniSym, Ctx: BC.Ctx.get()));
753 } else {
754 // Create dummy return function for trampoline to avoid issues
755 // with unknown symbol in runtime library. E.g. for static PIE
756 // executable
757 createSimpleFunction("__bolt_fini_trampoline",
758 BC.MIB->createDummyReturnFunction(Ctx: BC.Ctx.get()));
759 }
760 }
761}
762
763void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) {
764 uint32_t FuncDescSize = Summary->getFDSize();
765
766 BC.outs() << "BOLT-INSTRUMENTER: Number of indirect call site descriptors: "
767 << Summary->IndCallDescriptions.size() << "\n";
768 BC.outs() << "BOLT-INSTRUMENTER: Number of indirect call target descriptors: "
769 << Summary->IndCallTargetDescriptions.size() << "\n";
770 BC.outs() << "BOLT-INSTRUMENTER: Number of function descriptors: "
771 << Summary->FunctionDescriptions.size() << "\n";
772 BC.outs() << "BOLT-INSTRUMENTER: Number of branch counters: "
773 << BranchCounters << "\n";
774 BC.outs() << "BOLT-INSTRUMENTER: Number of ST leaf node counters: "
775 << LeafNodeCounters << "\n";
776 BC.outs() << "BOLT-INSTRUMENTER: Number of direct call counters: "
777 << DirectCallCounters << "\n";
778 BC.outs() << "BOLT-INSTRUMENTER: Total number of counters: "
779 << Summary->Counters.size() << "\n";
780 BC.outs() << "BOLT-INSTRUMENTER: Total size of counters: "
781 << (Summary->Counters.size() * 8)
782 << " bytes (static alloc memory)\n";
783 BC.outs() << "BOLT-INSTRUMENTER: Total size of string table emitted: "
784 << Summary->StringTable.size() << " bytes in file\n";
785 BC.outs() << "BOLT-INSTRUMENTER: Total size of descriptors: "
786 << (FuncDescSize +
787 Summary->IndCallDescriptions.size() *
788 sizeof(IndCallDescription) +
789 Summary->IndCallTargetDescriptions.size() *
790 sizeof(IndCallTargetDescription))
791 << " bytes in file\n";
792 BC.outs() << "BOLT-INSTRUMENTER: Profile will be saved to file "
793 << opts::InstrumentationFilename << "\n";
794
795 InstrumentationRuntimeLibrary *RtLibrary =
796 static_cast<InstrumentationRuntimeLibrary *>(BC.getRuntimeLibrary());
797 assert(RtLibrary && "instrumentation runtime library object must be set");
798 RtLibrary->setSummary(std::move(Summary));
799}
800} // namespace bolt
801} // namespace llvm
802

source code of bolt/lib/Passes/Instrumentation.cpp