1 | //===- bolt/Passes/Instrumentation.cpp ------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the Instrumentation class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/Instrumentation.h" |
14 | #include "bolt/Core/ParallelUtilities.h" |
15 | #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" |
16 | #include "bolt/Utils/CommandLineOpts.h" |
17 | #include "bolt/Utils/Utils.h" |
18 | #include "llvm/Support/CommandLine.h" |
19 | #include "llvm/Support/RWMutex.h" |
20 | #include <queue> |
21 | #include <stack> |
22 | #include <unordered_set> |
23 | |
24 | #define DEBUG_TYPE "bolt-instrumentation" |
25 | |
26 | using namespace llvm; |
27 | |
28 | namespace opts { |
29 | extern cl::OptionCategory BoltInstrCategory; |
30 | |
31 | cl::opt<std::string> InstrumentationFilename( |
32 | "instrumentation-file" , |
33 | cl::desc("file name where instrumented profile will be saved (default: " |
34 | "/tmp/prof.fdata)" ), |
35 | cl::init(Val: "/tmp/prof.fdata" ), cl::Optional, cl::cat(BoltInstrCategory)); |
36 | |
37 | cl::opt<std::string> InstrumentationBinpath( |
38 | "instrumentation-binpath" , |
39 | cl::desc("path to instrumented binary in case if /proc/self/map_files " |
40 | "is not accessible due to access restriction issues" ), |
41 | cl::Optional, cl::cat(BoltInstrCategory)); |
42 | |
43 | cl::opt<bool> InstrumentationFileAppendPID( |
44 | "instrumentation-file-append-pid" , |
45 | cl::desc("append PID to saved profile file name (default: false)" ), |
46 | cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory)); |
47 | |
48 | cl::opt<bool> ConservativeInstrumentation( |
49 | "conservative-instrumentation" , |
50 | cl::desc("disable instrumentation optimizations that sacrifice profile " |
51 | "accuracy (for debugging, default: false)" ), |
52 | cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory)); |
53 | |
54 | cl::opt<uint32_t> InstrumentationSleepTime( |
55 | "instrumentation-sleep-time" , |
56 | cl::desc("interval between profile writes (default: 0 = write only at " |
57 | "program end). This is useful for service workloads when you " |
58 | "want to dump profile every X minutes or if you are killing the " |
59 | "program and the profile is not being dumped at the end." ), |
60 | cl::init(Val: 0), cl::Optional, cl::cat(BoltInstrCategory)); |
61 | |
62 | cl::opt<bool> InstrumentationNoCountersClear( |
63 | "instrumentation-no-counters-clear" , |
64 | cl::desc("Don't clear counters across dumps " |
65 | "(use with instrumentation-sleep-time option)" ), |
66 | cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory)); |
67 | |
68 | cl::opt<bool> InstrumentationWaitForks( |
69 | "instrumentation-wait-forks" , |
70 | cl::desc("Wait until all forks of instrumented process will finish " |
71 | "(use with instrumentation-sleep-time option)" ), |
72 | cl::init(Val: false), cl::Optional, cl::cat(BoltInstrCategory)); |
73 | |
74 | cl::opt<bool> |
75 | InstrumentHotOnly("instrument-hot-only" , |
76 | cl::desc("only insert instrumentation on hot functions " |
77 | "(needs profile, default: false)" ), |
78 | cl::init(Val: false), cl::Optional, |
79 | cl::cat(BoltInstrCategory)); |
80 | |
81 | cl::opt<bool> InstrumentCalls("instrument-calls" , |
82 | cl::desc("record profile for inter-function " |
83 | "control flow activity (default: true)" ), |
84 | cl::init(Val: true), cl::Optional, |
85 | cl::cat(BoltInstrCategory)); |
86 | } // namespace opts |
87 | |
88 | namespace llvm { |
89 | namespace bolt { |
90 | |
91 | static bool hasAArch64ExclusiveMemop( |
92 | BinaryFunction &Function, |
93 | std::unordered_set<const BinaryBasicBlock *> &BBToSkip) { |
94 | // FIXME ARMv8-a architecture reference manual says that software must avoid |
95 | // having any explicit memory accesses between exclusive load and associated |
96 | // store instruction. So for now skip instrumentation for basic blocks that |
97 | // have these instructions, since it might lead to runtime deadlock. |
98 | BinaryContext &BC = Function.getBinaryContext(); |
99 | std::queue<std::pair<BinaryBasicBlock *, bool>> BBQueue; // {BB, isLoad} |
100 | std::unordered_set<BinaryBasicBlock *> Visited; |
101 | |
102 | if (Function.getLayout().block_begin() == Function.getLayout().block_end()) |
103 | return 0; |
104 | |
105 | BinaryBasicBlock *BBfirst = *Function.getLayout().block_begin(); |
106 | BBQueue.push(x: {BBfirst, false}); |
107 | |
108 | while (!BBQueue.empty()) { |
109 | BinaryBasicBlock *BB = BBQueue.front().first; |
110 | bool IsLoad = BBQueue.front().second; |
111 | BBQueue.pop(); |
112 | if (Visited.find(x: BB) != Visited.end()) |
113 | continue; |
114 | Visited.insert(x: BB); |
115 | |
116 | for (const MCInst &Inst : *BB) { |
117 | // Two loads one after another - skip whole function |
118 | if (BC.MIB->isAArch64ExclusiveLoad(Inst) && IsLoad) { |
119 | if (opts::Verbosity >= 2) { |
120 | outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName() |
121 | << " has two exclusive loads. Ignoring the function.\n" ; |
122 | } |
123 | return true; |
124 | } |
125 | |
126 | if (BC.MIB->isAArch64ExclusiveLoad(Inst)) |
127 | IsLoad = true; |
128 | |
129 | if (IsLoad && BBToSkip.find(x: BB) == BBToSkip.end()) { |
130 | BBToSkip.insert(x: BB); |
131 | if (opts::Verbosity >= 2) { |
132 | outs() << "BOLT-INSTRUMENTER: skip BB " << BB->getName() |
133 | << " due to exclusive instruction in function " |
134 | << Function.getPrintName() << "\n" ; |
135 | } |
136 | } |
137 | |
138 | if (!IsLoad && BC.MIB->isAArch64ExclusiveStore(Inst)) { |
139 | if (opts::Verbosity >= 2) { |
140 | outs() << "BOLT-INSTRUMENTER: function " << Function.getPrintName() |
141 | << " has exclusive store without corresponding load. Ignoring " |
142 | "the function.\n" ; |
143 | } |
144 | return true; |
145 | } |
146 | |
147 | if (IsLoad && (BC.MIB->isAArch64ExclusiveStore(Inst) || |
148 | BC.MIB->isAArch64ExclusiveClear(Inst))) |
149 | IsLoad = false; |
150 | } |
151 | |
152 | if (IsLoad && BB->succ_size() == 0) { |
153 | if (opts::Verbosity >= 2) { |
154 | outs() |
155 | << "BOLT-INSTRUMENTER: function " << Function.getPrintName() |
156 | << " has exclusive load in trailing BB. Ignoring the function.\n" ; |
157 | } |
158 | return true; |
159 | } |
160 | |
161 | for (BinaryBasicBlock *BBS : BB->successors()) |
162 | BBQueue.push(x: {BBS, IsLoad}); |
163 | } |
164 | |
165 | if (BBToSkip.size() == Visited.size()) { |
166 | if (opts::Verbosity >= 2) { |
167 | outs() << "BOLT-INSTRUMENTER: all BBs are marked with true. Ignoring the " |
168 | "function " |
169 | << Function.getPrintName() << "\n" ; |
170 | } |
171 | return true; |
172 | } |
173 | |
174 | return false; |
175 | } |
176 | |
177 | uint32_t Instrumentation::getFunctionNameIndex(const BinaryFunction &Function) { |
178 | auto Iter = FuncToStringIdx.find(x: &Function); |
179 | if (Iter != FuncToStringIdx.end()) |
180 | return Iter->second; |
181 | size_t Idx = Summary->StringTable.size(); |
182 | FuncToStringIdx.emplace(args: std::make_pair(x: &Function, y&: Idx)); |
183 | Summary->StringTable.append(str: getEscapedName(Name: Function.getOneName())); |
184 | Summary->StringTable.append(n: 1, c: '\0'); |
185 | return Idx; |
186 | } |
187 | |
188 | bool Instrumentation::createCallDescription(FunctionDescription &FuncDesc, |
189 | const BinaryFunction &FromFunction, |
190 | uint32_t From, uint32_t FromNodeID, |
191 | const BinaryFunction &ToFunction, |
192 | uint32_t To, bool IsInvoke) { |
193 | CallDescription CD; |
194 | // Ordinarily, we don't augment direct calls with an explicit counter, except |
195 | // when forced to do so or when we know this callee could be throwing |
196 | // exceptions, in which case there is no other way to accurately record its |
197 | // frequency. |
198 | bool ForceInstrumentation = opts::ConservativeInstrumentation || IsInvoke; |
199 | CD.FromLoc.FuncString = getFunctionNameIndex(Function: FromFunction); |
200 | CD.FromLoc.Offset = From; |
201 | CD.FromNode = FromNodeID; |
202 | CD.Target = &ToFunction; |
203 | CD.ToLoc.FuncString = getFunctionNameIndex(Function: ToFunction); |
204 | CD.ToLoc.Offset = To; |
205 | CD.Counter = ForceInstrumentation ? Summary->Counters.size() : 0xffffffff; |
206 | if (ForceInstrumentation) |
207 | ++DirectCallCounters; |
208 | FuncDesc.Calls.emplace_back(args&: CD); |
209 | return ForceInstrumentation; |
210 | } |
211 | |
212 | void Instrumentation::createIndCallDescription( |
213 | const BinaryFunction &FromFunction, uint32_t From) { |
214 | IndCallDescription ICD; |
215 | ICD.FromLoc.FuncString = getFunctionNameIndex(Function: FromFunction); |
216 | ICD.FromLoc.Offset = From; |
217 | Summary->IndCallDescriptions.emplace_back(args&: ICD); |
218 | } |
219 | |
220 | void Instrumentation::createIndCallTargetDescription( |
221 | const BinaryFunction &ToFunction, uint32_t To) { |
222 | IndCallTargetDescription ICD; |
223 | ICD.ToLoc.FuncString = getFunctionNameIndex(Function: ToFunction); |
224 | ICD.ToLoc.Offset = To; |
225 | ICD.Target = &ToFunction; |
226 | Summary->IndCallTargetDescriptions.emplace_back(args&: ICD); |
227 | } |
228 | |
229 | bool Instrumentation::createEdgeDescription(FunctionDescription &FuncDesc, |
230 | const BinaryFunction &FromFunction, |
231 | uint32_t From, uint32_t FromNodeID, |
232 | const BinaryFunction &ToFunction, |
233 | uint32_t To, uint32_t ToNodeID, |
234 | bool Instrumented) { |
235 | EdgeDescription ED; |
236 | auto Result = FuncDesc.EdgesSet.insert(V: std::make_pair(x&: FromNodeID, y&: ToNodeID)); |
237 | // Avoid creating duplicated edge descriptions. This happens in CFGs where a |
238 | // block jumps to its fall-through. |
239 | if (Result.second == false) |
240 | return false; |
241 | ED.FromLoc.FuncString = getFunctionNameIndex(Function: FromFunction); |
242 | ED.FromLoc.Offset = From; |
243 | ED.FromNode = FromNodeID; |
244 | ED.ToLoc.FuncString = getFunctionNameIndex(Function: ToFunction); |
245 | ED.ToLoc.Offset = To; |
246 | ED.ToNode = ToNodeID; |
247 | ED.Counter = Instrumented ? Summary->Counters.size() : 0xffffffff; |
248 | if (Instrumented) |
249 | ++BranchCounters; |
250 | FuncDesc.Edges.emplace_back(args&: ED); |
251 | return Instrumented; |
252 | } |
253 | |
254 | void Instrumentation::createLeafNodeDescription(FunctionDescription &FuncDesc, |
255 | uint32_t Node) { |
256 | InstrumentedNode IN; |
257 | IN.Node = Node; |
258 | IN.Counter = Summary->Counters.size(); |
259 | ++LeafNodeCounters; |
260 | FuncDesc.LeafNodes.emplace_back(args&: IN); |
261 | } |
262 | |
263 | InstructionListType |
264 | Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) { |
265 | auto L = BC.scopeLock(); |
266 | MCSymbol *Label = BC.Ctx->createNamedTempSymbol(Name: "InstrEntry" ); |
267 | Summary->Counters.emplace_back(args&: Label); |
268 | return BC.MIB->createInstrIncMemory(Target: Label, Ctx: BC.Ctx.get(), IsLeaf, |
269 | CodePointerSize: BC.AsmInfo->getCodePointerSize()); |
270 | } |
271 | |
272 | // Helper instruction sequence insertion function |
273 | static BinaryBasicBlock::iterator |
274 | insertInstructions(InstructionListType &Instrs, BinaryBasicBlock &BB, |
275 | BinaryBasicBlock::iterator Iter) { |
276 | for (MCInst &NewInst : Instrs) { |
277 | Iter = BB.insertInstruction(At: Iter, NewInst); |
278 | ++Iter; |
279 | } |
280 | return Iter; |
281 | } |
282 | |
283 | void Instrumentation::instrumentLeafNode(BinaryBasicBlock &BB, |
284 | BinaryBasicBlock::iterator Iter, |
285 | bool IsLeaf, |
286 | FunctionDescription &FuncDesc, |
287 | uint32_t Node) { |
288 | createLeafNodeDescription(FuncDesc, Node); |
289 | InstructionListType CounterInstrs = createInstrumentationSnippet( |
290 | BC&: BB.getFunction()->getBinaryContext(), IsLeaf); |
291 | insertInstructions(Instrs&: CounterInstrs, BB, Iter); |
292 | } |
293 | |
294 | void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB, |
295 | BinaryBasicBlock::iterator &Iter, |
296 | BinaryFunction &FromFunction, |
297 | uint32_t From) { |
298 | auto L = FromFunction.getBinaryContext().scopeLock(); |
299 | const size_t IndCallSiteID = Summary->IndCallDescriptions.size(); |
300 | createIndCallDescription(FromFunction, From); |
301 | |
302 | BinaryContext &BC = FromFunction.getBinaryContext(); |
303 | bool IsTailCall = BC.MIB->isTailCall(Inst: *Iter); |
304 | InstructionListType CounterInstrs = BC.MIB->createInstrumentedIndirectCall( |
305 | CallInst: std::move(*Iter), |
306 | HandlerFuncAddr: IsTailCall ? IndTailCallHandlerExitBBFunction->getSymbol() |
307 | : IndCallHandlerExitBBFunction->getSymbol(), |
308 | CallSiteID: IndCallSiteID, Ctx: &*BC.Ctx); |
309 | |
310 | Iter = BB.eraseInstruction(II: Iter); |
311 | Iter = insertInstructions(Instrs&: CounterInstrs, BB, Iter); |
312 | --Iter; |
313 | } |
314 | |
315 | bool Instrumentation::instrumentOneTarget( |
316 | SplitWorklistTy &SplitWorklist, SplitInstrsTy &SplitInstrs, |
317 | BinaryBasicBlock::iterator &Iter, BinaryFunction &FromFunction, |
318 | BinaryBasicBlock &FromBB, uint32_t From, BinaryFunction &ToFunc, |
319 | BinaryBasicBlock *TargetBB, uint32_t ToOffset, bool IsLeaf, bool IsInvoke, |
320 | FunctionDescription *FuncDesc, uint32_t FromNodeID, uint32_t ToNodeID) { |
321 | BinaryContext &BC = FromFunction.getBinaryContext(); |
322 | { |
323 | auto L = BC.scopeLock(); |
324 | bool Created = true; |
325 | if (!TargetBB) |
326 | Created = createCallDescription(FuncDesc&: *FuncDesc, FromFunction, From, FromNodeID, |
327 | ToFunction: ToFunc, To: ToOffset, IsInvoke); |
328 | else |
329 | Created = createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction, From, FromNodeID, |
330 | ToFunction: ToFunc, To: ToOffset, ToNodeID, |
331 | /*Instrumented=*/true); |
332 | if (!Created) |
333 | return false; |
334 | } |
335 | |
336 | InstructionListType CounterInstrs = createInstrumentationSnippet(BC, IsLeaf); |
337 | |
338 | const MCInst &Inst = *Iter; |
339 | if (BC.MIB->isCall(Inst)) { |
340 | // This code handles both |
341 | // - (regular) inter-function calls (cross-function control transfer), |
342 | // - (rare) intra-function calls (function-local control transfer) |
343 | Iter = insertInstructions(Instrs&: CounterInstrs, BB&: FromBB, Iter); |
344 | return true; |
345 | } |
346 | |
347 | if (!TargetBB || !FuncDesc) |
348 | return false; |
349 | |
350 | // Indirect branch, conditional branches or fall-throughs |
351 | // Regular cond branch, put counter at start of target block |
352 | // |
353 | // N.B.: (FromBB != TargetBBs) checks below handle conditional jumps where |
354 | // we can't put the instrumentation counter in this block because not all |
355 | // paths that reach it at this point will be taken and going to the target. |
356 | if (TargetBB->pred_size() == 1 && &FromBB != TargetBB && |
357 | !TargetBB->isEntryPoint()) { |
358 | insertInstructions(Instrs&: CounterInstrs, BB&: *TargetBB, Iter: TargetBB->begin()); |
359 | return true; |
360 | } |
361 | if (FromBB.succ_size() == 1 && &FromBB != TargetBB) { |
362 | Iter = insertInstructions(Instrs&: CounterInstrs, BB&: FromBB, Iter); |
363 | return true; |
364 | } |
365 | // Critical edge, create BB and put counter there |
366 | SplitWorklist.emplace_back(args: &FromBB, args&: TargetBB); |
367 | SplitInstrs.emplace_back(args: std::move(CounterInstrs)); |
368 | return true; |
369 | } |
370 | |
371 | void Instrumentation::instrumentFunction(BinaryFunction &Function, |
372 | MCPlusBuilder::AllocatorIdTy AllocId) { |
373 | if (Function.hasUnknownControlFlow()) |
374 | return; |
375 | |
376 | BinaryContext &BC = Function.getBinaryContext(); |
377 | if (BC.isMachO() && Function.hasName(FunctionName: "___GLOBAL_init_65535/1" )) |
378 | return; |
379 | |
380 | std::unordered_set<const BinaryBasicBlock *> BBToSkip; |
381 | if (BC.isAArch64() && hasAArch64ExclusiveMemop(Function, BBToSkip)) |
382 | return; |
383 | |
384 | SplitWorklistTy SplitWorklist; |
385 | SplitInstrsTy SplitInstrs; |
386 | |
387 | FunctionDescription *FuncDesc = nullptr; |
388 | { |
389 | std::unique_lock<llvm::sys::RWMutex> L(FDMutex); |
390 | Summary->FunctionDescriptions.emplace_back(); |
391 | FuncDesc = &Summary->FunctionDescriptions.back(); |
392 | } |
393 | |
394 | FuncDesc->Function = &Function; |
395 | Function.disambiguateJumpTables(AllocId); |
396 | Function.deleteConservativeEdges(); |
397 | |
398 | std::unordered_map<const BinaryBasicBlock *, uint32_t> BBToID; |
399 | uint32_t Id = 0; |
400 | for (auto BBI = Function.begin(); BBI != Function.end(); ++BBI) { |
401 | BBToID[&*BBI] = Id++; |
402 | } |
403 | std::unordered_set<const BinaryBasicBlock *> VisitedSet; |
404 | // DFS to establish edges we will use for a spanning tree. Edges in the |
405 | // spanning tree can be instrumentation-free since their count can be |
406 | // inferred by solving flow equations on a bottom-up traversal of the tree. |
407 | // Exit basic blocks are always instrumented so we start the traversal with |
408 | // a minimum number of defined variables to make the equation solvable. |
409 | std::stack<std::pair<const BinaryBasicBlock *, BinaryBasicBlock *>> Stack; |
410 | std::unordered_map<const BinaryBasicBlock *, |
411 | std::set<const BinaryBasicBlock *>> |
412 | STOutSet; |
413 | for (auto BBI = Function.getLayout().block_rbegin(); |
414 | BBI != Function.getLayout().block_rend(); ++BBI) { |
415 | if ((*BBI)->isEntryPoint() || (*BBI)->isLandingPad()) { |
416 | Stack.push(x: std::make_pair(x: nullptr, y&: *BBI)); |
417 | if (opts::InstrumentCalls && (*BBI)->isEntryPoint()) { |
418 | EntryNode E; |
419 | E.Node = BBToID[&**BBI]; |
420 | E.Address = (*BBI)->getInputOffset(); |
421 | FuncDesc->EntryNodes.emplace_back(args&: E); |
422 | createIndCallTargetDescription(ToFunction: Function, To: (*BBI)->getInputOffset()); |
423 | } |
424 | } |
425 | } |
426 | |
427 | // Modified version of BinaryFunction::dfs() to build a spanning tree |
428 | if (!opts::ConservativeInstrumentation) { |
429 | while (!Stack.empty()) { |
430 | BinaryBasicBlock *BB; |
431 | const BinaryBasicBlock *Pred; |
432 | std::tie(args&: Pred, args&: BB) = Stack.top(); |
433 | Stack.pop(); |
434 | if (llvm::is_contained(Range&: VisitedSet, Element: BB)) |
435 | continue; |
436 | |
437 | VisitedSet.insert(x: BB); |
438 | if (Pred) |
439 | STOutSet[Pred].insert(x: BB); |
440 | |
441 | for (BinaryBasicBlock *SuccBB : BB->successors()) |
442 | Stack.push(x: std::make_pair(x&: BB, y&: SuccBB)); |
443 | } |
444 | } |
445 | |
446 | // Determine whether this is a leaf function, which needs special |
447 | // instructions to protect the red zone |
448 | bool IsLeafFunction = true; |
449 | DenseSet<const BinaryBasicBlock *> InvokeBlocks; |
450 | for (const BinaryBasicBlock &BB : Function) { |
451 | for (const MCInst &Inst : BB) { |
452 | if (BC.MIB->isCall(Inst)) { |
453 | if (BC.MIB->isInvoke(Inst)) |
454 | InvokeBlocks.insert(V: &BB); |
455 | if (!BC.MIB->isTailCall(Inst)) |
456 | IsLeafFunction = false; |
457 | } |
458 | } |
459 | } |
460 | |
461 | for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) { |
462 | BinaryBasicBlock &BB = *BBI; |
463 | |
464 | // Skip BBs with exclusive load/stores |
465 | if (BBToSkip.find(x: &BB) != BBToSkip.end()) |
466 | continue; |
467 | |
468 | bool HasUnconditionalBranch = false; |
469 | bool HasJumpTable = false; |
470 | bool IsInvokeBlock = InvokeBlocks.count(V: &BB) > 0; |
471 | |
472 | for (auto I = BB.begin(); I != BB.end(); ++I) { |
473 | const MCInst &Inst = *I; |
474 | if (!BC.MIB->getOffset(Inst)) |
475 | continue; |
476 | |
477 | const bool IsJumpTable = Function.getJumpTable(Inst); |
478 | if (IsJumpTable) |
479 | HasJumpTable = true; |
480 | else if (BC.MIB->isUnconditionalBranch(Inst)) |
481 | HasUnconditionalBranch = true; |
482 | else if ((!BC.MIB->isCall(Inst) && !BC.MIB->isConditionalBranch(Inst)) || |
483 | BC.MIB->isUnsupportedBranch(Inst)) |
484 | continue; |
485 | |
486 | const uint32_t FromOffset = *BC.MIB->getOffset(Inst); |
487 | const MCSymbol *Target = BC.MIB->getTargetSymbol(Inst); |
488 | BinaryBasicBlock *TargetBB = Function.getBasicBlockForLabel(Label: Target); |
489 | uint32_t ToOffset = TargetBB ? TargetBB->getInputOffset() : 0; |
490 | BinaryFunction *TargetFunc = |
491 | TargetBB ? &Function : BC.getFunctionForSymbol(Symbol: Target); |
492 | if (TargetFunc && BC.MIB->isCall(Inst)) { |
493 | if (opts::InstrumentCalls) { |
494 | const BinaryBasicBlock *ForeignBB = |
495 | TargetFunc->getBasicBlockForLabel(Label: Target); |
496 | if (ForeignBB) |
497 | ToOffset = ForeignBB->getInputOffset(); |
498 | instrumentOneTarget(SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB, |
499 | From: FromOffset, ToFunc&: *TargetFunc, TargetBB, ToOffset, |
500 | IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, FuncDesc, |
501 | FromNodeID: BBToID[&BB]); |
502 | } |
503 | continue; |
504 | } |
505 | if (TargetFunc) { |
506 | // Do not instrument edges in the spanning tree |
507 | if (llvm::is_contained(Range&: STOutSet[&BB], Element: TargetBB)) { |
508 | auto L = BC.scopeLock(); |
509 | createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction: Function, From: FromOffset, FromNodeID: BBToID[&BB], |
510 | ToFunction: Function, To: ToOffset, ToNodeID: BBToID[TargetBB], |
511 | /*Instrumented=*/false); |
512 | continue; |
513 | } |
514 | instrumentOneTarget(SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB, |
515 | From: FromOffset, ToFunc&: *TargetFunc, TargetBB, ToOffset, |
516 | IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, FuncDesc, |
517 | FromNodeID: BBToID[&BB], ToNodeID: BBToID[TargetBB]); |
518 | continue; |
519 | } |
520 | |
521 | if (IsJumpTable) { |
522 | for (BinaryBasicBlock *&Succ : BB.successors()) { |
523 | // Do not instrument edges in the spanning tree |
524 | if (llvm::is_contained(Range&: STOutSet[&BB], Element: &*Succ)) { |
525 | auto L = BC.scopeLock(); |
526 | createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction: Function, From: FromOffset, FromNodeID: BBToID[&BB], |
527 | ToFunction: Function, To: Succ->getInputOffset(), |
528 | ToNodeID: BBToID[&*Succ], /*Instrumented=*/false); |
529 | continue; |
530 | } |
531 | instrumentOneTarget( |
532 | SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB, From: FromOffset, ToFunc&: Function, |
533 | TargetBB: &*Succ, ToOffset: Succ->getInputOffset(), IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, |
534 | FuncDesc, FromNodeID: BBToID[&BB], ToNodeID: BBToID[&*Succ]); |
535 | } |
536 | continue; |
537 | } |
538 | |
539 | // Handle indirect calls -- could be direct calls with unknown targets |
540 | // or secondary entry points of known functions, so check it is indirect |
541 | // to be sure. |
542 | if (opts::InstrumentCalls && BC.MIB->isIndirectCall(Inst: *I)) |
543 | instrumentIndirectTarget(BB, Iter&: I, FromFunction&: Function, From: FromOffset); |
544 | |
545 | } // End of instructions loop |
546 | |
547 | // Instrument fallthroughs (when the direct jump instruction is missing) |
548 | if (!HasUnconditionalBranch && !HasJumpTable && BB.succ_size() > 0 && |
549 | BB.size() > 0) { |
550 | BinaryBasicBlock *FTBB = BB.getFallthrough(); |
551 | assert(FTBB && "expected valid fall-through basic block" ); |
552 | auto I = BB.begin(); |
553 | auto LastInstr = BB.end(); |
554 | --LastInstr; |
555 | while (LastInstr != I && BC.MIB->isPseudo(Inst: *LastInstr)) |
556 | --LastInstr; |
557 | uint32_t FromOffset = 0; |
558 | // The last instruction in the BB should have an annotation, except |
559 | // if it was branching to the end of the function as a result of |
560 | // __builtin_unreachable(), in which case it was deleted by fixBranches. |
561 | // Ignore this case. FIXME: force fixBranches() to preserve the offset. |
562 | if (!BC.MIB->getOffset(Inst: *LastInstr)) |
563 | continue; |
564 | FromOffset = *BC.MIB->getOffset(Inst: *LastInstr); |
565 | |
566 | // Do not instrument edges in the spanning tree |
567 | if (llvm::is_contained(Range&: STOutSet[&BB], Element: FTBB)) { |
568 | auto L = BC.scopeLock(); |
569 | createEdgeDescription(FuncDesc&: *FuncDesc, FromFunction: Function, From: FromOffset, FromNodeID: BBToID[&BB], |
570 | ToFunction: Function, To: FTBB->getInputOffset(), ToNodeID: BBToID[FTBB], |
571 | /*Instrumented=*/false); |
572 | continue; |
573 | } |
574 | instrumentOneTarget(SplitWorklist, SplitInstrs, Iter&: I, FromFunction&: Function, FromBB&: BB, |
575 | From: FromOffset, ToFunc&: Function, TargetBB: FTBB, ToOffset: FTBB->getInputOffset(), |
576 | IsLeaf: IsLeafFunction, IsInvoke: IsInvokeBlock, FuncDesc, FromNodeID: BBToID[&BB], |
577 | ToNodeID: BBToID[FTBB]); |
578 | } |
579 | } // End of BBs loop |
580 | |
581 | // Instrument spanning tree leaves |
582 | if (!opts::ConservativeInstrumentation) { |
583 | for (auto BBI = Function.begin(), BBE = Function.end(); BBI != BBE; ++BBI) { |
584 | BinaryBasicBlock &BB = *BBI; |
585 | if (STOutSet[&BB].size() == 0) |
586 | instrumentLeafNode(BB, Iter: BB.begin(), IsLeaf: IsLeafFunction, FuncDesc&: *FuncDesc, |
587 | Node: BBToID[&BB]); |
588 | } |
589 | } |
590 | |
591 | // Consume list of critical edges: split them and add instrumentation to the |
592 | // newly created BBs |
593 | auto Iter = SplitInstrs.begin(); |
594 | for (std::pair<BinaryBasicBlock *, BinaryBasicBlock *> &BBPair : |
595 | SplitWorklist) { |
596 | BinaryBasicBlock *NewBB = Function.splitEdge(From: BBPair.first, To: BBPair.second); |
597 | NewBB->addInstructions(Begin: Iter->begin(), End: Iter->end()); |
598 | ++Iter; |
599 | } |
600 | |
601 | // Unused now |
602 | FuncDesc->EdgesSet.clear(); |
603 | } |
604 | |
605 | Error Instrumentation::runOnFunctions(BinaryContext &BC) { |
606 | const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/false, |
607 | /*IsText=*/false, |
608 | /*IsAllocatable=*/true); |
609 | BC.registerOrUpdateSection(Name: ".bolt.instr.counters" , ELFType: ELF::SHT_PROGBITS, ELFFlags: Flags, |
610 | Data: nullptr, Size: 0, Alignment: 1); |
611 | |
612 | BC.registerOrUpdateNoteSection(Name: ".bolt.instr.tables" , Data: nullptr, Size: 0, |
613 | /*Alignment=*/1, |
614 | /*IsReadOnly=*/true, ELFType: ELF::SHT_NOTE); |
615 | |
616 | Summary->IndCallCounterFuncPtr = |
617 | BC.Ctx->getOrCreateSymbol(Name: "__bolt_ind_call_counter_func_pointer" ); |
618 | Summary->IndTailCallCounterFuncPtr = |
619 | BC.Ctx->getOrCreateSymbol(Name: "__bolt_ind_tailcall_counter_func_pointer" ); |
620 | |
621 | createAuxiliaryFunctions(BC); |
622 | |
623 | ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { |
624 | return (!BF.isSimple() || BF.isIgnored() || |
625 | (opts::InstrumentHotOnly && !BF.getKnownExecutionCount())); |
626 | }; |
627 | |
628 | ParallelUtilities::WorkFuncWithAllocTy WorkFun = |
629 | [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) { |
630 | instrumentFunction(Function&: BF, AllocId: AllocatorId); |
631 | }; |
632 | |
633 | ParallelUtilities::runOnEachFunctionWithUniqueAllocId( |
634 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFunction: WorkFun, |
635 | SkipPredicate, LogName: "instrumentation" , /* ForceSequential=*/true); |
636 | |
637 | if (BC.isMachO()) { |
638 | if (BC.StartFunctionAddress) { |
639 | BinaryFunction *Main = |
640 | BC.getBinaryFunctionAtAddress(Address: *BC.StartFunctionAddress); |
641 | assert(Main && "Entry point function not found" ); |
642 | BinaryBasicBlock &BB = Main->front(); |
643 | |
644 | ErrorOr<BinarySection &> SetupSection = |
645 | BC.getUniqueSectionByName(SectionName: "I__setup" ); |
646 | if (!SetupSection) |
647 | return createFatalBOLTError(S: "Cannot find I__setup section\n" ); |
648 | |
649 | MCSymbol *Target = BC.registerNameAtAddress( |
650 | Name: "__bolt_instr_setup" , Address: SetupSection->getAddress(), Size: 0, Alignment: 0); |
651 | MCInst NewInst; |
652 | BC.MIB->createCall(Inst&: NewInst, Target, Ctx: BC.Ctx.get()); |
653 | BB.insertInstruction(At: BB.begin(), NewInst: std::move(NewInst)); |
654 | } else { |
655 | BC.errs() << "BOLT-WARNING: Entry point not found\n" ; |
656 | } |
657 | |
658 | if (BinaryData *BD = BC.getBinaryDataByName(Name: "___GLOBAL_init_65535/1" )) { |
659 | BinaryFunction *Ctor = BC.getBinaryFunctionAtAddress(Address: BD->getAddress()); |
660 | assert(Ctor && "___GLOBAL_init_65535 function not found" ); |
661 | BinaryBasicBlock &BB = Ctor->front(); |
662 | ErrorOr<BinarySection &> FiniSection = |
663 | BC.getUniqueSectionByName(SectionName: "I__fini" ); |
664 | if (!FiniSection) |
665 | return createFatalBOLTError(S: "Cannot find I__fini section" ); |
666 | |
667 | MCSymbol *Target = BC.registerNameAtAddress( |
668 | Name: "__bolt_instr_fini" , Address: FiniSection->getAddress(), Size: 0, Alignment: 0); |
669 | auto IsLEA = [&BC](const MCInst &Inst) { return BC.MIB->isLEA64r(Inst); }; |
670 | const auto LEA = std::find_if( |
671 | first: std::next(x: llvm::find_if(Range: reverse(C&: BB), P: IsLEA)), last: BB.rend(), pred: IsLEA); |
672 | LEA->getOperand(i: 4).setExpr( |
673 | MCSymbolRefExpr::create(Symbol: Target, Kind: MCSymbolRefExpr::VK_None, Ctx&: *BC.Ctx)); |
674 | } else { |
675 | BC.errs() << "BOLT-WARNING: ___GLOBAL_init_65535 not found\n" ; |
676 | } |
677 | } |
678 | |
679 | setupRuntimeLibrary(BC); |
680 | return Error::success(); |
681 | } |
682 | |
683 | void Instrumentation::createAuxiliaryFunctions(BinaryContext &BC) { |
684 | auto createSimpleFunction = |
685 | [&](StringRef Title, InstructionListType Instrs) -> BinaryFunction * { |
686 | BinaryFunction *Func = BC.createInjectedBinaryFunction(Name: std::string(Title)); |
687 | |
688 | std::vector<std::unique_ptr<BinaryBasicBlock>> BBs; |
689 | BBs.emplace_back(args: Func->createBasicBlock()); |
690 | BBs.back()->addInstructions(Begin: Instrs.begin(), End: Instrs.end()); |
691 | BBs.back()->setCFIState(0); |
692 | Func->insertBasicBlocks(Start: nullptr, NewBBs: std::move(BBs), |
693 | /*UpdateLayout=*/true, |
694 | /*UpdateCFIState=*/false); |
695 | Func->updateState(State: BinaryFunction::State::CFG_Finalized); |
696 | return Func; |
697 | }; |
698 | |
699 | // Here we are creating a set of functions to handle BB entry/exit. |
700 | // IndCallHandlerExitBB contains instructions to finish handling traffic to an |
701 | // indirect call. We pass it to createInstrumentedIndCallHandlerEntryBB(), |
702 | // which will check if a pointer to runtime library traffic accounting |
703 | // function was initialized (it is done during initialization of runtime |
704 | // library). If it is so - calls it. Then this routine returns to normal |
705 | // execution by jumping to exit BB. |
706 | BinaryFunction *IndCallHandlerExitBB = |
707 | createSimpleFunction("__bolt_instr_ind_call_handler" , |
708 | BC.MIB->createInstrumentedIndCallHandlerExitBB()); |
709 | |
710 | IndCallHandlerExitBBFunction = |
711 | createSimpleFunction("__bolt_instr_ind_call_handler_func" , |
712 | BC.MIB->createInstrumentedIndCallHandlerEntryBB( |
713 | InstrTrampoline: Summary->IndCallCounterFuncPtr, |
714 | IndCallHandler: IndCallHandlerExitBB->getSymbol(), Ctx: &*BC.Ctx)); |
715 | |
716 | BinaryFunction *IndTailCallHandlerExitBB = createSimpleFunction( |
717 | "__bolt_instr_ind_tail_call_handler" , |
718 | BC.MIB->createInstrumentedIndTailCallHandlerExitBB()); |
719 | |
720 | IndTailCallHandlerExitBBFunction = createSimpleFunction( |
721 | "__bolt_instr_ind_tailcall_handler_func" , |
722 | BC.MIB->createInstrumentedIndCallHandlerEntryBB( |
723 | InstrTrampoline: Summary->IndTailCallCounterFuncPtr, |
724 | IndCallHandler: IndTailCallHandlerExitBB->getSymbol(), Ctx: &*BC.Ctx)); |
725 | |
726 | createSimpleFunction("__bolt_num_counters_getter" , |
727 | BC.MIB->createNumCountersGetter(Ctx: BC.Ctx.get())); |
728 | createSimpleFunction("__bolt_instr_locations_getter" , |
729 | BC.MIB->createInstrLocationsGetter(Ctx: BC.Ctx.get())); |
730 | createSimpleFunction("__bolt_instr_tables_getter" , |
731 | BC.MIB->createInstrTablesGetter(Ctx: BC.Ctx.get())); |
732 | createSimpleFunction("__bolt_instr_num_funcs_getter" , |
733 | BC.MIB->createInstrNumFuncsGetter(Ctx: BC.Ctx.get())); |
734 | |
735 | if (BC.isELF()) { |
736 | if (BC.StartFunctionAddress) { |
737 | BinaryFunction *Start = |
738 | BC.getBinaryFunctionAtAddress(Address: *BC.StartFunctionAddress); |
739 | assert(Start && "Entry point function not found" ); |
740 | const MCSymbol *StartSym = Start->getSymbol(); |
741 | createSimpleFunction( |
742 | "__bolt_start_trampoline" , |
743 | BC.MIB->createSymbolTrampoline(TgtSym: StartSym, Ctx: BC.Ctx.get())); |
744 | } |
745 | if (BC.FiniFunctionAddress) { |
746 | BinaryFunction *Fini = |
747 | BC.getBinaryFunctionAtAddress(Address: *BC.FiniFunctionAddress); |
748 | assert(Fini && "Finalization function not found" ); |
749 | const MCSymbol *FiniSym = Fini->getSymbol(); |
750 | createSimpleFunction( |
751 | "__bolt_fini_trampoline" , |
752 | BC.MIB->createSymbolTrampoline(TgtSym: FiniSym, Ctx: BC.Ctx.get())); |
753 | } else { |
754 | // Create dummy return function for trampoline to avoid issues |
755 | // with unknown symbol in runtime library. E.g. for static PIE |
756 | // executable |
757 | createSimpleFunction("__bolt_fini_trampoline" , |
758 | BC.MIB->createDummyReturnFunction(Ctx: BC.Ctx.get())); |
759 | } |
760 | } |
761 | } |
762 | |
763 | void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) { |
764 | uint32_t FuncDescSize = Summary->getFDSize(); |
765 | |
766 | BC.outs() << "BOLT-INSTRUMENTER: Number of indirect call site descriptors: " |
767 | << Summary->IndCallDescriptions.size() << "\n" ; |
768 | BC.outs() << "BOLT-INSTRUMENTER: Number of indirect call target descriptors: " |
769 | << Summary->IndCallTargetDescriptions.size() << "\n" ; |
770 | BC.outs() << "BOLT-INSTRUMENTER: Number of function descriptors: " |
771 | << Summary->FunctionDescriptions.size() << "\n" ; |
772 | BC.outs() << "BOLT-INSTRUMENTER: Number of branch counters: " |
773 | << BranchCounters << "\n" ; |
774 | BC.outs() << "BOLT-INSTRUMENTER: Number of ST leaf node counters: " |
775 | << LeafNodeCounters << "\n" ; |
776 | BC.outs() << "BOLT-INSTRUMENTER: Number of direct call counters: " |
777 | << DirectCallCounters << "\n" ; |
778 | BC.outs() << "BOLT-INSTRUMENTER: Total number of counters: " |
779 | << Summary->Counters.size() << "\n" ; |
780 | BC.outs() << "BOLT-INSTRUMENTER: Total size of counters: " |
781 | << (Summary->Counters.size() * 8) |
782 | << " bytes (static alloc memory)\n" ; |
783 | BC.outs() << "BOLT-INSTRUMENTER: Total size of string table emitted: " |
784 | << Summary->StringTable.size() << " bytes in file\n" ; |
785 | BC.outs() << "BOLT-INSTRUMENTER: Total size of descriptors: " |
786 | << (FuncDescSize + |
787 | Summary->IndCallDescriptions.size() * |
788 | sizeof(IndCallDescription) + |
789 | Summary->IndCallTargetDescriptions.size() * |
790 | sizeof(IndCallTargetDescription)) |
791 | << " bytes in file\n" ; |
792 | BC.outs() << "BOLT-INSTRUMENTER: Profile will be saved to file " |
793 | << opts::InstrumentationFilename << "\n" ; |
794 | |
795 | InstrumentationRuntimeLibrary *RtLibrary = |
796 | static_cast<InstrumentationRuntimeLibrary *>(BC.getRuntimeLibrary()); |
797 | assert(RtLibrary && "instrumentation runtime library object must be set" ); |
798 | RtLibrary->setSummary(std::move(Summary)); |
799 | } |
800 | } // namespace bolt |
801 | } // namespace llvm |
802 | |