1 | //===- bolt/RuntimeLibs/InstrumentationRuntimeLibrary.cpp -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the InstrumentationRuntimeLibrary class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" |
14 | #include "bolt/Core/BinaryFunction.h" |
15 | #include "bolt/Core/JumpTable.h" |
16 | #include "bolt/Core/Linker.h" |
17 | #include "bolt/Utils/CommandLineOpts.h" |
18 | #include "llvm/MC/MCStreamer.h" |
19 | #include "llvm/Support/Alignment.h" |
20 | #include "llvm/Support/CommandLine.h" |
21 | |
22 | using namespace llvm; |
23 | using namespace bolt; |
24 | |
25 | namespace opts { |
26 | |
27 | cl::opt<std::string> RuntimeInstrumentationLib( |
28 | "runtime-instrumentation-lib" , |
29 | cl::desc("specify file name of the runtime instrumentation library" ), |
30 | cl::init(Val: "libbolt_rt_instr.a" ), cl::cat(BoltOptCategory)); |
31 | |
32 | extern cl::opt<bool> InstrumentationFileAppendPID; |
33 | extern cl::opt<bool> ConservativeInstrumentation; |
34 | extern cl::opt<std::string> InstrumentationFilename; |
35 | extern cl::opt<std::string> InstrumentationBinpath; |
36 | extern cl::opt<uint32_t> InstrumentationSleepTime; |
37 | extern cl::opt<bool> InstrumentationNoCountersClear; |
38 | extern cl::opt<bool> InstrumentationWaitForks; |
39 | extern cl::opt<JumpTableSupportLevel> JumpTables; |
40 | |
41 | } // namespace opts |
42 | |
43 | void InstrumentationRuntimeLibrary::adjustCommandLineOptions( |
44 | const BinaryContext &BC) const { |
45 | if (!BC.HasRelocations) { |
46 | errs() << "BOLT-ERROR: instrumentation runtime libraries require " |
47 | "relocations\n" ; |
48 | exit(status: 1); |
49 | } |
50 | if (opts::JumpTables != JTS_MOVE) { |
51 | opts::JumpTables = JTS_MOVE; |
52 | outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n" ; |
53 | } |
54 | if (!BC.StartFunctionAddress) { |
55 | errs() << "BOLT-ERROR: instrumentation runtime libraries require a known " |
56 | "entry point of " |
57 | "the input binary\n" ; |
58 | exit(status: 1); |
59 | } |
60 | |
61 | if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) { |
62 | errs() << "BOLT-ERROR: instrumentation of static binary currently does not " |
63 | "support profile output on binary finalization, so it " |
64 | "requires -instrumentation-sleep-time=N (N>0) usage\n" ; |
65 | exit(status: 1); |
66 | } |
67 | |
68 | if ((opts::InstrumentationWaitForks || opts::InstrumentationSleepTime) && |
69 | opts::InstrumentationFileAppendPID) { |
70 | errs() |
71 | << "BOLT-ERROR: instrumentation-file-append-pid is not compatible with " |
72 | "instrumentation-sleep-time and instrumentation-wait-forks. If you " |
73 | "want a separate profile for each fork, it can only be dumped in " |
74 | "the end of process when instrumentation-file-append-pid is used.\n" ; |
75 | exit(status: 1); |
76 | } |
77 | } |
78 | |
79 | void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, |
80 | MCStreamer &Streamer) { |
81 | MCSection *Section = BC.isELF() |
82 | ? static_cast<MCSection *>(BC.Ctx->getELFSection( |
83 | Section: ".bolt.instr.counters" , Type: ELF::SHT_PROGBITS, |
84 | Flags: BinarySection::getFlags(/*IsReadOnly=*/false, |
85 | /*IsText=*/false, |
86 | /*IsAllocatable=*/true) |
87 | |
88 | )) |
89 | : static_cast<MCSection *>(BC.Ctx->getMachOSection( |
90 | Segment: "__BOLT" , Section: "__counters" , TypeAndAttributes: MachO::S_REGULAR, |
91 | K: SectionKind::getData())); |
92 | |
93 | Section->setAlignment(llvm::Align(BC.RegularPageSize)); |
94 | Streamer.switchSection(Section); |
95 | |
96 | // EmitOffset is used to determine padding size for data alignment |
97 | uint64_t EmitOffset = 0; |
98 | |
99 | auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) { |
100 | Streamer.emitLabel(Symbol); |
101 | if (IsGlobal) |
102 | Streamer.emitSymbolAttribute(Symbol, Attribute: MCSymbolAttr::MCSA_Global); |
103 | }; |
104 | |
105 | auto emitLabelByName = [&BC, emitLabel](StringRef Name, |
106 | bool IsGlobal = true) { |
107 | MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name); |
108 | emitLabel(Symbol, IsGlobal); |
109 | }; |
110 | |
111 | auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) { |
112 | const uint64_t Padding = alignTo(Value: EmitOffset, Align: Size) - EmitOffset; |
113 | if (Padding) { |
114 | Streamer.emitFill(NumBytes: Padding, FillValue: 0); |
115 | EmitOffset += Padding; |
116 | } |
117 | }; |
118 | |
119 | auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; }; |
120 | |
121 | auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) { |
122 | emitPadding(Size); |
123 | emitDataSize(Size); |
124 | }; |
125 | |
126 | auto emitFill = [&Streamer, emitDataSize, |
127 | emitLabel](unsigned Size, MCSymbol *Symbol = nullptr, |
128 | uint8_t Byte = 0) { |
129 | emitDataSize(Size); |
130 | if (Symbol) |
131 | emitLabel(Symbol, /*IsGlobal*/ false); |
132 | Streamer.emitFill(NumBytes: Size, FillValue: Byte); |
133 | }; |
134 | |
135 | auto emitValue = [&BC, &Streamer, emitDataPadding, |
136 | emitLabel](MCSymbol *Symbol, const MCExpr *Value) { |
137 | const unsigned Psize = BC.AsmInfo->getCodePointerSize(); |
138 | emitDataPadding(Psize); |
139 | emitLabel(Symbol); |
140 | if (Value) |
141 | Streamer.emitValue(Value, Size: Psize); |
142 | else |
143 | Streamer.emitFill(NumBytes: Psize, FillValue: 0); |
144 | }; |
145 | |
146 | auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName]( |
147 | StringRef Name, uint64_t Value, unsigned Size = 4) { |
148 | emitDataPadding(Size); |
149 | emitLabelByName(Name); |
150 | Streamer.emitIntValue(Value, Size); |
151 | }; |
152 | |
153 | auto emitString = [&Streamer, emitDataSize, emitLabelByName, |
154 | emitFill](StringRef Name, StringRef Contents) { |
155 | emitDataSize(Contents.size()); |
156 | emitLabelByName(Name); |
157 | Streamer.emitBytes(Data: Contents); |
158 | emitFill(1); |
159 | }; |
160 | |
161 | // All of the following symbols will be exported as globals to be used by the |
162 | // instrumentation runtime library to dump the instrumentation data to disk. |
163 | // Label marking start of the memory region containing instrumentation |
164 | // counters, total vector size is Counters.size() 8-byte counters |
165 | emitLabelByName("__bolt_instr_locations" ); |
166 | for (MCSymbol *const &Label : Summary->Counters) |
167 | emitFill(sizeof(uint64_t), Label); |
168 | |
169 | emitPadding(BC.RegularPageSize); |
170 | emitIntValue("__bolt_instr_sleep_time" , opts::InstrumentationSleepTime); |
171 | emitIntValue("__bolt_instr_no_counters_clear" , |
172 | !!opts::InstrumentationNoCountersClear, 1); |
173 | emitIntValue("__bolt_instr_conservative" , !!opts::ConservativeInstrumentation, |
174 | 1); |
175 | emitIntValue("__bolt_instr_wait_forks" , !!opts::InstrumentationWaitForks, 1); |
176 | emitIntValue("__bolt_num_counters" , Summary->Counters.size()); |
177 | emitValue(Summary->IndCallCounterFuncPtr, nullptr); |
178 | emitValue(Summary->IndTailCallCounterFuncPtr, nullptr); |
179 | emitIntValue("__bolt_instr_num_ind_calls" , |
180 | Summary->IndCallDescriptions.size()); |
181 | emitIntValue("__bolt_instr_num_ind_targets" , |
182 | Summary->IndCallTargetDescriptions.size()); |
183 | emitIntValue("__bolt_instr_num_funcs" , Summary->FunctionDescriptions.size()); |
184 | emitString("__bolt_instr_filename" , opts::InstrumentationFilename); |
185 | emitString("__bolt_instr_binpath" , opts::InstrumentationBinpath); |
186 | emitIntValue("__bolt_instr_use_pid" , !!opts::InstrumentationFileAppendPID, 1); |
187 | |
188 | if (BC.isMachO()) { |
189 | MCSection *TablesSection = BC.Ctx->getMachOSection( |
190 | Segment: "__BOLT" , Section: "__tables" , TypeAndAttributes: MachO::S_REGULAR, K: SectionKind::getData()); |
191 | TablesSection->setAlignment(llvm::Align(BC.RegularPageSize)); |
192 | Streamer.switchSection(Section: TablesSection); |
193 | emitString("__bolt_instr_tables" , buildTables(BC)); |
194 | } |
195 | } |
196 | |
197 | void InstrumentationRuntimeLibrary::link( |
198 | BinaryContext &BC, StringRef ToolPath, BOLTLinker &Linker, |
199 | BOLTLinker::SectionsMapper MapSections) { |
200 | std::string LibPath = getLibPath(ToolPath, LibFileName: opts::RuntimeInstrumentationLib); |
201 | loadLibrary(LibPath, Linker, MapSections); |
202 | |
203 | if (BC.isMachO()) |
204 | return; |
205 | |
206 | RuntimeFiniAddress = Linker.lookupSymbol(Name: "__bolt_instr_fini" ).value_or(u: 0); |
207 | if (!RuntimeFiniAddress) { |
208 | errs() << "BOLT-ERROR: instrumentation library does not define " |
209 | "__bolt_instr_fini: " |
210 | << LibPath << "\n" ; |
211 | exit(status: 1); |
212 | } |
213 | RuntimeStartAddress = Linker.lookupSymbol(Name: "__bolt_instr_start" ).value_or(u: 0); |
214 | if (!RuntimeStartAddress) { |
215 | errs() << "BOLT-ERROR: instrumentation library does not define " |
216 | "__bolt_instr_start: " |
217 | << LibPath << "\n" ; |
218 | exit(status: 1); |
219 | } |
220 | outs() << "BOLT-INFO: output linked against instrumentation runtime " |
221 | "library, lib entry point is 0x" |
222 | << Twine::utohexstr(Val: RuntimeFiniAddress) << "\n" ; |
223 | outs() << "BOLT-INFO: clear procedure is 0x" |
224 | << Twine::utohexstr( |
225 | Val: Linker.lookupSymbol(Name: "__bolt_instr_clear_counters" ).value_or(u: 0)) |
226 | << "\n" ; |
227 | |
228 | emitTablesAsELFNote(BC); |
229 | } |
230 | |
231 | std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) { |
232 | std::string TablesStr; |
233 | raw_string_ostream OS(TablesStr); |
234 | |
235 | // This is sync'ed with runtime/instr.cpp:readDescriptions() |
236 | auto getOutputAddress = [](const BinaryFunction &Func, |
237 | uint64_t Offset) -> uint64_t { |
238 | return Offset == 0 |
239 | ? Func.getOutputAddress() |
240 | : Func.translateInputToOutputAddress(Address: Func.getAddress() + Offset); |
241 | }; |
242 | |
243 | // Indirect targets need to be sorted for fast lookup during runtime |
244 | llvm::sort(C&: Summary->IndCallTargetDescriptions, |
245 | Comp: [&](const IndCallTargetDescription &A, |
246 | const IndCallTargetDescription &B) { |
247 | return getOutputAddress(*A.Target, A.ToLoc.Offset) < |
248 | getOutputAddress(*B.Target, B.ToLoc.Offset); |
249 | }); |
250 | |
251 | // Start of the vector with descriptions (one CounterDescription for each |
252 | // counter), vector size is Counters.size() CounterDescription-sized elmts |
253 | const size_t IDSize = |
254 | Summary->IndCallDescriptions.size() * sizeof(IndCallDescription); |
255 | OS.write(Ptr: reinterpret_cast<const char *>(&IDSize), Size: 4); |
256 | for (const IndCallDescription &Desc : Summary->IndCallDescriptions) { |
257 | OS.write(Ptr: reinterpret_cast<const char *>(&Desc.FromLoc.FuncString), Size: 4); |
258 | OS.write(Ptr: reinterpret_cast<const char *>(&Desc.FromLoc.Offset), Size: 4); |
259 | } |
260 | |
261 | const size_t ITDSize = Summary->IndCallTargetDescriptions.size() * |
262 | sizeof(IndCallTargetDescription); |
263 | OS.write(Ptr: reinterpret_cast<const char *>(&ITDSize), Size: 4); |
264 | for (const IndCallTargetDescription &Desc : |
265 | Summary->IndCallTargetDescriptions) { |
266 | OS.write(Ptr: reinterpret_cast<const char *>(&Desc.ToLoc.FuncString), Size: 4); |
267 | OS.write(Ptr: reinterpret_cast<const char *>(&Desc.ToLoc.Offset), Size: 4); |
268 | uint64_t TargetFuncAddress = |
269 | getOutputAddress(*Desc.Target, Desc.ToLoc.Offset); |
270 | OS.write(Ptr: reinterpret_cast<const char *>(&TargetFuncAddress), Size: 8); |
271 | } |
272 | |
273 | uint32_t FuncDescSize = Summary->getFDSize(); |
274 | OS.write(Ptr: reinterpret_cast<const char *>(&FuncDescSize), Size: 4); |
275 | for (const FunctionDescription &Desc : Summary->FunctionDescriptions) { |
276 | const size_t LeafNum = Desc.LeafNodes.size(); |
277 | OS.write(Ptr: reinterpret_cast<const char *>(&LeafNum), Size: 4); |
278 | for (const InstrumentedNode &LeafNode : Desc.LeafNodes) { |
279 | OS.write(Ptr: reinterpret_cast<const char *>(&LeafNode.Node), Size: 4); |
280 | OS.write(Ptr: reinterpret_cast<const char *>(&LeafNode.Counter), Size: 4); |
281 | } |
282 | const size_t EdgesNum = Desc.Edges.size(); |
283 | OS.write(Ptr: reinterpret_cast<const char *>(&EdgesNum), Size: 4); |
284 | for (const EdgeDescription &Edge : Desc.Edges) { |
285 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.FromLoc.FuncString), Size: 4); |
286 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.FromLoc.Offset), Size: 4); |
287 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.FromNode), Size: 4); |
288 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.ToLoc.FuncString), Size: 4); |
289 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.ToLoc.Offset), Size: 4); |
290 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.ToNode), Size: 4); |
291 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.Counter), Size: 4); |
292 | } |
293 | const size_t CallsNum = Desc.Calls.size(); |
294 | OS.write(Ptr: reinterpret_cast<const char *>(&CallsNum), Size: 4); |
295 | for (const CallDescription &Call : Desc.Calls) { |
296 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.FromLoc.FuncString), Size: 4); |
297 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.FromLoc.Offset), Size: 4); |
298 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.FromNode), Size: 4); |
299 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.ToLoc.FuncString), Size: 4); |
300 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.ToLoc.Offset), Size: 4); |
301 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.Counter), Size: 4); |
302 | uint64_t TargetFuncAddress = |
303 | getOutputAddress(*Call.Target, Call.ToLoc.Offset); |
304 | OS.write(Ptr: reinterpret_cast<const char *>(&TargetFuncAddress), Size: 8); |
305 | } |
306 | const size_t EntryNum = Desc.EntryNodes.size(); |
307 | OS.write(Ptr: reinterpret_cast<const char *>(&EntryNum), Size: 4); |
308 | for (const EntryNode &EntryNode : Desc.EntryNodes) { |
309 | OS.write(Ptr: reinterpret_cast<const char *>(&EntryNode.Node), Size: 8); |
310 | uint64_t TargetFuncAddress = |
311 | getOutputAddress(*Desc.Function, EntryNode.Address); |
312 | OS.write(Ptr: reinterpret_cast<const char *>(&TargetFuncAddress), Size: 8); |
313 | } |
314 | } |
315 | // Our string table lives immediately after descriptions vector |
316 | OS << Summary->StringTable; |
317 | OS.flush(); |
318 | |
319 | return TablesStr; |
320 | } |
321 | |
322 | void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) { |
323 | std::string TablesStr = buildTables(BC); |
324 | const std::string BoltInfo = BinarySection::encodeELFNote( |
325 | NameStr: "BOLT" , DescStr: TablesStr, Type: BinarySection::NT_BOLT_INSTRUMENTATION_TABLES); |
326 | BC.registerOrUpdateNoteSection(Name: ".bolt.instr.tables" , Data: copyByteArray(Buffer: BoltInfo), |
327 | Size: BoltInfo.size(), |
328 | /*Alignment=*/1, |
329 | /*IsReadOnly=*/true, ELFType: ELF::SHT_NOTE); |
330 | } |
331 | |