| 1 | //===- bolt/RuntimeLibs/InstrumentationRuntimeLibrary.cpp -----------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the InstrumentationRuntimeLibrary class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" |
| 14 | #include "bolt/Core/BinaryFunction.h" |
| 15 | #include "bolt/Core/JumpTable.h" |
| 16 | #include "bolt/Core/Linker.h" |
| 17 | #include "bolt/Utils/CommandLineOpts.h" |
| 18 | #include "llvm/MC/MCStreamer.h" |
| 19 | #include "llvm/Support/Alignment.h" |
| 20 | #include "llvm/Support/CommandLine.h" |
| 21 | |
| 22 | using namespace llvm; |
| 23 | using namespace bolt; |
| 24 | |
| 25 | namespace opts { |
| 26 | |
| 27 | cl::opt<std::string> RuntimeInstrumentationLib( |
| 28 | "runtime-instrumentation-lib" , |
| 29 | cl::desc("specify path of the runtime instrumentation library" ), |
| 30 | cl::init(Val: "libbolt_rt_instr.a" ), cl::cat(BoltOptCategory)); |
| 31 | |
| 32 | extern cl::opt<bool> InstrumentationFileAppendPID; |
| 33 | extern cl::opt<bool> ConservativeInstrumentation; |
| 34 | extern cl::opt<std::string> InstrumentationFilename; |
| 35 | extern cl::opt<std::string> InstrumentationBinpath; |
| 36 | extern cl::opt<uint32_t> InstrumentationSleepTime; |
| 37 | extern cl::opt<bool> InstrumentationNoCountersClear; |
| 38 | extern cl::opt<bool> InstrumentationWaitForks; |
| 39 | extern cl::opt<JumpTableSupportLevel> JumpTables; |
| 40 | |
| 41 | } // namespace opts |
| 42 | |
| 43 | void InstrumentationRuntimeLibrary::adjustCommandLineOptions( |
| 44 | const BinaryContext &BC) const { |
| 45 | if (!BC.HasRelocations) { |
| 46 | errs() << "BOLT-ERROR: instrumentation runtime libraries require " |
| 47 | "relocations\n" ; |
| 48 | exit(status: 1); |
| 49 | } |
| 50 | if (opts::JumpTables != JTS_MOVE) { |
| 51 | opts::JumpTables = JTS_MOVE; |
| 52 | outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n" ; |
| 53 | } |
| 54 | if (!BC.StartFunctionAddress) { |
| 55 | errs() << "BOLT-ERROR: instrumentation runtime libraries require a known " |
| 56 | "entry point of " |
| 57 | "the input binary\n" ; |
| 58 | exit(status: 1); |
| 59 | } |
| 60 | |
| 61 | if (BC.IsStaticExecutable && !opts::InstrumentationSleepTime) { |
| 62 | errs() << "BOLT-ERROR: instrumentation of static binary currently does not " |
| 63 | "support profile output on binary finalization, so it " |
| 64 | "requires -instrumentation-sleep-time=N (N>0) usage\n" ; |
| 65 | exit(status: 1); |
| 66 | } |
| 67 | |
| 68 | if ((opts::InstrumentationWaitForks || opts::InstrumentationSleepTime) && |
| 69 | opts::InstrumentationFileAppendPID) { |
| 70 | errs() |
| 71 | << "BOLT-ERROR: instrumentation-file-append-pid is not compatible with " |
| 72 | "instrumentation-sleep-time and instrumentation-wait-forks. If you " |
| 73 | "want a separate profile for each fork, it can only be dumped in " |
| 74 | "the end of process when instrumentation-file-append-pid is used.\n" ; |
| 75 | exit(status: 1); |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | void InstrumentationRuntimeLibrary::emitBinary(BinaryContext &BC, |
| 80 | MCStreamer &Streamer) { |
| 81 | MCSection *Section = BC.isELF() |
| 82 | ? static_cast<MCSection *>(BC.Ctx->getELFSection( |
| 83 | Section: ".bolt.instr.counters" , Type: ELF::SHT_PROGBITS, |
| 84 | Flags: BinarySection::getFlags(/*IsReadOnly=*/false, |
| 85 | /*IsText=*/false, |
| 86 | /*IsAllocatable=*/true) |
| 87 | |
| 88 | )) |
| 89 | : static_cast<MCSection *>(BC.Ctx->getMachOSection( |
| 90 | Segment: "__BOLT" , Section: "__counters" , TypeAndAttributes: MachO::S_REGULAR, |
| 91 | K: SectionKind::getData())); |
| 92 | |
| 93 | Section->setAlignment(llvm::Align(BC.RegularPageSize)); |
| 94 | Streamer.switchSection(Section); |
| 95 | |
| 96 | // EmitOffset is used to determine padding size for data alignment |
| 97 | uint64_t EmitOffset = 0; |
| 98 | |
| 99 | auto emitLabel = [&Streamer](MCSymbol *Symbol, bool IsGlobal = true) { |
| 100 | Streamer.emitLabel(Symbol); |
| 101 | if (IsGlobal) |
| 102 | Streamer.emitSymbolAttribute(Symbol, Attribute: MCSymbolAttr::MCSA_Global); |
| 103 | }; |
| 104 | |
| 105 | auto emitLabelByName = [&BC, emitLabel](StringRef Name, |
| 106 | bool IsGlobal = true) { |
| 107 | MCSymbol *Symbol = BC.Ctx->getOrCreateSymbol(Name); |
| 108 | emitLabel(Symbol, IsGlobal); |
| 109 | }; |
| 110 | |
| 111 | auto emitPadding = [&Streamer, &EmitOffset](unsigned Size) { |
| 112 | const uint64_t Padding = alignTo(Value: EmitOffset, Align: Size) - EmitOffset; |
| 113 | if (Padding) { |
| 114 | Streamer.emitFill(NumBytes: Padding, FillValue: 0); |
| 115 | EmitOffset += Padding; |
| 116 | } |
| 117 | }; |
| 118 | |
| 119 | auto emitDataSize = [&EmitOffset](unsigned Size) { EmitOffset += Size; }; |
| 120 | |
| 121 | auto emitDataPadding = [emitPadding, emitDataSize](unsigned Size) { |
| 122 | emitPadding(Size); |
| 123 | emitDataSize(Size); |
| 124 | }; |
| 125 | |
| 126 | auto emitFill = [&Streamer, emitDataSize, |
| 127 | emitLabel](unsigned Size, MCSymbol *Symbol = nullptr, |
| 128 | uint8_t Byte = 0) { |
| 129 | emitDataSize(Size); |
| 130 | if (Symbol) |
| 131 | emitLabel(Symbol, /*IsGlobal*/ false); |
| 132 | Streamer.emitFill(NumBytes: Size, FillValue: Byte); |
| 133 | }; |
| 134 | |
| 135 | auto emitValue = [&BC, &Streamer, emitDataPadding, |
| 136 | emitLabel](MCSymbol *Symbol, const MCExpr *Value) { |
| 137 | const unsigned Psize = BC.AsmInfo->getCodePointerSize(); |
| 138 | emitDataPadding(Psize); |
| 139 | emitLabel(Symbol); |
| 140 | if (Value) |
| 141 | Streamer.emitValue(Value, Size: Psize); |
| 142 | else |
| 143 | Streamer.emitFill(NumBytes: Psize, FillValue: 0); |
| 144 | }; |
| 145 | |
| 146 | auto emitIntValue = [&Streamer, emitDataPadding, emitLabelByName]( |
| 147 | StringRef Name, uint64_t Value, unsigned Size = 4) { |
| 148 | emitDataPadding(Size); |
| 149 | emitLabelByName(Name); |
| 150 | Streamer.emitIntValue(Value, Size); |
| 151 | }; |
| 152 | |
| 153 | auto emitString = [&Streamer, emitDataSize, emitLabelByName, |
| 154 | emitFill](StringRef Name, StringRef Contents) { |
| 155 | emitDataSize(Contents.size()); |
| 156 | emitLabelByName(Name); |
| 157 | Streamer.emitBytes(Data: Contents); |
| 158 | emitFill(1); |
| 159 | }; |
| 160 | |
| 161 | // All of the following symbols will be exported as globals to be used by the |
| 162 | // instrumentation runtime library to dump the instrumentation data to disk. |
| 163 | // Label marking start of the memory region containing instrumentation |
| 164 | // counters, total vector size is Counters.size() 8-byte counters |
| 165 | emitLabelByName("__bolt_instr_locations" ); |
| 166 | for (MCSymbol *const &Label : Summary->Counters) |
| 167 | emitFill(sizeof(uint64_t), Label); |
| 168 | |
| 169 | emitPadding(BC.RegularPageSize); |
| 170 | emitIntValue("__bolt_instr_sleep_time" , opts::InstrumentationSleepTime); |
| 171 | emitIntValue("__bolt_instr_no_counters_clear" , |
| 172 | !!opts::InstrumentationNoCountersClear, 1); |
| 173 | emitIntValue("__bolt_instr_conservative" , !!opts::ConservativeInstrumentation, |
| 174 | 1); |
| 175 | emitIntValue("__bolt_instr_wait_forks" , !!opts::InstrumentationWaitForks, 1); |
| 176 | emitIntValue("__bolt_num_counters" , Summary->Counters.size()); |
| 177 | emitValue(Summary->IndCallCounterFuncPtr, nullptr); |
| 178 | emitValue(Summary->IndTailCallCounterFuncPtr, nullptr); |
| 179 | emitIntValue("__bolt_instr_num_ind_calls" , |
| 180 | Summary->IndCallDescriptions.size()); |
| 181 | emitIntValue("__bolt_instr_num_ind_targets" , |
| 182 | Summary->IndCallTargetDescriptions.size()); |
| 183 | emitIntValue("__bolt_instr_num_funcs" , Summary->FunctionDescriptions.size()); |
| 184 | emitString("__bolt_instr_filename" , opts::InstrumentationFilename); |
| 185 | emitString("__bolt_instr_binpath" , opts::InstrumentationBinpath); |
| 186 | emitIntValue("__bolt_instr_use_pid" , !!opts::InstrumentationFileAppendPID, 1); |
| 187 | |
| 188 | if (BC.isMachO()) { |
| 189 | MCSection *TablesSection = BC.Ctx->getMachOSection( |
| 190 | Segment: "__BOLT" , Section: "__tables" , TypeAndAttributes: MachO::S_REGULAR, K: SectionKind::getData()); |
| 191 | TablesSection->setAlignment(llvm::Align(BC.RegularPageSize)); |
| 192 | Streamer.switchSection(Section: TablesSection); |
| 193 | emitString("__bolt_instr_tables" , buildTables(BC)); |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | void InstrumentationRuntimeLibrary::link( |
| 198 | BinaryContext &BC, StringRef ToolPath, BOLTLinker &Linker, |
| 199 | BOLTLinker::SectionsMapper MapSections) { |
| 200 | std::string LibPath = getLibPath(ToolPath, LibFileName: opts::RuntimeInstrumentationLib); |
| 201 | loadLibrary(LibPath, Linker, MapSections); |
| 202 | |
| 203 | if (BC.isMachO()) |
| 204 | return; |
| 205 | |
| 206 | std::optional<BOLTLinker::SymbolInfo> FiniSymInfo = |
| 207 | Linker.lookupSymbolInfo(Name: "__bolt_instr_fini" ); |
| 208 | if (!FiniSymInfo) { |
| 209 | errs() << "BOLT-ERROR: instrumentation library does not define " |
| 210 | "__bolt_instr_fini: " |
| 211 | << LibPath << "\n" ; |
| 212 | exit(status: 1); |
| 213 | } |
| 214 | RuntimeFiniAddress = FiniSymInfo->Address; |
| 215 | |
| 216 | std::optional<BOLTLinker::SymbolInfo> StartSymInfo = |
| 217 | Linker.lookupSymbolInfo(Name: "__bolt_instr_start" ); |
| 218 | if (!StartSymInfo) { |
| 219 | errs() << "BOLT-ERROR: instrumentation library does not define " |
| 220 | "__bolt_instr_start: " |
| 221 | << LibPath << "\n" ; |
| 222 | exit(status: 1); |
| 223 | } |
| 224 | RuntimeStartAddress = StartSymInfo->Address; |
| 225 | |
| 226 | outs() << "BOLT-INFO: output linked against instrumentation runtime " |
| 227 | "library, lib entry point is 0x" |
| 228 | << Twine::utohexstr(Val: RuntimeStartAddress) << "\n" ; |
| 229 | |
| 230 | std::optional<BOLTLinker::SymbolInfo> ClearSymInfo = |
| 231 | Linker.lookupSymbolInfo(Name: "__bolt_instr_clear_counters" ); |
| 232 | const uint64_t ClearSymAddress = ClearSymInfo ? ClearSymInfo->Address : 0; |
| 233 | outs() << "BOLT-INFO: clear procedure is 0x" |
| 234 | << Twine::utohexstr(Val: ClearSymAddress) << "\n" ; |
| 235 | |
| 236 | emitTablesAsELFNote(BC); |
| 237 | } |
| 238 | |
| 239 | std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) { |
| 240 | std::string TablesStr; |
| 241 | raw_string_ostream OS(TablesStr); |
| 242 | |
| 243 | // This is sync'ed with runtime/instr.cpp:readDescriptions() |
| 244 | auto getOutputAddress = [](const BinaryFunction &Func, |
| 245 | uint64_t Offset) -> uint64_t { |
| 246 | return Offset == 0 |
| 247 | ? Func.getOutputAddress() |
| 248 | : Func.translateInputToOutputAddress(Address: Func.getAddress() + Offset); |
| 249 | }; |
| 250 | |
| 251 | // Indirect targets need to be sorted for fast lookup during runtime |
| 252 | llvm::sort(C&: Summary->IndCallTargetDescriptions, |
| 253 | Comp: [&](const IndCallTargetDescription &A, |
| 254 | const IndCallTargetDescription &B) { |
| 255 | return getOutputAddress(*A.Target, A.ToLoc.Offset) < |
| 256 | getOutputAddress(*B.Target, B.ToLoc.Offset); |
| 257 | }); |
| 258 | |
| 259 | // Start of the vector with descriptions (one CounterDescription for each |
| 260 | // counter), vector size is Counters.size() CounterDescription-sized elmts |
| 261 | const size_t IDSize = |
| 262 | Summary->IndCallDescriptions.size() * sizeof(IndCallDescription); |
| 263 | OS.write(Ptr: reinterpret_cast<const char *>(&IDSize), Size: 4); |
| 264 | for (const IndCallDescription &Desc : Summary->IndCallDescriptions) { |
| 265 | OS.write(Ptr: reinterpret_cast<const char *>(&Desc.FromLoc.FuncString), Size: 4); |
| 266 | OS.write(Ptr: reinterpret_cast<const char *>(&Desc.FromLoc.Offset), Size: 4); |
| 267 | } |
| 268 | |
| 269 | const size_t ITDSize = Summary->IndCallTargetDescriptions.size() * |
| 270 | sizeof(IndCallTargetDescription); |
| 271 | OS.write(Ptr: reinterpret_cast<const char *>(&ITDSize), Size: 4); |
| 272 | for (const IndCallTargetDescription &Desc : |
| 273 | Summary->IndCallTargetDescriptions) { |
| 274 | OS.write(Ptr: reinterpret_cast<const char *>(&Desc.ToLoc.FuncString), Size: 4); |
| 275 | OS.write(Ptr: reinterpret_cast<const char *>(&Desc.ToLoc.Offset), Size: 4); |
| 276 | uint64_t TargetFuncAddress = |
| 277 | getOutputAddress(*Desc.Target, Desc.ToLoc.Offset); |
| 278 | OS.write(Ptr: reinterpret_cast<const char *>(&TargetFuncAddress), Size: 8); |
| 279 | } |
| 280 | |
| 281 | uint32_t FuncDescSize = Summary->getFDSize(); |
| 282 | OS.write(Ptr: reinterpret_cast<const char *>(&FuncDescSize), Size: 4); |
| 283 | for (const FunctionDescription &Desc : Summary->FunctionDescriptions) { |
| 284 | const size_t LeafNum = Desc.LeafNodes.size(); |
| 285 | OS.write(Ptr: reinterpret_cast<const char *>(&LeafNum), Size: 4); |
| 286 | for (const InstrumentedNode &LeafNode : Desc.LeafNodes) { |
| 287 | OS.write(Ptr: reinterpret_cast<const char *>(&LeafNode.Node), Size: 4); |
| 288 | OS.write(Ptr: reinterpret_cast<const char *>(&LeafNode.Counter), Size: 4); |
| 289 | } |
| 290 | const size_t EdgesNum = Desc.Edges.size(); |
| 291 | OS.write(Ptr: reinterpret_cast<const char *>(&EdgesNum), Size: 4); |
| 292 | for (const EdgeDescription &Edge : Desc.Edges) { |
| 293 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.FromLoc.FuncString), Size: 4); |
| 294 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.FromLoc.Offset), Size: 4); |
| 295 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.FromNode), Size: 4); |
| 296 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.ToLoc.FuncString), Size: 4); |
| 297 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.ToLoc.Offset), Size: 4); |
| 298 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.ToNode), Size: 4); |
| 299 | OS.write(Ptr: reinterpret_cast<const char *>(&Edge.Counter), Size: 4); |
| 300 | } |
| 301 | const size_t CallsNum = Desc.Calls.size(); |
| 302 | OS.write(Ptr: reinterpret_cast<const char *>(&CallsNum), Size: 4); |
| 303 | for (const CallDescription &Call : Desc.Calls) { |
| 304 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.FromLoc.FuncString), Size: 4); |
| 305 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.FromLoc.Offset), Size: 4); |
| 306 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.FromNode), Size: 4); |
| 307 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.ToLoc.FuncString), Size: 4); |
| 308 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.ToLoc.Offset), Size: 4); |
| 309 | OS.write(Ptr: reinterpret_cast<const char *>(&Call.Counter), Size: 4); |
| 310 | uint64_t TargetFuncAddress = |
| 311 | getOutputAddress(*Call.Target, Call.ToLoc.Offset); |
| 312 | OS.write(Ptr: reinterpret_cast<const char *>(&TargetFuncAddress), Size: 8); |
| 313 | } |
| 314 | const size_t EntryNum = Desc.EntryNodes.size(); |
| 315 | OS.write(Ptr: reinterpret_cast<const char *>(&EntryNum), Size: 4); |
| 316 | for (const EntryNode &EntryNode : Desc.EntryNodes) { |
| 317 | OS.write(Ptr: reinterpret_cast<const char *>(&EntryNode.Node), Size: 8); |
| 318 | uint64_t TargetFuncAddress = |
| 319 | getOutputAddress(*Desc.Function, EntryNode.Address); |
| 320 | OS.write(Ptr: reinterpret_cast<const char *>(&TargetFuncAddress), Size: 8); |
| 321 | } |
| 322 | } |
| 323 | // Our string table lives immediately after descriptions vector |
| 324 | OS << Summary->StringTable; |
| 325 | |
| 326 | return TablesStr; |
| 327 | } |
| 328 | |
| 329 | void InstrumentationRuntimeLibrary::emitTablesAsELFNote(BinaryContext &BC) { |
| 330 | std::string TablesStr = buildTables(BC); |
| 331 | const std::string BoltInfo = BinarySection::encodeELFNote( |
| 332 | NameStr: "BOLT" , DescStr: TablesStr, Type: BinarySection::NT_BOLT_INSTRUMENTATION_TABLES); |
| 333 | BC.registerOrUpdateNoteSection(Name: ".bolt.instr.tables" , Data: copyByteArray(Buffer: BoltInfo), |
| 334 | Size: BoltInfo.size(), |
| 335 | /*Alignment=*/1, |
| 336 | /*IsReadOnly=*/true, ELFType: ELF::SHT_NOTE); |
| 337 | } |
| 338 | |