| 1 | //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the BinaryContext class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Core/BinaryContext.h" |
| 14 | #include "bolt/Core/BinaryEmitter.h" |
| 15 | #include "bolt/Core/BinaryFunction.h" |
| 16 | #include "bolt/Utils/CommandLineOpts.h" |
| 17 | #include "bolt/Utils/Utils.h" |
| 18 | #include "llvm/ADT/STLExtras.h" |
| 19 | #include "llvm/ADT/Twine.h" |
| 20 | #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" |
| 21 | #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" |
| 22 | #include "llvm/DebugInfo/DWARF/DWARFUnit.h" |
| 23 | #include "llvm/MC/MCAssembler.h" |
| 24 | #include "llvm/MC/MCContext.h" |
| 25 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
| 26 | #include "llvm/MC/MCInstPrinter.h" |
| 27 | #include "llvm/MC/MCObjectStreamer.h" |
| 28 | #include "llvm/MC/MCObjectWriter.h" |
| 29 | #include "llvm/MC/MCRegisterInfo.h" |
| 30 | #include "llvm/MC/MCSectionELF.h" |
| 31 | #include "llvm/MC/MCStreamer.h" |
| 32 | #include "llvm/MC/MCSubtargetInfo.h" |
| 33 | #include "llvm/MC/MCSymbol.h" |
| 34 | #include "llvm/Support/CommandLine.h" |
| 35 | #include "llvm/Support/Error.h" |
| 36 | #include "llvm/Support/Regex.h" |
| 37 | #include <algorithm> |
| 38 | #include <functional> |
| 39 | #include <iterator> |
| 40 | #include <unordered_set> |
| 41 | |
| 42 | using namespace llvm; |
| 43 | |
| 44 | #undef DEBUG_TYPE |
| 45 | #define DEBUG_TYPE "bolt" |
| 46 | |
| 47 | namespace opts { |
| 48 | |
| 49 | static cl::opt<bool> |
| 50 | NoHugePages("no-huge-pages" , |
| 51 | cl::desc("use regular size pages for code alignment" ), |
| 52 | cl::Hidden, cl::cat(BoltCategory)); |
| 53 | |
| 54 | static cl::opt<bool> |
| 55 | PrintDebugInfo("print-debug-info" , |
| 56 | cl::desc("print debug info when printing functions" ), |
| 57 | cl::Hidden, |
| 58 | cl::ZeroOrMore, |
| 59 | cl::cat(BoltCategory)); |
| 60 | |
| 61 | cl::opt<bool> PrintRelocations( |
| 62 | "print-relocations" , |
| 63 | cl::desc("print relocations when printing functions/objects" ), cl::Hidden, |
| 64 | cl::cat(BoltCategory)); |
| 65 | |
| 66 | static cl::opt<bool> |
| 67 | PrintMemData("print-mem-data" , |
| 68 | cl::desc("print memory data annotations when printing functions" ), |
| 69 | cl::Hidden, |
| 70 | cl::ZeroOrMore, |
| 71 | cl::cat(BoltCategory)); |
| 72 | |
| 73 | cl::opt<std::string> CompDirOverride( |
| 74 | "comp-dir-override" , |
| 75 | cl::desc("overrides DW_AT_comp_dir, and provides an alternative base " |
| 76 | "location, which is used with DW_AT_dwo_name to construct a path " |
| 77 | "to *.dwo files." ), |
| 78 | cl::Hidden, cl::init(Val: "" ), cl::cat(BoltCategory)); |
| 79 | } // namespace opts |
| 80 | |
| 81 | namespace llvm { |
| 82 | namespace bolt { |
| 83 | |
| 84 | char BOLTError::ID = 0; |
| 85 | |
| 86 | BOLTError::BOLTError(bool IsFatal, const Twine &S) |
| 87 | : IsFatal(IsFatal), Msg(S.str()) {} |
| 88 | |
| 89 | void BOLTError::log(raw_ostream &OS) const { |
| 90 | if (IsFatal) |
| 91 | OS << "FATAL " ; |
| 92 | StringRef ErrMsg = StringRef(Msg); |
| 93 | // Prepend our error prefix if it is missing |
| 94 | if (ErrMsg.empty()) { |
| 95 | OS << "BOLT-ERROR\n" ; |
| 96 | } else { |
| 97 | if (!ErrMsg.starts_with(Prefix: "BOLT-ERROR" )) |
| 98 | OS << "BOLT-ERROR: " ; |
| 99 | OS << ErrMsg << "\n" ; |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | std::error_code BOLTError::convertToErrorCode() const { |
| 104 | return inconvertibleErrorCode(); |
| 105 | } |
| 106 | |
| 107 | Error createNonFatalBOLTError(const Twine &S) { |
| 108 | return make_error<BOLTError>(/*IsFatal*/ Args: false, Args: S); |
| 109 | } |
| 110 | |
| 111 | Error createFatalBOLTError(const Twine &S) { |
| 112 | return make_error<BOLTError>(/*IsFatal*/ Args: true, Args: S); |
| 113 | } |
| 114 | |
| 115 | void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) { |
| 116 | handleAllErrors(E: Error(std::move(E)), Handlers: [&](const BOLTError &E) { |
| 117 | if (!E.getMessage().empty()) |
| 118 | E.log(OS&: this->errs()); |
| 119 | if (E.isFatal()) |
| 120 | exit(status: 1); |
| 121 | }); |
| 122 | } |
| 123 | |
| 124 | BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, |
| 125 | std::unique_ptr<DWARFContext> DwCtx, |
| 126 | std::unique_ptr<Triple> TheTriple, |
| 127 | std::shared_ptr<orc::SymbolStringPool> SSP, |
| 128 | const Target *TheTarget, std::string TripleName, |
| 129 | std::unique_ptr<MCCodeEmitter> MCE, |
| 130 | std::unique_ptr<MCObjectFileInfo> MOFI, |
| 131 | std::unique_ptr<const MCAsmInfo> AsmInfo, |
| 132 | std::unique_ptr<const MCInstrInfo> MII, |
| 133 | std::unique_ptr<const MCSubtargetInfo> STI, |
| 134 | std::unique_ptr<MCInstPrinter> InstPrinter, |
| 135 | std::unique_ptr<const MCInstrAnalysis> MIA, |
| 136 | std::unique_ptr<MCPlusBuilder> MIB, |
| 137 | std::unique_ptr<const MCRegisterInfo> MRI, |
| 138 | std::unique_ptr<MCDisassembler> DisAsm, |
| 139 | JournalingStreams Logger) |
| 140 | : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), |
| 141 | TheTriple(std::move(TheTriple)), SSP(std::move(SSP)), |
| 142 | TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)), |
| 143 | MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), |
| 144 | STI(std::move(STI)), InstPrinter(std::move(InstPrinter)), |
| 145 | MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)), |
| 146 | DisAsm(std::move(DisAsm)), Logger(Logger), InitialDynoStats(isAArch64()) { |
| 147 | RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; |
| 148 | PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; |
| 149 | } |
| 150 | |
| 151 | BinaryContext::~BinaryContext() { |
| 152 | for (BinarySection *Section : Sections) |
| 153 | delete Section; |
| 154 | for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) |
| 155 | delete InjectedFunction; |
| 156 | for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) |
| 157 | delete JTI.second; |
| 158 | clearBinaryData(); |
| 159 | } |
| 160 | |
| 161 | /// Create BinaryContext for a given architecture \p ArchName and |
| 162 | /// triple \p TripleName. |
| 163 | Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( |
| 164 | Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP, |
| 165 | StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC, |
| 166 | std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) { |
| 167 | StringRef ArchName = "" ; |
| 168 | std::string FeaturesStr = "" ; |
| 169 | switch (TheTriple.getArch()) { |
| 170 | case llvm::Triple::x86_64: |
| 171 | if (Features) |
| 172 | return createFatalBOLTError( |
| 173 | S: "x86_64 target does not use SubtargetFeatures" ); |
| 174 | ArchName = "x86-64" ; |
| 175 | FeaturesStr = "+nopl" ; |
| 176 | break; |
| 177 | case llvm::Triple::aarch64: |
| 178 | if (Features) |
| 179 | return createFatalBOLTError( |
| 180 | S: "AArch64 target does not use SubtargetFeatures" ); |
| 181 | ArchName = "aarch64" ; |
| 182 | FeaturesStr = "+all" ; |
| 183 | break; |
| 184 | case llvm::Triple::riscv64: { |
| 185 | ArchName = "riscv64" ; |
| 186 | if (!Features) |
| 187 | return createFatalBOLTError(S: "RISCV target needs SubtargetFeatures" ); |
| 188 | // We rely on relaxation for some transformations (e.g., promoting all calls |
| 189 | // to PseudoCALL and then making JITLink relax them). Since the relax |
| 190 | // feature is not stored in the object file, we manually enable it. |
| 191 | Features->AddFeature(String: "relax" ); |
| 192 | FeaturesStr = Features->getString(); |
| 193 | break; |
| 194 | } |
| 195 | default: |
| 196 | return createStringError(EC: std::errc::not_supported, |
| 197 | Fmt: "BOLT-ERROR: Unrecognized machine in ELF file" ); |
| 198 | } |
| 199 | |
| 200 | const std::string TripleName = TheTriple.str(); |
| 201 | |
| 202 | std::string Error; |
| 203 | const Target *TheTarget = |
| 204 | TargetRegistry::lookupTarget(ArchName, TheTriple, Error); |
| 205 | if (!TheTarget) |
| 206 | return createStringError(EC: make_error_code(e: std::errc::not_supported), |
| 207 | S: Twine("BOLT-ERROR: " , Error)); |
| 208 | |
| 209 | std::unique_ptr<const MCRegisterInfo> MRI( |
| 210 | TheTarget->createMCRegInfo(TT: TripleName)); |
| 211 | if (!MRI) |
| 212 | return createStringError( |
| 213 | EC: make_error_code(e: std::errc::not_supported), |
| 214 | S: Twine("BOLT-ERROR: no register info for target " , TripleName)); |
| 215 | |
| 216 | // Set up disassembler. |
| 217 | std::unique_ptr<MCAsmInfo> AsmInfo( |
| 218 | TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple: TripleName, Options: MCTargetOptions())); |
| 219 | if (!AsmInfo) |
| 220 | return createStringError( |
| 221 | EC: make_error_code(e: std::errc::not_supported), |
| 222 | S: Twine("BOLT-ERROR: no assembly info for target " , TripleName)); |
| 223 | // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump |
| 224 | // we want to emit such names as using @PLT without double quotes to convey |
| 225 | // variant kind to the assembler. BOLT doesn't rely on the linker so we can |
| 226 | // override the default AsmInfo behavior to emit names the way we want. |
| 227 | AsmInfo->setAllowAtInName(true); |
| 228 | |
| 229 | std::unique_ptr<const MCSubtargetInfo> STI( |
| 230 | TheTarget->createMCSubtargetInfo(TheTriple: TripleName, CPU: "" , Features: FeaturesStr)); |
| 231 | if (!STI) |
| 232 | return createStringError( |
| 233 | EC: make_error_code(e: std::errc::not_supported), |
| 234 | S: Twine("BOLT-ERROR: no subtarget info for target " , TripleName)); |
| 235 | |
| 236 | std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); |
| 237 | if (!MII) |
| 238 | return createStringError( |
| 239 | EC: make_error_code(e: std::errc::not_supported), |
| 240 | S: Twine("BOLT-ERROR: no instruction info for target " , TripleName)); |
| 241 | |
| 242 | std::unique_ptr<MCContext> Ctx( |
| 243 | new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get())); |
| 244 | std::unique_ptr<MCObjectFileInfo> MOFI( |
| 245 | TheTarget->createMCObjectFileInfo(Ctx&: *Ctx, PIC: IsPIC)); |
| 246 | Ctx->setObjectFileInfo(MOFI.get()); |
| 247 | // We do not support X86 Large code model. Change this in the future. |
| 248 | bool Large = false; |
| 249 | if (TheTriple.getArch() == llvm::Triple::aarch64) |
| 250 | Large = true; |
| 251 | unsigned LSDAEncoding = |
| 252 | Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; |
| 253 | if (IsPIC) { |
| 254 | LSDAEncoding = dwarf::DW_EH_PE_pcrel | |
| 255 | (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); |
| 256 | } |
| 257 | |
| 258 | std::unique_ptr<MCDisassembler> DisAsm( |
| 259 | TheTarget->createMCDisassembler(STI: *STI, Ctx&: *Ctx)); |
| 260 | |
| 261 | if (!DisAsm) |
| 262 | return createStringError( |
| 263 | EC: make_error_code(e: std::errc::not_supported), |
| 264 | S: Twine("BOLT-ERROR: no disassembler info for target " , TripleName)); |
| 265 | |
| 266 | std::unique_ptr<const MCInstrAnalysis> MIA( |
| 267 | TheTarget->createMCInstrAnalysis(Info: MII.get())); |
| 268 | if (!MIA) |
| 269 | return createStringError( |
| 270 | EC: make_error_code(e: std::errc::not_supported), |
| 271 | S: Twine("BOLT-ERROR: failed to create instruction analysis for target " , |
| 272 | TripleName)); |
| 273 | |
| 274 | int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); |
| 275 | std::unique_ptr<MCInstPrinter> InstructionPrinter( |
| 276 | TheTarget->createMCInstPrinter(T: TheTriple, SyntaxVariant: AsmPrinterVariant, MAI: *AsmInfo, |
| 277 | MII: *MII, MRI: *MRI)); |
| 278 | if (!InstructionPrinter) |
| 279 | return createStringError( |
| 280 | EC: make_error_code(e: std::errc::not_supported), |
| 281 | S: Twine("BOLT-ERROR: no instruction printer for target " , TripleName)); |
| 282 | InstructionPrinter->setPrintImmHex(true); |
| 283 | |
| 284 | std::unique_ptr<MCCodeEmitter> MCE( |
| 285 | TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *Ctx)); |
| 286 | |
| 287 | auto BC = std::make_unique<BinaryContext>( |
| 288 | args: std::move(Ctx), args: std::move(DwCtx), args: std::make_unique<Triple>(args&: TheTriple), |
| 289 | args: std::move(SSP), args&: TheTarget, args: std::string(TripleName), args: std::move(MCE), |
| 290 | args: std::move(MOFI), args: std::move(AsmInfo), args: std::move(MII), args: std::move(STI), |
| 291 | args: std::move(InstructionPrinter), args: std::move(MIA), args: nullptr, args: std::move(MRI), |
| 292 | args: std::move(DisAsm), args&: Logger); |
| 293 | |
| 294 | BC->LSDAEncoding = LSDAEncoding; |
| 295 | |
| 296 | BC->MAB = std::unique_ptr<MCAsmBackend>( |
| 297 | BC->TheTarget->createMCAsmBackend(STI: *BC->STI, MRI: *BC->MRI, Options: MCTargetOptions())); |
| 298 | |
| 299 | BC->setFilename(InputFileName); |
| 300 | |
| 301 | BC->HasFixedLoadAddress = !IsPIC; |
| 302 | |
| 303 | BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( |
| 304 | BC->TheTarget->createMCDisassembler(STI: *BC->STI, Ctx&: *BC->Ctx)); |
| 305 | |
| 306 | if (!BC->SymbolicDisAsm) |
| 307 | return createStringError( |
| 308 | EC: make_error_code(e: std::errc::not_supported), |
| 309 | S: Twine("BOLT-ERROR: no disassembler info for target " , TripleName)); |
| 310 | |
| 311 | return std::move(BC); |
| 312 | } |
| 313 | |
| 314 | bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { |
| 315 | if (opts::HotText && |
| 316 | (SymbolName == "__hot_start" || SymbolName == "__hot_end" )) |
| 317 | return true; |
| 318 | |
| 319 | if (opts::HotData && |
| 320 | (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end" )) |
| 321 | return true; |
| 322 | |
| 323 | if (SymbolName == "_end" ) |
| 324 | return true; |
| 325 | |
| 326 | return false; |
| 327 | } |
| 328 | |
| 329 | std::unique_ptr<MCObjectWriter> |
| 330 | BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { |
| 331 | return MAB->createObjectWriter(OS); |
| 332 | } |
| 333 | |
| 334 | bool BinaryContext::validateObjectNesting() const { |
| 335 | auto Itr = BinaryDataMap.begin(); |
| 336 | auto End = BinaryDataMap.end(); |
| 337 | bool Valid = true; |
| 338 | while (Itr != End) { |
| 339 | auto Next = std::next(x: Itr); |
| 340 | while (Next != End && |
| 341 | Itr->second->getSection() == Next->second->getSection() && |
| 342 | Itr->second->containsRange(Address: Next->second->getAddress(), |
| 343 | Size: Next->second->getSize())) { |
| 344 | if (Next->second->Parent != Itr->second) { |
| 345 | this->errs() << "BOLT-WARNING: object nesting incorrect for:\n" |
| 346 | << "BOLT-WARNING: " << *Itr->second << "\n" |
| 347 | << "BOLT-WARNING: " << *Next->second << "\n" ; |
| 348 | Valid = false; |
| 349 | } |
| 350 | ++Next; |
| 351 | } |
| 352 | Itr = Next; |
| 353 | } |
| 354 | return Valid; |
| 355 | } |
| 356 | |
| 357 | bool BinaryContext::validateHoles() const { |
| 358 | bool Valid = true; |
| 359 | for (BinarySection &Section : sections()) { |
| 360 | for (const Relocation &Rel : Section.relocations()) { |
| 361 | uint64_t RelAddr = Rel.Offset + Section.getAddress(); |
| 362 | const BinaryData *BD = getBinaryDataContainingAddress(Address: RelAddr); |
| 363 | if (!BD) { |
| 364 | this->errs() |
| 365 | << "BOLT-WARNING: no BinaryData found for relocation at address" |
| 366 | << " 0x" << Twine::utohexstr(Val: RelAddr) << " in " << Section.getName() |
| 367 | << "\n" ; |
| 368 | Valid = false; |
| 369 | } else if (!BD->getAtomicRoot()) { |
| 370 | this->errs() |
| 371 | << "BOLT-WARNING: no atomic BinaryData found for relocation at " |
| 372 | << "address 0x" << Twine::utohexstr(Val: RelAddr) << " in " |
| 373 | << Section.getName() << "\n" ; |
| 374 | Valid = false; |
| 375 | } |
| 376 | } |
| 377 | } |
| 378 | return Valid; |
| 379 | } |
| 380 | |
| 381 | void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { |
| 382 | const uint64_t Address = GAI->second->getAddress(); |
| 383 | const uint64_t Size = GAI->second->getSize(); |
| 384 | |
| 385 | auto fixParents = [&](BinaryDataMapType::iterator Itr, |
| 386 | BinaryData *NewParent) { |
| 387 | BinaryData *OldParent = Itr->second->Parent; |
| 388 | Itr->second->Parent = NewParent; |
| 389 | ++Itr; |
| 390 | while (Itr != BinaryDataMap.end() && OldParent && |
| 391 | Itr->second->Parent == OldParent) { |
| 392 | Itr->second->Parent = NewParent; |
| 393 | ++Itr; |
| 394 | } |
| 395 | }; |
| 396 | |
| 397 | // Check if the previous symbol contains the newly added symbol. |
| 398 | if (GAI != BinaryDataMap.begin()) { |
| 399 | BinaryData *Prev = std::prev(x: GAI)->second; |
| 400 | while (Prev) { |
| 401 | if (Prev->getSection() == GAI->second->getSection() && |
| 402 | Prev->containsRange(Address, Size)) { |
| 403 | fixParents(GAI, Prev); |
| 404 | } else { |
| 405 | fixParents(GAI, nullptr); |
| 406 | } |
| 407 | Prev = Prev->Parent; |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | // Check if the newly added symbol contains any subsequent symbols. |
| 412 | if (Size != 0) { |
| 413 | BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; |
| 414 | auto Itr = std::next(x: GAI); |
| 415 | while ( |
| 416 | Itr != BinaryDataMap.end() && |
| 417 | BD->containsRange(Address: Itr->second->getAddress(), Size: Itr->second->getSize())) { |
| 418 | Itr->second->Parent = BD; |
| 419 | ++Itr; |
| 420 | } |
| 421 | } |
| 422 | } |
| 423 | |
| 424 | iterator_range<BinaryContext::binary_data_iterator> |
| 425 | BinaryContext::getSubBinaryData(BinaryData *BD) { |
| 426 | auto Start = std::next(x: BinaryDataMap.find(x: BD->getAddress())); |
| 427 | auto End = Start; |
| 428 | while (End != BinaryDataMap.end() && BD->isAncestorOf(BD: End->second)) |
| 429 | ++End; |
| 430 | return make_range(x: Start, y: End); |
| 431 | } |
| 432 | |
| 433 | std::pair<const MCSymbol *, uint64_t> |
| 434 | BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, |
| 435 | bool IsPCRel) { |
| 436 | if (isAArch64()) { |
| 437 | // Check if this is an access to a constant island and create bookkeeping |
| 438 | // to keep track of it and emit it later as part of this function. |
| 439 | if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) |
| 440 | return std::make_pair(x&: IslandSym, y: 0); |
| 441 | |
| 442 | // Detect custom code written in assembly that refers to arbitrary |
| 443 | // constant islands from other functions. Write this reference so we |
| 444 | // can pull this constant island and emit it as part of this function |
| 445 | // too. |
| 446 | auto IslandIter = AddressToConstantIslandMap.lower_bound(x: Address); |
| 447 | |
| 448 | if (IslandIter != AddressToConstantIslandMap.begin() && |
| 449 | (IslandIter == AddressToConstantIslandMap.end() || |
| 450 | IslandIter->first > Address)) |
| 451 | --IslandIter; |
| 452 | |
| 453 | if (IslandIter != AddressToConstantIslandMap.end()) { |
| 454 | // Fall-back to referencing the original constant island in the presence |
| 455 | // of dynamic relocs, as we currently do not support cloning them. |
| 456 | // Notice: we might fail to link because of this, if the original constant |
| 457 | // island we are referring would be emitted too far away. |
| 458 | if (IslandIter->second->hasDynamicRelocationAtIsland()) { |
| 459 | MCSymbol *IslandSym = |
| 460 | IslandIter->second->getOrCreateIslandAccess(Address); |
| 461 | if (IslandSym) |
| 462 | return std::make_pair(x&: IslandSym, y: 0); |
| 463 | } else if (MCSymbol *IslandSym = |
| 464 | IslandIter->second->getOrCreateProxyIslandAccess(Address, |
| 465 | Referrer&: BF)) { |
| 466 | BF.createIslandDependency(Island: IslandSym, BF: IslandIter->second); |
| 467 | return std::make_pair(x&: IslandSym, y: 0); |
| 468 | } |
| 469 | } |
| 470 | } |
| 471 | |
| 472 | // Note that the address does not necessarily have to reside inside |
| 473 | // a section, it could be an absolute address too. |
| 474 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| 475 | if (Section && Section->isText()) { |
| 476 | if (BF.containsAddress(PC: Address, /*UseMaxSize=*/isAArch64())) { |
| 477 | if (Address != BF.getAddress()) { |
| 478 | // The address could potentially escape. Mark it as another entry |
| 479 | // point into the function. |
| 480 | if (opts::Verbosity >= 1) { |
| 481 | this->outs() << "BOLT-INFO: potentially escaped address 0x" |
| 482 | << Twine::utohexstr(Val: Address) << " in function " << BF |
| 483 | << '\n'; |
| 484 | } |
| 485 | BF.HasInternalLabelReference = true; |
| 486 | return std::make_pair( |
| 487 | x: BF.addEntryPointAtOffset(Offset: Address - BF.getAddress()), y: 0); |
| 488 | } |
| 489 | } else { |
| 490 | addInterproceduralReference(Function: &BF, Address); |
| 491 | } |
| 492 | } |
| 493 | |
| 494 | // With relocations, catch jump table references outside of the basic block |
| 495 | // containing the indirect jump. |
| 496 | if (HasRelocations) { |
| 497 | const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); |
| 498 | if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { |
| 499 | const MCSymbol *Symbol = |
| 500 | getOrCreateJumpTable(Function&: BF, Address, Type: JumpTable::JTT_PIC); |
| 501 | |
| 502 | return std::make_pair(x&: Symbol, y: 0); |
| 503 | } |
| 504 | } |
| 505 | |
| 506 | if (BinaryData *BD = getBinaryDataContainingAddress(Address)) |
| 507 | return std::make_pair(x: BD->getSymbol(), y: Address - BD->getAddress()); |
| 508 | |
| 509 | // TODO: use DWARF info to get size/alignment here? |
| 510 | MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, Prefix: "DATAat" ); |
| 511 | LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); |
| 512 | return std::make_pair(x&: TargetSymbol, y: 0); |
| 513 | } |
| 514 | |
| 515 | MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, |
| 516 | BinaryFunction &BF) { |
| 517 | if (!isX86()) |
| 518 | return MemoryContentsType::UNKNOWN; |
| 519 | |
| 520 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| 521 | if (!Section) { |
| 522 | // No section - possibly an absolute address. Since we don't allow |
| 523 | // internal function addresses to escape the function scope - we |
| 524 | // consider it a tail call. |
| 525 | if (opts::Verbosity > 1) { |
| 526 | this->errs() << "BOLT-WARNING: no section for address 0x" |
| 527 | << Twine::utohexstr(Val: Address) << " referenced from function " |
| 528 | << BF << '\n'; |
| 529 | } |
| 530 | return MemoryContentsType::UNKNOWN; |
| 531 | } |
| 532 | |
| 533 | if (Section->isVirtual()) { |
| 534 | // The contents are filled at runtime. |
| 535 | return MemoryContentsType::UNKNOWN; |
| 536 | } |
| 537 | |
| 538 | // No support for jump tables in code yet. |
| 539 | if (Section->isText()) |
| 540 | return MemoryContentsType::UNKNOWN; |
| 541 | |
| 542 | // Start with checking for PIC jump table. We expect non-PIC jump tables |
| 543 | // to have high 32 bits set to 0. |
| 544 | if (analyzeJumpTable(Address, Type: JumpTable::JTT_PIC, BF)) |
| 545 | return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; |
| 546 | |
| 547 | if (analyzeJumpTable(Address, Type: JumpTable::JTT_NORMAL, BF)) |
| 548 | return MemoryContentsType::POSSIBLE_JUMP_TABLE; |
| 549 | |
| 550 | return MemoryContentsType::UNKNOWN; |
| 551 | } |
| 552 | |
| 553 | bool BinaryContext::analyzeJumpTable(const uint64_t Address, |
| 554 | const JumpTable::JumpTableType Type, |
| 555 | const BinaryFunction &BF, |
| 556 | const uint64_t NextJTAddress, |
| 557 | JumpTable::AddressesType *EntriesAsAddress, |
| 558 | bool *HasEntryInFragment) const { |
| 559 | // Target address of __builtin_unreachable. |
| 560 | const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize(); |
| 561 | |
| 562 | // Is one of the targets __builtin_unreachable? |
| 563 | bool HasUnreachable = false; |
| 564 | |
| 565 | // Does one of the entries match function start address? |
| 566 | bool HasStartAsEntry = false; |
| 567 | |
| 568 | // Number of targets other than __builtin_unreachable. |
| 569 | uint64_t NumRealEntries = 0; |
| 570 | |
| 571 | // Size of the jump table without trailing __builtin_unreachable entries. |
| 572 | size_t TrimmedSize = 0; |
| 573 | |
| 574 | auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) { |
| 575 | if (!EntriesAsAddress) |
| 576 | return; |
| 577 | EntriesAsAddress->emplace_back(args&: EntryAddress); |
| 578 | if (!Unreachable) |
| 579 | TrimmedSize = EntriesAsAddress->size(); |
| 580 | }; |
| 581 | |
| 582 | auto printEntryDiagnostics = [&](raw_ostream &OS, |
| 583 | const BinaryFunction *TargetBF) { |
| 584 | OS << "FAIL: function doesn't contain this address\n" ; |
| 585 | if (!TargetBF) |
| 586 | return; |
| 587 | OS << " ! function containing this address: " << *TargetBF << '\n'; |
| 588 | if (!TargetBF->isFragment()) |
| 589 | return; |
| 590 | OS << " ! is a fragment with parents: " ; |
| 591 | ListSeparator LS; |
| 592 | for (BinaryFunction *Parent : TargetBF->ParentFragments) |
| 593 | OS << LS << *Parent; |
| 594 | OS << '\n'; |
| 595 | }; |
| 596 | |
| 597 | ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| 598 | if (!Section) |
| 599 | return false; |
| 600 | |
| 601 | // The upper bound is defined by containing object, section limits, and |
| 602 | // the next jump table in memory. |
| 603 | uint64_t UpperBound = Section->getEndAddress(); |
| 604 | const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); |
| 605 | if (JumpTableBD && JumpTableBD->getSize()) { |
| 606 | assert(JumpTableBD->getEndAddress() <= UpperBound && |
| 607 | "data object cannot cross a section boundary" ); |
| 608 | UpperBound = JumpTableBD->getEndAddress(); |
| 609 | } |
| 610 | if (NextJTAddress) |
| 611 | UpperBound = std::min(a: NextJTAddress, b: UpperBound); |
| 612 | |
| 613 | LLVM_DEBUG({ |
| 614 | using JTT = JumpTable::JumpTableType; |
| 615 | dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n" , |
| 616 | Address, BF.getPrintName(), |
| 617 | Type == JTT::JTT_PIC ? "PIC" : "Normal" ); |
| 618 | }); |
| 619 | const uint64_t EntrySize = getJumpTableEntrySize(Type); |
| 620 | for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; |
| 621 | EntryAddress += EntrySize) { |
| 622 | LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) |
| 623 | << " -> " ); |
| 624 | // Check if there's a proper relocation against the jump table entry. |
| 625 | if (HasRelocations) { |
| 626 | if (Type == JumpTable::JTT_PIC && |
| 627 | !DataPCRelocations.count(x: EntryAddress)) { |
| 628 | LLVM_DEBUG( |
| 629 | dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n" ); |
| 630 | break; |
| 631 | } |
| 632 | if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(Address: EntryAddress)) { |
| 633 | LLVM_DEBUG( |
| 634 | dbgs() |
| 635 | << "FAIL: JTT_NORMAL table, no relocation for this address\n" ); |
| 636 | break; |
| 637 | } |
| 638 | } |
| 639 | |
| 640 | const uint64_t Value = |
| 641 | (Type == JumpTable::JTT_PIC) |
| 642 | ? Address + *getSignedValueAtAddress(Address: EntryAddress, Size: EntrySize) |
| 643 | : *getPointerAtAddress(Address: EntryAddress); |
| 644 | |
| 645 | // __builtin_unreachable() case. |
| 646 | if (Value == UnreachableAddress) { |
| 647 | addEntryAddress(Value, /*Unreachable*/ true); |
| 648 | HasUnreachable = true; |
| 649 | LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n" , Value)); |
| 650 | continue; |
| 651 | } |
| 652 | |
| 653 | // Function start is another special case. It is allowed in the jump table, |
| 654 | // but we need at least one another regular entry to distinguish the table |
| 655 | // from, e.g. a function pointer array. |
| 656 | if (Value == BF.getAddress()) { |
| 657 | HasStartAsEntry = true; |
| 658 | addEntryAddress(Value); |
| 659 | continue; |
| 660 | } |
| 661 | |
| 662 | // Function or one of its fragments. |
| 663 | const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Address: Value); |
| 664 | if (!TargetBF || !areRelatedFragments(LHS: TargetBF, RHS: &BF)) { |
| 665 | LLVM_DEBUG(printEntryDiagnostics(dbgs(), TargetBF)); |
| 666 | (void)printEntryDiagnostics; |
| 667 | break; |
| 668 | } |
| 669 | |
| 670 | // Check there's an instruction at this offset. |
| 671 | if (TargetBF->getState() == BinaryFunction::State::Disassembled && |
| 672 | !TargetBF->getInstructionAtOffset(Offset: Value - TargetBF->getAddress())) { |
| 673 | LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n" , Value)); |
| 674 | break; |
| 675 | } |
| 676 | |
| 677 | ++NumRealEntries; |
| 678 | LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n" , Value)); |
| 679 | |
| 680 | if (TargetBF != &BF && HasEntryInFragment) |
| 681 | *HasEntryInFragment = true; |
| 682 | addEntryAddress(Value); |
| 683 | } |
| 684 | |
| 685 | // Trim direct/normal jump table to exclude trailing unreachable entries that |
| 686 | // can collide with a function address. |
| 687 | if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress && |
| 688 | TrimmedSize != EntriesAsAddress->size() && |
| 689 | getBinaryFunctionAtAddress(Address: UnreachableAddress)) |
| 690 | EntriesAsAddress->resize(new_size: TrimmedSize); |
| 691 | |
| 692 | // It's a jump table if the number of real entries is more than 1, or there's |
| 693 | // one real entry and one or more special targets. If there are only multiple |
| 694 | // special targets, then it's not a jump table. |
| 695 | return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; |
| 696 | } |
| 697 | |
| 698 | void BinaryContext::populateJumpTables() { |
| 699 | LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() |
| 700 | << '\n'); |
| 701 | for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; |
| 702 | ++JTI) { |
| 703 | JumpTable *JT = JTI->second; |
| 704 | |
| 705 | if (!llvm::all_of(Range&: JT->Parents, P: std::mem_fn(pm: &BinaryFunction::isSimple))) |
| 706 | continue; |
| 707 | |
| 708 | uint64_t NextJTAddress = 0; |
| 709 | auto NextJTI = std::next(x: JTI); |
| 710 | if (NextJTI != JTE) |
| 711 | NextJTAddress = NextJTI->second->getAddress(); |
| 712 | |
| 713 | const bool Success = |
| 714 | analyzeJumpTable(Address: JT->getAddress(), Type: JT->Type, BF: *(JT->Parents[0]), |
| 715 | NextJTAddress, EntriesAsAddress: &JT->EntriesAsAddress, HasEntryInFragment: &JT->IsSplit); |
| 716 | if (!Success) { |
| 717 | LLVM_DEBUG({ |
| 718 | dbgs() << "failed to analyze " ; |
| 719 | JT->print(dbgs()); |
| 720 | if (NextJTI != JTE) { |
| 721 | dbgs() << "next " ; |
| 722 | NextJTI->second->print(dbgs()); |
| 723 | } |
| 724 | }); |
| 725 | llvm_unreachable("jump table heuristic failure" ); |
| 726 | } |
| 727 | for (BinaryFunction *Frag : JT->Parents) { |
| 728 | if (JT->IsSplit) |
| 729 | Frag->setHasIndirectTargetToSplitFragment(true); |
| 730 | for (uint64_t EntryAddress : JT->EntriesAsAddress) |
| 731 | // if target is builtin_unreachable |
| 732 | if (EntryAddress == Frag->getAddress() + Frag->getSize()) { |
| 733 | Frag->IgnoredBranches.emplace_back(Args: EntryAddress - Frag->getAddress(), |
| 734 | Args: Frag->getSize()); |
| 735 | } else if (EntryAddress >= Frag->getAddress() && |
| 736 | EntryAddress < Frag->getAddress() + Frag->getSize()) { |
| 737 | Frag->registerReferencedOffset(Offset: EntryAddress - Frag->getAddress()); |
| 738 | } |
| 739 | } |
| 740 | |
| 741 | // In strict mode, erase PC-relative relocation record. Later we check that |
| 742 | // all such records are erased and thus have been accounted for. |
| 743 | if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { |
| 744 | for (uint64_t Address = JT->getAddress(); |
| 745 | Address < JT->getAddress() + JT->getSize(); |
| 746 | Address += JT->EntrySize) { |
| 747 | DataPCRelocations.erase(position: DataPCRelocations.find(x: Address)); |
| 748 | } |
| 749 | } |
| 750 | |
| 751 | // Mark to skip the function and all its fragments. |
| 752 | for (BinaryFunction *Frag : JT->Parents) |
| 753 | if (Frag->hasIndirectTargetToSplitFragment()) |
| 754 | addFragmentsToSkip(Function: Frag); |
| 755 | } |
| 756 | |
| 757 | if (opts::StrictMode && DataPCRelocations.size()) { |
| 758 | LLVM_DEBUG({ |
| 759 | dbgs() << DataPCRelocations.size() |
| 760 | << " unclaimed PC-relative relocations left in data:\n" ; |
| 761 | for (uint64_t Reloc : DataPCRelocations) |
| 762 | dbgs() << Twine::utohexstr(Reloc) << '\n'; |
| 763 | }); |
| 764 | assert(0 && "unclaimed PC-relative relocations left in data\n" ); |
| 765 | } |
| 766 | clearList(List&: DataPCRelocations); |
| 767 | } |
| 768 | |
| 769 | void BinaryContext::skipMarkedFragments() { |
| 770 | std::vector<BinaryFunction *> FragmentQueue; |
| 771 | // Copy the functions to FragmentQueue. |
| 772 | FragmentQueue.assign(first: FragmentsToSkip.begin(), last: FragmentsToSkip.end()); |
| 773 | auto addToWorklist = [&](BinaryFunction *Function) -> void { |
| 774 | if (FragmentsToSkip.count(x: Function)) |
| 775 | return; |
| 776 | FragmentQueue.push_back(x: Function); |
| 777 | addFragmentsToSkip(Function); |
| 778 | }; |
| 779 | // Functions containing split jump tables need to be skipped with all |
| 780 | // fragments (transitively). |
| 781 | for (size_t I = 0; I != FragmentQueue.size(); I++) { |
| 782 | BinaryFunction *BF = FragmentQueue[I]; |
| 783 | assert(FragmentsToSkip.count(BF) && |
| 784 | "internal error in traversing function fragments" ); |
| 785 | if (opts::Verbosity >= 1) |
| 786 | this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; |
| 787 | BF->setSimple(false); |
| 788 | BF->setHasIndirectTargetToSplitFragment(true); |
| 789 | |
| 790 | llvm::for_each(Range&: BF->Fragments, F: addToWorklist); |
| 791 | llvm::for_each(Range&: BF->ParentFragments, F: addToWorklist); |
| 792 | } |
| 793 | if (!FragmentsToSkip.empty()) |
| 794 | this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() |
| 795 | << " function" << (FragmentsToSkip.size() == 1 ? "" : "s" ) |
| 796 | << " due to cold fragments\n" ; |
| 797 | } |
| 798 | |
| 799 | MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, |
| 800 | uint64_t Size, |
| 801 | uint16_t Alignment, |
| 802 | unsigned Flags) { |
| 803 | auto Itr = BinaryDataMap.find(x: Address); |
| 804 | if (Itr != BinaryDataMap.end()) { |
| 805 | assert(Itr->second->getSize() == Size || !Size); |
| 806 | return Itr->second->getSymbol(); |
| 807 | } |
| 808 | |
| 809 | std::string Name = (Prefix + "0x" + Twine::utohexstr(Val: Address)).str(); |
| 810 | assert(!GlobalSymbols.count(Name) && "created name is not unique" ); |
| 811 | return registerNameAtAddress(Name, Address, Size, Alignment, Flags); |
| 812 | } |
| 813 | |
| 814 | MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { |
| 815 | return Ctx->getOrCreateSymbol(Name); |
| 816 | } |
| 817 | |
| 818 | BinaryFunction *BinaryContext::createBinaryFunction( |
| 819 | const std::string &Name, BinarySection &Section, uint64_t Address, |
| 820 | uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { |
| 821 | auto Result = BinaryFunctions.emplace( |
| 822 | args&: Address, args: BinaryFunction(Name, Section, Address, Size, *this)); |
| 823 | assert(Result.second == true && "unexpected duplicate function" ); |
| 824 | BinaryFunction *BF = &Result.first->second; |
| 825 | registerNameAtAddress(Name, Address, Size: SymbolSize ? SymbolSize : Size, |
| 826 | Alignment); |
| 827 | setSymbolToFunctionMap(Sym: BF->getSymbol(), BF); |
| 828 | return BF; |
| 829 | } |
| 830 | |
| 831 | const MCSymbol * |
| 832 | BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, |
| 833 | JumpTable::JumpTableType Type) { |
| 834 | // Two fragments of same function access same jump table |
| 835 | if (JumpTable *JT = getJumpTableContainingAddress(Address)) { |
| 836 | assert(JT->Type == Type && "jump table types have to match" ); |
| 837 | assert(Address == JT->getAddress() && "unexpected non-empty jump table" ); |
| 838 | |
| 839 | if (llvm::is_contained(Range&: JT->Parents, Element: &Function)) |
| 840 | return JT->getFirstLabel(); |
| 841 | |
| 842 | // Prevent associating a jump table to a specific fragment twice. |
| 843 | auto isSibling = std::bind(f: &BinaryContext::areRelatedFragments, args: this, |
| 844 | args: &Function, args: std::placeholders::_1); |
| 845 | assert(llvm::all_of(JT->Parents, isSibling) && |
| 846 | "cannot re-use jump table of a different function" ); |
| 847 | (void)isSibling; |
| 848 | if (opts::Verbosity > 2) { |
| 849 | this->outs() << "BOLT-INFO: multiple fragments access the same jump table" |
| 850 | << ": " << *JT->Parents[0] << "; " << Function << '\n'; |
| 851 | JT->print(OS&: this->outs()); |
| 852 | } |
| 853 | if (JT->Parents.size() == 1) |
| 854 | JT->Parents.front()->setHasIndirectTargetToSplitFragment(true); |
| 855 | Function.setHasIndirectTargetToSplitFragment(true); |
| 856 | // Duplicate the entry for the parent function for easy access |
| 857 | JT->Parents.push_back(Elt: &Function); |
| 858 | Function.JumpTables.emplace(args&: Address, args&: JT); |
| 859 | return JT->getFirstLabel(); |
| 860 | } |
| 861 | |
| 862 | // Re-use the existing symbol if possible. |
| 863 | MCSymbol *JTLabel = nullptr; |
| 864 | if (BinaryData *Object = getBinaryDataAtAddress(Address)) { |
| 865 | if (!isInternalSymbolName(Name: Object->getSymbol()->getName())) |
| 866 | JTLabel = Object->getSymbol(); |
| 867 | } |
| 868 | |
| 869 | const uint64_t EntrySize = getJumpTableEntrySize(Type); |
| 870 | if (!JTLabel) { |
| 871 | const std::string JumpTableName = generateJumpTableName(BF: Function, Address); |
| 872 | JTLabel = registerNameAtAddress(Name: JumpTableName, Address, Size: 0, Alignment: EntrySize); |
| 873 | } |
| 874 | |
| 875 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() |
| 876 | << " in function " << Function << '\n'); |
| 877 | |
| 878 | JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, |
| 879 | JumpTable::LabelMapType{{0, JTLabel}}, |
| 880 | *getSectionForAddress(Address)); |
| 881 | JT->Parents.push_back(Elt: &Function); |
| 882 | if (opts::Verbosity > 2) |
| 883 | JT->print(OS&: this->outs()); |
| 884 | JumpTables.emplace(args&: Address, args&: JT); |
| 885 | |
| 886 | // Duplicate the entry for the parent function for easy access. |
| 887 | Function.JumpTables.emplace(args&: Address, args&: JT); |
| 888 | return JTLabel; |
| 889 | } |
| 890 | |
| 891 | std::pair<uint64_t, const MCSymbol *> |
| 892 | BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, |
| 893 | const MCSymbol *OldLabel) { |
| 894 | auto L = scopeLock(); |
| 895 | unsigned Offset = 0; |
| 896 | bool Found = false; |
| 897 | for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { |
| 898 | if (Elmt.second != OldLabel) |
| 899 | continue; |
| 900 | Offset = Elmt.first; |
| 901 | Found = true; |
| 902 | break; |
| 903 | } |
| 904 | assert(Found && "Label not found" ); |
| 905 | (void)Found; |
| 906 | MCSymbol *NewLabel = Ctx->createNamedTempSymbol(Name: "duplicatedJT" ); |
| 907 | JumpTable *NewJT = |
| 908 | new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, |
| 909 | JumpTable::LabelMapType{{Offset, NewLabel}}, |
| 910 | *getSectionForAddress(Address: JT->getAddress())); |
| 911 | NewJT->Parents = JT->Parents; |
| 912 | NewJT->Entries = JT->Entries; |
| 913 | NewJT->Counts = JT->Counts; |
| 914 | uint64_t JumpTableID = ++DuplicatedJumpTables; |
| 915 | // Invert it to differentiate from regular jump tables whose IDs are their |
| 916 | // addresses in the input binary memory space |
| 917 | JumpTableID = ~JumpTableID; |
| 918 | JumpTables.emplace(args&: JumpTableID, args&: NewJT); |
| 919 | Function.JumpTables.emplace(args&: JumpTableID, args&: NewJT); |
| 920 | return std::make_pair(x&: JumpTableID, y&: NewLabel); |
| 921 | } |
| 922 | |
| 923 | std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, |
| 924 | uint64_t Address) { |
| 925 | size_t Id; |
| 926 | uint64_t Offset = 0; |
| 927 | if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { |
| 928 | Offset = Address - JT->getAddress(); |
| 929 | auto JTLabelsIt = JT->Labels.find(x: Offset); |
| 930 | if (JTLabelsIt != JT->Labels.end()) |
| 931 | return std::string(JTLabelsIt->second->getName()); |
| 932 | |
| 933 | auto JTIdsIt = JumpTableIds.find(x: JT->getAddress()); |
| 934 | assert(JTIdsIt != JumpTableIds.end()); |
| 935 | Id = JTIdsIt->second; |
| 936 | } else { |
| 937 | Id = JumpTableIds[Address] = BF.JumpTables.size(); |
| 938 | } |
| 939 | return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(val: Id) + |
| 940 | (Offset ? ("." + std::to_string(val: Offset)) : "" )); |
| 941 | } |
| 942 | |
| 943 | bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { |
| 944 | // FIXME: aarch64 support is missing. |
| 945 | if (!isX86()) |
| 946 | return true; |
| 947 | |
| 948 | if (BF.getSize() == BF.getMaxSize()) |
| 949 | return true; |
| 950 | |
| 951 | ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); |
| 952 | assert(FunctionData && "cannot get function as data" ); |
| 953 | |
| 954 | uint64_t Offset = BF.getSize(); |
| 955 | MCInst Instr; |
| 956 | uint64_t InstrSize = 0; |
| 957 | uint64_t InstrAddress = BF.getAddress() + Offset; |
| 958 | using std::placeholders::_1; |
| 959 | |
| 960 | // Skip instructions that satisfy the predicate condition. |
| 961 | auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { |
| 962 | const uint64_t StartOffset = Offset; |
| 963 | for (; Offset < BF.getMaxSize(); |
| 964 | Offset += InstrSize, InstrAddress += InstrSize) { |
| 965 | if (!DisAsm->getInstruction(Instr, Size&: InstrSize, Bytes: FunctionData->slice(N: Offset), |
| 966 | Address: InstrAddress, CStream&: nulls())) |
| 967 | break; |
| 968 | if (!Predicate(Instr)) |
| 969 | break; |
| 970 | } |
| 971 | |
| 972 | return Offset - StartOffset; |
| 973 | }; |
| 974 | |
| 975 | // Skip a sequence of zero bytes. |
| 976 | auto skipZeros = [&]() { |
| 977 | const uint64_t StartOffset = Offset; |
| 978 | for (; Offset < BF.getMaxSize(); ++Offset) |
| 979 | if ((*FunctionData)[Offset] != 0) |
| 980 | break; |
| 981 | |
| 982 | return Offset - StartOffset; |
| 983 | }; |
| 984 | |
| 985 | // Accept the whole padding area filled with breakpoints. |
| 986 | auto isBreakpoint = std::bind(f: &MCPlusBuilder::isBreakpoint, args: MIB.get(), args: _1); |
| 987 | if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) |
| 988 | return true; |
| 989 | |
| 990 | auto isNoop = std::bind(f: &MCPlusBuilder::isNoop, args: MIB.get(), args: _1); |
| 991 | |
| 992 | // Some functions have a jump to the next function or to the padding area |
| 993 | // inserted after the body. |
| 994 | auto isSkipJump = [&](const MCInst &Instr) { |
| 995 | uint64_t TargetAddress = 0; |
| 996 | if (MIB->isUnconditionalBranch(Inst: Instr) && |
| 997 | MIB->evaluateBranch(Inst: Instr, Addr: InstrAddress, Size: InstrSize, Target&: TargetAddress)) { |
| 998 | if (TargetAddress >= InstrAddress + InstrSize && |
| 999 | TargetAddress <= BF.getAddress() + BF.getMaxSize()) { |
| 1000 | return true; |
| 1001 | } |
| 1002 | } |
| 1003 | return false; |
| 1004 | }; |
| 1005 | |
| 1006 | // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). |
| 1007 | while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || |
| 1008 | skipZeros()) |
| 1009 | ; |
| 1010 | |
| 1011 | if (Offset == BF.getMaxSize()) |
| 1012 | return true; |
| 1013 | |
| 1014 | if (opts::Verbosity >= 1) { |
| 1015 | this->errs() << "BOLT-WARNING: bad padding at address 0x" |
| 1016 | << Twine::utohexstr(Val: BF.getAddress() + BF.getSize()) |
| 1017 | << " starting at offset " << (Offset - BF.getSize()) |
| 1018 | << " in function " << BF << '\n' |
| 1019 | << FunctionData->slice(N: BF.getSize(), |
| 1020 | M: BF.getMaxSize() - BF.getSize()) |
| 1021 | << '\n'; |
| 1022 | } |
| 1023 | |
| 1024 | return false; |
| 1025 | } |
| 1026 | |
| 1027 | void BinaryContext::adjustCodePadding() { |
| 1028 | for (auto &BFI : BinaryFunctions) { |
| 1029 | BinaryFunction &BF = BFI.second; |
| 1030 | if (!shouldEmit(Function: BF)) |
| 1031 | continue; |
| 1032 | |
| 1033 | if (!hasValidCodePadding(BF)) { |
| 1034 | if (HasRelocations) { |
| 1035 | this->errs() << "BOLT-WARNING: function " << BF |
| 1036 | << " has invalid padding. Ignoring the function\n" ; |
| 1037 | BF.setIgnored(); |
| 1038 | } else { |
| 1039 | BF.setMaxSize(BF.getSize()); |
| 1040 | } |
| 1041 | } |
| 1042 | } |
| 1043 | } |
| 1044 | |
| 1045 | MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, |
| 1046 | uint64_t Size, |
| 1047 | uint16_t Alignment, |
| 1048 | unsigned Flags) { |
| 1049 | // Register the name with MCContext. |
| 1050 | MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); |
| 1051 | |
| 1052 | auto GAI = BinaryDataMap.find(x: Address); |
| 1053 | BinaryData *BD; |
| 1054 | if (GAI == BinaryDataMap.end()) { |
| 1055 | ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); |
| 1056 | BinarySection &Section = |
| 1057 | SectionOrErr ? SectionOrErr.get() : absoluteSection(); |
| 1058 | BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, |
| 1059 | Section, Flags); |
| 1060 | GAI = BinaryDataMap.emplace(args&: Address, args&: BD).first; |
| 1061 | GlobalSymbols[Name] = BD; |
| 1062 | updateObjectNesting(GAI); |
| 1063 | } else { |
| 1064 | BD = GAI->second; |
| 1065 | if (!BD->hasName(Name)) { |
| 1066 | GlobalSymbols[Name] = BD; |
| 1067 | BD->updateSize(N: Size); |
| 1068 | BD->Symbols.push_back(x: Symbol); |
| 1069 | } |
| 1070 | } |
| 1071 | |
| 1072 | return Symbol; |
| 1073 | } |
| 1074 | |
| 1075 | const BinaryData * |
| 1076 | BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { |
| 1077 | auto NI = BinaryDataMap.lower_bound(x: Address); |
| 1078 | auto End = BinaryDataMap.end(); |
| 1079 | if ((NI != End && Address == NI->first) || |
| 1080 | ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { |
| 1081 | if (NI->second->containsAddress(Address)) |
| 1082 | return NI->second; |
| 1083 | |
| 1084 | // If this is a sub-symbol, see if a parent data contains the address. |
| 1085 | const BinaryData *BD = NI->second->getParent(); |
| 1086 | while (BD) { |
| 1087 | if (BD->containsAddress(Address)) |
| 1088 | return BD; |
| 1089 | BD = BD->getParent(); |
| 1090 | } |
| 1091 | } |
| 1092 | return nullptr; |
| 1093 | } |
| 1094 | |
| 1095 | BinaryData *BinaryContext::getGOTSymbol() { |
| 1096 | // First tries to find a global symbol with that name |
| 1097 | BinaryData *GOTSymBD = getBinaryDataByName(Name: "_GLOBAL_OFFSET_TABLE_" ); |
| 1098 | if (GOTSymBD) |
| 1099 | return GOTSymBD; |
| 1100 | |
| 1101 | // This symbol might be hidden from run-time link, so fetch the local |
| 1102 | // definition if available. |
| 1103 | GOTSymBD = getBinaryDataByName(Name: "_GLOBAL_OFFSET_TABLE_/1" ); |
| 1104 | if (!GOTSymBD) |
| 1105 | return nullptr; |
| 1106 | |
| 1107 | // If the local symbol is not unique, fail |
| 1108 | unsigned Index = 2; |
| 1109 | SmallString<30> Storage; |
| 1110 | while (const BinaryData *BD = |
| 1111 | getBinaryDataByName(Name: Twine("_GLOBAL_OFFSET_TABLE_/" ) |
| 1112 | .concat(Suffix: Twine(Index++)) |
| 1113 | .toStringRef(Out&: Storage))) |
| 1114 | if (BD->getAddress() != GOTSymBD->getAddress()) |
| 1115 | return nullptr; |
| 1116 | |
| 1117 | return GOTSymBD; |
| 1118 | } |
| 1119 | |
| 1120 | bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { |
| 1121 | auto NI = BinaryDataMap.find(x: Address); |
| 1122 | assert(NI != BinaryDataMap.end()); |
| 1123 | if (NI == BinaryDataMap.end()) |
| 1124 | return false; |
| 1125 | // TODO: it's possible that a jump table starts at the same address |
| 1126 | // as a larger blob of private data. When we set the size of the |
| 1127 | // jump table, it might be smaller than the total blob size. In this |
| 1128 | // case we just leave the original size since (currently) it won't really |
| 1129 | // affect anything. |
| 1130 | assert((!NI->second->Size || NI->second->Size == Size || |
| 1131 | (NI->second->isJumpTable() && NI->second->Size > Size)) && |
| 1132 | "can't change the size of a symbol that has already had its " |
| 1133 | "size set" ); |
| 1134 | if (!NI->second->Size) { |
| 1135 | NI->second->Size = Size; |
| 1136 | updateObjectNesting(GAI: NI); |
| 1137 | return true; |
| 1138 | } |
| 1139 | return false; |
| 1140 | } |
| 1141 | |
| 1142 | void BinaryContext::generateSymbolHashes() { |
| 1143 | auto isPadding = [](const BinaryData &BD) { |
| 1144 | StringRef Contents = BD.getSection().getContents(); |
| 1145 | StringRef SymData = Contents.substr(Start: BD.getOffset(), N: BD.getSize()); |
| 1146 | return (BD.getName().starts_with(Prefix: "HOLEat" ) || |
| 1147 | SymData.find_first_not_of(C: 0) == StringRef::npos); |
| 1148 | }; |
| 1149 | |
| 1150 | uint64_t NumCollisions = 0; |
| 1151 | for (auto &Entry : BinaryDataMap) { |
| 1152 | BinaryData &BD = *Entry.second; |
| 1153 | StringRef Name = BD.getName(); |
| 1154 | |
| 1155 | if (!isInternalSymbolName(Name)) |
| 1156 | continue; |
| 1157 | |
| 1158 | // First check if a non-anonymous alias exists and move it to the front. |
| 1159 | if (BD.getSymbols().size() > 1) { |
| 1160 | auto Itr = llvm::find_if(Range&: BD.getSymbols(), P: [&](const MCSymbol *Symbol) { |
| 1161 | return !isInternalSymbolName(Name: Symbol->getName()); |
| 1162 | }); |
| 1163 | if (Itr != BD.getSymbols().end()) { |
| 1164 | size_t Idx = std::distance(first: BD.getSymbols().begin(), last: Itr); |
| 1165 | std::swap(a&: BD.getSymbols()[0], b&: BD.getSymbols()[Idx]); |
| 1166 | continue; |
| 1167 | } |
| 1168 | } |
| 1169 | |
| 1170 | // We have to skip 0 size symbols since they will all collide. |
| 1171 | if (BD.getSize() == 0) { |
| 1172 | continue; |
| 1173 | } |
| 1174 | |
| 1175 | const uint64_t Hash = BD.getSection().hash(BD); |
| 1176 | const size_t Idx = Name.find(Str: "0x" ); |
| 1177 | std::string NewName = |
| 1178 | (Twine(Name.substr(Start: 0, N: Idx)) + "_" + Twine::utohexstr(Val: Hash)).str(); |
| 1179 | if (getBinaryDataByName(Name: NewName)) { |
| 1180 | // Ignore collisions for symbols that appear to be padding |
| 1181 | // (i.e. all zeros or a "hole") |
| 1182 | if (!isPadding(BD)) { |
| 1183 | if (opts::Verbosity) { |
| 1184 | this->errs() << "BOLT-WARNING: collision detected when hashing " << BD |
| 1185 | << " with new name (" << NewName << "), skipping.\n" ; |
| 1186 | } |
| 1187 | ++NumCollisions; |
| 1188 | } |
| 1189 | continue; |
| 1190 | } |
| 1191 | BD.Symbols.insert(position: BD.Symbols.begin(), x: Ctx->getOrCreateSymbol(Name: NewName)); |
| 1192 | GlobalSymbols[NewName] = &BD; |
| 1193 | } |
| 1194 | if (NumCollisions) { |
| 1195 | this->errs() << "BOLT-WARNING: " << NumCollisions |
| 1196 | << " collisions detected while hashing binary objects" ; |
| 1197 | if (!opts::Verbosity) |
| 1198 | this->errs() << ". Use -v=1 to see the list." ; |
| 1199 | this->errs() << '\n'; |
| 1200 | } |
| 1201 | } |
| 1202 | |
| 1203 | bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, |
| 1204 | BinaryFunction &Function) { |
| 1205 | assert(TargetFunction.isFragment() && "TargetFunction must be a fragment" ); |
| 1206 | if (TargetFunction.isChildOf(Other: Function)) |
| 1207 | return true; |
| 1208 | TargetFunction.addParentFragment(BF&: Function); |
| 1209 | Function.addFragment(BF&: TargetFunction); |
| 1210 | FragmentClasses.unionSets(V1: &TargetFunction, V2: &Function); |
| 1211 | if (!HasRelocations) { |
| 1212 | TargetFunction.setSimple(false); |
| 1213 | Function.setSimple(false); |
| 1214 | } |
| 1215 | if (opts::Verbosity >= 1) { |
| 1216 | this->outs() << "BOLT-INFO: marking " << TargetFunction |
| 1217 | << " as a fragment of " << Function << '\n'; |
| 1218 | } |
| 1219 | return true; |
| 1220 | } |
| 1221 | |
| 1222 | void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, |
| 1223 | MCInst &LoadLowBits, |
| 1224 | MCInst &LoadHiBits, |
| 1225 | uint64_t Target) { |
| 1226 | const MCSymbol *TargetSymbol; |
| 1227 | uint64_t Addend = 0; |
| 1228 | std::tie(args&: TargetSymbol, args&: Addend) = handleAddressRef(Address: Target, BF, |
| 1229 | /*IsPCRel*/ true); |
| 1230 | int64_t Val; |
| 1231 | MIB->replaceImmWithSymbolRef(Inst&: LoadHiBits, Symbol: TargetSymbol, Addend, Ctx: Ctx.get(), Value&: Val, |
| 1232 | RelType: ELF::R_AARCH64_ADR_PREL_PG_HI21); |
| 1233 | MIB->replaceImmWithSymbolRef(Inst&: LoadLowBits, Symbol: TargetSymbol, Addend, Ctx: Ctx.get(), |
| 1234 | Value&: Val, RelType: ELF::R_AARCH64_ADD_ABS_LO12_NC); |
| 1235 | } |
| 1236 | |
| 1237 | bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { |
| 1238 | BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); |
| 1239 | if (TargetFunction) |
| 1240 | return false; |
| 1241 | |
| 1242 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| 1243 | assert(Section && "cannot get section for referenced address" ); |
| 1244 | if (!Section->isText()) |
| 1245 | return false; |
| 1246 | |
| 1247 | bool Ret = false; |
| 1248 | StringRef SectionContents = Section->getContents(); |
| 1249 | uint64_t Offset = Address - Section->getAddress(); |
| 1250 | const uint64_t MaxSize = SectionContents.size() - Offset; |
| 1251 | const uint8_t *Bytes = |
| 1252 | reinterpret_cast<const uint8_t *>(SectionContents.data()); |
| 1253 | ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); |
| 1254 | |
| 1255 | auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, |
| 1256 | MCInst &Instruction, uint64_t Offset, |
| 1257 | uint64_t AbsoluteInstrAddr, |
| 1258 | uint64_t TotalSize) -> bool { |
| 1259 | MCInst *TargetHiBits, *TargetLowBits; |
| 1260 | uint64_t TargetAddress, Count; |
| 1261 | Count = MIB->matchLinkerVeneer(Begin: Instructions.begin(), End: Instructions.end(), |
| 1262 | Address: AbsoluteInstrAddr, CurInst: Instruction, TargetHiBits, |
| 1263 | TargetLowBits, Target&: TargetAddress); |
| 1264 | if (!Count) |
| 1265 | return false; |
| 1266 | |
| 1267 | if (MatchOnly) |
| 1268 | return true; |
| 1269 | |
| 1270 | // NOTE The target symbol was created during disassemble's |
| 1271 | // handleExternalReference |
| 1272 | const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, Prefix: "FUNCat" ); |
| 1273 | BinaryFunction *Veneer = createBinaryFunction(Name: VeneerSymbol->getName().str(), |
| 1274 | Section&: *Section, Address, Size: TotalSize); |
| 1275 | addAdrpAddRelocAArch64(BF&: *Veneer, LoadLowBits&: *TargetLowBits, LoadHiBits&: *TargetHiBits, |
| 1276 | Target: TargetAddress); |
| 1277 | MIB->addAnnotation(Inst&: Instruction, Name: "AArch64Veneer" , Val: true); |
| 1278 | Veneer->addInstruction(Offset, Instruction: std::move(Instruction)); |
| 1279 | --Count; |
| 1280 | for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { |
| 1281 | MIB->addAnnotation(Inst&: It->second, Name: "AArch64Veneer" , Val: true); |
| 1282 | Veneer->addInstruction(Offset: It->first, Instruction: std::move(It->second)); |
| 1283 | } |
| 1284 | |
| 1285 | Veneer->getOrCreateLocalLabel(Address); |
| 1286 | Veneer->setMaxSize(TotalSize); |
| 1287 | Veneer->updateState(State: BinaryFunction::State::Disassembled); |
| 1288 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" |
| 1289 | << Twine::utohexstr(Address) << "\n" ); |
| 1290 | return true; |
| 1291 | }; |
| 1292 | |
| 1293 | uint64_t Size = 0, TotalSize = 0; |
| 1294 | BinaryFunction::InstrMapType VeneerInstructions; |
| 1295 | for (Offset = 0; Offset < MaxSize; Offset += Size) { |
| 1296 | MCInst Instruction; |
| 1297 | const uint64_t AbsoluteInstrAddr = Address + Offset; |
| 1298 | if (!SymbolicDisAsm->getInstruction(Instr&: Instruction, Size, Bytes: Data.slice(N: Offset), |
| 1299 | Address: AbsoluteInstrAddr, CStream&: nulls())) |
| 1300 | break; |
| 1301 | |
| 1302 | TotalSize += Size; |
| 1303 | if (MIB->isBranch(Inst: Instruction)) { |
| 1304 | Ret = matchVeneer(VeneerInstructions, Instruction, Offset, |
| 1305 | AbsoluteInstrAddr, TotalSize); |
| 1306 | break; |
| 1307 | } |
| 1308 | |
| 1309 | VeneerInstructions.emplace(args&: Offset, args: std::move(Instruction)); |
| 1310 | } |
| 1311 | |
| 1312 | return Ret; |
| 1313 | } |
| 1314 | |
| 1315 | void BinaryContext::processInterproceduralReferences() { |
| 1316 | for (const std::pair<BinaryFunction *, uint64_t> &It : |
| 1317 | InterproceduralReferences) { |
| 1318 | BinaryFunction &Function = *It.first; |
| 1319 | uint64_t Address = It.second; |
| 1320 | // Process interprocedural references from ignored functions in BAT mode |
| 1321 | // (non-simple in non-relocation mode) to properly register entry points |
| 1322 | if (!Address || (Function.isIgnored() && !HasBATSection)) |
| 1323 | continue; |
| 1324 | |
| 1325 | BinaryFunction *TargetFunction = |
| 1326 | getBinaryFunctionContainingAddress(Address); |
| 1327 | if (&Function == TargetFunction) |
| 1328 | continue; |
| 1329 | |
| 1330 | if (TargetFunction) { |
| 1331 | if (TargetFunction->isFragment() && |
| 1332 | !areRelatedFragments(LHS: TargetFunction, RHS: &Function)) { |
| 1333 | this->errs() |
| 1334 | << "BOLT-WARNING: interprocedural reference between unrelated " |
| 1335 | "fragments: " |
| 1336 | << Function.getPrintName() << " and " |
| 1337 | << TargetFunction->getPrintName() << '\n'; |
| 1338 | } |
| 1339 | if (uint64_t Offset = Address - TargetFunction->getAddress()) |
| 1340 | TargetFunction->addEntryPointAtOffset(Offset); |
| 1341 | |
| 1342 | continue; |
| 1343 | } |
| 1344 | |
| 1345 | // Check if address falls in function padding space - this could be |
| 1346 | // unmarked data in code. In this case adjust the padding space size. |
| 1347 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| 1348 | assert(Section && "cannot get section for referenced address" ); |
| 1349 | |
| 1350 | if (!Section->isText()) |
| 1351 | continue; |
| 1352 | |
| 1353 | // PLT requires special handling and could be ignored in this context. |
| 1354 | StringRef SectionName = Section->getName(); |
| 1355 | if (SectionName == ".plt" || SectionName == ".plt.got" ) |
| 1356 | continue; |
| 1357 | |
| 1358 | // Check if it is aarch64 veneer written at Address |
| 1359 | if (isAArch64() && handleAArch64Veneer(Address)) |
| 1360 | continue; |
| 1361 | |
| 1362 | if (opts::processAllFunctions()) { |
| 1363 | this->errs() << "BOLT-ERROR: cannot process binaries with unmarked " |
| 1364 | << "object in code at address 0x" |
| 1365 | << Twine::utohexstr(Val: Address) << " belonging to section " |
| 1366 | << SectionName << " in current mode\n" ; |
| 1367 | exit(status: 1); |
| 1368 | } |
| 1369 | |
| 1370 | TargetFunction = getBinaryFunctionContainingAddress(Address, |
| 1371 | /*CheckPastEnd=*/false, |
| 1372 | /*UseMaxSize=*/true); |
| 1373 | // We are not going to overwrite non-simple functions, but for simple |
| 1374 | // ones - adjust the padding size. |
| 1375 | if (TargetFunction && TargetFunction->isSimple()) { |
| 1376 | this->errs() |
| 1377 | << "BOLT-WARNING: function " << *TargetFunction |
| 1378 | << " has an object detected in a padding region at address 0x" |
| 1379 | << Twine::utohexstr(Val: Address) << '\n'; |
| 1380 | TargetFunction->setMaxSize(TargetFunction->getSize()); |
| 1381 | } |
| 1382 | } |
| 1383 | |
| 1384 | InterproceduralReferences.clear(); |
| 1385 | } |
| 1386 | |
| 1387 | void BinaryContext::postProcessSymbolTable() { |
| 1388 | fixBinaryDataHoles(); |
| 1389 | bool Valid = true; |
| 1390 | for (auto &Entry : BinaryDataMap) { |
| 1391 | BinaryData *BD = Entry.second; |
| 1392 | if ((BD->getName().starts_with(Prefix: "SYMBOLat" ) || |
| 1393 | BD->getName().starts_with(Prefix: "DATAat" )) && |
| 1394 | !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && |
| 1395 | BD->getSection()) { |
| 1396 | this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD |
| 1397 | << "\n" ; |
| 1398 | Valid = false; |
| 1399 | } |
| 1400 | } |
| 1401 | assert(Valid); |
| 1402 | (void)Valid; |
| 1403 | generateSymbolHashes(); |
| 1404 | } |
| 1405 | |
| 1406 | void BinaryContext::foldFunction(BinaryFunction &ChildBF, |
| 1407 | BinaryFunction &ParentBF) { |
| 1408 | assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && |
| 1409 | "cannot merge functions with multiple entry points" ); |
| 1410 | |
| 1411 | std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); |
| 1412 | std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( |
| 1413 | SymbolToFunctionMapMutex, std::defer_lock); |
| 1414 | |
| 1415 | const StringRef ChildName = ChildBF.getOneName(); |
| 1416 | |
| 1417 | // Move symbols over and update bookkeeping info. |
| 1418 | for (MCSymbol *Symbol : ChildBF.getSymbols()) { |
| 1419 | ParentBF.getSymbols().push_back(Elt: Symbol); |
| 1420 | WriteSymbolMapLock.lock(); |
| 1421 | SymbolToFunctionMap[Symbol] = &ParentBF; |
| 1422 | WriteSymbolMapLock.unlock(); |
| 1423 | // NB: there's no need to update BinaryDataMap and GlobalSymbols. |
| 1424 | } |
| 1425 | ChildBF.getSymbols().clear(); |
| 1426 | |
| 1427 | // Move other names the child function is known under. |
| 1428 | llvm::move(Range&: ChildBF.Aliases, Out: std::back_inserter(x&: ParentBF.Aliases)); |
| 1429 | ChildBF.Aliases.clear(); |
| 1430 | |
| 1431 | if (HasRelocations) { |
| 1432 | // Merge execution counts of ChildBF into those of ParentBF. |
| 1433 | // Without relocations, we cannot reliably merge profiles as both functions |
| 1434 | // continue to exist and either one can be executed. |
| 1435 | ChildBF.mergeProfileDataInto(BF&: ParentBF); |
| 1436 | |
| 1437 | std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, |
| 1438 | std::defer_lock); |
| 1439 | std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, |
| 1440 | std::defer_lock); |
| 1441 | // Remove ChildBF from the global set of functions in relocs mode. |
| 1442 | ReadBfsLock.lock(); |
| 1443 | auto FI = BinaryFunctions.find(x: ChildBF.getAddress()); |
| 1444 | ReadBfsLock.unlock(); |
| 1445 | |
| 1446 | assert(FI != BinaryFunctions.end() && "function not found" ); |
| 1447 | assert(&ChildBF == &FI->second && "function mismatch" ); |
| 1448 | |
| 1449 | WriteBfsLock.lock(); |
| 1450 | ChildBF.clearDisasmState(); |
| 1451 | FI = BinaryFunctions.erase(position: FI); |
| 1452 | WriteBfsLock.unlock(); |
| 1453 | |
| 1454 | } else { |
| 1455 | // In non-relocation mode we keep the function, but rename it. |
| 1456 | std::string NewName = "__ICF_" + ChildName.str(); |
| 1457 | |
| 1458 | WriteCtxLock.lock(); |
| 1459 | ChildBF.getSymbols().push_back(Elt: Ctx->getOrCreateSymbol(Name: NewName)); |
| 1460 | WriteCtxLock.unlock(); |
| 1461 | |
| 1462 | ChildBF.setFolded(&ParentBF); |
| 1463 | } |
| 1464 | |
| 1465 | ParentBF.setHasFunctionsFoldedInto(); |
| 1466 | } |
| 1467 | |
| 1468 | void BinaryContext::fixBinaryDataHoles() { |
| 1469 | assert(validateObjectNesting() && "object nesting inconsistency detected" ); |
| 1470 | |
| 1471 | for (BinarySection &Section : allocatableSections()) { |
| 1472 | std::vector<std::pair<uint64_t, uint64_t>> Holes; |
| 1473 | |
| 1474 | auto isNotHole = [&Section](const binary_data_iterator &Itr) { |
| 1475 | BinaryData *BD = Itr->second; |
| 1476 | bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && |
| 1477 | (BD->getName().starts_with(Prefix: "SYMBOLat0x" ) || |
| 1478 | BD->getName().starts_with(Prefix: "DATAat0x" ) || |
| 1479 | BD->getName().starts_with(Prefix: "ANONYMOUS" ))); |
| 1480 | return !isHole && BD->getSection() == Section && !BD->getParent(); |
| 1481 | }; |
| 1482 | |
| 1483 | auto BDStart = BinaryDataMap.begin(); |
| 1484 | auto BDEnd = BinaryDataMap.end(); |
| 1485 | auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); |
| 1486 | auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); |
| 1487 | |
| 1488 | uint64_t EndAddress = Section.getAddress(); |
| 1489 | |
| 1490 | while (Itr != End) { |
| 1491 | if (Itr->second->getAddress() > EndAddress) { |
| 1492 | uint64_t Gap = Itr->second->getAddress() - EndAddress; |
| 1493 | Holes.emplace_back(args&: EndAddress, args&: Gap); |
| 1494 | } |
| 1495 | EndAddress = Itr->second->getEndAddress(); |
| 1496 | ++Itr; |
| 1497 | } |
| 1498 | |
| 1499 | if (EndAddress < Section.getEndAddress()) |
| 1500 | Holes.emplace_back(args&: EndAddress, args: Section.getEndAddress() - EndAddress); |
| 1501 | |
| 1502 | // If there is already a symbol at the start of the hole, grow that symbol |
| 1503 | // to cover the rest. Otherwise, create a new symbol to cover the hole. |
| 1504 | for (std::pair<uint64_t, uint64_t> &Hole : Holes) { |
| 1505 | BinaryData *BD = getBinaryDataAtAddress(Address: Hole.first); |
| 1506 | if (BD) { |
| 1507 | // BD->getSection() can be != Section if there are sections that |
| 1508 | // overlap. In this case it is probably safe to just skip the holes |
| 1509 | // since the overlapping section will not(?) have any symbols in it. |
| 1510 | if (BD->getSection() == Section) |
| 1511 | setBinaryDataSize(Address: Hole.first, Size: Hole.second); |
| 1512 | } else { |
| 1513 | getOrCreateGlobalSymbol(Address: Hole.first, Prefix: "HOLEat" , Size: Hole.second, Alignment: 1); |
| 1514 | } |
| 1515 | } |
| 1516 | } |
| 1517 | |
| 1518 | assert(validateObjectNesting() && "object nesting inconsistency detected" ); |
| 1519 | assert(validateHoles() && "top level hole detected in object map" ); |
| 1520 | } |
| 1521 | |
| 1522 | void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { |
| 1523 | const BinarySection *CurrentSection = nullptr; |
| 1524 | bool FirstSection = true; |
| 1525 | |
| 1526 | for (auto &Entry : BinaryDataMap) { |
| 1527 | const BinaryData *BD = Entry.second; |
| 1528 | const BinarySection &Section = BD->getSection(); |
| 1529 | if (FirstSection || Section != *CurrentSection) { |
| 1530 | uint64_t Address, Size; |
| 1531 | StringRef Name = Section.getName(); |
| 1532 | if (Section) { |
| 1533 | Address = Section.getAddress(); |
| 1534 | Size = Section.getSize(); |
| 1535 | } else { |
| 1536 | Address = BD->getAddress(); |
| 1537 | Size = BD->getSize(); |
| 1538 | } |
| 1539 | OS << "BOLT-INFO: Section " << Name << ", " |
| 1540 | << "0x" + Twine::utohexstr(Val: Address) << ":" |
| 1541 | << "0x" + Twine::utohexstr(Val: Address + Size) << "/" << Size << "\n" ; |
| 1542 | CurrentSection = &Section; |
| 1543 | FirstSection = false; |
| 1544 | } |
| 1545 | |
| 1546 | OS << "BOLT-INFO: " ; |
| 1547 | const BinaryData *P = BD->getParent(); |
| 1548 | while (P) { |
| 1549 | OS << " " ; |
| 1550 | P = P->getParent(); |
| 1551 | } |
| 1552 | OS << *BD << "\n" ; |
| 1553 | } |
| 1554 | } |
| 1555 | |
| 1556 | Expected<unsigned> BinaryContext::getDwarfFile( |
| 1557 | StringRef Directory, StringRef FileName, unsigned FileNumber, |
| 1558 | std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, |
| 1559 | unsigned CUID, unsigned DWARFVersion) { |
| 1560 | DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; |
| 1561 | return Table.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion: DWARFVersion, |
| 1562 | FileNumber); |
| 1563 | } |
| 1564 | |
| 1565 | unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, |
| 1566 | const uint32_t SrcCUID, |
| 1567 | unsigned FileIndex) { |
| 1568 | DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(Offset: SrcCUID); |
| 1569 | const DWARFDebugLine::LineTable *LineTable = |
| 1570 | DwCtx->getLineTableForUnit(U: SrcUnit); |
| 1571 | const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = |
| 1572 | LineTable->Prologue.FileNames; |
| 1573 | // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 |
| 1574 | // means empty dir. |
| 1575 | assert(FileIndex > 0 && FileIndex <= FileNames.size() && |
| 1576 | "FileIndex out of range for the compilation unit." ); |
| 1577 | StringRef Dir = "" ; |
| 1578 | if (FileNames[FileIndex - 1].DirIdx != 0) { |
| 1579 | if (std::optional<const char *> DirName = dwarf::toString( |
| 1580 | V: LineTable->Prologue |
| 1581 | .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { |
| 1582 | Dir = *DirName; |
| 1583 | } |
| 1584 | } |
| 1585 | StringRef FileName = "" ; |
| 1586 | if (std::optional<const char *> FName = |
| 1587 | dwarf::toString(V: FileNames[FileIndex - 1].Name)) |
| 1588 | FileName = *FName; |
| 1589 | assert(FileName != "" ); |
| 1590 | DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(Offset: DestCUID); |
| 1591 | return cantFail(ValOrErr: getDwarfFile(Directory: Dir, FileName, FileNumber: 0, Checksum: std::nullopt, Source: std::nullopt, |
| 1592 | CUID: DestCUID, DWARFVersion: DstUnit->getVersion())); |
| 1593 | } |
| 1594 | |
| 1595 | std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { |
| 1596 | std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); |
| 1597 | llvm::transform(Range: llvm::make_second_range(c&: BinaryFunctions), |
| 1598 | d_first: SortedFunctions.begin(), |
| 1599 | F: [](BinaryFunction &BF) { return &BF; }); |
| 1600 | |
| 1601 | llvm::stable_sort(Range&: SortedFunctions, C: compareBinaryFunctionByIndex); |
| 1602 | return SortedFunctions; |
| 1603 | } |
| 1604 | |
| 1605 | std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { |
| 1606 | std::vector<BinaryFunction *> AllFunctions; |
| 1607 | AllFunctions.reserve(n: BinaryFunctions.size() + InjectedBinaryFunctions.size()); |
| 1608 | llvm::transform(Range: llvm::make_second_range(c&: BinaryFunctions), |
| 1609 | d_first: std::back_inserter(x&: AllFunctions), |
| 1610 | F: [](BinaryFunction &BF) { return &BF; }); |
| 1611 | llvm::copy(Range&: InjectedBinaryFunctions, Out: std::back_inserter(x&: AllFunctions)); |
| 1612 | |
| 1613 | return AllFunctions; |
| 1614 | } |
| 1615 | |
| 1616 | std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { |
| 1617 | auto Iter = DWOCUs.find(x: DWOId); |
| 1618 | if (Iter == DWOCUs.end()) |
| 1619 | return std::nullopt; |
| 1620 | |
| 1621 | return Iter->second; |
| 1622 | } |
| 1623 | |
| 1624 | DWARFContext *BinaryContext::getDWOContext() const { |
| 1625 | if (DWOCUs.empty()) |
| 1626 | return nullptr; |
| 1627 | return &DWOCUs.begin()->second->getContext(); |
| 1628 | } |
| 1629 | |
| 1630 | /// Handles DWO sections that can either be in .o, .dwo or .dwp files. |
| 1631 | void BinaryContext::preprocessDWODebugInfo() { |
| 1632 | for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
| 1633 | DWARFUnit *const DwarfUnit = CU.get(); |
| 1634 | if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { |
| 1635 | std::string DWOName = dwarf::toString( |
| 1636 | V: DwarfUnit->getUnitDIE().find( |
| 1637 | Attrs: {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), |
| 1638 | Default: "" ); |
| 1639 | SmallString<16> AbsolutePath; |
| 1640 | if (!opts::CompDirOverride.empty()) { |
| 1641 | sys::path::append(path&: AbsolutePath, a: opts::CompDirOverride); |
| 1642 | sys::path::append(path&: AbsolutePath, a: DWOName); |
| 1643 | } |
| 1644 | DWARFUnit *DWOCU = |
| 1645 | DwarfUnit->getNonSkeletonUnitDIE(ExtractUnitDIEOnly: false, DWOAlternativeLocation: AbsolutePath).getDwarfUnit(); |
| 1646 | if (!DWOCU->isDWOUnit()) { |
| 1647 | this->outs() |
| 1648 | << "BOLT-WARNING: Debug Fission: DWO debug information for " |
| 1649 | << DWOName |
| 1650 | << " was not retrieved and won't be updated. Please check " |
| 1651 | "relative path.\n" ; |
| 1652 | continue; |
| 1653 | } |
| 1654 | DWOCUs[*DWOId] = DWOCU; |
| 1655 | } |
| 1656 | } |
| 1657 | if (!DWOCUs.empty()) |
| 1658 | this->outs() << "BOLT-INFO: processing split DWARF\n" ; |
| 1659 | } |
| 1660 | |
| 1661 | void BinaryContext::preprocessDebugInfo() { |
| 1662 | struct CURange { |
| 1663 | uint64_t LowPC; |
| 1664 | uint64_t HighPC; |
| 1665 | DWARFUnit *Unit; |
| 1666 | |
| 1667 | bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } |
| 1668 | }; |
| 1669 | |
| 1670 | // Building a map of address ranges to CUs similar to .debug_aranges and use |
| 1671 | // it to assign CU to functions. |
| 1672 | std::vector<CURange> AllRanges; |
| 1673 | AllRanges.reserve(n: DwCtx->getNumCompileUnits()); |
| 1674 | for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
| 1675 | Expected<DWARFAddressRangesVector> RangesOrError = |
| 1676 | CU->getUnitDIE().getAddressRanges(); |
| 1677 | if (!RangesOrError) { |
| 1678 | consumeError(Err: RangesOrError.takeError()); |
| 1679 | continue; |
| 1680 | } |
| 1681 | for (DWARFAddressRange &Range : *RangesOrError) { |
| 1682 | // Parts of the debug info could be invalidated due to corresponding code |
| 1683 | // being removed from the binary by the linker. Hence we check if the |
| 1684 | // address is a valid one. |
| 1685 | if (containsAddress(Address: Range.LowPC)) |
| 1686 | AllRanges.emplace_back(args: CURange{.LowPC: Range.LowPC, .HighPC: Range.HighPC, .Unit: CU.get()}); |
| 1687 | } |
| 1688 | |
| 1689 | ContainsDwarf5 |= CU->getVersion() >= 5; |
| 1690 | ContainsDwarfLegacy |= CU->getVersion() < 5; |
| 1691 | } |
| 1692 | |
| 1693 | llvm::sort(C&: AllRanges); |
| 1694 | for (auto &KV : BinaryFunctions) { |
| 1695 | const uint64_t FunctionAddress = KV.first; |
| 1696 | BinaryFunction &Function = KV.second; |
| 1697 | |
| 1698 | auto It = llvm::partition_point( |
| 1699 | Range&: AllRanges, P: [=](CURange R) { return R.HighPC <= FunctionAddress; }); |
| 1700 | if (It != AllRanges.end() && It->LowPC <= FunctionAddress) |
| 1701 | Function.setDWARFUnit(It->Unit); |
| 1702 | } |
| 1703 | |
| 1704 | // Discover units with debug info that needs to be updated. |
| 1705 | for (const auto &KV : BinaryFunctions) { |
| 1706 | const BinaryFunction &BF = KV.second; |
| 1707 | if (shouldEmit(Function: BF) && BF.getDWARFUnit()) |
| 1708 | ProcessedCUs.insert(x: BF.getDWARFUnit()); |
| 1709 | } |
| 1710 | |
| 1711 | // Clear debug info for functions from units that we are not going to process. |
| 1712 | for (auto &KV : BinaryFunctions) { |
| 1713 | BinaryFunction &BF = KV.second; |
| 1714 | if (BF.getDWARFUnit() && !ProcessedCUs.count(x: BF.getDWARFUnit())) |
| 1715 | BF.setDWARFUnit(nullptr); |
| 1716 | } |
| 1717 | |
| 1718 | if (opts::Verbosity >= 1) { |
| 1719 | this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " |
| 1720 | << DwCtx->getNumCompileUnits() << " CUs will be updated\n" ; |
| 1721 | } |
| 1722 | |
| 1723 | preprocessDWODebugInfo(); |
| 1724 | |
| 1725 | // Populate MCContext with DWARF files from all units. |
| 1726 | StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); |
| 1727 | for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
| 1728 | const uint64_t CUID = CU->getOffset(); |
| 1729 | DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); |
| 1730 | BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( |
| 1731 | Name: GlobalPrefix + "line_table_start" + Twine(CUID))); |
| 1732 | |
| 1733 | if (!ProcessedCUs.count(x: CU.get())) |
| 1734 | continue; |
| 1735 | |
| 1736 | const DWARFDebugLine::LineTable *LineTable = |
| 1737 | DwCtx->getLineTableForUnit(U: CU.get()); |
| 1738 | const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = |
| 1739 | LineTable->Prologue.FileNames; |
| 1740 | |
| 1741 | uint16_t DwarfVersion = LineTable->Prologue.getVersion(); |
| 1742 | if (DwarfVersion >= 5) { |
| 1743 | std::optional<MD5::MD5Result> Checksum; |
| 1744 | if (LineTable->Prologue.ContentTypes.HasMD5) |
| 1745 | Checksum = LineTable->Prologue.FileNames[0].Checksum; |
| 1746 | std::optional<const char *> Name = |
| 1747 | dwarf::toString(V: CU->getUnitDIE().find(Attr: dwarf::DW_AT_name), Default: nullptr); |
| 1748 | if (std::optional<uint64_t> DWOID = CU->getDWOId()) { |
| 1749 | auto Iter = DWOCUs.find(x: *DWOID); |
| 1750 | if (Iter == DWOCUs.end()) { |
| 1751 | this->errs() << "BOLT-ERROR: DWO CU was not found for " << Name |
| 1752 | << '\n'; |
| 1753 | exit(status: 1); |
| 1754 | } |
| 1755 | Name = dwarf::toString( |
| 1756 | V: Iter->second->getUnitDIE().find(Attr: dwarf::DW_AT_name), Default: nullptr); |
| 1757 | } |
| 1758 | BinaryLineTable.setRootFile(Directory: CU->getCompilationDir(), FileName: *Name, Checksum, |
| 1759 | Source: std::nullopt); |
| 1760 | } |
| 1761 | |
| 1762 | BinaryLineTable.setDwarfVersion(DwarfVersion); |
| 1763 | |
| 1764 | // Assign a unique label to every line table, one per CU. |
| 1765 | // Make sure empty debug line tables are registered too. |
| 1766 | if (FileNames.empty()) { |
| 1767 | cantFail(ValOrErr: getDwarfFile(Directory: "" , FileName: "<unknown>" , FileNumber: 0, Checksum: std::nullopt, Source: std::nullopt, |
| 1768 | CUID, DWARFVersion: DwarfVersion)); |
| 1769 | continue; |
| 1770 | } |
| 1771 | const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; |
| 1772 | for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { |
| 1773 | // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 |
| 1774 | // means empty dir. |
| 1775 | StringRef Dir = "" ; |
| 1776 | if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) |
| 1777 | if (std::optional<const char *> DirName = dwarf::toString( |
| 1778 | V: LineTable->Prologue |
| 1779 | .IncludeDirectories[FileNames[I].DirIdx - Offset])) |
| 1780 | Dir = *DirName; |
| 1781 | StringRef FileName = "" ; |
| 1782 | if (std::optional<const char *> FName = |
| 1783 | dwarf::toString(V: FileNames[I].Name)) |
| 1784 | FileName = *FName; |
| 1785 | assert(FileName != "" ); |
| 1786 | std::optional<MD5::MD5Result> Checksum; |
| 1787 | if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) |
| 1788 | Checksum = LineTable->Prologue.FileNames[I].Checksum; |
| 1789 | cantFail(ValOrErr: getDwarfFile(Directory: Dir, FileName, FileNumber: 0, Checksum, Source: std::nullopt, CUID, |
| 1790 | DWARFVersion: DwarfVersion)); |
| 1791 | } |
| 1792 | } |
| 1793 | } |
| 1794 | |
| 1795 | bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { |
| 1796 | if (Function.isPseudo()) |
| 1797 | return false; |
| 1798 | |
| 1799 | if (opts::processAllFunctions()) |
| 1800 | return true; |
| 1801 | |
| 1802 | if (Function.isIgnored()) |
| 1803 | return false; |
| 1804 | |
| 1805 | // In relocation mode we will emit non-simple functions with CFG. |
| 1806 | // If the function does not have a CFG it should be marked as ignored. |
| 1807 | return HasRelocations || Function.isSimple(); |
| 1808 | } |
| 1809 | |
| 1810 | void BinaryContext::dump(const MCInst &Inst) const { |
| 1811 | if (LLVM_UNLIKELY(!InstPrinter)) { |
| 1812 | dbgs() << "Cannot dump for InstPrinter is not initialized.\n" ; |
| 1813 | return; |
| 1814 | } |
| 1815 | InstPrinter->printInst(MI: &Inst, Address: 0, Annot: "" , STI: *STI, OS&: dbgs()); |
| 1816 | dbgs() << "\n" ; |
| 1817 | } |
| 1818 | |
| 1819 | void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { |
| 1820 | uint32_t Operation = Inst.getOperation(); |
| 1821 | switch (Operation) { |
| 1822 | case MCCFIInstruction::OpSameValue: |
| 1823 | OS << "OpSameValue Reg" << Inst.getRegister(); |
| 1824 | break; |
| 1825 | case MCCFIInstruction::OpRememberState: |
| 1826 | OS << "OpRememberState" ; |
| 1827 | break; |
| 1828 | case MCCFIInstruction::OpRestoreState: |
| 1829 | OS << "OpRestoreState" ; |
| 1830 | break; |
| 1831 | case MCCFIInstruction::OpOffset: |
| 1832 | OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); |
| 1833 | break; |
| 1834 | case MCCFIInstruction::OpDefCfaRegister: |
| 1835 | OS << "OpDefCfaRegister Reg" << Inst.getRegister(); |
| 1836 | break; |
| 1837 | case MCCFIInstruction::OpDefCfaOffset: |
| 1838 | OS << "OpDefCfaOffset " << Inst.getOffset(); |
| 1839 | break; |
| 1840 | case MCCFIInstruction::OpDefCfa: |
| 1841 | OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); |
| 1842 | break; |
| 1843 | case MCCFIInstruction::OpRelOffset: |
| 1844 | OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); |
| 1845 | break; |
| 1846 | case MCCFIInstruction::OpAdjustCfaOffset: |
| 1847 | OS << "OfAdjustCfaOffset " << Inst.getOffset(); |
| 1848 | break; |
| 1849 | case MCCFIInstruction::OpEscape: |
| 1850 | OS << "OpEscape" ; |
| 1851 | break; |
| 1852 | case MCCFIInstruction::OpRestore: |
| 1853 | OS << "OpRestore Reg" << Inst.getRegister(); |
| 1854 | break; |
| 1855 | case MCCFIInstruction::OpUndefined: |
| 1856 | OS << "OpUndefined Reg" << Inst.getRegister(); |
| 1857 | break; |
| 1858 | case MCCFIInstruction::OpRegister: |
| 1859 | OS << "OpRegister Reg" << Inst.getRegister() << " Reg" |
| 1860 | << Inst.getRegister2(); |
| 1861 | break; |
| 1862 | case MCCFIInstruction::OpWindowSave: |
| 1863 | OS << "OpWindowSave" ; |
| 1864 | break; |
| 1865 | case MCCFIInstruction::OpGnuArgsSize: |
| 1866 | OS << "OpGnuArgsSize" ; |
| 1867 | break; |
| 1868 | default: |
| 1869 | OS << "Op#" << Operation; |
| 1870 | break; |
| 1871 | } |
| 1872 | } |
| 1873 | |
| 1874 | MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { |
| 1875 | // For aarch64 and riscv, the ABI defines mapping symbols so we identify data |
| 1876 | // in the code section (see IHI0056B). $x identifies a symbol starting code or |
| 1877 | // the end of a data chunk inside code, $d identifies start of data. |
| 1878 | if (isX86() || ELFSymbolRef(Symbol).getSize()) |
| 1879 | return MarkerSymType::NONE; |
| 1880 | |
| 1881 | Expected<StringRef> NameOrError = Symbol.getName(); |
| 1882 | Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); |
| 1883 | |
| 1884 | if (!TypeOrError || !NameOrError) |
| 1885 | return MarkerSymType::NONE; |
| 1886 | |
| 1887 | if (*TypeOrError != SymbolRef::ST_Unknown) |
| 1888 | return MarkerSymType::NONE; |
| 1889 | |
| 1890 | if (*NameOrError == "$x" || NameOrError->starts_with(Prefix: "$x." )) |
| 1891 | return MarkerSymType::CODE; |
| 1892 | |
| 1893 | // $x<ISA> |
| 1894 | if (isRISCV() && NameOrError->starts_with(Prefix: "$x" )) |
| 1895 | return MarkerSymType::CODE; |
| 1896 | |
| 1897 | if (*NameOrError == "$d" || NameOrError->starts_with(Prefix: "$d." )) |
| 1898 | return MarkerSymType::DATA; |
| 1899 | |
| 1900 | return MarkerSymType::NONE; |
| 1901 | } |
| 1902 | |
| 1903 | bool BinaryContext::isMarker(const SymbolRef &Symbol) const { |
| 1904 | return getMarkerType(Symbol) != MarkerSymType::NONE; |
| 1905 | } |
| 1906 | |
| 1907 | static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, |
| 1908 | const BinaryFunction *Function, |
| 1909 | DWARFContext *DwCtx) { |
| 1910 | DebugLineTableRowRef RowRef = |
| 1911 | DebugLineTableRowRef::fromSMLoc(Loc: Instruction.getLoc()); |
| 1912 | if (RowRef == DebugLineTableRowRef::NULL_ROW) |
| 1913 | return; |
| 1914 | |
| 1915 | const DWARFDebugLine::LineTable *LineTable; |
| 1916 | if (Function && Function->getDWARFUnit() && |
| 1917 | Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { |
| 1918 | LineTable = Function->getDWARFLineTable(); |
| 1919 | } else { |
| 1920 | LineTable = DwCtx->getLineTableForUnit( |
| 1921 | U: DwCtx->getCompileUnitForOffset(Offset: RowRef.DwCompileUnitIndex)); |
| 1922 | } |
| 1923 | assert(LineTable && "line table expected for instruction with debug info" ); |
| 1924 | |
| 1925 | const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; |
| 1926 | StringRef FileName = "" ; |
| 1927 | if (std::optional<const char *> FName = |
| 1928 | dwarf::toString(V: LineTable->Prologue.FileNames[Row.File - 1].Name)) |
| 1929 | FileName = *FName; |
| 1930 | OS << " # debug line " << FileName << ":" << Row.Line; |
| 1931 | if (Row.Column) |
| 1932 | OS << ":" << Row.Column; |
| 1933 | if (Row.Discriminator) |
| 1934 | OS << " discriminator:" << Row.Discriminator; |
| 1935 | } |
| 1936 | |
| 1937 | ArrayRef<uint8_t> BinaryContext::(uint64_t Address, |
| 1938 | uint64_t Size) const { |
| 1939 | ArrayRef<uint8_t> Res; |
| 1940 | |
| 1941 | const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| 1942 | if (!Section || Section->isVirtual()) |
| 1943 | return Res; |
| 1944 | |
| 1945 | if (!Section->containsRange(Address, Size)) |
| 1946 | return Res; |
| 1947 | |
| 1948 | auto *Bytes = |
| 1949 | reinterpret_cast<const uint8_t *>(Section->getContents().data()); |
| 1950 | return ArrayRef<uint8_t>(Bytes + Address - Section->getAddress(), Size); |
| 1951 | } |
| 1952 | |
| 1953 | void BinaryContext::printData(raw_ostream &OS, ArrayRef<uint8_t> Data, |
| 1954 | uint64_t Offset) const { |
| 1955 | DataExtractor DE(Data, AsmInfo->isLittleEndian(), |
| 1956 | AsmInfo->getCodePointerSize()); |
| 1957 | uint64_t DataOffset = 0; |
| 1958 | while (DataOffset + 4 <= Data.size()) { |
| 1959 | OS << format(Fmt: " %08" PRIx64 ": \t.word\t0x" , Vals: Offset + DataOffset); |
| 1960 | const auto Word = DE.getUnsigned(offset_ptr: &DataOffset, byte_size: 4); |
| 1961 | OS << Twine::utohexstr(Val: Word) << '\n'; |
| 1962 | } |
| 1963 | if (DataOffset + 2 <= Data.size()) { |
| 1964 | OS << format(Fmt: " %08" PRIx64 ": \t.short\t0x" , Vals: Offset + DataOffset); |
| 1965 | const auto Short = DE.getUnsigned(offset_ptr: &DataOffset, byte_size: 2); |
| 1966 | OS << Twine::utohexstr(Val: Short) << '\n'; |
| 1967 | } |
| 1968 | if (DataOffset + 1 == Data.size()) { |
| 1969 | OS << format(Fmt: " %08" PRIx64 ": \t.byte\t0x%x\n" , Vals: Offset + DataOffset, |
| 1970 | Vals: Data[DataOffset]); |
| 1971 | } |
| 1972 | } |
| 1973 | |
| 1974 | void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, |
| 1975 | uint64_t Offset, |
| 1976 | const BinaryFunction *Function, |
| 1977 | bool PrintMCInst, bool PrintMemData, |
| 1978 | bool PrintRelocations, |
| 1979 | StringRef Endl) const { |
| 1980 | OS << format(Fmt: " %08" PRIx64 ": " , Vals: Offset); |
| 1981 | if (MIB->isCFI(Inst: Instruction)) { |
| 1982 | uint32_t Offset = Instruction.getOperand(i: 0).getImm(); |
| 1983 | OS << "\t!CFI\t$" << Offset << "\t; " ; |
| 1984 | if (Function) |
| 1985 | printCFI(OS, Inst: *Function->getCFIFor(Instr: Instruction)); |
| 1986 | OS << Endl; |
| 1987 | return; |
| 1988 | } |
| 1989 | if (std::optional<uint32_t> DynamicID = |
| 1990 | MIB->getDynamicBranchID(Inst: Instruction)) { |
| 1991 | OS << "\tjit\t" << MIB->getTargetSymbol(Inst: Instruction)->getName() |
| 1992 | << " # ID: " << DynamicID; |
| 1993 | } else { |
| 1994 | // If there are annotations on the instruction, the MCInstPrinter will fail |
| 1995 | // to print the preferred alias as it only does so when the number of |
| 1996 | // operands is as expected. See |
| 1997 | // https://github.com/llvm/llvm-project/blob/782f1a0d895646c364a53f9dcdd6d4ec1f3e5ea0/llvm/lib/MC/MCInstPrinter.cpp#L142 |
| 1998 | // Therefore, create a temporary copy of the Inst from which the annotations |
| 1999 | // are removed, and print that Inst. |
| 2000 | MCInst InstNoAnnot = Instruction; |
| 2001 | MIB->stripAnnotations(Inst&: InstNoAnnot); |
| 2002 | InstPrinter->printInst(MI: &InstNoAnnot, Address: 0, Annot: "" , STI: *STI, OS); |
| 2003 | } |
| 2004 | if (MIB->isCall(Inst: Instruction)) { |
| 2005 | if (MIB->isTailCall(Inst: Instruction)) |
| 2006 | OS << " # TAILCALL " ; |
| 2007 | if (MIB->isInvoke(Inst: Instruction)) { |
| 2008 | const std::optional<MCPlus::MCLandingPad> EHInfo = |
| 2009 | MIB->getEHInfo(Inst: Instruction); |
| 2010 | OS << " # handler: " ; |
| 2011 | if (EHInfo->first) |
| 2012 | OS << *EHInfo->first; |
| 2013 | else |
| 2014 | OS << '0'; |
| 2015 | OS << "; action: " << EHInfo->second; |
| 2016 | const int64_t GnuArgsSize = MIB->getGnuArgsSize(Inst: Instruction); |
| 2017 | if (GnuArgsSize >= 0) |
| 2018 | OS << "; GNU_args_size = " << GnuArgsSize; |
| 2019 | } |
| 2020 | } else if (MIB->isIndirectBranch(Inst: Instruction)) { |
| 2021 | if (uint64_t JTAddress = MIB->getJumpTable(Inst: Instruction)) { |
| 2022 | OS << " # JUMPTABLE @0x" << Twine::utohexstr(Val: JTAddress); |
| 2023 | } else { |
| 2024 | OS << " # UNKNOWN CONTROL FLOW" ; |
| 2025 | } |
| 2026 | } |
| 2027 | if (std::optional<uint32_t> Offset = MIB->getOffset(Inst: Instruction)) |
| 2028 | OS << " # Offset: " << *Offset; |
| 2029 | if (std::optional<uint32_t> Size = MIB->getSize(Inst: Instruction)) |
| 2030 | OS << " # Size: " << *Size; |
| 2031 | if (MCSymbol *Label = MIB->getInstLabel(Inst: Instruction)) |
| 2032 | OS << " # Label: " << *Label; |
| 2033 | |
| 2034 | MIB->printAnnotations(Inst: Instruction, OS); |
| 2035 | |
| 2036 | if (opts::PrintDebugInfo) |
| 2037 | printDebugInfo(OS, Instruction, Function, DwCtx: DwCtx.get()); |
| 2038 | |
| 2039 | if ((opts::PrintRelocations || PrintRelocations) && Function) { |
| 2040 | const uint64_t Size = computeCodeSize(Beg: &Instruction, End: &Instruction + 1); |
| 2041 | Function->printRelocations(OS, Offset, Size); |
| 2042 | } |
| 2043 | |
| 2044 | OS << Endl; |
| 2045 | |
| 2046 | if (PrintMCInst) { |
| 2047 | Instruction.dump_pretty(OS, Printer: InstPrinter.get()); |
| 2048 | OS << Endl; |
| 2049 | } |
| 2050 | } |
| 2051 | |
| 2052 | std::optional<uint64_t> |
| 2053 | BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, |
| 2054 | uint64_t FileOffset) const { |
| 2055 | // Find a segment with a matching file offset. |
| 2056 | for (auto &KV : SegmentMapInfo) { |
| 2057 | const SegmentInfo &SegInfo = KV.second; |
| 2058 | // Only consider executable segments. |
| 2059 | if (!SegInfo.IsExecutable) |
| 2060 | continue; |
| 2061 | // FileOffset is got from perf event, |
| 2062 | // and it is equal to alignDown(SegInfo.FileOffset, pagesize). |
| 2063 | // If the pagesize is not equal to SegInfo.Alignment. |
| 2064 | // FileOffset and SegInfo.FileOffset should be aligned first, |
| 2065 | // and then judge whether they are equal. |
| 2066 | if (alignDown(Value: SegInfo.FileOffset, Align: SegInfo.Alignment) == |
| 2067 | alignDown(Value: FileOffset, Align: SegInfo.Alignment)) { |
| 2068 | // The function's offset from base address in VAS is aligned by pagesize |
| 2069 | // instead of SegInfo.Alignment. Pagesize can't be got from perf events. |
| 2070 | // However, The ELF document says that SegInfo.FileOffset should equal |
| 2071 | // to SegInfo.Address, modulo the pagesize. |
| 2072 | // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf |
| 2073 | |
| 2074 | // So alignDown(SegInfo.Address, pagesize) can be calculated by: |
| 2075 | // alignDown(SegInfo.Address, pagesize) |
| 2076 | // = SegInfo.Address - (SegInfo.Address % pagesize) |
| 2077 | // = SegInfo.Address - (SegInfo.FileOffset % pagesize) |
| 2078 | // = SegInfo.Address - SegInfo.FileOffset + |
| 2079 | // alignDown(SegInfo.FileOffset, pagesize) |
| 2080 | // = SegInfo.Address - SegInfo.FileOffset + FileOffset |
| 2081 | return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); |
| 2082 | } |
| 2083 | } |
| 2084 | |
| 2085 | return std::nullopt; |
| 2086 | } |
| 2087 | |
| 2088 | ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { |
| 2089 | auto SI = AddressToSection.upper_bound(x: Address); |
| 2090 | if (SI != AddressToSection.begin()) { |
| 2091 | --SI; |
| 2092 | uint64_t UpperBound = SI->first + SI->second->getSize(); |
| 2093 | if (!SI->second->getSize()) |
| 2094 | UpperBound += 1; |
| 2095 | if (UpperBound > Address) |
| 2096 | return *SI->second; |
| 2097 | } |
| 2098 | return std::make_error_code(e: std::errc::bad_address); |
| 2099 | } |
| 2100 | |
| 2101 | ErrorOr<StringRef> |
| 2102 | BinaryContext::getSectionNameForAddress(uint64_t Address) const { |
| 2103 | if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) |
| 2104 | return Section->getName(); |
| 2105 | return std::make_error_code(e: std::errc::bad_address); |
| 2106 | } |
| 2107 | |
| 2108 | BinarySection &BinaryContext::registerSection(BinarySection *Section) { |
| 2109 | auto Res = Sections.insert(x: Section); |
| 2110 | (void)Res; |
| 2111 | assert(Res.second && "can't register the same section twice." ); |
| 2112 | |
| 2113 | // Only register allocatable sections in the AddressToSection map. |
| 2114 | if (Section->isAllocatable() && Section->getAddress()) |
| 2115 | AddressToSection.insert(x: std::make_pair(x: Section->getAddress(), y&: Section)); |
| 2116 | NameToSection.insert( |
| 2117 | x: std::make_pair(x: std::string(Section->getName()), y&: Section)); |
| 2118 | if (Section->hasSectionRef()) |
| 2119 | SectionRefToBinarySection.insert( |
| 2120 | KV: std::make_pair(x: Section->getSectionRef(), y&: Section)); |
| 2121 | |
| 2122 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n" ); |
| 2123 | return *Section; |
| 2124 | } |
| 2125 | |
| 2126 | BinarySection &BinaryContext::registerSection(SectionRef Section) { |
| 2127 | return registerSection(Section: new BinarySection(*this, Section)); |
| 2128 | } |
| 2129 | |
| 2130 | BinarySection & |
| 2131 | BinaryContext::registerSection(const Twine &SectionName, |
| 2132 | const BinarySection &OriginalSection) { |
| 2133 | return registerSection( |
| 2134 | Section: new BinarySection(*this, SectionName, OriginalSection)); |
| 2135 | } |
| 2136 | |
| 2137 | BinarySection & |
| 2138 | BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, |
| 2139 | unsigned ELFFlags, uint8_t *Data, |
| 2140 | uint64_t Size, unsigned Alignment) { |
| 2141 | auto NamedSections = getSectionByName(Name); |
| 2142 | if (NamedSections.begin() != NamedSections.end()) { |
| 2143 | assert(std::next(NamedSections.begin()) == NamedSections.end() && |
| 2144 | "can only update unique sections" ); |
| 2145 | BinarySection *Section = NamedSections.begin()->second; |
| 2146 | |
| 2147 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> " ); |
| 2148 | const bool Flag = Section->isAllocatable(); |
| 2149 | (void)Flag; |
| 2150 | Section->update(NewData: Data, NewSize: Size, NewAlignment: Alignment, NewELFType: ELFType, NewELFFlags: ELFFlags); |
| 2151 | LLVM_DEBUG(dbgs() << *Section << "\n" ); |
| 2152 | // FIXME: Fix section flags/attributes for MachO. |
| 2153 | if (isELF()) |
| 2154 | assert(Flag == Section->isAllocatable() && |
| 2155 | "can't change section allocation status" ); |
| 2156 | return *Section; |
| 2157 | } |
| 2158 | |
| 2159 | return registerSection( |
| 2160 | Section: new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); |
| 2161 | } |
| 2162 | |
| 2163 | void BinaryContext::deregisterSectionName(const BinarySection &Section) { |
| 2164 | auto NameRange = NameToSection.equal_range(x: Section.getName().str()); |
| 2165 | while (NameRange.first != NameRange.second) { |
| 2166 | if (NameRange.first->second == &Section) { |
| 2167 | NameToSection.erase(position: NameRange.first); |
| 2168 | break; |
| 2169 | } |
| 2170 | ++NameRange.first; |
| 2171 | } |
| 2172 | } |
| 2173 | |
| 2174 | void BinaryContext::deregisterUnusedSections() { |
| 2175 | ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName(SectionName: "<absolute>" ); |
| 2176 | for (auto SI = Sections.begin(); SI != Sections.end();) { |
| 2177 | BinarySection *Section = *SI; |
| 2178 | // We check getOutputData() instead of getOutputSize() because sometimes |
| 2179 | // zero-sized .text.cold sections are allocated. |
| 2180 | if (Section->hasSectionRef() || Section->getOutputData() || |
| 2181 | (AbsSection && Section == &AbsSection.get())) { |
| 2182 | ++SI; |
| 2183 | continue; |
| 2184 | } |
| 2185 | |
| 2186 | LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() |
| 2187 | << '\n';); |
| 2188 | deregisterSectionName(Section: *Section); |
| 2189 | SI = Sections.erase(position: SI); |
| 2190 | delete Section; |
| 2191 | } |
| 2192 | } |
| 2193 | |
| 2194 | bool BinaryContext::deregisterSection(BinarySection &Section) { |
| 2195 | BinarySection *SectionPtr = &Section; |
| 2196 | auto Itr = Sections.find(x: SectionPtr); |
| 2197 | if (Itr != Sections.end()) { |
| 2198 | auto Range = AddressToSection.equal_range(x: SectionPtr->getAddress()); |
| 2199 | while (Range.first != Range.second) { |
| 2200 | if (Range.first->second == SectionPtr) { |
| 2201 | AddressToSection.erase(position: Range.first); |
| 2202 | break; |
| 2203 | } |
| 2204 | ++Range.first; |
| 2205 | } |
| 2206 | |
| 2207 | deregisterSectionName(Section: *SectionPtr); |
| 2208 | Sections.erase(position: Itr); |
| 2209 | delete SectionPtr; |
| 2210 | return true; |
| 2211 | } |
| 2212 | return false; |
| 2213 | } |
| 2214 | |
| 2215 | void BinaryContext::renameSection(BinarySection &Section, |
| 2216 | const Twine &NewName) { |
| 2217 | auto Itr = Sections.find(x: &Section); |
| 2218 | assert(Itr != Sections.end() && "Section must exist to be renamed." ); |
| 2219 | Sections.erase(position: Itr); |
| 2220 | |
| 2221 | deregisterSectionName(Section); |
| 2222 | |
| 2223 | Section.Name = NewName.str(); |
| 2224 | Section.setOutputName(Section.Name); |
| 2225 | |
| 2226 | NameToSection.insert(x: std::make_pair(x&: Section.Name, y: &Section)); |
| 2227 | |
| 2228 | // Reinsert with the new name. |
| 2229 | Sections.insert(x: &Section); |
| 2230 | } |
| 2231 | |
| 2232 | void BinaryContext::printSections(raw_ostream &OS) const { |
| 2233 | for (BinarySection *const &Section : Sections) |
| 2234 | OS << "BOLT-INFO: " << *Section << "\n" ; |
| 2235 | } |
| 2236 | |
| 2237 | BinarySection &BinaryContext::absoluteSection() { |
| 2238 | if (ErrorOr<BinarySection &> Section = getUniqueSectionByName(SectionName: "<absolute>" )) |
| 2239 | return *Section; |
| 2240 | return registerOrUpdateSection(Name: "<absolute>" , ELFType: ELF::SHT_NULL, ELFFlags: 0u); |
| 2241 | } |
| 2242 | |
| 2243 | ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, |
| 2244 | size_t Size) const { |
| 2245 | const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| 2246 | if (!Section) |
| 2247 | return std::make_error_code(e: std::errc::bad_address); |
| 2248 | |
| 2249 | if (Section->isVirtual()) |
| 2250 | return 0; |
| 2251 | |
| 2252 | DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), |
| 2253 | AsmInfo->getCodePointerSize()); |
| 2254 | auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); |
| 2255 | return DE.getUnsigned(offset_ptr: &ValueOffset, byte_size: Size); |
| 2256 | } |
| 2257 | |
| 2258 | ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, |
| 2259 | size_t Size) const { |
| 2260 | const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| 2261 | if (!Section) |
| 2262 | return std::make_error_code(e: std::errc::bad_address); |
| 2263 | |
| 2264 | if (Section->isVirtual()) |
| 2265 | return 0; |
| 2266 | |
| 2267 | DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), |
| 2268 | AsmInfo->getCodePointerSize()); |
| 2269 | auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); |
| 2270 | return DE.getSigned(offset_ptr: &ValueOffset, size: Size); |
| 2271 | } |
| 2272 | |
| 2273 | void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, |
| 2274 | uint32_t Type, uint64_t Addend, |
| 2275 | uint64_t Value) { |
| 2276 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| 2277 | assert(Section && "cannot find section for address" ); |
| 2278 | Section->addRelocation(Offset: Address - Section->getAddress(), Symbol, Type, Addend, |
| 2279 | Value); |
| 2280 | } |
| 2281 | |
| 2282 | void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, |
| 2283 | uint32_t Type, uint64_t Addend, |
| 2284 | uint64_t Value) { |
| 2285 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| 2286 | assert(Section && "cannot find section for address" ); |
| 2287 | Section->addDynamicRelocation(Offset: Address - Section->getAddress(), Symbol, Type, |
| 2288 | Addend, Value); |
| 2289 | } |
| 2290 | |
| 2291 | bool BinaryContext::removeRelocationAt(uint64_t Address) { |
| 2292 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| 2293 | assert(Section && "cannot find section for address" ); |
| 2294 | return Section->removeRelocationAt(Offset: Address - Section->getAddress()); |
| 2295 | } |
| 2296 | |
| 2297 | const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { |
| 2298 | ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| 2299 | if (!Section) |
| 2300 | return nullptr; |
| 2301 | |
| 2302 | return Section->getRelocationAt(Offset: Address - Section->getAddress()); |
| 2303 | } |
| 2304 | |
| 2305 | const Relocation * |
| 2306 | BinaryContext::getDynamicRelocationAt(uint64_t Address) const { |
| 2307 | ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| 2308 | if (!Section) |
| 2309 | return nullptr; |
| 2310 | |
| 2311 | return Section->getDynamicRelocationAt(Offset: Address - Section->getAddress()); |
| 2312 | } |
| 2313 | |
| 2314 | void BinaryContext::markAmbiguousRelocations(BinaryData &BD, |
| 2315 | const uint64_t Address) { |
| 2316 | auto setImmovable = [&](BinaryData &BD) { |
| 2317 | BinaryData *Root = BD.getAtomicRoot(); |
| 2318 | LLVM_DEBUG(if (Root->isMoveable()) { |
| 2319 | dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " |
| 2320 | << "due to ambiguous relocation referencing 0x" |
| 2321 | << Twine::utohexstr(Address) << '\n'; |
| 2322 | }); |
| 2323 | Root->setIsMoveable(false); |
| 2324 | }; |
| 2325 | |
| 2326 | if (Address == BD.getAddress()) { |
| 2327 | setImmovable(BD); |
| 2328 | |
| 2329 | // Set previous symbol as immovable |
| 2330 | BinaryData *Prev = getBinaryDataContainingAddress(Address: Address - 1); |
| 2331 | if (Prev && Prev->getEndAddress() == BD.getAddress()) |
| 2332 | setImmovable(*Prev); |
| 2333 | } |
| 2334 | |
| 2335 | if (Address == BD.getEndAddress()) { |
| 2336 | setImmovable(BD); |
| 2337 | |
| 2338 | // Set next symbol as immovable |
| 2339 | BinaryData *Next = getBinaryDataContainingAddress(Address: BD.getEndAddress()); |
| 2340 | if (Next && Next->getAddress() == BD.getEndAddress()) |
| 2341 | setImmovable(*Next); |
| 2342 | } |
| 2343 | } |
| 2344 | |
| 2345 | BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, |
| 2346 | uint64_t *EntryDesc) { |
| 2347 | std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); |
| 2348 | auto BFI = SymbolToFunctionMap.find(x: Symbol); |
| 2349 | if (BFI == SymbolToFunctionMap.end()) |
| 2350 | return nullptr; |
| 2351 | |
| 2352 | BinaryFunction *BF = BFI->second; |
| 2353 | if (EntryDesc) |
| 2354 | *EntryDesc = BF->getEntryIDForSymbol(EntrySymbol: Symbol); |
| 2355 | |
| 2356 | return BF; |
| 2357 | } |
| 2358 | |
| 2359 | std::string |
| 2360 | BinaryContext::generateBugReportMessage(StringRef Message, |
| 2361 | const BinaryFunction &Function) const { |
| 2362 | std::string Msg; |
| 2363 | raw_string_ostream SS(Msg); |
| 2364 | SS << "=======================================\n" ; |
| 2365 | SS << "BOLT is unable to proceed because it couldn't properly understand " |
| 2366 | "this function.\n" ; |
| 2367 | SS << "If you are running the most recent version of BOLT, you may " |
| 2368 | "want to " |
| 2369 | "report this and paste this dump.\nPlease check that there is no " |
| 2370 | "sensitive contents being shared in this dump.\n" ; |
| 2371 | SS << "\nOffending function: " << Function.getPrintName() << "\n\n" ; |
| 2372 | ScopedPrinter SP(SS); |
| 2373 | SP.printBinaryBlock(Label: "Function contents" , Value: *Function.getData()); |
| 2374 | SS << "\n" ; |
| 2375 | const_cast<BinaryFunction &>(Function).print(OS&: SS, Annotation: "" ); |
| 2376 | SS << "ERROR: " << Message; |
| 2377 | SS << "\n=======================================\n" ; |
| 2378 | return Msg; |
| 2379 | } |
| 2380 | |
| 2381 | BinaryFunction * |
| 2382 | BinaryContext::createInjectedBinaryFunction(const std::string &Name, |
| 2383 | bool IsSimple) { |
| 2384 | InjectedBinaryFunctions.push_back(x: new BinaryFunction(Name, *this, IsSimple)); |
| 2385 | BinaryFunction *BF = InjectedBinaryFunctions.back(); |
| 2386 | setSymbolToFunctionMap(Sym: BF->getSymbol(), BF); |
| 2387 | BF->CurrentState = BinaryFunction::State::CFG; |
| 2388 | return BF; |
| 2389 | } |
| 2390 | |
| 2391 | BinaryFunction * |
| 2392 | BinaryContext::createInstructionPatch(uint64_t Address, |
| 2393 | const InstructionListType &Instructions, |
| 2394 | const Twine &Name) { |
| 2395 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| 2396 | assert(Section && "cannot get section for patching" ); |
| 2397 | assert(Section->hasSectionRef() && Section->isText() && |
| 2398 | "can only patch input file code sections" ); |
| 2399 | |
| 2400 | const uint64_t FileOffset = |
| 2401 | Section->getInputFileOffset() + Address - Section->getAddress(); |
| 2402 | |
| 2403 | std::string PatchName = Name.str(); |
| 2404 | if (PatchName.empty()) { |
| 2405 | // Assign unique name to the patch. |
| 2406 | static uint64_t N = 0; |
| 2407 | PatchName = "__BP_" + std::to_string(val: N++); |
| 2408 | } |
| 2409 | |
| 2410 | BinaryFunction *PBF = createInjectedBinaryFunction(Name: PatchName); |
| 2411 | PBF->setOutputAddress(Address); |
| 2412 | PBF->setFileOffset(FileOffset); |
| 2413 | PBF->setOriginSection(&Section.get()); |
| 2414 | PBF->addBasicBlock()->addInstructions(R: Instructions); |
| 2415 | PBF->setIsPatch(true); |
| 2416 | |
| 2417 | // Don't create symbol table entry if the name wasn't specified. |
| 2418 | if (Name.str().empty()) |
| 2419 | PBF->setAnonymous(true); |
| 2420 | |
| 2421 | return PBF; |
| 2422 | } |
| 2423 | |
| 2424 | std::pair<size_t, size_t> |
| 2425 | BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { |
| 2426 | // Use the original size for non-simple functions. |
| 2427 | if (!BF.isSimple() || BF.isIgnored()) |
| 2428 | return std::make_pair(x: BF.getSize(), y: 0); |
| 2429 | |
| 2430 | // Adjust branch instruction to match the current layout. |
| 2431 | if (FixBranches) |
| 2432 | BF.fixBranches(); |
| 2433 | |
| 2434 | // Create local MC context to isolate the effect of ephemeral code emission. |
| 2435 | IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); |
| 2436 | MCContext *LocalCtx = MCEInstance.LocalCtx.get(); |
| 2437 | MCAsmBackend *MAB = |
| 2438 | TheTarget->createMCAsmBackend(STI: *STI, MRI: *MRI, Options: MCTargetOptions()); |
| 2439 | |
| 2440 | SmallString<256> Code; |
| 2441 | raw_svector_ostream VecOS(Code); |
| 2442 | |
| 2443 | std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS&: VecOS); |
| 2444 | std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( |
| 2445 | T: *TheTriple, Ctx&: *LocalCtx, TAB: std::unique_ptr<MCAsmBackend>(MAB), OW: std::move(OW), |
| 2446 | Emitter: std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), STI: *STI)); |
| 2447 | |
| 2448 | Streamer->initSections(NoExecStack: false, STI: *STI); |
| 2449 | |
| 2450 | MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); |
| 2451 | Section->setHasInstructions(true); |
| 2452 | |
| 2453 | // Create symbols in the LocalCtx so that they get destroyed with it. |
| 2454 | MCSymbol *StartLabel = LocalCtx->createTempSymbol(); |
| 2455 | MCSymbol *EndLabel = LocalCtx->createTempSymbol(); |
| 2456 | |
| 2457 | Streamer->switchSection(Section); |
| 2458 | Streamer->emitLabel(Symbol: StartLabel); |
| 2459 | emitFunctionBody(Streamer&: *Streamer, BF, FF&: BF.getLayout().getMainFragment(), |
| 2460 | /*EmitCodeOnly=*/true); |
| 2461 | Streamer->emitLabel(Symbol: EndLabel); |
| 2462 | |
| 2463 | using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; |
| 2464 | SmallVector<LabelRange> SplitLabels; |
| 2465 | for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { |
| 2466 | MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); |
| 2467 | MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); |
| 2468 | SplitLabels.emplace_back(Args: SplitStartLabel, Args: SplitEndLabel); |
| 2469 | |
| 2470 | MCSectionELF *const SplitSection = LocalCtx->getELFSection( |
| 2471 | Section: BF.getCodeSectionName(Fragment: FF.getFragmentNum()), Type: ELF::SHT_PROGBITS, |
| 2472 | Flags: ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); |
| 2473 | SplitSection->setHasInstructions(true); |
| 2474 | Streamer->switchSection(Section: SplitSection); |
| 2475 | |
| 2476 | Streamer->emitLabel(Symbol: SplitStartLabel); |
| 2477 | emitFunctionBody(Streamer&: *Streamer, BF, FF, /*EmitCodeOnly=*/true); |
| 2478 | Streamer->emitLabel(Symbol: SplitEndLabel); |
| 2479 | } |
| 2480 | |
| 2481 | MCAssembler &Assembler = |
| 2482 | static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); |
| 2483 | Assembler.layout(); |
| 2484 | |
| 2485 | // Obtain fragment sizes. |
| 2486 | std::vector<uint64_t> FragmentSizes; |
| 2487 | // Main fragment size. |
| 2488 | const uint64_t HotSize = Assembler.getSymbolOffset(S: *EndLabel) - |
| 2489 | Assembler.getSymbolOffset(S: *StartLabel); |
| 2490 | FragmentSizes.push_back(x: HotSize); |
| 2491 | // Split fragment sizes. |
| 2492 | uint64_t ColdSize = 0; |
| 2493 | for (const auto &Labels : SplitLabels) { |
| 2494 | uint64_t Size = Assembler.getSymbolOffset(S: *Labels.second) - |
| 2495 | Assembler.getSymbolOffset(S: *Labels.first); |
| 2496 | FragmentSizes.push_back(x: Size); |
| 2497 | ColdSize += Size; |
| 2498 | } |
| 2499 | |
| 2500 | // Populate new start and end offsets of each basic block. |
| 2501 | uint64_t FragmentIndex = 0; |
| 2502 | for (FunctionFragment &FF : BF.getLayout().fragments()) { |
| 2503 | BinaryBasicBlock *PrevBB = nullptr; |
| 2504 | for (BinaryBasicBlock *BB : FF) { |
| 2505 | const uint64_t BBStartOffset = |
| 2506 | Assembler.getSymbolOffset(S: *(BB->getLabel())); |
| 2507 | BB->setOutputStartAddress(BBStartOffset); |
| 2508 | if (PrevBB) |
| 2509 | PrevBB->setOutputEndAddress(BBStartOffset); |
| 2510 | PrevBB = BB; |
| 2511 | } |
| 2512 | if (PrevBB) |
| 2513 | PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]); |
| 2514 | FragmentIndex++; |
| 2515 | } |
| 2516 | |
| 2517 | // Clean-up the effect of the code emission. |
| 2518 | for (const MCSymbol &Symbol : Assembler.symbols()) { |
| 2519 | MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); |
| 2520 | MutableSymbol->setUndefined(); |
| 2521 | MutableSymbol->setIsRegistered(false); |
| 2522 | } |
| 2523 | |
| 2524 | return std::make_pair(x: HotSize, y&: ColdSize); |
| 2525 | } |
| 2526 | |
| 2527 | bool BinaryContext::validateInstructionEncoding( |
| 2528 | ArrayRef<uint8_t> InputSequence) const { |
| 2529 | MCInst Inst; |
| 2530 | uint64_t InstSize; |
| 2531 | DisAsm->getInstruction(Instr&: Inst, Size&: InstSize, Bytes: InputSequence, Address: 0, CStream&: nulls()); |
| 2532 | assert(InstSize == InputSequence.size() && |
| 2533 | "Disassembled instruction size does not match the sequence." ); |
| 2534 | |
| 2535 | SmallString<256> Code; |
| 2536 | SmallVector<MCFixup, 4> Fixups; |
| 2537 | |
| 2538 | MCE->encodeInstruction(Inst, CB&: Code, Fixups, STI: *STI); |
| 2539 | auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); |
| 2540 | if (InputSequence != OutputSequence) { |
| 2541 | if (opts::Verbosity > 1) { |
| 2542 | this->errs() << "BOLT-WARNING: mismatched encoding detected\n" |
| 2543 | << " input: " << InputSequence << '\n' |
| 2544 | << " output: " << OutputSequence << '\n'; |
| 2545 | } |
| 2546 | return false; |
| 2547 | } |
| 2548 | |
| 2549 | return true; |
| 2550 | } |
| 2551 | |
| 2552 | uint64_t BinaryContext::getHotThreshold() const { |
| 2553 | static uint64_t Threshold = 0; |
| 2554 | if (Threshold == 0) { |
| 2555 | Threshold = std::max( |
| 2556 | a: (uint64_t)opts::ExecutionCountThreshold, |
| 2557 | b: NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); |
| 2558 | } |
| 2559 | return Threshold; |
| 2560 | } |
| 2561 | |
| 2562 | BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( |
| 2563 | uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { |
| 2564 | auto FI = BinaryFunctions.upper_bound(x: Address); |
| 2565 | if (FI == BinaryFunctions.begin()) |
| 2566 | return nullptr; |
| 2567 | --FI; |
| 2568 | |
| 2569 | const uint64_t UsedSize = |
| 2570 | UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); |
| 2571 | |
| 2572 | if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) |
| 2573 | return nullptr; |
| 2574 | |
| 2575 | return &FI->second; |
| 2576 | } |
| 2577 | |
| 2578 | BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { |
| 2579 | // First, try to find a function starting at the given address. If the |
| 2580 | // function was folded, this will get us the original folded function if it |
| 2581 | // wasn't removed from the list, e.g. in non-relocation mode. |
| 2582 | auto BFI = BinaryFunctions.find(x: Address); |
| 2583 | if (BFI != BinaryFunctions.end()) |
| 2584 | return &BFI->second; |
| 2585 | |
| 2586 | // We might have folded the function matching the object at the given |
| 2587 | // address. In such case, we look for a function matching the symbol |
| 2588 | // registered at the original address. The new function (the one that the |
| 2589 | // original was folded into) will hold the symbol. |
| 2590 | if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { |
| 2591 | uint64_t EntryID = 0; |
| 2592 | BinaryFunction *BF = getFunctionForSymbol(Symbol: BD->getSymbol(), EntryDesc: &EntryID); |
| 2593 | if (BF && EntryID == 0) |
| 2594 | return BF; |
| 2595 | } |
| 2596 | return nullptr; |
| 2597 | } |
| 2598 | |
| 2599 | /// Deregister JumpTable registered at a given \p Address and delete it. |
| 2600 | void BinaryContext::deleteJumpTable(uint64_t Address) { |
| 2601 | assert(JumpTables.count(Address) && "Must have a jump table at address" ); |
| 2602 | JumpTable *JT = JumpTables.at(k: Address); |
| 2603 | for (BinaryFunction *Parent : JT->Parents) |
| 2604 | Parent->JumpTables.erase(x: Address); |
| 2605 | JumpTables.erase(x: Address); |
| 2606 | delete JT; |
| 2607 | } |
| 2608 | |
| 2609 | DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( |
| 2610 | const DWARFAddressRangesVector &InputRanges) const { |
| 2611 | DebugAddressRangesVector OutputRanges; |
| 2612 | |
| 2613 | for (const DWARFAddressRange Range : InputRanges) { |
| 2614 | auto BFI = BinaryFunctions.lower_bound(x: Range.LowPC); |
| 2615 | while (BFI != BinaryFunctions.end()) { |
| 2616 | const BinaryFunction &Function = BFI->second; |
| 2617 | if (Function.getAddress() >= Range.HighPC) |
| 2618 | break; |
| 2619 | const DebugAddressRangesVector FunctionRanges = |
| 2620 | Function.getOutputAddressRanges(); |
| 2621 | llvm::move(Range: FunctionRanges, Out: std::back_inserter(x&: OutputRanges)); |
| 2622 | std::advance(i&: BFI, n: 1); |
| 2623 | } |
| 2624 | } |
| 2625 | |
| 2626 | return OutputRanges; |
| 2627 | } |
| 2628 | |
| 2629 | } // namespace bolt |
| 2630 | } // namespace llvm |
| 2631 | |