1 | //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the BinaryContext class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/BinaryContext.h" |
14 | #include "bolt/Core/BinaryEmitter.h" |
15 | #include "bolt/Core/BinaryFunction.h" |
16 | #include "bolt/Utils/CommandLineOpts.h" |
17 | #include "bolt/Utils/Utils.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/Twine.h" |
20 | #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" |
21 | #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" |
22 | #include "llvm/DebugInfo/DWARF/DWARFUnit.h" |
23 | #include "llvm/MC/MCAssembler.h" |
24 | #include "llvm/MC/MCContext.h" |
25 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
26 | #include "llvm/MC/MCInstPrinter.h" |
27 | #include "llvm/MC/MCObjectStreamer.h" |
28 | #include "llvm/MC/MCObjectWriter.h" |
29 | #include "llvm/MC/MCRegisterInfo.h" |
30 | #include "llvm/MC/MCSectionELF.h" |
31 | #include "llvm/MC/MCStreamer.h" |
32 | #include "llvm/MC/MCSubtargetInfo.h" |
33 | #include "llvm/MC/MCSymbol.h" |
34 | #include "llvm/Support/CommandLine.h" |
35 | #include "llvm/Support/Error.h" |
36 | #include "llvm/Support/Regex.h" |
37 | #include <algorithm> |
38 | #include <functional> |
39 | #include <iterator> |
40 | #include <unordered_set> |
41 | |
42 | using namespace llvm; |
43 | |
44 | #undef DEBUG_TYPE |
45 | #define DEBUG_TYPE "bolt" |
46 | |
47 | namespace opts { |
48 | |
49 | static cl::opt<bool> |
50 | NoHugePages("no-huge-pages", |
51 | cl::desc("use regular size pages for code alignment"), |
52 | cl::Hidden, cl::cat(BoltCategory)); |
53 | |
54 | static cl::opt<bool> |
55 | PrintDebugInfo("print-debug-info", |
56 | cl::desc("print debug info when printing functions"), |
57 | cl::Hidden, |
58 | cl::ZeroOrMore, |
59 | cl::cat(BoltCategory)); |
60 | |
61 | cl::opt<bool> PrintRelocations( |
62 | "print-relocations", |
63 | cl::desc("print relocations when printing functions/objects"), cl::Hidden, |
64 | cl::cat(BoltCategory)); |
65 | |
66 | static cl::opt<bool> |
67 | PrintMemData("print-mem-data", |
68 | cl::desc("print memory data annotations when printing functions"), |
69 | cl::Hidden, |
70 | cl::ZeroOrMore, |
71 | cl::cat(BoltCategory)); |
72 | |
73 | cl::opt<std::string> CompDirOverride( |
74 | "comp-dir-override", |
75 | cl::desc("overrides DW_AT_comp_dir, and provides an alternative base " |
76 | "location, which is used with DW_AT_dwo_name to construct a path " |
77 | "to *.dwo files."), |
78 | cl::Hidden, cl::init(Val: ""), cl::cat(BoltCategory)); |
79 | } // namespace opts |
80 | |
81 | namespace llvm { |
82 | namespace bolt { |
83 | |
84 | char BOLTError::ID = 0; |
85 | |
86 | BOLTError::BOLTError(bool IsFatal, const Twine &S) |
87 | : IsFatal(IsFatal), Msg(S.str()) {} |
88 | |
89 | void BOLTError::log(raw_ostream &OS) const { |
90 | if (IsFatal) |
91 | OS << "FATAL "; |
92 | StringRef ErrMsg = StringRef(Msg); |
93 | // Prepend our error prefix if it is missing |
94 | if (ErrMsg.empty()) { |
95 | OS << "BOLT-ERROR\n"; |
96 | } else { |
97 | if (!ErrMsg.starts_with(Prefix: "BOLT-ERROR")) |
98 | OS << "BOLT-ERROR: "; |
99 | OS << ErrMsg << "\n"; |
100 | } |
101 | } |
102 | |
103 | std::error_code BOLTError::convertToErrorCode() const { |
104 | return inconvertibleErrorCode(); |
105 | } |
106 | |
107 | Error createNonFatalBOLTError(const Twine &S) { |
108 | return make_error<BOLTError>(/*IsFatal*/ Args: false, Args: S); |
109 | } |
110 | |
111 | Error createFatalBOLTError(const Twine &S) { |
112 | return make_error<BOLTError>(/*IsFatal*/ Args: true, Args: S); |
113 | } |
114 | |
115 | void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) { |
116 | handleAllErrors(E: Error(std::move(E)), Handlers: [&](const BOLTError &E) { |
117 | if (!E.getMessage().empty()) |
118 | E.log(OS&: this->errs()); |
119 | if (E.isFatal()) |
120 | exit(status: 1); |
121 | }); |
122 | } |
123 | |
124 | BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, |
125 | std::unique_ptr<DWARFContext> DwCtx, |
126 | std::unique_ptr<Triple> TheTriple, |
127 | std::shared_ptr<orc::SymbolStringPool> SSP, |
128 | const Target *TheTarget, std::string TripleName, |
129 | std::unique_ptr<MCCodeEmitter> MCE, |
130 | std::unique_ptr<MCObjectFileInfo> MOFI, |
131 | std::unique_ptr<const MCAsmInfo> AsmInfo, |
132 | std::unique_ptr<const MCInstrInfo> MII, |
133 | std::unique_ptr<const MCSubtargetInfo> STI, |
134 | std::unique_ptr<MCInstPrinter> InstPrinter, |
135 | std::unique_ptr<const MCInstrAnalysis> MIA, |
136 | std::unique_ptr<MCPlusBuilder> MIB, |
137 | std::unique_ptr<const MCRegisterInfo> MRI, |
138 | std::unique_ptr<MCDisassembler> DisAsm, |
139 | JournalingStreams Logger) |
140 | : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), |
141 | TheTriple(std::move(TheTriple)), SSP(std::move(SSP)), |
142 | TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)), |
143 | MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), |
144 | STI(std::move(STI)), InstPrinter(std::move(InstPrinter)), |
145 | MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)), |
146 | DisAsm(std::move(DisAsm)), Logger(Logger), InitialDynoStats(isAArch64()) { |
147 | RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; |
148 | PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; |
149 | } |
150 | |
151 | BinaryContext::~BinaryContext() { |
152 | for (BinarySection *Section : Sections) |
153 | delete Section; |
154 | for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) |
155 | delete InjectedFunction; |
156 | for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) |
157 | delete JTI.second; |
158 | clearBinaryData(); |
159 | } |
160 | |
161 | /// Create BinaryContext for a given architecture \p ArchName and |
162 | /// triple \p TripleName. |
163 | Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( |
164 | Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP, |
165 | StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC, |
166 | std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) { |
167 | StringRef ArchName = ""; |
168 | std::string FeaturesStr = ""; |
169 | switch (TheTriple.getArch()) { |
170 | case llvm::Triple::x86_64: |
171 | if (Features) |
172 | return createFatalBOLTError( |
173 | S: "x86_64 target does not use SubtargetFeatures"); |
174 | ArchName = "x86-64"; |
175 | FeaturesStr = "+nopl"; |
176 | break; |
177 | case llvm::Triple::aarch64: |
178 | if (Features) |
179 | return createFatalBOLTError( |
180 | S: "AArch64 target does not use SubtargetFeatures"); |
181 | ArchName = "aarch64"; |
182 | FeaturesStr = "+all"; |
183 | break; |
184 | case llvm::Triple::riscv64: { |
185 | ArchName = "riscv64"; |
186 | if (!Features) |
187 | return createFatalBOLTError(S: "RISCV target needs SubtargetFeatures"); |
188 | // We rely on relaxation for some transformations (e.g., promoting all calls |
189 | // to PseudoCALL and then making JITLink relax them). Since the relax |
190 | // feature is not stored in the object file, we manually enable it. |
191 | Features->AddFeature(String: "relax"); |
192 | FeaturesStr = Features->getString(); |
193 | break; |
194 | } |
195 | default: |
196 | return createStringError(EC: std::errc::not_supported, |
197 | Fmt: "BOLT-ERROR: Unrecognized machine in ELF file"); |
198 | } |
199 | |
200 | const std::string TripleName = TheTriple.str(); |
201 | |
202 | std::string Error; |
203 | const Target *TheTarget = |
204 | TargetRegistry::lookupTarget(ArchName, TheTriple, Error); |
205 | if (!TheTarget) |
206 | return createStringError(EC: make_error_code(e: std::errc::not_supported), |
207 | S: Twine("BOLT-ERROR: ", Error)); |
208 | |
209 | std::unique_ptr<const MCRegisterInfo> MRI( |
210 | TheTarget->createMCRegInfo(TT: TripleName)); |
211 | if (!MRI) |
212 | return createStringError( |
213 | EC: make_error_code(e: std::errc::not_supported), |
214 | S: Twine("BOLT-ERROR: no register info for target ", TripleName)); |
215 | |
216 | // Set up disassembler. |
217 | std::unique_ptr<MCAsmInfo> AsmInfo( |
218 | TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple: TripleName, Options: MCTargetOptions())); |
219 | if (!AsmInfo) |
220 | return createStringError( |
221 | EC: make_error_code(e: std::errc::not_supported), |
222 | S: Twine("BOLT-ERROR: no assembly info for target ", TripleName)); |
223 | // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump |
224 | // we want to emit such names as using @PLT without double quotes to convey |
225 | // variant kind to the assembler. BOLT doesn't rely on the linker so we can |
226 | // override the default AsmInfo behavior to emit names the way we want. |
227 | AsmInfo->setAllowAtInName(true); |
228 | |
229 | std::unique_ptr<const MCSubtargetInfo> STI( |
230 | TheTarget->createMCSubtargetInfo(TheTriple: TripleName, CPU: "", Features: FeaturesStr)); |
231 | if (!STI) |
232 | return createStringError( |
233 | EC: make_error_code(e: std::errc::not_supported), |
234 | S: Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); |
235 | |
236 | std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); |
237 | if (!MII) |
238 | return createStringError( |
239 | EC: make_error_code(e: std::errc::not_supported), |
240 | S: Twine("BOLT-ERROR: no instruction info for target ", TripleName)); |
241 | |
242 | std::unique_ptr<MCContext> Ctx( |
243 | new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get())); |
244 | std::unique_ptr<MCObjectFileInfo> MOFI( |
245 | TheTarget->createMCObjectFileInfo(Ctx&: *Ctx, PIC: IsPIC)); |
246 | Ctx->setObjectFileInfo(MOFI.get()); |
247 | // We do not support X86 Large code model. Change this in the future. |
248 | bool Large = false; |
249 | if (TheTriple.getArch() == llvm::Triple::aarch64) |
250 | Large = true; |
251 | unsigned LSDAEncoding = |
252 | Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; |
253 | if (IsPIC) { |
254 | LSDAEncoding = dwarf::DW_EH_PE_pcrel | |
255 | (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); |
256 | } |
257 | |
258 | std::unique_ptr<MCDisassembler> DisAsm( |
259 | TheTarget->createMCDisassembler(STI: *STI, Ctx&: *Ctx)); |
260 | |
261 | if (!DisAsm) |
262 | return createStringError( |
263 | EC: make_error_code(e: std::errc::not_supported), |
264 | S: Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); |
265 | |
266 | std::unique_ptr<const MCInstrAnalysis> MIA( |
267 | TheTarget->createMCInstrAnalysis(Info: MII.get())); |
268 | if (!MIA) |
269 | return createStringError( |
270 | EC: make_error_code(e: std::errc::not_supported), |
271 | S: Twine("BOLT-ERROR: failed to create instruction analysis for target ", |
272 | TripleName)); |
273 | |
274 | int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); |
275 | std::unique_ptr<MCInstPrinter> InstructionPrinter( |
276 | TheTarget->createMCInstPrinter(T: TheTriple, SyntaxVariant: AsmPrinterVariant, MAI: *AsmInfo, |
277 | MII: *MII, MRI: *MRI)); |
278 | if (!InstructionPrinter) |
279 | return createStringError( |
280 | EC: make_error_code(e: std::errc::not_supported), |
281 | S: Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); |
282 | InstructionPrinter->setPrintImmHex(true); |
283 | |
284 | std::unique_ptr<MCCodeEmitter> MCE( |
285 | TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *Ctx)); |
286 | |
287 | auto BC = std::make_unique<BinaryContext>( |
288 | args: std::move(Ctx), args: std::move(DwCtx), args: std::make_unique<Triple>(args&: TheTriple), |
289 | args: std::move(SSP), args&: TheTarget, args: std::string(TripleName), args: std::move(MCE), |
290 | args: std::move(MOFI), args: std::move(AsmInfo), args: std::move(MII), args: std::move(STI), |
291 | args: std::move(InstructionPrinter), args: std::move(MIA), args: nullptr, args: std::move(MRI), |
292 | args: std::move(DisAsm), args&: Logger); |
293 | |
294 | BC->LSDAEncoding = LSDAEncoding; |
295 | |
296 | BC->MAB = std::unique_ptr<MCAsmBackend>( |
297 | BC->TheTarget->createMCAsmBackend(STI: *BC->STI, MRI: *BC->MRI, Options: MCTargetOptions())); |
298 | |
299 | BC->setFilename(InputFileName); |
300 | |
301 | BC->HasFixedLoadAddress = !IsPIC; |
302 | |
303 | BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( |
304 | BC->TheTarget->createMCDisassembler(STI: *BC->STI, Ctx&: *BC->Ctx)); |
305 | |
306 | if (!BC->SymbolicDisAsm) |
307 | return createStringError( |
308 | EC: make_error_code(e: std::errc::not_supported), |
309 | S: Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); |
310 | |
311 | return std::move(BC); |
312 | } |
313 | |
314 | bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { |
315 | if (opts::HotText && |
316 | (SymbolName == "__hot_start"|| SymbolName == "__hot_end")) |
317 | return true; |
318 | |
319 | if (opts::HotData && |
320 | (SymbolName == "__hot_data_start"|| SymbolName == "__hot_data_end")) |
321 | return true; |
322 | |
323 | if (SymbolName == "_end") |
324 | return true; |
325 | |
326 | return false; |
327 | } |
328 | |
329 | std::unique_ptr<MCObjectWriter> |
330 | BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { |
331 | return MAB->createObjectWriter(OS); |
332 | } |
333 | |
334 | bool BinaryContext::validateObjectNesting() const { |
335 | auto Itr = BinaryDataMap.begin(); |
336 | auto End = BinaryDataMap.end(); |
337 | bool Valid = true; |
338 | while (Itr != End) { |
339 | auto Next = std::next(x: Itr); |
340 | while (Next != End && |
341 | Itr->second->getSection() == Next->second->getSection() && |
342 | Itr->second->containsRange(Address: Next->second->getAddress(), |
343 | Size: Next->second->getSize())) { |
344 | if (Next->second->Parent != Itr->second) { |
345 | this->errs() << "BOLT-WARNING: object nesting incorrect for:\n" |
346 | << "BOLT-WARNING: "<< *Itr->second << "\n" |
347 | << "BOLT-WARNING: "<< *Next->second << "\n"; |
348 | Valid = false; |
349 | } |
350 | ++Next; |
351 | } |
352 | Itr = Next; |
353 | } |
354 | return Valid; |
355 | } |
356 | |
357 | bool BinaryContext::validateHoles() const { |
358 | bool Valid = true; |
359 | for (BinarySection &Section : sections()) { |
360 | for (const Relocation &Rel : Section.relocations()) { |
361 | uint64_t RelAddr = Rel.Offset + Section.getAddress(); |
362 | const BinaryData *BD = getBinaryDataContainingAddress(Address: RelAddr); |
363 | if (!BD) { |
364 | this->errs() |
365 | << "BOLT-WARNING: no BinaryData found for relocation at address" |
366 | << " 0x"<< Twine::utohexstr(Val: RelAddr) << " in "<< Section.getName() |
367 | << "\n"; |
368 | Valid = false; |
369 | } else if (!BD->getAtomicRoot()) { |
370 | this->errs() |
371 | << "BOLT-WARNING: no atomic BinaryData found for relocation at " |
372 | << "address 0x"<< Twine::utohexstr(Val: RelAddr) << " in " |
373 | << Section.getName() << "\n"; |
374 | Valid = false; |
375 | } |
376 | } |
377 | } |
378 | return Valid; |
379 | } |
380 | |
381 | void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { |
382 | const uint64_t Address = GAI->second->getAddress(); |
383 | const uint64_t Size = GAI->second->getSize(); |
384 | |
385 | auto fixParents = [&](BinaryDataMapType::iterator Itr, |
386 | BinaryData *NewParent) { |
387 | BinaryData *OldParent = Itr->second->Parent; |
388 | Itr->second->Parent = NewParent; |
389 | ++Itr; |
390 | while (Itr != BinaryDataMap.end() && OldParent && |
391 | Itr->second->Parent == OldParent) { |
392 | Itr->second->Parent = NewParent; |
393 | ++Itr; |
394 | } |
395 | }; |
396 | |
397 | // Check if the previous symbol contains the newly added symbol. |
398 | if (GAI != BinaryDataMap.begin()) { |
399 | BinaryData *Prev = std::prev(x: GAI)->second; |
400 | while (Prev) { |
401 | if (Prev->getSection() == GAI->second->getSection() && |
402 | Prev->containsRange(Address, Size)) { |
403 | fixParents(GAI, Prev); |
404 | } else { |
405 | fixParents(GAI, nullptr); |
406 | } |
407 | Prev = Prev->Parent; |
408 | } |
409 | } |
410 | |
411 | // Check if the newly added symbol contains any subsequent symbols. |
412 | if (Size != 0) { |
413 | BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; |
414 | auto Itr = std::next(x: GAI); |
415 | while ( |
416 | Itr != BinaryDataMap.end() && |
417 | BD->containsRange(Address: Itr->second->getAddress(), Size: Itr->second->getSize())) { |
418 | Itr->second->Parent = BD; |
419 | ++Itr; |
420 | } |
421 | } |
422 | } |
423 | |
424 | iterator_range<BinaryContext::binary_data_iterator> |
425 | BinaryContext::getSubBinaryData(BinaryData *BD) { |
426 | auto Start = std::next(x: BinaryDataMap.find(x: BD->getAddress())); |
427 | auto End = Start; |
428 | while (End != BinaryDataMap.end() && BD->isAncestorOf(BD: End->second)) |
429 | ++End; |
430 | return make_range(x: Start, y: End); |
431 | } |
432 | |
433 | std::pair<const MCSymbol *, uint64_t> |
434 | BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, |
435 | bool IsPCRel) { |
436 | if (isAArch64()) { |
437 | // Check if this is an access to a constant island and create bookkeeping |
438 | // to keep track of it and emit it later as part of this function. |
439 | if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) |
440 | return std::make_pair(x&: IslandSym, y: 0); |
441 | |
442 | // Detect custom code written in assembly that refers to arbitrary |
443 | // constant islands from other functions. Write this reference so we |
444 | // can pull this constant island and emit it as part of this function |
445 | // too. |
446 | auto IslandIter = AddressToConstantIslandMap.lower_bound(x: Address); |
447 | |
448 | if (IslandIter != AddressToConstantIslandMap.begin() && |
449 | (IslandIter == AddressToConstantIslandMap.end() || |
450 | IslandIter->first > Address)) |
451 | --IslandIter; |
452 | |
453 | if (IslandIter != AddressToConstantIslandMap.end()) { |
454 | // Fall-back to referencing the original constant island in the presence |
455 | // of dynamic relocs, as we currently do not support cloning them. |
456 | // Notice: we might fail to link because of this, if the original constant |
457 | // island we are referring would be emitted too far away. |
458 | if (IslandIter->second->hasDynamicRelocationAtIsland()) { |
459 | MCSymbol *IslandSym = |
460 | IslandIter->second->getOrCreateIslandAccess(Address); |
461 | if (IslandSym) |
462 | return std::make_pair(x&: IslandSym, y: 0); |
463 | } else if (MCSymbol *IslandSym = |
464 | IslandIter->second->getOrCreateProxyIslandAccess(Address, |
465 | Referrer&: BF)) { |
466 | BF.createIslandDependency(Island: IslandSym, BF: IslandIter->second); |
467 | return std::make_pair(x&: IslandSym, y: 0); |
468 | } |
469 | } |
470 | } |
471 | |
472 | // Note that the address does not necessarily have to reside inside |
473 | // a section, it could be an absolute address too. |
474 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
475 | if (Section && Section->isText()) { |
476 | if (BF.containsAddress(PC: Address, /*UseMaxSize=*/isAArch64())) { |
477 | if (Address != BF.getAddress()) { |
478 | // The address could potentially escape. Mark it as another entry |
479 | // point into the function. |
480 | if (opts::Verbosity >= 1) { |
481 | this->outs() << "BOLT-INFO: potentially escaped address 0x" |
482 | << Twine::utohexstr(Val: Address) << " in function "<< BF |
483 | << '\n'; |
484 | } |
485 | BF.HasInternalLabelReference = true; |
486 | return std::make_pair( |
487 | x: BF.addEntryPointAtOffset(Offset: Address - BF.getAddress()), y: 0); |
488 | } |
489 | } else { |
490 | addInterproceduralReference(Function: &BF, Address); |
491 | } |
492 | } |
493 | |
494 | // With relocations, catch jump table references outside of the basic block |
495 | // containing the indirect jump. |
496 | if (HasRelocations) { |
497 | const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); |
498 | if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { |
499 | const MCSymbol *Symbol = |
500 | getOrCreateJumpTable(Function&: BF, Address, Type: JumpTable::JTT_PIC); |
501 | |
502 | return std::make_pair(x&: Symbol, y: 0); |
503 | } |
504 | } |
505 | |
506 | if (BinaryData *BD = getBinaryDataContainingAddress(Address)) |
507 | return std::make_pair(x: BD->getSymbol(), y: Address - BD->getAddress()); |
508 | |
509 | // TODO: use DWARF info to get size/alignment here? |
510 | MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, Prefix: "DATAat"); |
511 | LLVM_DEBUG(dbgs() << "Created symbol "<< TargetSymbol->getName() << '\n'); |
512 | return std::make_pair(x&: TargetSymbol, y: 0); |
513 | } |
514 | |
515 | MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, |
516 | BinaryFunction &BF) { |
517 | if (!isX86()) |
518 | return MemoryContentsType::UNKNOWN; |
519 | |
520 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
521 | if (!Section) { |
522 | // No section - possibly an absolute address. Since we don't allow |
523 | // internal function addresses to escape the function scope - we |
524 | // consider it a tail call. |
525 | if (opts::Verbosity > 1) { |
526 | this->errs() << "BOLT-WARNING: no section for address 0x" |
527 | << Twine::utohexstr(Val: Address) << " referenced from function " |
528 | << BF << '\n'; |
529 | } |
530 | return MemoryContentsType::UNKNOWN; |
531 | } |
532 | |
533 | if (Section->isVirtual()) { |
534 | // The contents are filled at runtime. |
535 | return MemoryContentsType::UNKNOWN; |
536 | } |
537 | |
538 | // No support for jump tables in code yet. |
539 | if (Section->isText()) |
540 | return MemoryContentsType::UNKNOWN; |
541 | |
542 | // Start with checking for PIC jump table. We expect non-PIC jump tables |
543 | // to have high 32 bits set to 0. |
544 | if (analyzeJumpTable(Address, Type: JumpTable::JTT_PIC, BF)) |
545 | return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; |
546 | |
547 | if (analyzeJumpTable(Address, Type: JumpTable::JTT_NORMAL, BF)) |
548 | return MemoryContentsType::POSSIBLE_JUMP_TABLE; |
549 | |
550 | return MemoryContentsType::UNKNOWN; |
551 | } |
552 | |
553 | bool BinaryContext::analyzeJumpTable(const uint64_t Address, |
554 | const JumpTable::JumpTableType Type, |
555 | const BinaryFunction &BF, |
556 | const uint64_t NextJTAddress, |
557 | JumpTable::AddressesType *EntriesAsAddress, |
558 | bool *HasEntryInFragment) const { |
559 | // Target address of __builtin_unreachable. |
560 | const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize(); |
561 | |
562 | // Is one of the targets __builtin_unreachable? |
563 | bool HasUnreachable = false; |
564 | |
565 | // Does one of the entries match function start address? |
566 | bool HasStartAsEntry = false; |
567 | |
568 | // Number of targets other than __builtin_unreachable. |
569 | uint64_t NumRealEntries = 0; |
570 | |
571 | // Size of the jump table without trailing __builtin_unreachable entries. |
572 | size_t TrimmedSize = 0; |
573 | |
574 | auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) { |
575 | if (!EntriesAsAddress) |
576 | return; |
577 | EntriesAsAddress->emplace_back(args&: EntryAddress); |
578 | if (!Unreachable) |
579 | TrimmedSize = EntriesAsAddress->size(); |
580 | }; |
581 | |
582 | auto printEntryDiagnostics = [&](raw_ostream &OS, |
583 | const BinaryFunction *TargetBF) { |
584 | OS << "FAIL: function doesn't contain this address\n"; |
585 | if (!TargetBF) |
586 | return; |
587 | OS << " ! function containing this address: "<< *TargetBF << '\n'; |
588 | if (!TargetBF->isFragment()) |
589 | return; |
590 | OS << " ! is a fragment with parents: "; |
591 | ListSeparator LS; |
592 | for (BinaryFunction *Parent : TargetBF->ParentFragments) |
593 | OS << LS << *Parent; |
594 | OS << '\n'; |
595 | }; |
596 | |
597 | ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
598 | if (!Section) |
599 | return false; |
600 | |
601 | // The upper bound is defined by containing object, section limits, and |
602 | // the next jump table in memory. |
603 | uint64_t UpperBound = Section->getEndAddress(); |
604 | const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); |
605 | if (JumpTableBD && JumpTableBD->getSize()) { |
606 | assert(JumpTableBD->getEndAddress() <= UpperBound && |
607 | "data object cannot cross a section boundary"); |
608 | UpperBound = JumpTableBD->getEndAddress(); |
609 | } |
610 | if (NextJTAddress) |
611 | UpperBound = std::min(a: NextJTAddress, b: UpperBound); |
612 | |
613 | LLVM_DEBUG({ |
614 | using JTT = JumpTable::JumpTableType; |
615 | dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", |
616 | Address, BF.getPrintName(), |
617 | Type == JTT::JTT_PIC ? "PIC": "Normal"); |
618 | }); |
619 | const uint64_t EntrySize = getJumpTableEntrySize(Type); |
620 | for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; |
621 | EntryAddress += EntrySize) { |
622 | LLVM_DEBUG(dbgs() << " * Checking 0x"<< Twine::utohexstr(EntryAddress) |
623 | << " -> "); |
624 | // Check if there's a proper relocation against the jump table entry. |
625 | if (HasRelocations) { |
626 | if (Type == JumpTable::JTT_PIC && |
627 | !DataPCRelocations.count(x: EntryAddress)) { |
628 | LLVM_DEBUG( |
629 | dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); |
630 | break; |
631 | } |
632 | if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(Address: EntryAddress)) { |
633 | LLVM_DEBUG( |
634 | dbgs() |
635 | << "FAIL: JTT_NORMAL table, no relocation for this address\n"); |
636 | break; |
637 | } |
638 | } |
639 | |
640 | const uint64_t Value = |
641 | (Type == JumpTable::JTT_PIC) |
642 | ? Address + *getSignedValueAtAddress(Address: EntryAddress, Size: EntrySize) |
643 | : *getPointerAtAddress(Address: EntryAddress); |
644 | |
645 | // __builtin_unreachable() case. |
646 | if (Value == UnreachableAddress) { |
647 | addEntryAddress(Value, /*Unreachable*/ true); |
648 | HasUnreachable = true; |
649 | LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); |
650 | continue; |
651 | } |
652 | |
653 | // Function start is another special case. It is allowed in the jump table, |
654 | // but we need at least one another regular entry to distinguish the table |
655 | // from, e.g. a function pointer array. |
656 | if (Value == BF.getAddress()) { |
657 | HasStartAsEntry = true; |
658 | addEntryAddress(Value); |
659 | continue; |
660 | } |
661 | |
662 | // Function or one of its fragments. |
663 | const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Address: Value); |
664 | if (!TargetBF || !areRelatedFragments(LHS: TargetBF, RHS: &BF)) { |
665 | LLVM_DEBUG(printEntryDiagnostics(dbgs(), TargetBF)); |
666 | (void)printEntryDiagnostics; |
667 | break; |
668 | } |
669 | |
670 | // Check there's an instruction at this offset. |
671 | if (TargetBF->getState() == BinaryFunction::State::Disassembled && |
672 | !TargetBF->getInstructionAtOffset(Offset: Value - TargetBF->getAddress())) { |
673 | LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); |
674 | break; |
675 | } |
676 | |
677 | ++NumRealEntries; |
678 | LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); |
679 | |
680 | if (TargetBF != &BF && HasEntryInFragment) |
681 | *HasEntryInFragment = true; |
682 | addEntryAddress(Value); |
683 | } |
684 | |
685 | // Trim direct/normal jump table to exclude trailing unreachable entries that |
686 | // can collide with a function address. |
687 | if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress && |
688 | TrimmedSize != EntriesAsAddress->size() && |
689 | getBinaryFunctionAtAddress(Address: UnreachableAddress)) |
690 | EntriesAsAddress->resize(new_size: TrimmedSize); |
691 | |
692 | // It's a jump table if the number of real entries is more than 1, or there's |
693 | // one real entry and one or more special targets. If there are only multiple |
694 | // special targets, then it's not a jump table. |
695 | return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; |
696 | } |
697 | |
698 | void BinaryContext::populateJumpTables() { |
699 | LLVM_DEBUG(dbgs() << "DataPCRelocations: "<< DataPCRelocations.size() |
700 | << '\n'); |
701 | for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; |
702 | ++JTI) { |
703 | JumpTable *JT = JTI->second; |
704 | |
705 | if (!llvm::all_of(Range&: JT->Parents, P: std::mem_fn(pm: &BinaryFunction::isSimple))) |
706 | continue; |
707 | |
708 | uint64_t NextJTAddress = 0; |
709 | auto NextJTI = std::next(x: JTI); |
710 | if (NextJTI != JTE) |
711 | NextJTAddress = NextJTI->second->getAddress(); |
712 | |
713 | const bool Success = |
714 | analyzeJumpTable(Address: JT->getAddress(), Type: JT->Type, BF: *(JT->Parents[0]), |
715 | NextJTAddress, EntriesAsAddress: &JT->EntriesAsAddress, HasEntryInFragment: &JT->IsSplit); |
716 | if (!Success) { |
717 | LLVM_DEBUG({ |
718 | dbgs() << "failed to analyze "; |
719 | JT->print(dbgs()); |
720 | if (NextJTI != JTE) { |
721 | dbgs() << "next "; |
722 | NextJTI->second->print(dbgs()); |
723 | } |
724 | }); |
725 | llvm_unreachable("jump table heuristic failure"); |
726 | } |
727 | for (BinaryFunction *Frag : JT->Parents) { |
728 | if (JT->IsSplit) |
729 | Frag->setHasIndirectTargetToSplitFragment(true); |
730 | for (uint64_t EntryAddress : JT->EntriesAsAddress) |
731 | // if target is builtin_unreachable |
732 | if (EntryAddress == Frag->getAddress() + Frag->getSize()) { |
733 | Frag->IgnoredBranches.emplace_back(Args: EntryAddress - Frag->getAddress(), |
734 | Args: Frag->getSize()); |
735 | } else if (EntryAddress >= Frag->getAddress() && |
736 | EntryAddress < Frag->getAddress() + Frag->getSize()) { |
737 | Frag->registerReferencedOffset(Offset: EntryAddress - Frag->getAddress()); |
738 | } |
739 | } |
740 | |
741 | // In strict mode, erase PC-relative relocation record. Later we check that |
742 | // all such records are erased and thus have been accounted for. |
743 | if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { |
744 | for (uint64_t Address = JT->getAddress(); |
745 | Address < JT->getAddress() + JT->getSize(); |
746 | Address += JT->EntrySize) { |
747 | DataPCRelocations.erase(position: DataPCRelocations.find(x: Address)); |
748 | } |
749 | } |
750 | |
751 | // Mark to skip the function and all its fragments. |
752 | for (BinaryFunction *Frag : JT->Parents) |
753 | if (Frag->hasIndirectTargetToSplitFragment()) |
754 | addFragmentsToSkip(Function: Frag); |
755 | } |
756 | |
757 | if (opts::StrictMode && DataPCRelocations.size()) { |
758 | LLVM_DEBUG({ |
759 | dbgs() << DataPCRelocations.size() |
760 | << " unclaimed PC-relative relocations left in data:\n"; |
761 | for (uint64_t Reloc : DataPCRelocations) |
762 | dbgs() << Twine::utohexstr(Reloc) << '\n'; |
763 | }); |
764 | assert(0 && "unclaimed PC-relative relocations left in data\n"); |
765 | } |
766 | clearList(List&: DataPCRelocations); |
767 | } |
768 | |
769 | void BinaryContext::skipMarkedFragments() { |
770 | std::vector<BinaryFunction *> FragmentQueue; |
771 | // Copy the functions to FragmentQueue. |
772 | FragmentQueue.assign(first: FragmentsToSkip.begin(), last: FragmentsToSkip.end()); |
773 | auto addToWorklist = [&](BinaryFunction *Function) -> void { |
774 | if (FragmentsToSkip.count(x: Function)) |
775 | return; |
776 | FragmentQueue.push_back(x: Function); |
777 | addFragmentsToSkip(Function); |
778 | }; |
779 | // Functions containing split jump tables need to be skipped with all |
780 | // fragments (transitively). |
781 | for (size_t I = 0; I != FragmentQueue.size(); I++) { |
782 | BinaryFunction *BF = FragmentQueue[I]; |
783 | assert(FragmentsToSkip.count(BF) && |
784 | "internal error in traversing function fragments"); |
785 | if (opts::Verbosity >= 1) |
786 | this->errs() << "BOLT-WARNING: Ignoring "<< BF->getPrintName() << '\n'; |
787 | BF->setSimple(false); |
788 | BF->setHasIndirectTargetToSplitFragment(true); |
789 | |
790 | llvm::for_each(Range&: BF->Fragments, F: addToWorklist); |
791 | llvm::for_each(Range&: BF->ParentFragments, F: addToWorklist); |
792 | } |
793 | if (!FragmentsToSkip.empty()) |
794 | this->errs() << "BOLT-WARNING: skipped "<< FragmentsToSkip.size() |
795 | << " function"<< (FragmentsToSkip.size() == 1 ? "": "s") |
796 | << " due to cold fragments\n"; |
797 | } |
798 | |
799 | MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, |
800 | uint64_t Size, |
801 | uint16_t Alignment, |
802 | unsigned Flags) { |
803 | auto Itr = BinaryDataMap.find(x: Address); |
804 | if (Itr != BinaryDataMap.end()) { |
805 | assert(Itr->second->getSize() == Size || !Size); |
806 | return Itr->second->getSymbol(); |
807 | } |
808 | |
809 | std::string Name = (Prefix + "0x"+ Twine::utohexstr(Val: Address)).str(); |
810 | assert(!GlobalSymbols.count(Name) && "created name is not unique"); |
811 | return registerNameAtAddress(Name, Address, Size, Alignment, Flags); |
812 | } |
813 | |
814 | MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { |
815 | return Ctx->getOrCreateSymbol(Name); |
816 | } |
817 | |
818 | BinaryFunction *BinaryContext::createBinaryFunction( |
819 | const std::string &Name, BinarySection &Section, uint64_t Address, |
820 | uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { |
821 | auto Result = BinaryFunctions.emplace( |
822 | args&: Address, args: BinaryFunction(Name, Section, Address, Size, *this)); |
823 | assert(Result.second == true && "unexpected duplicate function"); |
824 | BinaryFunction *BF = &Result.first->second; |
825 | registerNameAtAddress(Name, Address, Size: SymbolSize ? SymbolSize : Size, |
826 | Alignment); |
827 | setSymbolToFunctionMap(Sym: BF->getSymbol(), BF); |
828 | return BF; |
829 | } |
830 | |
831 | const MCSymbol * |
832 | BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, |
833 | JumpTable::JumpTableType Type) { |
834 | // Two fragments of same function access same jump table |
835 | if (JumpTable *JT = getJumpTableContainingAddress(Address)) { |
836 | assert(JT->Type == Type && "jump table types have to match"); |
837 | assert(Address == JT->getAddress() && "unexpected non-empty jump table"); |
838 | |
839 | if (llvm::is_contained(Range&: JT->Parents, Element: &Function)) |
840 | return JT->getFirstLabel(); |
841 | |
842 | // Prevent associating a jump table to a specific fragment twice. |
843 | auto isSibling = std::bind(f: &BinaryContext::areRelatedFragments, args: this, |
844 | args: &Function, args: std::placeholders::_1); |
845 | assert(llvm::all_of(JT->Parents, isSibling) && |
846 | "cannot re-use jump table of a different function"); |
847 | (void)isSibling; |
848 | if (opts::Verbosity > 2) { |
849 | this->outs() << "BOLT-INFO: multiple fragments access the same jump table" |
850 | << ": "<< *JT->Parents[0] << "; "<< Function << '\n'; |
851 | JT->print(OS&: this->outs()); |
852 | } |
853 | if (JT->Parents.size() == 1) |
854 | JT->Parents.front()->setHasIndirectTargetToSplitFragment(true); |
855 | Function.setHasIndirectTargetToSplitFragment(true); |
856 | // Duplicate the entry for the parent function for easy access |
857 | JT->Parents.push_back(Elt: &Function); |
858 | Function.JumpTables.emplace(args&: Address, args&: JT); |
859 | return JT->getFirstLabel(); |
860 | } |
861 | |
862 | // Re-use the existing symbol if possible. |
863 | MCSymbol *JTLabel = nullptr; |
864 | if (BinaryData *Object = getBinaryDataAtAddress(Address)) { |
865 | if (!isInternalSymbolName(Name: Object->getSymbol()->getName())) |
866 | JTLabel = Object->getSymbol(); |
867 | } |
868 | |
869 | const uint64_t EntrySize = getJumpTableEntrySize(Type); |
870 | if (!JTLabel) { |
871 | const std::string JumpTableName = generateJumpTableName(BF: Function, Address); |
872 | JTLabel = registerNameAtAddress(Name: JumpTableName, Address, Size: 0, Alignment: EntrySize); |
873 | } |
874 | |
875 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table "<< JTLabel->getName() |
876 | << " in function "<< Function << '\n'); |
877 | |
878 | JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, |
879 | JumpTable::LabelMapType{{0, JTLabel}}, |
880 | *getSectionForAddress(Address)); |
881 | JT->Parents.push_back(Elt: &Function); |
882 | if (opts::Verbosity > 2) |
883 | JT->print(OS&: this->outs()); |
884 | JumpTables.emplace(args&: Address, args&: JT); |
885 | |
886 | // Duplicate the entry for the parent function for easy access. |
887 | Function.JumpTables.emplace(args&: Address, args&: JT); |
888 | return JTLabel; |
889 | } |
890 | |
891 | std::pair<uint64_t, const MCSymbol *> |
892 | BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, |
893 | const MCSymbol *OldLabel) { |
894 | auto L = scopeLock(); |
895 | unsigned Offset = 0; |
896 | bool Found = false; |
897 | for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { |
898 | if (Elmt.second != OldLabel) |
899 | continue; |
900 | Offset = Elmt.first; |
901 | Found = true; |
902 | break; |
903 | } |
904 | assert(Found && "Label not found"); |
905 | (void)Found; |
906 | MCSymbol *NewLabel = Ctx->createNamedTempSymbol(Name: "duplicatedJT"); |
907 | JumpTable *NewJT = |
908 | new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, |
909 | JumpTable::LabelMapType{{Offset, NewLabel}}, |
910 | *getSectionForAddress(Address: JT->getAddress())); |
911 | NewJT->Parents = JT->Parents; |
912 | NewJT->Entries = JT->Entries; |
913 | NewJT->Counts = JT->Counts; |
914 | uint64_t JumpTableID = ++DuplicatedJumpTables; |
915 | // Invert it to differentiate from regular jump tables whose IDs are their |
916 | // addresses in the input binary memory space |
917 | JumpTableID = ~JumpTableID; |
918 | JumpTables.emplace(args&: JumpTableID, args&: NewJT); |
919 | Function.JumpTables.emplace(args&: JumpTableID, args&: NewJT); |
920 | return std::make_pair(x&: JumpTableID, y&: NewLabel); |
921 | } |
922 | |
923 | std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, |
924 | uint64_t Address) { |
925 | size_t Id; |
926 | uint64_t Offset = 0; |
927 | if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { |
928 | Offset = Address - JT->getAddress(); |
929 | auto JTLabelsIt = JT->Labels.find(x: Offset); |
930 | if (JTLabelsIt != JT->Labels.end()) |
931 | return std::string(JTLabelsIt->second->getName()); |
932 | |
933 | auto JTIdsIt = JumpTableIds.find(x: JT->getAddress()); |
934 | assert(JTIdsIt != JumpTableIds.end()); |
935 | Id = JTIdsIt->second; |
936 | } else { |
937 | Id = JumpTableIds[Address] = BF.JumpTables.size(); |
938 | } |
939 | return ("JUMP_TABLE/"+ BF.getOneName().str() + "."+ std::to_string(val: Id) + |
940 | (Offset ? ("."+ std::to_string(val: Offset)) : "")); |
941 | } |
942 | |
943 | bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { |
944 | // FIXME: aarch64 support is missing. |
945 | if (!isX86()) |
946 | return true; |
947 | |
948 | if (BF.getSize() == BF.getMaxSize()) |
949 | return true; |
950 | |
951 | ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); |
952 | assert(FunctionData && "cannot get function as data"); |
953 | |
954 | uint64_t Offset = BF.getSize(); |
955 | MCInst Instr; |
956 | uint64_t InstrSize = 0; |
957 | uint64_t InstrAddress = BF.getAddress() + Offset; |
958 | using std::placeholders::_1; |
959 | |
960 | // Skip instructions that satisfy the predicate condition. |
961 | auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { |
962 | const uint64_t StartOffset = Offset; |
963 | for (; Offset < BF.getMaxSize(); |
964 | Offset += InstrSize, InstrAddress += InstrSize) { |
965 | if (!DisAsm->getInstruction(Instr, Size&: InstrSize, Bytes: FunctionData->slice(N: Offset), |
966 | Address: InstrAddress, CStream&: nulls())) |
967 | break; |
968 | if (!Predicate(Instr)) |
969 | break; |
970 | } |
971 | |
972 | return Offset - StartOffset; |
973 | }; |
974 | |
975 | // Skip a sequence of zero bytes. |
976 | auto skipZeros = [&]() { |
977 | const uint64_t StartOffset = Offset; |
978 | for (; Offset < BF.getMaxSize(); ++Offset) |
979 | if ((*FunctionData)[Offset] != 0) |
980 | break; |
981 | |
982 | return Offset - StartOffset; |
983 | }; |
984 | |
985 | // Accept the whole padding area filled with breakpoints. |
986 | auto isBreakpoint = std::bind(f: &MCPlusBuilder::isBreakpoint, args: MIB.get(), args: _1); |
987 | if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) |
988 | return true; |
989 | |
990 | auto isNoop = std::bind(f: &MCPlusBuilder::isNoop, args: MIB.get(), args: _1); |
991 | |
992 | // Some functions have a jump to the next function or to the padding area |
993 | // inserted after the body. |
994 | auto isSkipJump = [&](const MCInst &Instr) { |
995 | uint64_t TargetAddress = 0; |
996 | if (MIB->isUnconditionalBranch(Inst: Instr) && |
997 | MIB->evaluateBranch(Inst: Instr, Addr: InstrAddress, Size: InstrSize, Target&: TargetAddress)) { |
998 | if (TargetAddress >= InstrAddress + InstrSize && |
999 | TargetAddress <= BF.getAddress() + BF.getMaxSize()) { |
1000 | return true; |
1001 | } |
1002 | } |
1003 | return false; |
1004 | }; |
1005 | |
1006 | // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). |
1007 | while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || |
1008 | skipZeros()) |
1009 | ; |
1010 | |
1011 | if (Offset == BF.getMaxSize()) |
1012 | return true; |
1013 | |
1014 | if (opts::Verbosity >= 1) { |
1015 | this->errs() << "BOLT-WARNING: bad padding at address 0x" |
1016 | << Twine::utohexstr(Val: BF.getAddress() + BF.getSize()) |
1017 | << " starting at offset "<< (Offset - BF.getSize()) |
1018 | << " in function "<< BF << '\n' |
1019 | << FunctionData->slice(N: BF.getSize(), |
1020 | M: BF.getMaxSize() - BF.getSize()) |
1021 | << '\n'; |
1022 | } |
1023 | |
1024 | return false; |
1025 | } |
1026 | |
1027 | void BinaryContext::adjustCodePadding() { |
1028 | for (auto &BFI : BinaryFunctions) { |
1029 | BinaryFunction &BF = BFI.second; |
1030 | if (!shouldEmit(Function: BF)) |
1031 | continue; |
1032 | |
1033 | if (!hasValidCodePadding(BF)) { |
1034 | if (HasRelocations) { |
1035 | this->errs() << "BOLT-WARNING: function "<< BF |
1036 | << " has invalid padding. Ignoring the function\n"; |
1037 | BF.setIgnored(); |
1038 | } else { |
1039 | BF.setMaxSize(BF.getSize()); |
1040 | } |
1041 | } |
1042 | } |
1043 | } |
1044 | |
1045 | MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, |
1046 | uint64_t Size, |
1047 | uint16_t Alignment, |
1048 | unsigned Flags) { |
1049 | // Register the name with MCContext. |
1050 | MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); |
1051 | |
1052 | auto GAI = BinaryDataMap.find(x: Address); |
1053 | BinaryData *BD; |
1054 | if (GAI == BinaryDataMap.end()) { |
1055 | ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); |
1056 | BinarySection &Section = |
1057 | SectionOrErr ? SectionOrErr.get() : absoluteSection(); |
1058 | BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, |
1059 | Section, Flags); |
1060 | GAI = BinaryDataMap.emplace(args&: Address, args&: BD).first; |
1061 | GlobalSymbols[Name] = BD; |
1062 | updateObjectNesting(GAI); |
1063 | } else { |
1064 | BD = GAI->second; |
1065 | if (!BD->hasName(Name)) { |
1066 | GlobalSymbols[Name] = BD; |
1067 | BD->updateSize(N: Size); |
1068 | BD->Symbols.push_back(x: Symbol); |
1069 | } |
1070 | } |
1071 | |
1072 | return Symbol; |
1073 | } |
1074 | |
1075 | const BinaryData * |
1076 | BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { |
1077 | auto NI = BinaryDataMap.lower_bound(x: Address); |
1078 | auto End = BinaryDataMap.end(); |
1079 | if ((NI != End && Address == NI->first) || |
1080 | ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { |
1081 | if (NI->second->containsAddress(Address)) |
1082 | return NI->second; |
1083 | |
1084 | // If this is a sub-symbol, see if a parent data contains the address. |
1085 | const BinaryData *BD = NI->second->getParent(); |
1086 | while (BD) { |
1087 | if (BD->containsAddress(Address)) |
1088 | return BD; |
1089 | BD = BD->getParent(); |
1090 | } |
1091 | } |
1092 | return nullptr; |
1093 | } |
1094 | |
1095 | BinaryData *BinaryContext::getGOTSymbol() { |
1096 | // First tries to find a global symbol with that name |
1097 | BinaryData *GOTSymBD = getBinaryDataByName(Name: "_GLOBAL_OFFSET_TABLE_"); |
1098 | if (GOTSymBD) |
1099 | return GOTSymBD; |
1100 | |
1101 | // This symbol might be hidden from run-time link, so fetch the local |
1102 | // definition if available. |
1103 | GOTSymBD = getBinaryDataByName(Name: "_GLOBAL_OFFSET_TABLE_/1"); |
1104 | if (!GOTSymBD) |
1105 | return nullptr; |
1106 | |
1107 | // If the local symbol is not unique, fail |
1108 | unsigned Index = 2; |
1109 | SmallString<30> Storage; |
1110 | while (const BinaryData *BD = |
1111 | getBinaryDataByName(Name: Twine("_GLOBAL_OFFSET_TABLE_/") |
1112 | .concat(Suffix: Twine(Index++)) |
1113 | .toStringRef(Out&: Storage))) |
1114 | if (BD->getAddress() != GOTSymBD->getAddress()) |
1115 | return nullptr; |
1116 | |
1117 | return GOTSymBD; |
1118 | } |
1119 | |
1120 | bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { |
1121 | auto NI = BinaryDataMap.find(x: Address); |
1122 | assert(NI != BinaryDataMap.end()); |
1123 | if (NI == BinaryDataMap.end()) |
1124 | return false; |
1125 | // TODO: it's possible that a jump table starts at the same address |
1126 | // as a larger blob of private data. When we set the size of the |
1127 | // jump table, it might be smaller than the total blob size. In this |
1128 | // case we just leave the original size since (currently) it won't really |
1129 | // affect anything. |
1130 | assert((!NI->second->Size || NI->second->Size == Size || |
1131 | (NI->second->isJumpTable() && NI->second->Size > Size)) && |
1132 | "can't change the size of a symbol that has already had its " |
1133 | "size set"); |
1134 | if (!NI->second->Size) { |
1135 | NI->second->Size = Size; |
1136 | updateObjectNesting(GAI: NI); |
1137 | return true; |
1138 | } |
1139 | return false; |
1140 | } |
1141 | |
1142 | void BinaryContext::generateSymbolHashes() { |
1143 | auto isPadding = [](const BinaryData &BD) { |
1144 | StringRef Contents = BD.getSection().getContents(); |
1145 | StringRef SymData = Contents.substr(Start: BD.getOffset(), N: BD.getSize()); |
1146 | return (BD.getName().starts_with(Prefix: "HOLEat") || |
1147 | SymData.find_first_not_of(C: 0) == StringRef::npos); |
1148 | }; |
1149 | |
1150 | uint64_t NumCollisions = 0; |
1151 | for (auto &Entry : BinaryDataMap) { |
1152 | BinaryData &BD = *Entry.second; |
1153 | StringRef Name = BD.getName(); |
1154 | |
1155 | if (!isInternalSymbolName(Name)) |
1156 | continue; |
1157 | |
1158 | // First check if a non-anonymous alias exists and move it to the front. |
1159 | if (BD.getSymbols().size() > 1) { |
1160 | auto Itr = llvm::find_if(Range&: BD.getSymbols(), P: [&](const MCSymbol *Symbol) { |
1161 | return !isInternalSymbolName(Name: Symbol->getName()); |
1162 | }); |
1163 | if (Itr != BD.getSymbols().end()) { |
1164 | size_t Idx = std::distance(first: BD.getSymbols().begin(), last: Itr); |
1165 | std::swap(a&: BD.getSymbols()[0], b&: BD.getSymbols()[Idx]); |
1166 | continue; |
1167 | } |
1168 | } |
1169 | |
1170 | // We have to skip 0 size symbols since they will all collide. |
1171 | if (BD.getSize() == 0) { |
1172 | continue; |
1173 | } |
1174 | |
1175 | const uint64_t Hash = BD.getSection().hash(BD); |
1176 | const size_t Idx = Name.find(Str: "0x"); |
1177 | std::string NewName = |
1178 | (Twine(Name.substr(Start: 0, N: Idx)) + "_"+ Twine::utohexstr(Val: Hash)).str(); |
1179 | if (getBinaryDataByName(Name: NewName)) { |
1180 | // Ignore collisions for symbols that appear to be padding |
1181 | // (i.e. all zeros or a "hole") |
1182 | if (!isPadding(BD)) { |
1183 | if (opts::Verbosity) { |
1184 | this->errs() << "BOLT-WARNING: collision detected when hashing "<< BD |
1185 | << " with new name ("<< NewName << "), skipping.\n"; |
1186 | } |
1187 | ++NumCollisions; |
1188 | } |
1189 | continue; |
1190 | } |
1191 | BD.Symbols.insert(position: BD.Symbols.begin(), x: Ctx->getOrCreateSymbol(Name: NewName)); |
1192 | GlobalSymbols[NewName] = &BD; |
1193 | } |
1194 | if (NumCollisions) { |
1195 | this->errs() << "BOLT-WARNING: "<< NumCollisions |
1196 | << " collisions detected while hashing binary objects"; |
1197 | if (!opts::Verbosity) |
1198 | this->errs() << ". Use -v=1 to see the list."; |
1199 | this->errs() << '\n'; |
1200 | } |
1201 | } |
1202 | |
1203 | bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, |
1204 | BinaryFunction &Function) { |
1205 | assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); |
1206 | if (TargetFunction.isChildOf(Other: Function)) |
1207 | return true; |
1208 | TargetFunction.addParentFragment(BF&: Function); |
1209 | Function.addFragment(BF&: TargetFunction); |
1210 | FragmentClasses.unionSets(V1: &TargetFunction, V2: &Function); |
1211 | if (!HasRelocations) { |
1212 | TargetFunction.setSimple(false); |
1213 | Function.setSimple(false); |
1214 | } |
1215 | if (opts::Verbosity >= 1) { |
1216 | this->outs() << "BOLT-INFO: marking "<< TargetFunction |
1217 | << " as a fragment of "<< Function << '\n'; |
1218 | } |
1219 | return true; |
1220 | } |
1221 | |
1222 | void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, |
1223 | MCInst &LoadLowBits, |
1224 | MCInst &LoadHiBits, |
1225 | uint64_t Target) { |
1226 | const MCSymbol *TargetSymbol; |
1227 | uint64_t Addend = 0; |
1228 | std::tie(args&: TargetSymbol, args&: Addend) = handleAddressRef(Address: Target, BF, |
1229 | /*IsPCRel*/ true); |
1230 | int64_t Val; |
1231 | MIB->replaceImmWithSymbolRef(Inst&: LoadHiBits, Symbol: TargetSymbol, Addend, Ctx: Ctx.get(), Value&: Val, |
1232 | RelType: ELF::R_AARCH64_ADR_PREL_PG_HI21); |
1233 | MIB->replaceImmWithSymbolRef(Inst&: LoadLowBits, Symbol: TargetSymbol, Addend, Ctx: Ctx.get(), |
1234 | Value&: Val, RelType: ELF::R_AARCH64_ADD_ABS_LO12_NC); |
1235 | } |
1236 | |
1237 | bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { |
1238 | BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); |
1239 | if (TargetFunction) |
1240 | return false; |
1241 | |
1242 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
1243 | assert(Section && "cannot get section for referenced address"); |
1244 | if (!Section->isText()) |
1245 | return false; |
1246 | |
1247 | bool Ret = false; |
1248 | StringRef SectionContents = Section->getContents(); |
1249 | uint64_t Offset = Address - Section->getAddress(); |
1250 | const uint64_t MaxSize = SectionContents.size() - Offset; |
1251 | const uint8_t *Bytes = |
1252 | reinterpret_cast<const uint8_t *>(SectionContents.data()); |
1253 | ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); |
1254 | |
1255 | auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, |
1256 | MCInst &Instruction, uint64_t Offset, |
1257 | uint64_t AbsoluteInstrAddr, |
1258 | uint64_t TotalSize) -> bool { |
1259 | MCInst *TargetHiBits, *TargetLowBits; |
1260 | uint64_t TargetAddress, Count; |
1261 | Count = MIB->matchLinkerVeneer(Begin: Instructions.begin(), End: Instructions.end(), |
1262 | Address: AbsoluteInstrAddr, CurInst: Instruction, TargetHiBits, |
1263 | TargetLowBits, Target&: TargetAddress); |
1264 | if (!Count) |
1265 | return false; |
1266 | |
1267 | if (MatchOnly) |
1268 | return true; |
1269 | |
1270 | // NOTE The target symbol was created during disassemble's |
1271 | // handleExternalReference |
1272 | const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, Prefix: "FUNCat"); |
1273 | BinaryFunction *Veneer = createBinaryFunction(Name: VeneerSymbol->getName().str(), |
1274 | Section&: *Section, Address, Size: TotalSize); |
1275 | addAdrpAddRelocAArch64(BF&: *Veneer, LoadLowBits&: *TargetLowBits, LoadHiBits&: *TargetHiBits, |
1276 | Target: TargetAddress); |
1277 | MIB->addAnnotation(Inst&: Instruction, Name: "AArch64Veneer", Val: true); |
1278 | Veneer->addInstruction(Offset, Instruction: std::move(Instruction)); |
1279 | --Count; |
1280 | for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { |
1281 | MIB->addAnnotation(Inst&: It->second, Name: "AArch64Veneer", Val: true); |
1282 | Veneer->addInstruction(Offset: It->first, Instruction: std::move(It->second)); |
1283 | } |
1284 | |
1285 | Veneer->getOrCreateLocalLabel(Address); |
1286 | Veneer->setMaxSize(TotalSize); |
1287 | Veneer->updateState(State: BinaryFunction::State::Disassembled); |
1288 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" |
1289 | << Twine::utohexstr(Address) << "\n"); |
1290 | return true; |
1291 | }; |
1292 | |
1293 | uint64_t Size = 0, TotalSize = 0; |
1294 | BinaryFunction::InstrMapType VeneerInstructions; |
1295 | for (Offset = 0; Offset < MaxSize; Offset += Size) { |
1296 | MCInst Instruction; |
1297 | const uint64_t AbsoluteInstrAddr = Address + Offset; |
1298 | if (!SymbolicDisAsm->getInstruction(Instr&: Instruction, Size, Bytes: Data.slice(N: Offset), |
1299 | Address: AbsoluteInstrAddr, CStream&: nulls())) |
1300 | break; |
1301 | |
1302 | TotalSize += Size; |
1303 | if (MIB->isBranch(Inst: Instruction)) { |
1304 | Ret = matchVeneer(VeneerInstructions, Instruction, Offset, |
1305 | AbsoluteInstrAddr, TotalSize); |
1306 | break; |
1307 | } |
1308 | |
1309 | VeneerInstructions.emplace(args&: Offset, args: std::move(Instruction)); |
1310 | } |
1311 | |
1312 | return Ret; |
1313 | } |
1314 | |
1315 | void BinaryContext::processInterproceduralReferences() { |
1316 | for (const std::pair<BinaryFunction *, uint64_t> &It : |
1317 | InterproceduralReferences) { |
1318 | BinaryFunction &Function = *It.first; |
1319 | uint64_t Address = It.second; |
1320 | // Process interprocedural references from ignored functions in BAT mode |
1321 | // (non-simple in non-relocation mode) to properly register entry points |
1322 | if (!Address || (Function.isIgnored() && !HasBATSection)) |
1323 | continue; |
1324 | |
1325 | BinaryFunction *TargetFunction = |
1326 | getBinaryFunctionContainingAddress(Address); |
1327 | if (&Function == TargetFunction) |
1328 | continue; |
1329 | |
1330 | if (TargetFunction) { |
1331 | if (TargetFunction->isFragment() && |
1332 | !areRelatedFragments(LHS: TargetFunction, RHS: &Function)) { |
1333 | this->errs() |
1334 | << "BOLT-WARNING: interprocedural reference between unrelated " |
1335 | "fragments: " |
1336 | << Function.getPrintName() << " and " |
1337 | << TargetFunction->getPrintName() << '\n'; |
1338 | } |
1339 | if (uint64_t Offset = Address - TargetFunction->getAddress()) |
1340 | TargetFunction->addEntryPointAtOffset(Offset); |
1341 | |
1342 | continue; |
1343 | } |
1344 | |
1345 | // Check if address falls in function padding space - this could be |
1346 | // unmarked data in code. In this case adjust the padding space size. |
1347 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
1348 | assert(Section && "cannot get section for referenced address"); |
1349 | |
1350 | if (!Section->isText()) |
1351 | continue; |
1352 | |
1353 | // PLT requires special handling and could be ignored in this context. |
1354 | StringRef SectionName = Section->getName(); |
1355 | if (SectionName == ".plt"|| SectionName == ".plt.got") |
1356 | continue; |
1357 | |
1358 | // Check if it is aarch64 veneer written at Address |
1359 | if (isAArch64() && handleAArch64Veneer(Address)) |
1360 | continue; |
1361 | |
1362 | if (opts::processAllFunctions()) { |
1363 | this->errs() << "BOLT-ERROR: cannot process binaries with unmarked " |
1364 | << "object in code at address 0x" |
1365 | << Twine::utohexstr(Val: Address) << " belonging to section " |
1366 | << SectionName << " in current mode\n"; |
1367 | exit(status: 1); |
1368 | } |
1369 | |
1370 | TargetFunction = getBinaryFunctionContainingAddress(Address, |
1371 | /*CheckPastEnd=*/false, |
1372 | /*UseMaxSize=*/true); |
1373 | // We are not going to overwrite non-simple functions, but for simple |
1374 | // ones - adjust the padding size. |
1375 | if (TargetFunction && TargetFunction->isSimple()) { |
1376 | this->errs() |
1377 | << "BOLT-WARNING: function "<< *TargetFunction |
1378 | << " has an object detected in a padding region at address 0x" |
1379 | << Twine::utohexstr(Val: Address) << '\n'; |
1380 | TargetFunction->setMaxSize(TargetFunction->getSize()); |
1381 | } |
1382 | } |
1383 | |
1384 | InterproceduralReferences.clear(); |
1385 | } |
1386 | |
1387 | void BinaryContext::postProcessSymbolTable() { |
1388 | fixBinaryDataHoles(); |
1389 | bool Valid = true; |
1390 | for (auto &Entry : BinaryDataMap) { |
1391 | BinaryData *BD = Entry.second; |
1392 | if ((BD->getName().starts_with(Prefix: "SYMBOLat") || |
1393 | BD->getName().starts_with(Prefix: "DATAat")) && |
1394 | !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && |
1395 | BD->getSection()) { |
1396 | this->errs() << "BOLT-WARNING: zero-sized top level symbol: "<< *BD |
1397 | << "\n"; |
1398 | Valid = false; |
1399 | } |
1400 | } |
1401 | assert(Valid); |
1402 | (void)Valid; |
1403 | generateSymbolHashes(); |
1404 | } |
1405 | |
1406 | void BinaryContext::foldFunction(BinaryFunction &ChildBF, |
1407 | BinaryFunction &ParentBF) { |
1408 | assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && |
1409 | "cannot merge functions with multiple entry points"); |
1410 | |
1411 | std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); |
1412 | std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( |
1413 | SymbolToFunctionMapMutex, std::defer_lock); |
1414 | |
1415 | const StringRef ChildName = ChildBF.getOneName(); |
1416 | |
1417 | // Move symbols over and update bookkeeping info. |
1418 | for (MCSymbol *Symbol : ChildBF.getSymbols()) { |
1419 | ParentBF.getSymbols().push_back(Elt: Symbol); |
1420 | WriteSymbolMapLock.lock(); |
1421 | SymbolToFunctionMap[Symbol] = &ParentBF; |
1422 | WriteSymbolMapLock.unlock(); |
1423 | // NB: there's no need to update BinaryDataMap and GlobalSymbols. |
1424 | } |
1425 | ChildBF.getSymbols().clear(); |
1426 | |
1427 | // Move other names the child function is known under. |
1428 | llvm::move(Range&: ChildBF.Aliases, Out: std::back_inserter(x&: ParentBF.Aliases)); |
1429 | ChildBF.Aliases.clear(); |
1430 | |
1431 | if (HasRelocations) { |
1432 | // Merge execution counts of ChildBF into those of ParentBF. |
1433 | // Without relocations, we cannot reliably merge profiles as both functions |
1434 | // continue to exist and either one can be executed. |
1435 | ChildBF.mergeProfileDataInto(BF&: ParentBF); |
1436 | |
1437 | std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, |
1438 | std::defer_lock); |
1439 | std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, |
1440 | std::defer_lock); |
1441 | // Remove ChildBF from the global set of functions in relocs mode. |
1442 | ReadBfsLock.lock(); |
1443 | auto FI = BinaryFunctions.find(x: ChildBF.getAddress()); |
1444 | ReadBfsLock.unlock(); |
1445 | |
1446 | assert(FI != BinaryFunctions.end() && "function not found"); |
1447 | assert(&ChildBF == &FI->second && "function mismatch"); |
1448 | |
1449 | WriteBfsLock.lock(); |
1450 | ChildBF.clearDisasmState(); |
1451 | FI = BinaryFunctions.erase(position: FI); |
1452 | WriteBfsLock.unlock(); |
1453 | |
1454 | } else { |
1455 | // In non-relocation mode we keep the function, but rename it. |
1456 | std::string NewName = "__ICF_"+ ChildName.str(); |
1457 | |
1458 | WriteCtxLock.lock(); |
1459 | ChildBF.getSymbols().push_back(Elt: Ctx->getOrCreateSymbol(Name: NewName)); |
1460 | WriteCtxLock.unlock(); |
1461 | |
1462 | ChildBF.setFolded(&ParentBF); |
1463 | } |
1464 | |
1465 | ParentBF.setHasFunctionsFoldedInto(); |
1466 | } |
1467 | |
1468 | void BinaryContext::fixBinaryDataHoles() { |
1469 | assert(validateObjectNesting() && "object nesting inconsistency detected"); |
1470 | |
1471 | for (BinarySection &Section : allocatableSections()) { |
1472 | std::vector<std::pair<uint64_t, uint64_t>> Holes; |
1473 | |
1474 | auto isNotHole = [&Section](const binary_data_iterator &Itr) { |
1475 | BinaryData *BD = Itr->second; |
1476 | bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && |
1477 | (BD->getName().starts_with(Prefix: "SYMBOLat0x") || |
1478 | BD->getName().starts_with(Prefix: "DATAat0x") || |
1479 | BD->getName().starts_with(Prefix: "ANONYMOUS"))); |
1480 | return !isHole && BD->getSection() == Section && !BD->getParent(); |
1481 | }; |
1482 | |
1483 | auto BDStart = BinaryDataMap.begin(); |
1484 | auto BDEnd = BinaryDataMap.end(); |
1485 | auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); |
1486 | auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); |
1487 | |
1488 | uint64_t EndAddress = Section.getAddress(); |
1489 | |
1490 | while (Itr != End) { |
1491 | if (Itr->second->getAddress() > EndAddress) { |
1492 | uint64_t Gap = Itr->second->getAddress() - EndAddress; |
1493 | Holes.emplace_back(args&: EndAddress, args&: Gap); |
1494 | } |
1495 | EndAddress = Itr->second->getEndAddress(); |
1496 | ++Itr; |
1497 | } |
1498 | |
1499 | if (EndAddress < Section.getEndAddress()) |
1500 | Holes.emplace_back(args&: EndAddress, args: Section.getEndAddress() - EndAddress); |
1501 | |
1502 | // If there is already a symbol at the start of the hole, grow that symbol |
1503 | // to cover the rest. Otherwise, create a new symbol to cover the hole. |
1504 | for (std::pair<uint64_t, uint64_t> &Hole : Holes) { |
1505 | BinaryData *BD = getBinaryDataAtAddress(Address: Hole.first); |
1506 | if (BD) { |
1507 | // BD->getSection() can be != Section if there are sections that |
1508 | // overlap. In this case it is probably safe to just skip the holes |
1509 | // since the overlapping section will not(?) have any symbols in it. |
1510 | if (BD->getSection() == Section) |
1511 | setBinaryDataSize(Address: Hole.first, Size: Hole.second); |
1512 | } else { |
1513 | getOrCreateGlobalSymbol(Address: Hole.first, Prefix: "HOLEat", Size: Hole.second, Alignment: 1); |
1514 | } |
1515 | } |
1516 | } |
1517 | |
1518 | assert(validateObjectNesting() && "object nesting inconsistency detected"); |
1519 | assert(validateHoles() && "top level hole detected in object map"); |
1520 | } |
1521 | |
1522 | void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { |
1523 | const BinarySection *CurrentSection = nullptr; |
1524 | bool FirstSection = true; |
1525 | |
1526 | for (auto &Entry : BinaryDataMap) { |
1527 | const BinaryData *BD = Entry.second; |
1528 | const BinarySection &Section = BD->getSection(); |
1529 | if (FirstSection || Section != *CurrentSection) { |
1530 | uint64_t Address, Size; |
1531 | StringRef Name = Section.getName(); |
1532 | if (Section) { |
1533 | Address = Section.getAddress(); |
1534 | Size = Section.getSize(); |
1535 | } else { |
1536 | Address = BD->getAddress(); |
1537 | Size = BD->getSize(); |
1538 | } |
1539 | OS << "BOLT-INFO: Section "<< Name << ", " |
1540 | << "0x"+ Twine::utohexstr(Val: Address) << ":" |
1541 | << "0x"+ Twine::utohexstr(Val: Address + Size) << "/"<< Size << "\n"; |
1542 | CurrentSection = &Section; |
1543 | FirstSection = false; |
1544 | } |
1545 | |
1546 | OS << "BOLT-INFO: "; |
1547 | const BinaryData *P = BD->getParent(); |
1548 | while (P) { |
1549 | OS << " "; |
1550 | P = P->getParent(); |
1551 | } |
1552 | OS << *BD << "\n"; |
1553 | } |
1554 | } |
1555 | |
1556 | Expected<unsigned> BinaryContext::getDwarfFile( |
1557 | StringRef Directory, StringRef FileName, unsigned FileNumber, |
1558 | std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, |
1559 | unsigned CUID, unsigned DWARFVersion) { |
1560 | DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; |
1561 | return Table.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion: DWARFVersion, |
1562 | FileNumber); |
1563 | } |
1564 | |
1565 | unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, |
1566 | const uint32_t SrcCUID, |
1567 | unsigned FileIndex) { |
1568 | DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(Offset: SrcCUID); |
1569 | const DWARFDebugLine::LineTable *LineTable = |
1570 | DwCtx->getLineTableForUnit(U: SrcUnit); |
1571 | const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = |
1572 | LineTable->Prologue.FileNames; |
1573 | // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 |
1574 | // means empty dir. |
1575 | assert(FileIndex > 0 && FileIndex <= FileNames.size() && |
1576 | "FileIndex out of range for the compilation unit."); |
1577 | StringRef Dir = ""; |
1578 | if (FileNames[FileIndex - 1].DirIdx != 0) { |
1579 | if (std::optional<const char *> DirName = dwarf::toString( |
1580 | V: LineTable->Prologue |
1581 | .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { |
1582 | Dir = *DirName; |
1583 | } |
1584 | } |
1585 | StringRef FileName = ""; |
1586 | if (std::optional<const char *> FName = |
1587 | dwarf::toString(V: FileNames[FileIndex - 1].Name)) |
1588 | FileName = *FName; |
1589 | assert(FileName != ""); |
1590 | DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(Offset: DestCUID); |
1591 | return cantFail(ValOrErr: getDwarfFile(Directory: Dir, FileName, FileNumber: 0, Checksum: std::nullopt, Source: std::nullopt, |
1592 | CUID: DestCUID, DWARFVersion: DstUnit->getVersion())); |
1593 | } |
1594 | |
1595 | std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { |
1596 | std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); |
1597 | llvm::transform(Range: llvm::make_second_range(c&: BinaryFunctions), |
1598 | d_first: SortedFunctions.begin(), |
1599 | F: [](BinaryFunction &BF) { return &BF; }); |
1600 | |
1601 | llvm::stable_sort(Range&: SortedFunctions, C: compareBinaryFunctionByIndex); |
1602 | return SortedFunctions; |
1603 | } |
1604 | |
1605 | std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { |
1606 | std::vector<BinaryFunction *> AllFunctions; |
1607 | AllFunctions.reserve(n: BinaryFunctions.size() + InjectedBinaryFunctions.size()); |
1608 | llvm::transform(Range: llvm::make_second_range(c&: BinaryFunctions), |
1609 | d_first: std::back_inserter(x&: AllFunctions), |
1610 | F: [](BinaryFunction &BF) { return &BF; }); |
1611 | llvm::copy(Range&: InjectedBinaryFunctions, Out: std::back_inserter(x&: AllFunctions)); |
1612 | |
1613 | return AllFunctions; |
1614 | } |
1615 | |
1616 | std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { |
1617 | auto Iter = DWOCUs.find(x: DWOId); |
1618 | if (Iter == DWOCUs.end()) |
1619 | return std::nullopt; |
1620 | |
1621 | return Iter->second; |
1622 | } |
1623 | |
1624 | DWARFContext *BinaryContext::getDWOContext() const { |
1625 | if (DWOCUs.empty()) |
1626 | return nullptr; |
1627 | return &DWOCUs.begin()->second->getContext(); |
1628 | } |
1629 | |
1630 | /// Handles DWO sections that can either be in .o, .dwo or .dwp files. |
1631 | void BinaryContext::preprocessDWODebugInfo() { |
1632 | for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
1633 | DWARFUnit *const DwarfUnit = CU.get(); |
1634 | if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { |
1635 | std::string DWOName = dwarf::toString( |
1636 | V: DwarfUnit->getUnitDIE().find( |
1637 | Attrs: {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), |
1638 | Default: ""); |
1639 | SmallString<16> AbsolutePath; |
1640 | if (!opts::CompDirOverride.empty()) { |
1641 | sys::path::append(path&: AbsolutePath, a: opts::CompDirOverride); |
1642 | sys::path::append(path&: AbsolutePath, a: DWOName); |
1643 | } |
1644 | DWARFUnit *DWOCU = |
1645 | DwarfUnit->getNonSkeletonUnitDIE(ExtractUnitDIEOnly: false, DWOAlternativeLocation: AbsolutePath).getDwarfUnit(); |
1646 | if (!DWOCU->isDWOUnit()) { |
1647 | this->outs() |
1648 | << "BOLT-WARNING: Debug Fission: DWO debug information for " |
1649 | << DWOName |
1650 | << " was not retrieved and won't be updated. Please check " |
1651 | "relative path.\n"; |
1652 | continue; |
1653 | } |
1654 | DWOCUs[*DWOId] = DWOCU; |
1655 | } |
1656 | } |
1657 | if (!DWOCUs.empty()) |
1658 | this->outs() << "BOLT-INFO: processing split DWARF\n"; |
1659 | } |
1660 | |
1661 | void BinaryContext::preprocessDebugInfo() { |
1662 | struct CURange { |
1663 | uint64_t LowPC; |
1664 | uint64_t HighPC; |
1665 | DWARFUnit *Unit; |
1666 | |
1667 | bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } |
1668 | }; |
1669 | |
1670 | // Building a map of address ranges to CUs similar to .debug_aranges and use |
1671 | // it to assign CU to functions. |
1672 | std::vector<CURange> AllRanges; |
1673 | AllRanges.reserve(n: DwCtx->getNumCompileUnits()); |
1674 | for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
1675 | Expected<DWARFAddressRangesVector> RangesOrError = |
1676 | CU->getUnitDIE().getAddressRanges(); |
1677 | if (!RangesOrError) { |
1678 | consumeError(Err: RangesOrError.takeError()); |
1679 | continue; |
1680 | } |
1681 | for (DWARFAddressRange &Range : *RangesOrError) { |
1682 | // Parts of the debug info could be invalidated due to corresponding code |
1683 | // being removed from the binary by the linker. Hence we check if the |
1684 | // address is a valid one. |
1685 | if (containsAddress(Address: Range.LowPC)) |
1686 | AllRanges.emplace_back(args: CURange{.LowPC: Range.LowPC, .HighPC: Range.HighPC, .Unit: CU.get()}); |
1687 | } |
1688 | |
1689 | ContainsDwarf5 |= CU->getVersion() >= 5; |
1690 | ContainsDwarfLegacy |= CU->getVersion() < 5; |
1691 | } |
1692 | |
1693 | llvm::sort(C&: AllRanges); |
1694 | for (auto &KV : BinaryFunctions) { |
1695 | const uint64_t FunctionAddress = KV.first; |
1696 | BinaryFunction &Function = KV.second; |
1697 | |
1698 | auto It = llvm::partition_point( |
1699 | Range&: AllRanges, P: [=](CURange R) { return R.HighPC <= FunctionAddress; }); |
1700 | if (It != AllRanges.end() && It->LowPC <= FunctionAddress) |
1701 | Function.setDWARFUnit(It->Unit); |
1702 | } |
1703 | |
1704 | // Discover units with debug info that needs to be updated. |
1705 | for (const auto &KV : BinaryFunctions) { |
1706 | const BinaryFunction &BF = KV.second; |
1707 | if (shouldEmit(Function: BF) && BF.getDWARFUnit()) |
1708 | ProcessedCUs.insert(x: BF.getDWARFUnit()); |
1709 | } |
1710 | |
1711 | // Clear debug info for functions from units that we are not going to process. |
1712 | for (auto &KV : BinaryFunctions) { |
1713 | BinaryFunction &BF = KV.second; |
1714 | if (BF.getDWARFUnit() && !ProcessedCUs.count(x: BF.getDWARFUnit())) |
1715 | BF.setDWARFUnit(nullptr); |
1716 | } |
1717 | |
1718 | if (opts::Verbosity >= 1) { |
1719 | this->outs() << "BOLT-INFO: "<< ProcessedCUs.size() << " out of " |
1720 | << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; |
1721 | } |
1722 | |
1723 | preprocessDWODebugInfo(); |
1724 | |
1725 | // Populate MCContext with DWARF files from all units. |
1726 | StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); |
1727 | for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
1728 | const uint64_t CUID = CU->getOffset(); |
1729 | DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); |
1730 | BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( |
1731 | Name: GlobalPrefix + "line_table_start"+ Twine(CUID))); |
1732 | |
1733 | if (!ProcessedCUs.count(x: CU.get())) |
1734 | continue; |
1735 | |
1736 | const DWARFDebugLine::LineTable *LineTable = |
1737 | DwCtx->getLineTableForUnit(U: CU.get()); |
1738 | const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = |
1739 | LineTable->Prologue.FileNames; |
1740 | |
1741 | uint16_t DwarfVersion = LineTable->Prologue.getVersion(); |
1742 | if (DwarfVersion >= 5) { |
1743 | std::optional<MD5::MD5Result> Checksum; |
1744 | if (LineTable->Prologue.ContentTypes.HasMD5) |
1745 | Checksum = LineTable->Prologue.FileNames[0].Checksum; |
1746 | std::optional<const char *> Name = |
1747 | dwarf::toString(V: CU->getUnitDIE().find(Attr: dwarf::DW_AT_name), Default: nullptr); |
1748 | if (std::optional<uint64_t> DWOID = CU->getDWOId()) { |
1749 | auto Iter = DWOCUs.find(x: *DWOID); |
1750 | if (Iter == DWOCUs.end()) { |
1751 | this->errs() << "BOLT-ERROR: DWO CU was not found for "<< Name |
1752 | << '\n'; |
1753 | exit(status: 1); |
1754 | } |
1755 | Name = dwarf::toString( |
1756 | V: Iter->second->getUnitDIE().find(Attr: dwarf::DW_AT_name), Default: nullptr); |
1757 | } |
1758 | BinaryLineTable.setRootFile(Directory: CU->getCompilationDir(), FileName: *Name, Checksum, |
1759 | Source: std::nullopt); |
1760 | } |
1761 | |
1762 | BinaryLineTable.setDwarfVersion(DwarfVersion); |
1763 | |
1764 | // Assign a unique label to every line table, one per CU. |
1765 | // Make sure empty debug line tables are registered too. |
1766 | if (FileNames.empty()) { |
1767 | cantFail(ValOrErr: getDwarfFile(Directory: "", FileName: "<unknown>", FileNumber: 0, Checksum: std::nullopt, Source: std::nullopt, |
1768 | CUID, DWARFVersion: DwarfVersion)); |
1769 | continue; |
1770 | } |
1771 | const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; |
1772 | for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { |
1773 | // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 |
1774 | // means empty dir. |
1775 | StringRef Dir = ""; |
1776 | if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) |
1777 | if (std::optional<const char *> DirName = dwarf::toString( |
1778 | V: LineTable->Prologue |
1779 | .IncludeDirectories[FileNames[I].DirIdx - Offset])) |
1780 | Dir = *DirName; |
1781 | StringRef FileName = ""; |
1782 | if (std::optional<const char *> FName = |
1783 | dwarf::toString(V: FileNames[I].Name)) |
1784 | FileName = *FName; |
1785 | assert(FileName != ""); |
1786 | std::optional<MD5::MD5Result> Checksum; |
1787 | if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) |
1788 | Checksum = LineTable->Prologue.FileNames[I].Checksum; |
1789 | cantFail(ValOrErr: getDwarfFile(Directory: Dir, FileName, FileNumber: 0, Checksum, Source: std::nullopt, CUID, |
1790 | DWARFVersion: DwarfVersion)); |
1791 | } |
1792 | } |
1793 | } |
1794 | |
1795 | bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { |
1796 | if (Function.isPseudo()) |
1797 | return false; |
1798 | |
1799 | if (opts::processAllFunctions()) |
1800 | return true; |
1801 | |
1802 | if (Function.isIgnored()) |
1803 | return false; |
1804 | |
1805 | // In relocation mode we will emit non-simple functions with CFG. |
1806 | // If the function does not have a CFG it should be marked as ignored. |
1807 | return HasRelocations || Function.isSimple(); |
1808 | } |
1809 | |
1810 | void BinaryContext::dump(const MCInst &Inst) const { |
1811 | if (LLVM_UNLIKELY(!InstPrinter)) { |
1812 | dbgs() << "Cannot dump for InstPrinter is not initialized.\n"; |
1813 | return; |
1814 | } |
1815 | InstPrinter->printInst(MI: &Inst, Address: 0, Annot: "", STI: *STI, OS&: dbgs()); |
1816 | dbgs() << "\n"; |
1817 | } |
1818 | |
1819 | void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { |
1820 | uint32_t Operation = Inst.getOperation(); |
1821 | switch (Operation) { |
1822 | case MCCFIInstruction::OpSameValue: |
1823 | OS << "OpSameValue Reg"<< Inst.getRegister(); |
1824 | break; |
1825 | case MCCFIInstruction::OpRememberState: |
1826 | OS << "OpRememberState"; |
1827 | break; |
1828 | case MCCFIInstruction::OpRestoreState: |
1829 | OS << "OpRestoreState"; |
1830 | break; |
1831 | case MCCFIInstruction::OpOffset: |
1832 | OS << "OpOffset Reg"<< Inst.getRegister() << " "<< Inst.getOffset(); |
1833 | break; |
1834 | case MCCFIInstruction::OpDefCfaRegister: |
1835 | OS << "OpDefCfaRegister Reg"<< Inst.getRegister(); |
1836 | break; |
1837 | case MCCFIInstruction::OpDefCfaOffset: |
1838 | OS << "OpDefCfaOffset "<< Inst.getOffset(); |
1839 | break; |
1840 | case MCCFIInstruction::OpDefCfa: |
1841 | OS << "OpDefCfa Reg"<< Inst.getRegister() << " "<< Inst.getOffset(); |
1842 | break; |
1843 | case MCCFIInstruction::OpRelOffset: |
1844 | OS << "OpRelOffset Reg"<< Inst.getRegister() << " "<< Inst.getOffset(); |
1845 | break; |
1846 | case MCCFIInstruction::OpAdjustCfaOffset: |
1847 | OS << "OfAdjustCfaOffset "<< Inst.getOffset(); |
1848 | break; |
1849 | case MCCFIInstruction::OpEscape: |
1850 | OS << "OpEscape"; |
1851 | break; |
1852 | case MCCFIInstruction::OpRestore: |
1853 | OS << "OpRestore Reg"<< Inst.getRegister(); |
1854 | break; |
1855 | case MCCFIInstruction::OpUndefined: |
1856 | OS << "OpUndefined Reg"<< Inst.getRegister(); |
1857 | break; |
1858 | case MCCFIInstruction::OpRegister: |
1859 | OS << "OpRegister Reg"<< Inst.getRegister() << " Reg" |
1860 | << Inst.getRegister2(); |
1861 | break; |
1862 | case MCCFIInstruction::OpWindowSave: |
1863 | OS << "OpWindowSave"; |
1864 | break; |
1865 | case MCCFIInstruction::OpGnuArgsSize: |
1866 | OS << "OpGnuArgsSize"; |
1867 | break; |
1868 | default: |
1869 | OS << "Op#"<< Operation; |
1870 | break; |
1871 | } |
1872 | } |
1873 | |
1874 | MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { |
1875 | // For aarch64 and riscv, the ABI defines mapping symbols so we identify data |
1876 | // in the code section (see IHI0056B). $x identifies a symbol starting code or |
1877 | // the end of a data chunk inside code, $d identifies start of data. |
1878 | if (isX86() || ELFSymbolRef(Symbol).getSize()) |
1879 | return MarkerSymType::NONE; |
1880 | |
1881 | Expected<StringRef> NameOrError = Symbol.getName(); |
1882 | Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); |
1883 | |
1884 | if (!TypeOrError || !NameOrError) |
1885 | return MarkerSymType::NONE; |
1886 | |
1887 | if (*TypeOrError != SymbolRef::ST_Unknown) |
1888 | return MarkerSymType::NONE; |
1889 | |
1890 | if (*NameOrError == "$x"|| NameOrError->starts_with(Prefix: "$x.")) |
1891 | return MarkerSymType::CODE; |
1892 | |
1893 | // $x<ISA> |
1894 | if (isRISCV() && NameOrError->starts_with(Prefix: "$x")) |
1895 | return MarkerSymType::CODE; |
1896 | |
1897 | if (*NameOrError == "$d"|| NameOrError->starts_with(Prefix: "$d.")) |
1898 | return MarkerSymType::DATA; |
1899 | |
1900 | return MarkerSymType::NONE; |
1901 | } |
1902 | |
1903 | bool BinaryContext::isMarker(const SymbolRef &Symbol) const { |
1904 | return getMarkerType(Symbol) != MarkerSymType::NONE; |
1905 | } |
1906 | |
1907 | static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, |
1908 | const BinaryFunction *Function, |
1909 | DWARFContext *DwCtx) { |
1910 | DebugLineTableRowRef RowRef = |
1911 | DebugLineTableRowRef::fromSMLoc(Loc: Instruction.getLoc()); |
1912 | if (RowRef == DebugLineTableRowRef::NULL_ROW) |
1913 | return; |
1914 | |
1915 | const DWARFDebugLine::LineTable *LineTable; |
1916 | if (Function && Function->getDWARFUnit() && |
1917 | Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { |
1918 | LineTable = Function->getDWARFLineTable(); |
1919 | } else { |
1920 | LineTable = DwCtx->getLineTableForUnit( |
1921 | U: DwCtx->getCompileUnitForOffset(Offset: RowRef.DwCompileUnitIndex)); |
1922 | } |
1923 | assert(LineTable && "line table expected for instruction with debug info"); |
1924 | |
1925 | const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; |
1926 | StringRef FileName = ""; |
1927 | if (std::optional<const char *> FName = |
1928 | dwarf::toString(V: LineTable->Prologue.FileNames[Row.File - 1].Name)) |
1929 | FileName = *FName; |
1930 | OS << " # debug line "<< FileName << ":"<< Row.Line; |
1931 | if (Row.Column) |
1932 | OS << ":"<< Row.Column; |
1933 | if (Row.Discriminator) |
1934 | OS << " discriminator:"<< Row.Discriminator; |
1935 | } |
1936 | |
1937 | ArrayRef<uint8_t> BinaryContext::extractData(uint64_t Address, |
1938 | uint64_t Size) const { |
1939 | ArrayRef<uint8_t> Res; |
1940 | |
1941 | const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
1942 | if (!Section || Section->isVirtual()) |
1943 | return Res; |
1944 | |
1945 | if (!Section->containsRange(Address, Size)) |
1946 | return Res; |
1947 | |
1948 | auto *Bytes = |
1949 | reinterpret_cast<const uint8_t *>(Section->getContents().data()); |
1950 | return ArrayRef<uint8_t>(Bytes + Address - Section->getAddress(), Size); |
1951 | } |
1952 | |
1953 | void BinaryContext::printData(raw_ostream &OS, ArrayRef<uint8_t> Data, |
1954 | uint64_t Offset) const { |
1955 | DataExtractor DE(Data, AsmInfo->isLittleEndian(), |
1956 | AsmInfo->getCodePointerSize()); |
1957 | uint64_t DataOffset = 0; |
1958 | while (DataOffset + 4 <= Data.size()) { |
1959 | OS << format(Fmt: " %08"PRIx64 ": \t.word\t0x", Vals: Offset + DataOffset); |
1960 | const auto Word = DE.getUnsigned(offset_ptr: &DataOffset, byte_size: 4); |
1961 | OS << Twine::utohexstr(Val: Word) << '\n'; |
1962 | } |
1963 | if (DataOffset + 2 <= Data.size()) { |
1964 | OS << format(Fmt: " %08"PRIx64 ": \t.short\t0x", Vals: Offset + DataOffset); |
1965 | const auto Short = DE.getUnsigned(offset_ptr: &DataOffset, byte_size: 2); |
1966 | OS << Twine::utohexstr(Val: Short) << '\n'; |
1967 | } |
1968 | if (DataOffset + 1 == Data.size()) { |
1969 | OS << format(Fmt: " %08"PRIx64 ": \t.byte\t0x%x\n", Vals: Offset + DataOffset, |
1970 | Vals: Data[DataOffset]); |
1971 | } |
1972 | } |
1973 | |
1974 | void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, |
1975 | uint64_t Offset, |
1976 | const BinaryFunction *Function, |
1977 | bool PrintMCInst, bool PrintMemData, |
1978 | bool PrintRelocations, |
1979 | StringRef Endl) const { |
1980 | OS << format(Fmt: " %08"PRIx64 ": ", Vals: Offset); |
1981 | if (MIB->isCFI(Inst: Instruction)) { |
1982 | uint32_t Offset = Instruction.getOperand(i: 0).getImm(); |
1983 | OS << "\t!CFI\t$"<< Offset << "\t; "; |
1984 | if (Function) |
1985 | printCFI(OS, Inst: *Function->getCFIFor(Instr: Instruction)); |
1986 | OS << Endl; |
1987 | return; |
1988 | } |
1989 | if (std::optional<uint32_t> DynamicID = |
1990 | MIB->getDynamicBranchID(Inst: Instruction)) { |
1991 | OS << "\tjit\t"<< MIB->getTargetSymbol(Inst: Instruction)->getName() |
1992 | << " # ID: "<< DynamicID; |
1993 | } else { |
1994 | // If there are annotations on the instruction, the MCInstPrinter will fail |
1995 | // to print the preferred alias as it only does so when the number of |
1996 | // operands is as expected. See |
1997 | // https://github.com/llvm/llvm-project/blob/782f1a0d895646c364a53f9dcdd6d4ec1f3e5ea0/llvm/lib/MC/MCInstPrinter.cpp#L142 |
1998 | // Therefore, create a temporary copy of the Inst from which the annotations |
1999 | // are removed, and print that Inst. |
2000 | MCInst InstNoAnnot = Instruction; |
2001 | MIB->stripAnnotations(Inst&: InstNoAnnot); |
2002 | InstPrinter->printInst(MI: &InstNoAnnot, Address: 0, Annot: "", STI: *STI, OS); |
2003 | } |
2004 | if (MIB->isCall(Inst: Instruction)) { |
2005 | if (MIB->isTailCall(Inst: Instruction)) |
2006 | OS << " # TAILCALL "; |
2007 | if (MIB->isInvoke(Inst: Instruction)) { |
2008 | const std::optional<MCPlus::MCLandingPad> EHInfo = |
2009 | MIB->getEHInfo(Inst: Instruction); |
2010 | OS << " # handler: "; |
2011 | if (EHInfo->first) |
2012 | OS << *EHInfo->first; |
2013 | else |
2014 | OS << '0'; |
2015 | OS << "; action: "<< EHInfo->second; |
2016 | const int64_t GnuArgsSize = MIB->getGnuArgsSize(Inst: Instruction); |
2017 | if (GnuArgsSize >= 0) |
2018 | OS << "; GNU_args_size = "<< GnuArgsSize; |
2019 | } |
2020 | } else if (MIB->isIndirectBranch(Inst: Instruction)) { |
2021 | if (uint64_t JTAddress = MIB->getJumpTable(Inst: Instruction)) { |
2022 | OS << " # JUMPTABLE @0x"<< Twine::utohexstr(Val: JTAddress); |
2023 | } else { |
2024 | OS << " # UNKNOWN CONTROL FLOW"; |
2025 | } |
2026 | } |
2027 | if (std::optional<uint32_t> Offset = MIB->getOffset(Inst: Instruction)) |
2028 | OS << " # Offset: "<< *Offset; |
2029 | if (std::optional<uint32_t> Size = MIB->getSize(Inst: Instruction)) |
2030 | OS << " # Size: "<< *Size; |
2031 | if (MCSymbol *Label = MIB->getInstLabel(Inst: Instruction)) |
2032 | OS << " # Label: "<< *Label; |
2033 | |
2034 | MIB->printAnnotations(Inst: Instruction, OS); |
2035 | |
2036 | if (opts::PrintDebugInfo) |
2037 | printDebugInfo(OS, Instruction, Function, DwCtx: DwCtx.get()); |
2038 | |
2039 | if ((opts::PrintRelocations || PrintRelocations) && Function) { |
2040 | const uint64_t Size = computeCodeSize(Beg: &Instruction, End: &Instruction + 1); |
2041 | Function->printRelocations(OS, Offset, Size); |
2042 | } |
2043 | |
2044 | OS << Endl; |
2045 | |
2046 | if (PrintMCInst) { |
2047 | Instruction.dump_pretty(OS, Printer: InstPrinter.get()); |
2048 | OS << Endl; |
2049 | } |
2050 | } |
2051 | |
2052 | std::optional<uint64_t> |
2053 | BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, |
2054 | uint64_t FileOffset) const { |
2055 | // Find a segment with a matching file offset. |
2056 | for (auto &KV : SegmentMapInfo) { |
2057 | const SegmentInfo &SegInfo = KV.second; |
2058 | // Only consider executable segments. |
2059 | if (!SegInfo.IsExecutable) |
2060 | continue; |
2061 | // FileOffset is got from perf event, |
2062 | // and it is equal to alignDown(SegInfo.FileOffset, pagesize). |
2063 | // If the pagesize is not equal to SegInfo.Alignment. |
2064 | // FileOffset and SegInfo.FileOffset should be aligned first, |
2065 | // and then judge whether they are equal. |
2066 | if (alignDown(Value: SegInfo.FileOffset, Align: SegInfo.Alignment) == |
2067 | alignDown(Value: FileOffset, Align: SegInfo.Alignment)) { |
2068 | // The function's offset from base address in VAS is aligned by pagesize |
2069 | // instead of SegInfo.Alignment. Pagesize can't be got from perf events. |
2070 | // However, The ELF document says that SegInfo.FileOffset should equal |
2071 | // to SegInfo.Address, modulo the pagesize. |
2072 | // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf |
2073 | |
2074 | // So alignDown(SegInfo.Address, pagesize) can be calculated by: |
2075 | // alignDown(SegInfo.Address, pagesize) |
2076 | // = SegInfo.Address - (SegInfo.Address % pagesize) |
2077 | // = SegInfo.Address - (SegInfo.FileOffset % pagesize) |
2078 | // = SegInfo.Address - SegInfo.FileOffset + |
2079 | // alignDown(SegInfo.FileOffset, pagesize) |
2080 | // = SegInfo.Address - SegInfo.FileOffset + FileOffset |
2081 | return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); |
2082 | } |
2083 | } |
2084 | |
2085 | return std::nullopt; |
2086 | } |
2087 | |
2088 | ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { |
2089 | auto SI = AddressToSection.upper_bound(x: Address); |
2090 | if (SI != AddressToSection.begin()) { |
2091 | --SI; |
2092 | uint64_t UpperBound = SI->first + SI->second->getSize(); |
2093 | if (!SI->second->getSize()) |
2094 | UpperBound += 1; |
2095 | if (UpperBound > Address) |
2096 | return *SI->second; |
2097 | } |
2098 | return std::make_error_code(e: std::errc::bad_address); |
2099 | } |
2100 | |
2101 | ErrorOr<StringRef> |
2102 | BinaryContext::getSectionNameForAddress(uint64_t Address) const { |
2103 | if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) |
2104 | return Section->getName(); |
2105 | return std::make_error_code(e: std::errc::bad_address); |
2106 | } |
2107 | |
2108 | BinarySection &BinaryContext::registerSection(BinarySection *Section) { |
2109 | auto Res = Sections.insert(x: Section); |
2110 | (void)Res; |
2111 | assert(Res.second && "can't register the same section twice."); |
2112 | |
2113 | // Only register allocatable sections in the AddressToSection map. |
2114 | if (Section->isAllocatable() && Section->getAddress()) |
2115 | AddressToSection.insert(x: std::make_pair(x: Section->getAddress(), y&: Section)); |
2116 | NameToSection.insert( |
2117 | x: std::make_pair(x: std::string(Section->getName()), y&: Section)); |
2118 | if (Section->hasSectionRef()) |
2119 | SectionRefToBinarySection.insert( |
2120 | KV: std::make_pair(x: Section->getSectionRef(), y&: Section)); |
2121 | |
2122 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering "<< *Section << "\n"); |
2123 | return *Section; |
2124 | } |
2125 | |
2126 | BinarySection &BinaryContext::registerSection(SectionRef Section) { |
2127 | return registerSection(Section: new BinarySection(*this, Section)); |
2128 | } |
2129 | |
2130 | BinarySection & |
2131 | BinaryContext::registerSection(const Twine &SectionName, |
2132 | const BinarySection &OriginalSection) { |
2133 | return registerSection( |
2134 | Section: new BinarySection(*this, SectionName, OriginalSection)); |
2135 | } |
2136 | |
2137 | BinarySection & |
2138 | BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, |
2139 | unsigned ELFFlags, uint8_t *Data, |
2140 | uint64_t Size, unsigned Alignment) { |
2141 | auto NamedSections = getSectionByName(Name); |
2142 | if (NamedSections.begin() != NamedSections.end()) { |
2143 | assert(std::next(NamedSections.begin()) == NamedSections.end() && |
2144 | "can only update unique sections"); |
2145 | BinarySection *Section = NamedSections.begin()->second; |
2146 | |
2147 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating "<< *Section << " -> "); |
2148 | const bool Flag = Section->isAllocatable(); |
2149 | (void)Flag; |
2150 | Section->update(NewData: Data, NewSize: Size, NewAlignment: Alignment, NewELFType: ELFType, NewELFFlags: ELFFlags); |
2151 | LLVM_DEBUG(dbgs() << *Section << "\n"); |
2152 | // FIXME: Fix section flags/attributes for MachO. |
2153 | if (isELF()) |
2154 | assert(Flag == Section->isAllocatable() && |
2155 | "can't change section allocation status"); |
2156 | return *Section; |
2157 | } |
2158 | |
2159 | return registerSection( |
2160 | Section: new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); |
2161 | } |
2162 | |
2163 | void BinaryContext::deregisterSectionName(const BinarySection &Section) { |
2164 | auto NameRange = NameToSection.equal_range(x: Section.getName().str()); |
2165 | while (NameRange.first != NameRange.second) { |
2166 | if (NameRange.first->second == &Section) { |
2167 | NameToSection.erase(position: NameRange.first); |
2168 | break; |
2169 | } |
2170 | ++NameRange.first; |
2171 | } |
2172 | } |
2173 | |
2174 | void BinaryContext::deregisterUnusedSections() { |
2175 | ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName(SectionName: "<absolute>"); |
2176 | for (auto SI = Sections.begin(); SI != Sections.end();) { |
2177 | BinarySection *Section = *SI; |
2178 | // We check getOutputData() instead of getOutputSize() because sometimes |
2179 | // zero-sized .text.cold sections are allocated. |
2180 | if (Section->hasSectionRef() || Section->getOutputData() || |
2181 | (AbsSection && Section == &AbsSection.get())) { |
2182 | ++SI; |
2183 | continue; |
2184 | } |
2185 | |
2186 | LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering "<< Section->getName() |
2187 | << '\n';); |
2188 | deregisterSectionName(Section: *Section); |
2189 | SI = Sections.erase(position: SI); |
2190 | delete Section; |
2191 | } |
2192 | } |
2193 | |
2194 | bool BinaryContext::deregisterSection(BinarySection &Section) { |
2195 | BinarySection *SectionPtr = &Section; |
2196 | auto Itr = Sections.find(x: SectionPtr); |
2197 | if (Itr != Sections.end()) { |
2198 | auto Range = AddressToSection.equal_range(x: SectionPtr->getAddress()); |
2199 | while (Range.first != Range.second) { |
2200 | if (Range.first->second == SectionPtr) { |
2201 | AddressToSection.erase(position: Range.first); |
2202 | break; |
2203 | } |
2204 | ++Range.first; |
2205 | } |
2206 | |
2207 | deregisterSectionName(Section: *SectionPtr); |
2208 | Sections.erase(position: Itr); |
2209 | delete SectionPtr; |
2210 | return true; |
2211 | } |
2212 | return false; |
2213 | } |
2214 | |
2215 | void BinaryContext::renameSection(BinarySection &Section, |
2216 | const Twine &NewName) { |
2217 | auto Itr = Sections.find(x: &Section); |
2218 | assert(Itr != Sections.end() && "Section must exist to be renamed."); |
2219 | Sections.erase(position: Itr); |
2220 | |
2221 | deregisterSectionName(Section); |
2222 | |
2223 | Section.Name = NewName.str(); |
2224 | Section.setOutputName(Section.Name); |
2225 | |
2226 | NameToSection.insert(x: std::make_pair(x&: Section.Name, y: &Section)); |
2227 | |
2228 | // Reinsert with the new name. |
2229 | Sections.insert(x: &Section); |
2230 | } |
2231 | |
2232 | void BinaryContext::printSections(raw_ostream &OS) const { |
2233 | for (BinarySection *const &Section : Sections) |
2234 | OS << "BOLT-INFO: "<< *Section << "\n"; |
2235 | } |
2236 | |
2237 | BinarySection &BinaryContext::absoluteSection() { |
2238 | if (ErrorOr<BinarySection &> Section = getUniqueSectionByName(SectionName: "<absolute>")) |
2239 | return *Section; |
2240 | return registerOrUpdateSection(Name: "<absolute>", ELFType: ELF::SHT_NULL, ELFFlags: 0u); |
2241 | } |
2242 | |
2243 | ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, |
2244 | size_t Size) const { |
2245 | const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
2246 | if (!Section) |
2247 | return std::make_error_code(e: std::errc::bad_address); |
2248 | |
2249 | if (Section->isVirtual()) |
2250 | return 0; |
2251 | |
2252 | DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), |
2253 | AsmInfo->getCodePointerSize()); |
2254 | auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); |
2255 | return DE.getUnsigned(offset_ptr: &ValueOffset, byte_size: Size); |
2256 | } |
2257 | |
2258 | ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, |
2259 | size_t Size) const { |
2260 | const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
2261 | if (!Section) |
2262 | return std::make_error_code(e: std::errc::bad_address); |
2263 | |
2264 | if (Section->isVirtual()) |
2265 | return 0; |
2266 | |
2267 | DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), |
2268 | AsmInfo->getCodePointerSize()); |
2269 | auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); |
2270 | return DE.getSigned(offset_ptr: &ValueOffset, size: Size); |
2271 | } |
2272 | |
2273 | void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, |
2274 | uint32_t Type, uint64_t Addend, |
2275 | uint64_t Value) { |
2276 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
2277 | assert(Section && "cannot find section for address"); |
2278 | Section->addRelocation(Offset: Address - Section->getAddress(), Symbol, Type, Addend, |
2279 | Value); |
2280 | } |
2281 | |
2282 | void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, |
2283 | uint32_t Type, uint64_t Addend, |
2284 | uint64_t Value) { |
2285 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
2286 | assert(Section && "cannot find section for address"); |
2287 | Section->addDynamicRelocation(Offset: Address - Section->getAddress(), Symbol, Type, |
2288 | Addend, Value); |
2289 | } |
2290 | |
2291 | bool BinaryContext::removeRelocationAt(uint64_t Address) { |
2292 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
2293 | assert(Section && "cannot find section for address"); |
2294 | return Section->removeRelocationAt(Offset: Address - Section->getAddress()); |
2295 | } |
2296 | |
2297 | const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { |
2298 | ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
2299 | if (!Section) |
2300 | return nullptr; |
2301 | |
2302 | return Section->getRelocationAt(Offset: Address - Section->getAddress()); |
2303 | } |
2304 | |
2305 | const Relocation * |
2306 | BinaryContext::getDynamicRelocationAt(uint64_t Address) const { |
2307 | ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
2308 | if (!Section) |
2309 | return nullptr; |
2310 | |
2311 | return Section->getDynamicRelocationAt(Offset: Address - Section->getAddress()); |
2312 | } |
2313 | |
2314 | void BinaryContext::markAmbiguousRelocations(BinaryData &BD, |
2315 | const uint64_t Address) { |
2316 | auto setImmovable = [&](BinaryData &BD) { |
2317 | BinaryData *Root = BD.getAtomicRoot(); |
2318 | LLVM_DEBUG(if (Root->isMoveable()) { |
2319 | dbgs() << "BOLT-DEBUG: setting "<< *Root << " as immovable " |
2320 | << "due to ambiguous relocation referencing 0x" |
2321 | << Twine::utohexstr(Address) << '\n'; |
2322 | }); |
2323 | Root->setIsMoveable(false); |
2324 | }; |
2325 | |
2326 | if (Address == BD.getAddress()) { |
2327 | setImmovable(BD); |
2328 | |
2329 | // Set previous symbol as immovable |
2330 | BinaryData *Prev = getBinaryDataContainingAddress(Address: Address - 1); |
2331 | if (Prev && Prev->getEndAddress() == BD.getAddress()) |
2332 | setImmovable(*Prev); |
2333 | } |
2334 | |
2335 | if (Address == BD.getEndAddress()) { |
2336 | setImmovable(BD); |
2337 | |
2338 | // Set next symbol as immovable |
2339 | BinaryData *Next = getBinaryDataContainingAddress(Address: BD.getEndAddress()); |
2340 | if (Next && Next->getAddress() == BD.getEndAddress()) |
2341 | setImmovable(*Next); |
2342 | } |
2343 | } |
2344 | |
2345 | BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, |
2346 | uint64_t *EntryDesc) { |
2347 | std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); |
2348 | auto BFI = SymbolToFunctionMap.find(x: Symbol); |
2349 | if (BFI == SymbolToFunctionMap.end()) |
2350 | return nullptr; |
2351 | |
2352 | BinaryFunction *BF = BFI->second; |
2353 | if (EntryDesc) |
2354 | *EntryDesc = BF->getEntryIDForSymbol(EntrySymbol: Symbol); |
2355 | |
2356 | return BF; |
2357 | } |
2358 | |
2359 | std::string |
2360 | BinaryContext::generateBugReportMessage(StringRef Message, |
2361 | const BinaryFunction &Function) const { |
2362 | std::string Msg; |
2363 | raw_string_ostream SS(Msg); |
2364 | SS << "=======================================\n"; |
2365 | SS << "BOLT is unable to proceed because it couldn't properly understand " |
2366 | "this function.\n"; |
2367 | SS << "If you are running the most recent version of BOLT, you may " |
2368 | "want to " |
2369 | "report this and paste this dump.\nPlease check that there is no " |
2370 | "sensitive contents being shared in this dump.\n"; |
2371 | SS << "\nOffending function: "<< Function.getPrintName() << "\n\n"; |
2372 | ScopedPrinter SP(SS); |
2373 | SP.printBinaryBlock(Label: "Function contents", Value: *Function.getData()); |
2374 | SS << "\n"; |
2375 | const_cast<BinaryFunction &>(Function).print(OS&: SS, Annotation: ""); |
2376 | SS << "ERROR: "<< Message; |
2377 | SS << "\n=======================================\n"; |
2378 | return Msg; |
2379 | } |
2380 | |
2381 | BinaryFunction * |
2382 | BinaryContext::createInjectedBinaryFunction(const std::string &Name, |
2383 | bool IsSimple) { |
2384 | InjectedBinaryFunctions.push_back(x: new BinaryFunction(Name, *this, IsSimple)); |
2385 | BinaryFunction *BF = InjectedBinaryFunctions.back(); |
2386 | setSymbolToFunctionMap(Sym: BF->getSymbol(), BF); |
2387 | BF->CurrentState = BinaryFunction::State::CFG; |
2388 | return BF; |
2389 | } |
2390 | |
2391 | BinaryFunction * |
2392 | BinaryContext::createInstructionPatch(uint64_t Address, |
2393 | const InstructionListType &Instructions, |
2394 | const Twine &Name) { |
2395 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
2396 | assert(Section && "cannot get section for patching"); |
2397 | assert(Section->hasSectionRef() && Section->isText() && |
2398 | "can only patch input file code sections"); |
2399 | |
2400 | const uint64_t FileOffset = |
2401 | Section->getInputFileOffset() + Address - Section->getAddress(); |
2402 | |
2403 | std::string PatchName = Name.str(); |
2404 | if (PatchName.empty()) { |
2405 | // Assign unique name to the patch. |
2406 | static uint64_t N = 0; |
2407 | PatchName = "__BP_"+ std::to_string(val: N++); |
2408 | } |
2409 | |
2410 | BinaryFunction *PBF = createInjectedBinaryFunction(Name: PatchName); |
2411 | PBF->setOutputAddress(Address); |
2412 | PBF->setFileOffset(FileOffset); |
2413 | PBF->setOriginSection(&Section.get()); |
2414 | PBF->addBasicBlock()->addInstructions(R: Instructions); |
2415 | PBF->setIsPatch(true); |
2416 | |
2417 | // Don't create symbol table entry if the name wasn't specified. |
2418 | if (Name.str().empty()) |
2419 | PBF->setAnonymous(true); |
2420 | |
2421 | return PBF; |
2422 | } |
2423 | |
2424 | std::pair<size_t, size_t> |
2425 | BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { |
2426 | // Use the original size for non-simple functions. |
2427 | if (!BF.isSimple() || BF.isIgnored()) |
2428 | return std::make_pair(x: BF.getSize(), y: 0); |
2429 | |
2430 | // Adjust branch instruction to match the current layout. |
2431 | if (FixBranches) |
2432 | BF.fixBranches(); |
2433 | |
2434 | // Create local MC context to isolate the effect of ephemeral code emission. |
2435 | IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); |
2436 | MCContext *LocalCtx = MCEInstance.LocalCtx.get(); |
2437 | MCAsmBackend *MAB = |
2438 | TheTarget->createMCAsmBackend(STI: *STI, MRI: *MRI, Options: MCTargetOptions()); |
2439 | |
2440 | SmallString<256> Code; |
2441 | raw_svector_ostream VecOS(Code); |
2442 | |
2443 | std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS&: VecOS); |
2444 | std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( |
2445 | T: *TheTriple, Ctx&: *LocalCtx, TAB: std::unique_ptr<MCAsmBackend>(MAB), OW: std::move(OW), |
2446 | Emitter: std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), STI: *STI)); |
2447 | |
2448 | Streamer->initSections(NoExecStack: false, STI: *STI); |
2449 | |
2450 | MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); |
2451 | Section->setHasInstructions(true); |
2452 | |
2453 | // Create symbols in the LocalCtx so that they get destroyed with it. |
2454 | MCSymbol *StartLabel = LocalCtx->createTempSymbol(); |
2455 | MCSymbol *EndLabel = LocalCtx->createTempSymbol(); |
2456 | |
2457 | Streamer->switchSection(Section); |
2458 | Streamer->emitLabel(Symbol: StartLabel); |
2459 | emitFunctionBody(Streamer&: *Streamer, BF, FF&: BF.getLayout().getMainFragment(), |
2460 | /*EmitCodeOnly=*/true); |
2461 | Streamer->emitLabel(Symbol: EndLabel); |
2462 | |
2463 | using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; |
2464 | SmallVector<LabelRange> SplitLabels; |
2465 | for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { |
2466 | MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); |
2467 | MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); |
2468 | SplitLabels.emplace_back(Args: SplitStartLabel, Args: SplitEndLabel); |
2469 | |
2470 | MCSectionELF *const SplitSection = LocalCtx->getELFSection( |
2471 | Section: BF.getCodeSectionName(Fragment: FF.getFragmentNum()), Type: ELF::SHT_PROGBITS, |
2472 | Flags: ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); |
2473 | SplitSection->setHasInstructions(true); |
2474 | Streamer->switchSection(Section: SplitSection); |
2475 | |
2476 | Streamer->emitLabel(Symbol: SplitStartLabel); |
2477 | emitFunctionBody(Streamer&: *Streamer, BF, FF, /*EmitCodeOnly=*/true); |
2478 | Streamer->emitLabel(Symbol: SplitEndLabel); |
2479 | } |
2480 | |
2481 | MCAssembler &Assembler = |
2482 | static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); |
2483 | Assembler.layout(); |
2484 | |
2485 | // Obtain fragment sizes. |
2486 | std::vector<uint64_t> FragmentSizes; |
2487 | // Main fragment size. |
2488 | const uint64_t HotSize = Assembler.getSymbolOffset(S: *EndLabel) - |
2489 | Assembler.getSymbolOffset(S: *StartLabel); |
2490 | FragmentSizes.push_back(x: HotSize); |
2491 | // Split fragment sizes. |
2492 | uint64_t ColdSize = 0; |
2493 | for (const auto &Labels : SplitLabels) { |
2494 | uint64_t Size = Assembler.getSymbolOffset(S: *Labels.second) - |
2495 | Assembler.getSymbolOffset(S: *Labels.first); |
2496 | FragmentSizes.push_back(x: Size); |
2497 | ColdSize += Size; |
2498 | } |
2499 | |
2500 | // Populate new start and end offsets of each basic block. |
2501 | uint64_t FragmentIndex = 0; |
2502 | for (FunctionFragment &FF : BF.getLayout().fragments()) { |
2503 | BinaryBasicBlock *PrevBB = nullptr; |
2504 | for (BinaryBasicBlock *BB : FF) { |
2505 | const uint64_t BBStartOffset = |
2506 | Assembler.getSymbolOffset(S: *(BB->getLabel())); |
2507 | BB->setOutputStartAddress(BBStartOffset); |
2508 | if (PrevBB) |
2509 | PrevBB->setOutputEndAddress(BBStartOffset); |
2510 | PrevBB = BB; |
2511 | } |
2512 | if (PrevBB) |
2513 | PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]); |
2514 | FragmentIndex++; |
2515 | } |
2516 | |
2517 | // Clean-up the effect of the code emission. |
2518 | for (const MCSymbol &Symbol : Assembler.symbols()) { |
2519 | MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); |
2520 | MutableSymbol->setUndefined(); |
2521 | MutableSymbol->setIsRegistered(false); |
2522 | } |
2523 | |
2524 | return std::make_pair(x: HotSize, y&: ColdSize); |
2525 | } |
2526 | |
2527 | bool BinaryContext::validateInstructionEncoding( |
2528 | ArrayRef<uint8_t> InputSequence) const { |
2529 | MCInst Inst; |
2530 | uint64_t InstSize; |
2531 | DisAsm->getInstruction(Instr&: Inst, Size&: InstSize, Bytes: InputSequence, Address: 0, CStream&: nulls()); |
2532 | assert(InstSize == InputSequence.size() && |
2533 | "Disassembled instruction size does not match the sequence."); |
2534 | |
2535 | SmallString<256> Code; |
2536 | SmallVector<MCFixup, 4> Fixups; |
2537 | |
2538 | MCE->encodeInstruction(Inst, CB&: Code, Fixups, STI: *STI); |
2539 | auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); |
2540 | if (InputSequence != OutputSequence) { |
2541 | if (opts::Verbosity > 1) { |
2542 | this->errs() << "BOLT-WARNING: mismatched encoding detected\n" |
2543 | << " input: "<< InputSequence << '\n' |
2544 | << " output: "<< OutputSequence << '\n'; |
2545 | } |
2546 | return false; |
2547 | } |
2548 | |
2549 | return true; |
2550 | } |
2551 | |
2552 | uint64_t BinaryContext::getHotThreshold() const { |
2553 | static uint64_t Threshold = 0; |
2554 | if (Threshold == 0) { |
2555 | Threshold = std::max( |
2556 | a: (uint64_t)opts::ExecutionCountThreshold, |
2557 | b: NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); |
2558 | } |
2559 | return Threshold; |
2560 | } |
2561 | |
2562 | BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( |
2563 | uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { |
2564 | auto FI = BinaryFunctions.upper_bound(x: Address); |
2565 | if (FI == BinaryFunctions.begin()) |
2566 | return nullptr; |
2567 | --FI; |
2568 | |
2569 | const uint64_t UsedSize = |
2570 | UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); |
2571 | |
2572 | if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) |
2573 | return nullptr; |
2574 | |
2575 | return &FI->second; |
2576 | } |
2577 | |
2578 | BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { |
2579 | // First, try to find a function starting at the given address. If the |
2580 | // function was folded, this will get us the original folded function if it |
2581 | // wasn't removed from the list, e.g. in non-relocation mode. |
2582 | auto BFI = BinaryFunctions.find(x: Address); |
2583 | if (BFI != BinaryFunctions.end()) |
2584 | return &BFI->second; |
2585 | |
2586 | // We might have folded the function matching the object at the given |
2587 | // address. In such case, we look for a function matching the symbol |
2588 | // registered at the original address. The new function (the one that the |
2589 | // original was folded into) will hold the symbol. |
2590 | if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { |
2591 | uint64_t EntryID = 0; |
2592 | BinaryFunction *BF = getFunctionForSymbol(Symbol: BD->getSymbol(), EntryDesc: &EntryID); |
2593 | if (BF && EntryID == 0) |
2594 | return BF; |
2595 | } |
2596 | return nullptr; |
2597 | } |
2598 | |
2599 | /// Deregister JumpTable registered at a given \p Address and delete it. |
2600 | void BinaryContext::deleteJumpTable(uint64_t Address) { |
2601 | assert(JumpTables.count(Address) && "Must have a jump table at address"); |
2602 | JumpTable *JT = JumpTables.at(k: Address); |
2603 | for (BinaryFunction *Parent : JT->Parents) |
2604 | Parent->JumpTables.erase(x: Address); |
2605 | JumpTables.erase(x: Address); |
2606 | delete JT; |
2607 | } |
2608 | |
2609 | DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( |
2610 | const DWARFAddressRangesVector &InputRanges) const { |
2611 | DebugAddressRangesVector OutputRanges; |
2612 | |
2613 | for (const DWARFAddressRange Range : InputRanges) { |
2614 | auto BFI = BinaryFunctions.lower_bound(x: Range.LowPC); |
2615 | while (BFI != BinaryFunctions.end()) { |
2616 | const BinaryFunction &Function = BFI->second; |
2617 | if (Function.getAddress() >= Range.HighPC) |
2618 | break; |
2619 | const DebugAddressRangesVector FunctionRanges = |
2620 | Function.getOutputAddressRanges(); |
2621 | llvm::move(Range: FunctionRanges, Out: std::back_inserter(x&: OutputRanges)); |
2622 | std::advance(i&: BFI, n: 1); |
2623 | } |
2624 | } |
2625 | |
2626 | return OutputRanges; |
2627 | } |
2628 | |
2629 | } // namespace bolt |
2630 | } // namespace llvm |
2631 |
Definitions
- NoHugePages
- PrintDebugInfo
- PrintRelocations
- PrintMemData
- CompDirOverride
- ID
- BOLTError
- log
- convertToErrorCode
- createNonFatalBOLTError
- createFatalBOLTError
- logBOLTErrorsAndQuitOnFatal
- BinaryContext
- ~BinaryContext
- createBinaryContext
- forceSymbolRelocations
- createObjectWriter
- validateObjectNesting
- validateHoles
- updateObjectNesting
- getSubBinaryData
- handleAddressRef
- analyzeMemoryAt
- analyzeJumpTable
- populateJumpTables
- skipMarkedFragments
- getOrCreateGlobalSymbol
- getOrCreateUndefinedGlobalSymbol
- createBinaryFunction
- getOrCreateJumpTable
- duplicateJumpTable
- generateJumpTableName
- hasValidCodePadding
- adjustCodePadding
- registerNameAtAddress
- getBinaryDataContainingAddressImpl
- getGOTSymbol
- setBinaryDataSize
- generateSymbolHashes
- registerFragment
- addAdrpAddRelocAArch64
- handleAArch64Veneer
- processInterproceduralReferences
- postProcessSymbolTable
- foldFunction
- fixBinaryDataHoles
- printGlobalSymbols
- getDwarfFile
- addDebugFilenameToUnit
- getSortedFunctions
- getAllBinaryFunctions
- getDWOCU
- getDWOContext
- preprocessDWODebugInfo
- preprocessDebugInfo
- shouldEmit
- dump
- printCFI
- getMarkerType
- isMarker
- printDebugInfo
- extractData
- printData
- printInstruction
- getBaseAddressForMapping
- getSectionForAddress
- getSectionNameForAddress
- registerSection
- registerSection
- registerSection
- registerOrUpdateSection
- deregisterSectionName
- deregisterUnusedSections
- deregisterSection
- renameSection
- printSections
- absoluteSection
- getUnsignedValueAtAddress
- getSignedValueAtAddress
- addRelocation
- addDynamicRelocation
- removeRelocationAt
- getRelocationAt
- getDynamicRelocationAt
- markAmbiguousRelocations
- getFunctionForSymbol
- generateBugReportMessage
- createInjectedBinaryFunction
- createInstructionPatch
- calculateEmittedSize
- validateInstructionEncoding
- getHotThreshold
- getBinaryFunctionContainingAddress
- getBinaryFunctionAtAddress
- deleteJumpTable
Improve your Profiling and Debugging skills
Find out more