1 | //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the BinaryContext class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/BinaryContext.h" |
14 | #include "bolt/Core/BinaryEmitter.h" |
15 | #include "bolt/Core/BinaryFunction.h" |
16 | #include "bolt/Utils/CommandLineOpts.h" |
17 | #include "bolt/Utils/Utils.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/Twine.h" |
20 | #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" |
21 | #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" |
22 | #include "llvm/DebugInfo/DWARF/DWARFUnit.h" |
23 | #include "llvm/MC/MCAsmLayout.h" |
24 | #include "llvm/MC/MCAssembler.h" |
25 | #include "llvm/MC/MCContext.h" |
26 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
27 | #include "llvm/MC/MCInstPrinter.h" |
28 | #include "llvm/MC/MCObjectStreamer.h" |
29 | #include "llvm/MC/MCObjectWriter.h" |
30 | #include "llvm/MC/MCRegisterInfo.h" |
31 | #include "llvm/MC/MCSectionELF.h" |
32 | #include "llvm/MC/MCStreamer.h" |
33 | #include "llvm/MC/MCSubtargetInfo.h" |
34 | #include "llvm/MC/MCSymbol.h" |
35 | #include "llvm/Support/CommandLine.h" |
36 | #include "llvm/Support/Error.h" |
37 | #include "llvm/Support/Regex.h" |
38 | #include <algorithm> |
39 | #include <functional> |
40 | #include <iterator> |
41 | #include <unordered_set> |
42 | |
43 | using namespace llvm; |
44 | |
45 | #undef DEBUG_TYPE |
46 | #define DEBUG_TYPE "bolt" |
47 | |
48 | namespace opts { |
49 | |
50 | cl::opt<bool> NoHugePages("no-huge-pages" , |
51 | cl::desc("use regular size pages for code alignment" ), |
52 | cl::Hidden, cl::cat(BoltCategory)); |
53 | |
54 | static cl::opt<bool> |
55 | PrintDebugInfo("print-debug-info" , |
56 | cl::desc("print debug info when printing functions" ), |
57 | cl::Hidden, |
58 | cl::ZeroOrMore, |
59 | cl::cat(BoltCategory)); |
60 | |
61 | cl::opt<bool> PrintRelocations( |
62 | "print-relocations" , |
63 | cl::desc("print relocations when printing functions/objects" ), cl::Hidden, |
64 | cl::cat(BoltCategory)); |
65 | |
66 | static cl::opt<bool> |
67 | PrintMemData("print-mem-data" , |
68 | cl::desc("print memory data annotations when printing functions" ), |
69 | cl::Hidden, |
70 | cl::ZeroOrMore, |
71 | cl::cat(BoltCategory)); |
72 | |
73 | cl::opt<std::string> CompDirOverride( |
74 | "comp-dir-override" , |
75 | cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base " |
76 | "location, which is used with DW_AT_dwo_name to construct a path " |
77 | "to *.dwo files." ), |
78 | cl::Hidden, cl::init(Val: "" ), cl::cat(BoltCategory)); |
79 | } // namespace opts |
80 | |
81 | namespace llvm { |
82 | namespace bolt { |
83 | |
84 | char BOLTError::ID = 0; |
85 | |
86 | BOLTError::BOLTError(bool IsFatal, const Twine &S) |
87 | : IsFatal(IsFatal), Msg(S.str()) {} |
88 | |
89 | void BOLTError::log(raw_ostream &OS) const { |
90 | if (IsFatal) |
91 | OS << "FATAL " ; |
92 | StringRef ErrMsg = StringRef(Msg); |
93 | // Prepend our error prefix if it is missing |
94 | if (ErrMsg.empty()) { |
95 | OS << "BOLT-ERROR\n" ; |
96 | } else { |
97 | if (!ErrMsg.starts_with(Prefix: "BOLT-ERROR" )) |
98 | OS << "BOLT-ERROR: " ; |
99 | OS << ErrMsg << "\n" ; |
100 | } |
101 | } |
102 | |
103 | std::error_code BOLTError::convertToErrorCode() const { |
104 | return inconvertibleErrorCode(); |
105 | } |
106 | |
107 | Error createNonFatalBOLTError(const Twine &S) { |
108 | return make_error<BOLTError>(/*IsFatal*/ Args: false, Args: S); |
109 | } |
110 | |
111 | Error createFatalBOLTError(const Twine &S) { |
112 | return make_error<BOLTError>(/*IsFatal*/ Args: true, Args: S); |
113 | } |
114 | |
115 | void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) { |
116 | handleAllErrors(E: Error(std::move(E)), Handlers: [&](const BOLTError &E) { |
117 | if (!E.getMessage().empty()) |
118 | E.log(OS&: this->errs()); |
119 | if (E.isFatal()) |
120 | exit(status: 1); |
121 | }); |
122 | } |
123 | |
124 | BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, |
125 | std::unique_ptr<DWARFContext> DwCtx, |
126 | std::unique_ptr<Triple> TheTriple, |
127 | const Target *TheTarget, std::string TripleName, |
128 | std::unique_ptr<MCCodeEmitter> MCE, |
129 | std::unique_ptr<MCObjectFileInfo> MOFI, |
130 | std::unique_ptr<const MCAsmInfo> AsmInfo, |
131 | std::unique_ptr<const MCInstrInfo> MII, |
132 | std::unique_ptr<const MCSubtargetInfo> STI, |
133 | std::unique_ptr<MCInstPrinter> InstPrinter, |
134 | std::unique_ptr<const MCInstrAnalysis> MIA, |
135 | std::unique_ptr<MCPlusBuilder> MIB, |
136 | std::unique_ptr<const MCRegisterInfo> MRI, |
137 | std::unique_ptr<MCDisassembler> DisAsm, |
138 | JournalingStreams Logger) |
139 | : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), |
140 | TheTriple(std::move(TheTriple)), TheTarget(TheTarget), |
141 | TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), |
142 | AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), |
143 | InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), |
144 | MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)), |
145 | Logger(Logger) { |
146 | Relocation::Arch = this->TheTriple->getArch(); |
147 | RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; |
148 | PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; |
149 | } |
150 | |
151 | BinaryContext::~BinaryContext() { |
152 | for (BinarySection *Section : Sections) |
153 | delete Section; |
154 | for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) |
155 | delete InjectedFunction; |
156 | for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) |
157 | delete JTI.second; |
158 | clearBinaryData(); |
159 | } |
160 | |
161 | /// Create BinaryContext for a given architecture \p ArchName and |
162 | /// triple \p TripleName. |
163 | Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( |
164 | Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features, |
165 | bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) { |
166 | StringRef ArchName = "" ; |
167 | std::string FeaturesStr = "" ; |
168 | switch (TheTriple.getArch()) { |
169 | case llvm::Triple::x86_64: |
170 | if (Features) |
171 | return createFatalBOLTError( |
172 | S: "x86_64 target does not use SubtargetFeatures" ); |
173 | ArchName = "x86-64" ; |
174 | FeaturesStr = "+nopl" ; |
175 | break; |
176 | case llvm::Triple::aarch64: |
177 | if (Features) |
178 | return createFatalBOLTError( |
179 | S: "AArch64 target does not use SubtargetFeatures" ); |
180 | ArchName = "aarch64" ; |
181 | FeaturesStr = "+all" ; |
182 | break; |
183 | case llvm::Triple::riscv64: { |
184 | ArchName = "riscv64" ; |
185 | if (!Features) |
186 | return createFatalBOLTError(S: "RISCV target needs SubtargetFeatures" ); |
187 | // We rely on relaxation for some transformations (e.g., promoting all calls |
188 | // to PseudoCALL and then making JITLink relax them). Since the relax |
189 | // feature is not stored in the object file, we manually enable it. |
190 | Features->AddFeature(String: "relax" ); |
191 | FeaturesStr = Features->getString(); |
192 | break; |
193 | } |
194 | default: |
195 | return createStringError(EC: std::errc::not_supported, |
196 | Fmt: "BOLT-ERROR: Unrecognized machine in ELF file" ); |
197 | } |
198 | |
199 | const std::string TripleName = TheTriple.str(); |
200 | |
201 | std::string Error; |
202 | const Target *TheTarget = |
203 | TargetRegistry::lookupTarget(ArchName: std::string(ArchName), TheTriple, Error); |
204 | if (!TheTarget) |
205 | return createStringError(EC: make_error_code(e: std::errc::not_supported), |
206 | S: Twine("BOLT-ERROR: " , Error)); |
207 | |
208 | std::unique_ptr<const MCRegisterInfo> MRI( |
209 | TheTarget->createMCRegInfo(TT: TripleName)); |
210 | if (!MRI) |
211 | return createStringError( |
212 | EC: make_error_code(e: std::errc::not_supported), |
213 | S: Twine("BOLT-ERROR: no register info for target " , TripleName)); |
214 | |
215 | // Set up disassembler. |
216 | std::unique_ptr<MCAsmInfo> AsmInfo( |
217 | TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple: TripleName, Options: MCTargetOptions())); |
218 | if (!AsmInfo) |
219 | return createStringError( |
220 | EC: make_error_code(e: std::errc::not_supported), |
221 | S: Twine("BOLT-ERROR: no assembly info for target " , TripleName)); |
222 | // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump |
223 | // we want to emit such names as using @PLT without double quotes to convey |
224 | // variant kind to the assembler. BOLT doesn't rely on the linker so we can |
225 | // override the default AsmInfo behavior to emit names the way we want. |
226 | AsmInfo->setAllowAtInName(true); |
227 | |
228 | std::unique_ptr<const MCSubtargetInfo> STI( |
229 | TheTarget->createMCSubtargetInfo(TheTriple: TripleName, CPU: "" , Features: FeaturesStr)); |
230 | if (!STI) |
231 | return createStringError( |
232 | EC: make_error_code(e: std::errc::not_supported), |
233 | S: Twine("BOLT-ERROR: no subtarget info for target " , TripleName)); |
234 | |
235 | std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); |
236 | if (!MII) |
237 | return createStringError( |
238 | EC: make_error_code(e: std::errc::not_supported), |
239 | S: Twine("BOLT-ERROR: no instruction info for target " , TripleName)); |
240 | |
241 | std::unique_ptr<MCContext> Ctx( |
242 | new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get())); |
243 | std::unique_ptr<MCObjectFileInfo> MOFI( |
244 | TheTarget->createMCObjectFileInfo(Ctx&: *Ctx, PIC: IsPIC)); |
245 | Ctx->setObjectFileInfo(MOFI.get()); |
246 | // We do not support X86 Large code model. Change this in the future. |
247 | bool Large = false; |
248 | if (TheTriple.getArch() == llvm::Triple::aarch64) |
249 | Large = true; |
250 | unsigned LSDAEncoding = |
251 | Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; |
252 | if (IsPIC) { |
253 | LSDAEncoding = dwarf::DW_EH_PE_pcrel | |
254 | (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); |
255 | } |
256 | |
257 | std::unique_ptr<MCDisassembler> DisAsm( |
258 | TheTarget->createMCDisassembler(STI: *STI, Ctx&: *Ctx)); |
259 | |
260 | if (!DisAsm) |
261 | return createStringError( |
262 | EC: make_error_code(e: std::errc::not_supported), |
263 | S: Twine("BOLT-ERROR: no disassembler info for target " , TripleName)); |
264 | |
265 | std::unique_ptr<const MCInstrAnalysis> MIA( |
266 | TheTarget->createMCInstrAnalysis(Info: MII.get())); |
267 | if (!MIA) |
268 | return createStringError( |
269 | EC: make_error_code(e: std::errc::not_supported), |
270 | S: Twine("BOLT-ERROR: failed to create instruction analysis for target " , |
271 | TripleName)); |
272 | |
273 | int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); |
274 | std::unique_ptr<MCInstPrinter> InstructionPrinter( |
275 | TheTarget->createMCInstPrinter(T: TheTriple, SyntaxVariant: AsmPrinterVariant, MAI: *AsmInfo, |
276 | MII: *MII, MRI: *MRI)); |
277 | if (!InstructionPrinter) |
278 | return createStringError( |
279 | EC: make_error_code(e: std::errc::not_supported), |
280 | S: Twine("BOLT-ERROR: no instruction printer for target " , TripleName)); |
281 | InstructionPrinter->setPrintImmHex(true); |
282 | |
283 | std::unique_ptr<MCCodeEmitter> MCE( |
284 | TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *Ctx)); |
285 | |
286 | auto BC = std::make_unique<BinaryContext>( |
287 | args: std::move(Ctx), args: std::move(DwCtx), args: std::make_unique<Triple>(args&: TheTriple), |
288 | args&: TheTarget, args: std::string(TripleName), args: std::move(MCE), args: std::move(MOFI), |
289 | args: std::move(AsmInfo), args: std::move(MII), args: std::move(STI), |
290 | args: std::move(InstructionPrinter), args: std::move(MIA), args: nullptr, args: std::move(MRI), |
291 | args: std::move(DisAsm), args&: Logger); |
292 | |
293 | BC->LSDAEncoding = LSDAEncoding; |
294 | |
295 | BC->MAB = std::unique_ptr<MCAsmBackend>( |
296 | BC->TheTarget->createMCAsmBackend(STI: *BC->STI, MRI: *BC->MRI, Options: MCTargetOptions())); |
297 | |
298 | BC->setFilename(InputFileName); |
299 | |
300 | BC->HasFixedLoadAddress = !IsPIC; |
301 | |
302 | BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( |
303 | BC->TheTarget->createMCDisassembler(STI: *BC->STI, Ctx&: *BC->Ctx)); |
304 | |
305 | if (!BC->SymbolicDisAsm) |
306 | return createStringError( |
307 | EC: make_error_code(e: std::errc::not_supported), |
308 | S: Twine("BOLT-ERROR: no disassembler info for target " , TripleName)); |
309 | |
310 | return std::move(BC); |
311 | } |
312 | |
313 | bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { |
314 | if (opts::HotText && |
315 | (SymbolName == "__hot_start" || SymbolName == "__hot_end" )) |
316 | return true; |
317 | |
318 | if (opts::HotData && |
319 | (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end" )) |
320 | return true; |
321 | |
322 | if (SymbolName == "_end" ) |
323 | return true; |
324 | |
325 | return false; |
326 | } |
327 | |
328 | std::unique_ptr<MCObjectWriter> |
329 | BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { |
330 | return MAB->createObjectWriter(OS); |
331 | } |
332 | |
333 | bool BinaryContext::validateObjectNesting() const { |
334 | auto Itr = BinaryDataMap.begin(); |
335 | auto End = BinaryDataMap.end(); |
336 | bool Valid = true; |
337 | while (Itr != End) { |
338 | auto Next = std::next(x: Itr); |
339 | while (Next != End && |
340 | Itr->second->getSection() == Next->second->getSection() && |
341 | Itr->second->containsRange(Address: Next->second->getAddress(), |
342 | Size: Next->second->getSize())) { |
343 | if (Next->second->Parent != Itr->second) { |
344 | this->errs() << "BOLT-WARNING: object nesting incorrect for:\n" |
345 | << "BOLT-WARNING: " << *Itr->second << "\n" |
346 | << "BOLT-WARNING: " << *Next->second << "\n" ; |
347 | Valid = false; |
348 | } |
349 | ++Next; |
350 | } |
351 | Itr = Next; |
352 | } |
353 | return Valid; |
354 | } |
355 | |
356 | bool BinaryContext::validateHoles() const { |
357 | bool Valid = true; |
358 | for (BinarySection &Section : sections()) { |
359 | for (const Relocation &Rel : Section.relocations()) { |
360 | uint64_t RelAddr = Rel.Offset + Section.getAddress(); |
361 | const BinaryData *BD = getBinaryDataContainingAddress(Address: RelAddr); |
362 | if (!BD) { |
363 | this->errs() |
364 | << "BOLT-WARNING: no BinaryData found for relocation at address" |
365 | << " 0x" << Twine::utohexstr(Val: RelAddr) << " in " << Section.getName() |
366 | << "\n" ; |
367 | Valid = false; |
368 | } else if (!BD->getAtomicRoot()) { |
369 | this->errs() |
370 | << "BOLT-WARNING: no atomic BinaryData found for relocation at " |
371 | << "address 0x" << Twine::utohexstr(Val: RelAddr) << " in " |
372 | << Section.getName() << "\n" ; |
373 | Valid = false; |
374 | } |
375 | } |
376 | } |
377 | return Valid; |
378 | } |
379 | |
380 | void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { |
381 | const uint64_t Address = GAI->second->getAddress(); |
382 | const uint64_t Size = GAI->second->getSize(); |
383 | |
384 | auto fixParents = [&](BinaryDataMapType::iterator Itr, |
385 | BinaryData *NewParent) { |
386 | BinaryData *OldParent = Itr->second->Parent; |
387 | Itr->second->Parent = NewParent; |
388 | ++Itr; |
389 | while (Itr != BinaryDataMap.end() && OldParent && |
390 | Itr->second->Parent == OldParent) { |
391 | Itr->second->Parent = NewParent; |
392 | ++Itr; |
393 | } |
394 | }; |
395 | |
396 | // Check if the previous symbol contains the newly added symbol. |
397 | if (GAI != BinaryDataMap.begin()) { |
398 | BinaryData *Prev = std::prev(x: GAI)->second; |
399 | while (Prev) { |
400 | if (Prev->getSection() == GAI->second->getSection() && |
401 | Prev->containsRange(Address, Size)) { |
402 | fixParents(GAI, Prev); |
403 | } else { |
404 | fixParents(GAI, nullptr); |
405 | } |
406 | Prev = Prev->Parent; |
407 | } |
408 | } |
409 | |
410 | // Check if the newly added symbol contains any subsequent symbols. |
411 | if (Size != 0) { |
412 | BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; |
413 | auto Itr = std::next(x: GAI); |
414 | while ( |
415 | Itr != BinaryDataMap.end() && |
416 | BD->containsRange(Address: Itr->second->getAddress(), Size: Itr->second->getSize())) { |
417 | Itr->second->Parent = BD; |
418 | ++Itr; |
419 | } |
420 | } |
421 | } |
422 | |
423 | iterator_range<BinaryContext::binary_data_iterator> |
424 | BinaryContext::getSubBinaryData(BinaryData *BD) { |
425 | auto Start = std::next(x: BinaryDataMap.find(x: BD->getAddress())); |
426 | auto End = Start; |
427 | while (End != BinaryDataMap.end() && BD->isAncestorOf(BD: End->second)) |
428 | ++End; |
429 | return make_range(x: Start, y: End); |
430 | } |
431 | |
432 | std::pair<const MCSymbol *, uint64_t> |
433 | BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, |
434 | bool IsPCRel) { |
435 | if (isAArch64()) { |
436 | // Check if this is an access to a constant island and create bookkeeping |
437 | // to keep track of it and emit it later as part of this function. |
438 | if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) |
439 | return std::make_pair(x&: IslandSym, y: 0); |
440 | |
441 | // Detect custom code written in assembly that refers to arbitrary |
442 | // constant islands from other functions. Write this reference so we |
443 | // can pull this constant island and emit it as part of this function |
444 | // too. |
445 | auto IslandIter = AddressToConstantIslandMap.lower_bound(x: Address); |
446 | |
447 | if (IslandIter != AddressToConstantIslandMap.begin() && |
448 | (IslandIter == AddressToConstantIslandMap.end() || |
449 | IslandIter->first > Address)) |
450 | --IslandIter; |
451 | |
452 | if (IslandIter != AddressToConstantIslandMap.end()) { |
453 | // Fall-back to referencing the original constant island in the presence |
454 | // of dynamic relocs, as we currently do not support cloning them. |
455 | // Notice: we might fail to link because of this, if the original constant |
456 | // island we are referring would be emitted too far away. |
457 | if (IslandIter->second->hasDynamicRelocationAtIsland()) { |
458 | MCSymbol *IslandSym = |
459 | IslandIter->second->getOrCreateIslandAccess(Address); |
460 | if (IslandSym) |
461 | return std::make_pair(x&: IslandSym, y: 0); |
462 | } else if (MCSymbol *IslandSym = |
463 | IslandIter->second->getOrCreateProxyIslandAccess(Address, |
464 | Referrer&: BF)) { |
465 | BF.createIslandDependency(Island: IslandSym, BF: IslandIter->second); |
466 | return std::make_pair(x&: IslandSym, y: 0); |
467 | } |
468 | } |
469 | } |
470 | |
471 | // Note that the address does not necessarily have to reside inside |
472 | // a section, it could be an absolute address too. |
473 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
474 | if (Section && Section->isText()) { |
475 | if (BF.containsAddress(PC: Address, /*UseMaxSize=*/isAArch64())) { |
476 | if (Address != BF.getAddress()) { |
477 | // The address could potentially escape. Mark it as another entry |
478 | // point into the function. |
479 | if (opts::Verbosity >= 1) { |
480 | this->outs() << "BOLT-INFO: potentially escaped address 0x" |
481 | << Twine::utohexstr(Val: Address) << " in function " << BF |
482 | << '\n'; |
483 | } |
484 | BF.HasInternalLabelReference = true; |
485 | return std::make_pair( |
486 | x: BF.addEntryPointAtOffset(Offset: Address - BF.getAddress()), y: 0); |
487 | } |
488 | } else { |
489 | addInterproceduralReference(Function: &BF, Address); |
490 | } |
491 | } |
492 | |
493 | // With relocations, catch jump table references outside of the basic block |
494 | // containing the indirect jump. |
495 | if (HasRelocations) { |
496 | const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); |
497 | if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { |
498 | const MCSymbol *Symbol = |
499 | getOrCreateJumpTable(Function&: BF, Address, Type: JumpTable::JTT_PIC); |
500 | |
501 | return std::make_pair(x&: Symbol, y: 0); |
502 | } |
503 | } |
504 | |
505 | if (BinaryData *BD = getBinaryDataContainingAddress(Address)) |
506 | return std::make_pair(x: BD->getSymbol(), y: Address - BD->getAddress()); |
507 | |
508 | // TODO: use DWARF info to get size/alignment here? |
509 | MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, Prefix: "DATAat" ); |
510 | LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); |
511 | return std::make_pair(x&: TargetSymbol, y: 0); |
512 | } |
513 | |
514 | MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, |
515 | BinaryFunction &BF) { |
516 | if (!isX86()) |
517 | return MemoryContentsType::UNKNOWN; |
518 | |
519 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
520 | if (!Section) { |
521 | // No section - possibly an absolute address. Since we don't allow |
522 | // internal function addresses to escape the function scope - we |
523 | // consider it a tail call. |
524 | if (opts::Verbosity > 1) { |
525 | this->errs() << "BOLT-WARNING: no section for address 0x" |
526 | << Twine::utohexstr(Val: Address) << " referenced from function " |
527 | << BF << '\n'; |
528 | } |
529 | return MemoryContentsType::UNKNOWN; |
530 | } |
531 | |
532 | if (Section->isVirtual()) { |
533 | // The contents are filled at runtime. |
534 | return MemoryContentsType::UNKNOWN; |
535 | } |
536 | |
537 | // No support for jump tables in code yet. |
538 | if (Section->isText()) |
539 | return MemoryContentsType::UNKNOWN; |
540 | |
541 | // Start with checking for PIC jump table. We expect non-PIC jump tables |
542 | // to have high 32 bits set to 0. |
543 | if (analyzeJumpTable(Address, Type: JumpTable::JTT_PIC, BF)) |
544 | return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; |
545 | |
546 | if (analyzeJumpTable(Address, Type: JumpTable::JTT_NORMAL, BF)) |
547 | return MemoryContentsType::POSSIBLE_JUMP_TABLE; |
548 | |
549 | return MemoryContentsType::UNKNOWN; |
550 | } |
551 | |
552 | bool BinaryContext::analyzeJumpTable(const uint64_t Address, |
553 | const JumpTable::JumpTableType Type, |
554 | const BinaryFunction &BF, |
555 | const uint64_t NextJTAddress, |
556 | JumpTable::AddressesType *EntriesAsAddress, |
557 | bool *HasEntryInFragment) const { |
558 | // Target address of __builtin_unreachable. |
559 | const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize(); |
560 | |
561 | // Is one of the targets __builtin_unreachable? |
562 | bool HasUnreachable = false; |
563 | |
564 | // Does one of the entries match function start address? |
565 | bool HasStartAsEntry = false; |
566 | |
567 | // Number of targets other than __builtin_unreachable. |
568 | uint64_t NumRealEntries = 0; |
569 | |
570 | // Size of the jump table without trailing __builtin_unreachable entries. |
571 | size_t TrimmedSize = 0; |
572 | |
573 | auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) { |
574 | if (!EntriesAsAddress) |
575 | return; |
576 | EntriesAsAddress->emplace_back(args&: EntryAddress); |
577 | if (!Unreachable) |
578 | TrimmedSize = EntriesAsAddress->size(); |
579 | }; |
580 | |
581 | ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
582 | if (!Section) |
583 | return false; |
584 | |
585 | // The upper bound is defined by containing object, section limits, and |
586 | // the next jump table in memory. |
587 | uint64_t UpperBound = Section->getEndAddress(); |
588 | const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); |
589 | if (JumpTableBD && JumpTableBD->getSize()) { |
590 | assert(JumpTableBD->getEndAddress() <= UpperBound && |
591 | "data object cannot cross a section boundary" ); |
592 | UpperBound = JumpTableBD->getEndAddress(); |
593 | } |
594 | if (NextJTAddress) |
595 | UpperBound = std::min(a: NextJTAddress, b: UpperBound); |
596 | |
597 | LLVM_DEBUG({ |
598 | using JTT = JumpTable::JumpTableType; |
599 | dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n" , |
600 | Address, BF.getPrintName(), |
601 | Type == JTT::JTT_PIC ? "PIC" : "Normal" ); |
602 | }); |
603 | const uint64_t EntrySize = getJumpTableEntrySize(Type); |
604 | for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; |
605 | EntryAddress += EntrySize) { |
606 | LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) |
607 | << " -> " ); |
608 | // Check if there's a proper relocation against the jump table entry. |
609 | if (HasRelocations) { |
610 | if (Type == JumpTable::JTT_PIC && |
611 | !DataPCRelocations.count(x: EntryAddress)) { |
612 | LLVM_DEBUG( |
613 | dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n" ); |
614 | break; |
615 | } |
616 | if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(Address: EntryAddress)) { |
617 | LLVM_DEBUG( |
618 | dbgs() |
619 | << "FAIL: JTT_NORMAL table, no relocation for this address\n" ); |
620 | break; |
621 | } |
622 | } |
623 | |
624 | const uint64_t Value = |
625 | (Type == JumpTable::JTT_PIC) |
626 | ? Address + *getSignedValueAtAddress(Address: EntryAddress, Size: EntrySize) |
627 | : *getPointerAtAddress(Address: EntryAddress); |
628 | |
629 | // __builtin_unreachable() case. |
630 | if (Value == UnreachableAddress) { |
631 | addEntryAddress(Value, /*Unreachable*/ true); |
632 | HasUnreachable = true; |
633 | LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n" , Value)); |
634 | continue; |
635 | } |
636 | |
637 | // Function start is another special case. It is allowed in the jump table, |
638 | // but we need at least one another regular entry to distinguish the table |
639 | // from, e.g. a function pointer array. |
640 | if (Value == BF.getAddress()) { |
641 | HasStartAsEntry = true; |
642 | addEntryAddress(Value); |
643 | continue; |
644 | } |
645 | |
646 | // Function or one of its fragments. |
647 | const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Address: Value); |
648 | const bool DoesBelongToFunction = |
649 | BF.containsAddress(PC: Value) || |
650 | (TargetBF && TargetBF->isParentOrChildOf(Other: BF)); |
651 | if (!DoesBelongToFunction) { |
652 | LLVM_DEBUG({ |
653 | if (!BF.containsAddress(Value)) { |
654 | dbgs() << "FAIL: function doesn't contain this address\n" ; |
655 | if (TargetBF) { |
656 | dbgs() << " ! function containing this address: " |
657 | << TargetBF->getPrintName() << '\n'; |
658 | if (TargetBF->isFragment()) { |
659 | dbgs() << " ! is a fragment" ; |
660 | for (BinaryFunction *Parent : TargetBF->ParentFragments) |
661 | dbgs() << ", parent: " << Parent->getPrintName(); |
662 | dbgs() << '\n'; |
663 | } |
664 | } |
665 | } |
666 | }); |
667 | break; |
668 | } |
669 | |
670 | // Check there's an instruction at this offset. |
671 | if (TargetBF->getState() == BinaryFunction::State::Disassembled && |
672 | !TargetBF->getInstructionAtOffset(Offset: Value - TargetBF->getAddress())) { |
673 | LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n" , Value)); |
674 | break; |
675 | } |
676 | |
677 | ++NumRealEntries; |
678 | LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n" , Value)); |
679 | |
680 | if (TargetBF != &BF && HasEntryInFragment) |
681 | *HasEntryInFragment = true; |
682 | addEntryAddress(Value); |
683 | } |
684 | |
685 | // Trim direct/normal jump table to exclude trailing unreachable entries that |
686 | // can collide with a function address. |
687 | if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress && |
688 | TrimmedSize != EntriesAsAddress->size() && |
689 | getBinaryFunctionAtAddress(Address: UnreachableAddress)) |
690 | EntriesAsAddress->resize(new_size: TrimmedSize); |
691 | |
692 | // It's a jump table if the number of real entries is more than 1, or there's |
693 | // one real entry and one or more special targets. If there are only multiple |
694 | // special targets, then it's not a jump table. |
695 | return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; |
696 | } |
697 | |
698 | void BinaryContext::populateJumpTables() { |
699 | LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() |
700 | << '\n'); |
701 | for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; |
702 | ++JTI) { |
703 | JumpTable *JT = JTI->second; |
704 | |
705 | bool NonSimpleParent = false; |
706 | for (BinaryFunction *BF : JT->Parents) |
707 | NonSimpleParent |= !BF->isSimple(); |
708 | if (NonSimpleParent) |
709 | continue; |
710 | |
711 | uint64_t NextJTAddress = 0; |
712 | auto NextJTI = std::next(x: JTI); |
713 | if (NextJTI != JTE) |
714 | NextJTAddress = NextJTI->second->getAddress(); |
715 | |
716 | const bool Success = |
717 | analyzeJumpTable(Address: JT->getAddress(), Type: JT->Type, BF: *(JT->Parents[0]), |
718 | NextJTAddress, EntriesAsAddress: &JT->EntriesAsAddress, HasEntryInFragment: &JT->IsSplit); |
719 | if (!Success) { |
720 | LLVM_DEBUG({ |
721 | dbgs() << "failed to analyze " ; |
722 | JT->print(dbgs()); |
723 | if (NextJTI != JTE) { |
724 | dbgs() << "next " ; |
725 | NextJTI->second->print(dbgs()); |
726 | } |
727 | }); |
728 | llvm_unreachable("jump table heuristic failure" ); |
729 | } |
730 | for (BinaryFunction *Frag : JT->Parents) { |
731 | if (JT->IsSplit) |
732 | Frag->setHasIndirectTargetToSplitFragment(true); |
733 | for (uint64_t EntryAddress : JT->EntriesAsAddress) |
734 | // if target is builtin_unreachable |
735 | if (EntryAddress == Frag->getAddress() + Frag->getSize()) { |
736 | Frag->IgnoredBranches.emplace_back(Args: EntryAddress - Frag->getAddress(), |
737 | Args: Frag->getSize()); |
738 | } else if (EntryAddress >= Frag->getAddress() && |
739 | EntryAddress < Frag->getAddress() + Frag->getSize()) { |
740 | Frag->registerReferencedOffset(Offset: EntryAddress - Frag->getAddress()); |
741 | } |
742 | } |
743 | |
744 | // In strict mode, erase PC-relative relocation record. Later we check that |
745 | // all such records are erased and thus have been accounted for. |
746 | if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { |
747 | for (uint64_t Address = JT->getAddress(); |
748 | Address < JT->getAddress() + JT->getSize(); |
749 | Address += JT->EntrySize) { |
750 | DataPCRelocations.erase(position: DataPCRelocations.find(x: Address)); |
751 | } |
752 | } |
753 | |
754 | // Mark to skip the function and all its fragments. |
755 | for (BinaryFunction *Frag : JT->Parents) |
756 | if (Frag->hasIndirectTargetToSplitFragment()) |
757 | addFragmentsToSkip(Function: Frag); |
758 | } |
759 | |
760 | if (opts::StrictMode && DataPCRelocations.size()) { |
761 | LLVM_DEBUG({ |
762 | dbgs() << DataPCRelocations.size() |
763 | << " unclaimed PC-relative relocations left in data:\n" ; |
764 | for (uint64_t Reloc : DataPCRelocations) |
765 | dbgs() << Twine::utohexstr(Reloc) << '\n'; |
766 | }); |
767 | assert(0 && "unclaimed PC-relative relocations left in data\n" ); |
768 | } |
769 | clearList(List&: DataPCRelocations); |
770 | } |
771 | |
772 | void BinaryContext::skipMarkedFragments() { |
773 | std::vector<BinaryFunction *> FragmentQueue; |
774 | // Copy the functions to FragmentQueue. |
775 | FragmentQueue.assign(first: FragmentsToSkip.begin(), last: FragmentsToSkip.end()); |
776 | auto addToWorklist = [&](BinaryFunction *Function) -> void { |
777 | if (FragmentsToSkip.count(x: Function)) |
778 | return; |
779 | FragmentQueue.push_back(x: Function); |
780 | addFragmentsToSkip(Function); |
781 | }; |
782 | // Functions containing split jump tables need to be skipped with all |
783 | // fragments (transitively). |
784 | for (size_t I = 0; I != FragmentQueue.size(); I++) { |
785 | BinaryFunction *BF = FragmentQueue[I]; |
786 | assert(FragmentsToSkip.count(BF) && |
787 | "internal error in traversing function fragments" ); |
788 | if (opts::Verbosity >= 1) |
789 | this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; |
790 | BF->setSimple(false); |
791 | BF->setHasIndirectTargetToSplitFragment(true); |
792 | |
793 | llvm::for_each(Range&: BF->Fragments, F: addToWorklist); |
794 | llvm::for_each(Range&: BF->ParentFragments, F: addToWorklist); |
795 | } |
796 | if (!FragmentsToSkip.empty()) |
797 | this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() |
798 | << " function" << (FragmentsToSkip.size() == 1 ? "" : "s" ) |
799 | << " due to cold fragments\n" ; |
800 | } |
801 | |
802 | MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, |
803 | uint64_t Size, |
804 | uint16_t Alignment, |
805 | unsigned Flags) { |
806 | auto Itr = BinaryDataMap.find(x: Address); |
807 | if (Itr != BinaryDataMap.end()) { |
808 | assert(Itr->second->getSize() == Size || !Size); |
809 | return Itr->second->getSymbol(); |
810 | } |
811 | |
812 | std::string Name = (Prefix + "0x" + Twine::utohexstr(Val: Address)).str(); |
813 | assert(!GlobalSymbols.count(Name) && "created name is not unique" ); |
814 | return registerNameAtAddress(Name, Address, Size, Alignment, Flags); |
815 | } |
816 | |
817 | MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { |
818 | return Ctx->getOrCreateSymbol(Name); |
819 | } |
820 | |
821 | BinaryFunction *BinaryContext::createBinaryFunction( |
822 | const std::string &Name, BinarySection &Section, uint64_t Address, |
823 | uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { |
824 | auto Result = BinaryFunctions.emplace( |
825 | args&: Address, args: BinaryFunction(Name, Section, Address, Size, *this)); |
826 | assert(Result.second == true && "unexpected duplicate function" ); |
827 | BinaryFunction *BF = &Result.first->second; |
828 | registerNameAtAddress(Name, Address, Size: SymbolSize ? SymbolSize : Size, |
829 | Alignment); |
830 | setSymbolToFunctionMap(Sym: BF->getSymbol(), BF); |
831 | return BF; |
832 | } |
833 | |
834 | const MCSymbol * |
835 | BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, |
836 | JumpTable::JumpTableType Type) { |
837 | // Two fragments of same function access same jump table |
838 | if (JumpTable *JT = getJumpTableContainingAddress(Address)) { |
839 | assert(JT->Type == Type && "jump table types have to match" ); |
840 | assert(Address == JT->getAddress() && "unexpected non-empty jump table" ); |
841 | |
842 | // Prevent associating a jump table to a specific fragment twice. |
843 | // This simple check arises from the assumption: no more than 2 fragments. |
844 | if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { |
845 | assert(JT->Parents[0]->isParentOrChildOf(Function) && |
846 | "cannot re-use jump table of a different function" ); |
847 | // Duplicate the entry for the parent function for easy access |
848 | JT->Parents.push_back(Elt: &Function); |
849 | if (opts::Verbosity > 2) { |
850 | this->outs() << "BOLT-INFO: Multiple fragments access same jump table: " |
851 | << JT->Parents[0]->getPrintName() << "; " |
852 | << Function.getPrintName() << "\n" ; |
853 | JT->print(OS&: this->outs()); |
854 | } |
855 | Function.JumpTables.emplace(args&: Address, args&: JT); |
856 | JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); |
857 | JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); |
858 | } |
859 | |
860 | bool IsJumpTableParent = false; |
861 | (void)IsJumpTableParent; |
862 | for (BinaryFunction *Frag : JT->Parents) |
863 | if (Frag == &Function) |
864 | IsJumpTableParent = true; |
865 | assert(IsJumpTableParent && |
866 | "cannot re-use jump table of a different function" ); |
867 | return JT->getFirstLabel(); |
868 | } |
869 | |
870 | // Re-use the existing symbol if possible. |
871 | MCSymbol *JTLabel = nullptr; |
872 | if (BinaryData *Object = getBinaryDataAtAddress(Address)) { |
873 | if (!isInternalSymbolName(Name: Object->getSymbol()->getName())) |
874 | JTLabel = Object->getSymbol(); |
875 | } |
876 | |
877 | const uint64_t EntrySize = getJumpTableEntrySize(Type); |
878 | if (!JTLabel) { |
879 | const std::string JumpTableName = generateJumpTableName(BF: Function, Address); |
880 | JTLabel = registerNameAtAddress(Name: JumpTableName, Address, Size: 0, Alignment: EntrySize); |
881 | } |
882 | |
883 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() |
884 | << " in function " << Function << '\n'); |
885 | |
886 | JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, |
887 | JumpTable::LabelMapType{{0, JTLabel}}, |
888 | *getSectionForAddress(Address)); |
889 | JT->Parents.push_back(Elt: &Function); |
890 | if (opts::Verbosity > 2) |
891 | JT->print(OS&: this->outs()); |
892 | JumpTables.emplace(args&: Address, args&: JT); |
893 | |
894 | // Duplicate the entry for the parent function for easy access. |
895 | Function.JumpTables.emplace(args&: Address, args&: JT); |
896 | return JTLabel; |
897 | } |
898 | |
899 | std::pair<uint64_t, const MCSymbol *> |
900 | BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, |
901 | const MCSymbol *OldLabel) { |
902 | auto L = scopeLock(); |
903 | unsigned Offset = 0; |
904 | bool Found = false; |
905 | for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { |
906 | if (Elmt.second != OldLabel) |
907 | continue; |
908 | Offset = Elmt.first; |
909 | Found = true; |
910 | break; |
911 | } |
912 | assert(Found && "Label not found" ); |
913 | (void)Found; |
914 | MCSymbol *NewLabel = Ctx->createNamedTempSymbol(Name: "duplicatedJT" ); |
915 | JumpTable *NewJT = |
916 | new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, |
917 | JumpTable::LabelMapType{{Offset, NewLabel}}, |
918 | *getSectionForAddress(Address: JT->getAddress())); |
919 | NewJT->Parents = JT->Parents; |
920 | NewJT->Entries = JT->Entries; |
921 | NewJT->Counts = JT->Counts; |
922 | uint64_t JumpTableID = ++DuplicatedJumpTables; |
923 | // Invert it to differentiate from regular jump tables whose IDs are their |
924 | // addresses in the input binary memory space |
925 | JumpTableID = ~JumpTableID; |
926 | JumpTables.emplace(args&: JumpTableID, args&: NewJT); |
927 | Function.JumpTables.emplace(args&: JumpTableID, args&: NewJT); |
928 | return std::make_pair(x&: JumpTableID, y&: NewLabel); |
929 | } |
930 | |
931 | std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, |
932 | uint64_t Address) { |
933 | size_t Id; |
934 | uint64_t Offset = 0; |
935 | if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { |
936 | Offset = Address - JT->getAddress(); |
937 | auto Itr = JT->Labels.find(x: Offset); |
938 | if (Itr != JT->Labels.end()) |
939 | return std::string(Itr->second->getName()); |
940 | Id = JumpTableIds.at(k: JT->getAddress()); |
941 | } else { |
942 | Id = JumpTableIds[Address] = BF.JumpTables.size(); |
943 | } |
944 | return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(val: Id) + |
945 | (Offset ? ("." + std::to_string(val: Offset)) : "" )); |
946 | } |
947 | |
948 | bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { |
949 | // FIXME: aarch64 support is missing. |
950 | if (!isX86()) |
951 | return true; |
952 | |
953 | if (BF.getSize() == BF.getMaxSize()) |
954 | return true; |
955 | |
956 | ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); |
957 | assert(FunctionData && "cannot get function as data" ); |
958 | |
959 | uint64_t Offset = BF.getSize(); |
960 | MCInst Instr; |
961 | uint64_t InstrSize = 0; |
962 | uint64_t InstrAddress = BF.getAddress() + Offset; |
963 | using std::placeholders::_1; |
964 | |
965 | // Skip instructions that satisfy the predicate condition. |
966 | auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { |
967 | const uint64_t StartOffset = Offset; |
968 | for (; Offset < BF.getMaxSize(); |
969 | Offset += InstrSize, InstrAddress += InstrSize) { |
970 | if (!DisAsm->getInstruction(Instr, Size&: InstrSize, Bytes: FunctionData->slice(N: Offset), |
971 | Address: InstrAddress, CStream&: nulls())) |
972 | break; |
973 | if (!Predicate(Instr)) |
974 | break; |
975 | } |
976 | |
977 | return Offset - StartOffset; |
978 | }; |
979 | |
980 | // Skip a sequence of zero bytes. |
981 | auto skipZeros = [&]() { |
982 | const uint64_t StartOffset = Offset; |
983 | for (; Offset < BF.getMaxSize(); ++Offset) |
984 | if ((*FunctionData)[Offset] != 0) |
985 | break; |
986 | |
987 | return Offset - StartOffset; |
988 | }; |
989 | |
990 | // Accept the whole padding area filled with breakpoints. |
991 | auto isBreakpoint = std::bind(f: &MCPlusBuilder::isBreakpoint, args: MIB.get(), args: _1); |
992 | if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) |
993 | return true; |
994 | |
995 | auto isNoop = std::bind(f: &MCPlusBuilder::isNoop, args: MIB.get(), args: _1); |
996 | |
997 | // Some functions have a jump to the next function or to the padding area |
998 | // inserted after the body. |
999 | auto isSkipJump = [&](const MCInst &Instr) { |
1000 | uint64_t TargetAddress = 0; |
1001 | if (MIB->isUnconditionalBranch(Inst: Instr) && |
1002 | MIB->evaluateBranch(Inst: Instr, Addr: InstrAddress, Size: InstrSize, Target&: TargetAddress)) { |
1003 | if (TargetAddress >= InstrAddress + InstrSize && |
1004 | TargetAddress <= BF.getAddress() + BF.getMaxSize()) { |
1005 | return true; |
1006 | } |
1007 | } |
1008 | return false; |
1009 | }; |
1010 | |
1011 | // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). |
1012 | while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || |
1013 | skipZeros()) |
1014 | ; |
1015 | |
1016 | if (Offset == BF.getMaxSize()) |
1017 | return true; |
1018 | |
1019 | if (opts::Verbosity >= 1) { |
1020 | this->errs() << "BOLT-WARNING: bad padding at address 0x" |
1021 | << Twine::utohexstr(Val: BF.getAddress() + BF.getSize()) |
1022 | << " starting at offset " << (Offset - BF.getSize()) |
1023 | << " in function " << BF << '\n' |
1024 | << FunctionData->slice(N: BF.getSize(), |
1025 | M: BF.getMaxSize() - BF.getSize()) |
1026 | << '\n'; |
1027 | } |
1028 | |
1029 | return false; |
1030 | } |
1031 | |
1032 | void BinaryContext::adjustCodePadding() { |
1033 | for (auto &BFI : BinaryFunctions) { |
1034 | BinaryFunction &BF = BFI.second; |
1035 | if (!shouldEmit(Function: BF)) |
1036 | continue; |
1037 | |
1038 | if (!hasValidCodePadding(BF)) { |
1039 | if (HasRelocations) { |
1040 | if (opts::Verbosity >= 1) { |
1041 | this->outs() << "BOLT-INFO: function " << BF |
1042 | << " has invalid padding. Ignoring the function.\n" ; |
1043 | } |
1044 | BF.setIgnored(); |
1045 | } else { |
1046 | BF.setMaxSize(BF.getSize()); |
1047 | } |
1048 | } |
1049 | } |
1050 | } |
1051 | |
1052 | MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, |
1053 | uint64_t Size, |
1054 | uint16_t Alignment, |
1055 | unsigned Flags) { |
1056 | // Register the name with MCContext. |
1057 | MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); |
1058 | |
1059 | auto GAI = BinaryDataMap.find(x: Address); |
1060 | BinaryData *BD; |
1061 | if (GAI == BinaryDataMap.end()) { |
1062 | ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); |
1063 | BinarySection &Section = |
1064 | SectionOrErr ? SectionOrErr.get() : absoluteSection(); |
1065 | BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, |
1066 | Section, Flags); |
1067 | GAI = BinaryDataMap.emplace(args&: Address, args&: BD).first; |
1068 | GlobalSymbols[Name] = BD; |
1069 | updateObjectNesting(GAI); |
1070 | } else { |
1071 | BD = GAI->second; |
1072 | if (!BD->hasName(Name)) { |
1073 | GlobalSymbols[Name] = BD; |
1074 | BD->Symbols.push_back(x: Symbol); |
1075 | } |
1076 | } |
1077 | |
1078 | return Symbol; |
1079 | } |
1080 | |
1081 | const BinaryData * |
1082 | BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { |
1083 | auto NI = BinaryDataMap.lower_bound(x: Address); |
1084 | auto End = BinaryDataMap.end(); |
1085 | if ((NI != End && Address == NI->first) || |
1086 | ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { |
1087 | if (NI->second->containsAddress(Address)) |
1088 | return NI->second; |
1089 | |
1090 | // If this is a sub-symbol, see if a parent data contains the address. |
1091 | const BinaryData *BD = NI->second->getParent(); |
1092 | while (BD) { |
1093 | if (BD->containsAddress(Address)) |
1094 | return BD; |
1095 | BD = BD->getParent(); |
1096 | } |
1097 | } |
1098 | return nullptr; |
1099 | } |
1100 | |
1101 | BinaryData *BinaryContext::getGOTSymbol() { |
1102 | // First tries to find a global symbol with that name |
1103 | BinaryData *GOTSymBD = getBinaryDataByName(Name: "_GLOBAL_OFFSET_TABLE_" ); |
1104 | if (GOTSymBD) |
1105 | return GOTSymBD; |
1106 | |
1107 | // This symbol might be hidden from run-time link, so fetch the local |
1108 | // definition if available. |
1109 | GOTSymBD = getBinaryDataByName(Name: "_GLOBAL_OFFSET_TABLE_/1" ); |
1110 | if (!GOTSymBD) |
1111 | return nullptr; |
1112 | |
1113 | // If the local symbol is not unique, fail |
1114 | unsigned Index = 2; |
1115 | SmallString<30> Storage; |
1116 | while (const BinaryData *BD = |
1117 | getBinaryDataByName(Name: Twine("_GLOBAL_OFFSET_TABLE_/" ) |
1118 | .concat(Suffix: Twine(Index++)) |
1119 | .toStringRef(Out&: Storage))) |
1120 | if (BD->getAddress() != GOTSymBD->getAddress()) |
1121 | return nullptr; |
1122 | |
1123 | return GOTSymBD; |
1124 | } |
1125 | |
1126 | bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { |
1127 | auto NI = BinaryDataMap.find(x: Address); |
1128 | assert(NI != BinaryDataMap.end()); |
1129 | if (NI == BinaryDataMap.end()) |
1130 | return false; |
1131 | // TODO: it's possible that a jump table starts at the same address |
1132 | // as a larger blob of private data. When we set the size of the |
1133 | // jump table, it might be smaller than the total blob size. In this |
1134 | // case we just leave the original size since (currently) it won't really |
1135 | // affect anything. |
1136 | assert((!NI->second->Size || NI->second->Size == Size || |
1137 | (NI->second->isJumpTable() && NI->second->Size > Size)) && |
1138 | "can't change the size of a symbol that has already had its " |
1139 | "size set" ); |
1140 | if (!NI->second->Size) { |
1141 | NI->second->Size = Size; |
1142 | updateObjectNesting(GAI: NI); |
1143 | return true; |
1144 | } |
1145 | return false; |
1146 | } |
1147 | |
1148 | void BinaryContext::generateSymbolHashes() { |
1149 | auto isPadding = [](const BinaryData &BD) { |
1150 | StringRef Contents = BD.getSection().getContents(); |
1151 | StringRef SymData = Contents.substr(Start: BD.getOffset(), N: BD.getSize()); |
1152 | return (BD.getName().starts_with(Prefix: "HOLEat" ) || |
1153 | SymData.find_first_not_of(C: 0) == StringRef::npos); |
1154 | }; |
1155 | |
1156 | uint64_t NumCollisions = 0; |
1157 | for (auto &Entry : BinaryDataMap) { |
1158 | BinaryData &BD = *Entry.second; |
1159 | StringRef Name = BD.getName(); |
1160 | |
1161 | if (!isInternalSymbolName(Name)) |
1162 | continue; |
1163 | |
1164 | // First check if a non-anonymous alias exists and move it to the front. |
1165 | if (BD.getSymbols().size() > 1) { |
1166 | auto Itr = llvm::find_if(Range&: BD.getSymbols(), P: [&](const MCSymbol *Symbol) { |
1167 | return !isInternalSymbolName(Name: Symbol->getName()); |
1168 | }); |
1169 | if (Itr != BD.getSymbols().end()) { |
1170 | size_t Idx = std::distance(first: BD.getSymbols().begin(), last: Itr); |
1171 | std::swap(a&: BD.getSymbols()[0], b&: BD.getSymbols()[Idx]); |
1172 | continue; |
1173 | } |
1174 | } |
1175 | |
1176 | // We have to skip 0 size symbols since they will all collide. |
1177 | if (BD.getSize() == 0) { |
1178 | continue; |
1179 | } |
1180 | |
1181 | const uint64_t Hash = BD.getSection().hash(BD); |
1182 | const size_t Idx = Name.find(Str: "0x" ); |
1183 | std::string NewName = |
1184 | (Twine(Name.substr(Start: 0, N: Idx)) + "_" + Twine::utohexstr(Val: Hash)).str(); |
1185 | if (getBinaryDataByName(Name: NewName)) { |
1186 | // Ignore collisions for symbols that appear to be padding |
1187 | // (i.e. all zeros or a "hole") |
1188 | if (!isPadding(BD)) { |
1189 | if (opts::Verbosity) { |
1190 | this->errs() << "BOLT-WARNING: collision detected when hashing " << BD |
1191 | << " with new name (" << NewName << "), skipping.\n" ; |
1192 | } |
1193 | ++NumCollisions; |
1194 | } |
1195 | continue; |
1196 | } |
1197 | BD.Symbols.insert(position: BD.Symbols.begin(), x: Ctx->getOrCreateSymbol(Name: NewName)); |
1198 | GlobalSymbols[NewName] = &BD; |
1199 | } |
1200 | if (NumCollisions) { |
1201 | this->errs() << "BOLT-WARNING: " << NumCollisions |
1202 | << " collisions detected while hashing binary objects" ; |
1203 | if (!opts::Verbosity) |
1204 | this->errs() << ". Use -v=1 to see the list." ; |
1205 | this->errs() << '\n'; |
1206 | } |
1207 | } |
1208 | |
1209 | bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, |
1210 | BinaryFunction &Function) const { |
1211 | assert(TargetFunction.isFragment() && "TargetFunction must be a fragment" ); |
1212 | if (TargetFunction.isChildOf(Other: Function)) |
1213 | return true; |
1214 | TargetFunction.addParentFragment(BF&: Function); |
1215 | Function.addFragment(BF&: TargetFunction); |
1216 | if (!HasRelocations) { |
1217 | TargetFunction.setSimple(false); |
1218 | Function.setSimple(false); |
1219 | } |
1220 | if (opts::Verbosity >= 1) { |
1221 | this->outs() << "BOLT-INFO: marking " << TargetFunction |
1222 | << " as a fragment of " << Function << '\n'; |
1223 | } |
1224 | return true; |
1225 | } |
1226 | |
1227 | void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, |
1228 | MCInst &LoadLowBits, |
1229 | MCInst &LoadHiBits, |
1230 | uint64_t Target) { |
1231 | const MCSymbol *TargetSymbol; |
1232 | uint64_t Addend = 0; |
1233 | std::tie(args&: TargetSymbol, args&: Addend) = handleAddressRef(Address: Target, BF, |
1234 | /*IsPCRel*/ true); |
1235 | int64_t Val; |
1236 | MIB->replaceImmWithSymbolRef(Inst&: LoadHiBits, Symbol: TargetSymbol, Addend, Ctx: Ctx.get(), Value&: Val, |
1237 | RelType: ELF::R_AARCH64_ADR_PREL_PG_HI21); |
1238 | MIB->replaceImmWithSymbolRef(Inst&: LoadLowBits, Symbol: TargetSymbol, Addend, Ctx: Ctx.get(), |
1239 | Value&: Val, RelType: ELF::R_AARCH64_ADD_ABS_LO12_NC); |
1240 | } |
1241 | |
1242 | bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { |
1243 | BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); |
1244 | if (TargetFunction) |
1245 | return false; |
1246 | |
1247 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
1248 | assert(Section && "cannot get section for referenced address" ); |
1249 | if (!Section->isText()) |
1250 | return false; |
1251 | |
1252 | bool Ret = false; |
1253 | StringRef SectionContents = Section->getContents(); |
1254 | uint64_t Offset = Address - Section->getAddress(); |
1255 | const uint64_t MaxSize = SectionContents.size() - Offset; |
1256 | const uint8_t *Bytes = |
1257 | reinterpret_cast<const uint8_t *>(SectionContents.data()); |
1258 | ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); |
1259 | |
1260 | auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, |
1261 | MCInst &Instruction, uint64_t Offset, |
1262 | uint64_t AbsoluteInstrAddr, |
1263 | uint64_t TotalSize) -> bool { |
1264 | MCInst *TargetHiBits, *TargetLowBits; |
1265 | uint64_t TargetAddress, Count; |
1266 | Count = MIB->matchLinkerVeneer(Begin: Instructions.begin(), End: Instructions.end(), |
1267 | Address: AbsoluteInstrAddr, CurInst: Instruction, TargetHiBits, |
1268 | TargetLowBits, Target&: TargetAddress); |
1269 | if (!Count) |
1270 | return false; |
1271 | |
1272 | if (MatchOnly) |
1273 | return true; |
1274 | |
1275 | // NOTE The target symbol was created during disassemble's |
1276 | // handleExternalReference |
1277 | const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, Prefix: "FUNCat" ); |
1278 | BinaryFunction *Veneer = createBinaryFunction(Name: VeneerSymbol->getName().str(), |
1279 | Section&: *Section, Address, Size: TotalSize); |
1280 | addAdrpAddRelocAArch64(BF&: *Veneer, LoadLowBits&: *TargetLowBits, LoadHiBits&: *TargetHiBits, |
1281 | Target: TargetAddress); |
1282 | MIB->addAnnotation(Inst&: Instruction, Name: "AArch64Veneer" , Val: true); |
1283 | Veneer->addInstruction(Offset, Instruction: std::move(Instruction)); |
1284 | --Count; |
1285 | for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { |
1286 | MIB->addAnnotation(Inst&: It->second, Name: "AArch64Veneer" , Val: true); |
1287 | Veneer->addInstruction(Offset: It->first, Instruction: std::move(It->second)); |
1288 | } |
1289 | |
1290 | Veneer->getOrCreateLocalLabel(Address); |
1291 | Veneer->setMaxSize(TotalSize); |
1292 | Veneer->updateState(State: BinaryFunction::State::Disassembled); |
1293 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address |
1294 | << "\n" ); |
1295 | return true; |
1296 | }; |
1297 | |
1298 | uint64_t Size = 0, TotalSize = 0; |
1299 | BinaryFunction::InstrMapType VeneerInstructions; |
1300 | for (Offset = 0; Offset < MaxSize; Offset += Size) { |
1301 | MCInst Instruction; |
1302 | const uint64_t AbsoluteInstrAddr = Address + Offset; |
1303 | if (!SymbolicDisAsm->getInstruction(Instr&: Instruction, Size, Bytes: Data.slice(N: Offset), |
1304 | Address: AbsoluteInstrAddr, CStream&: nulls())) |
1305 | break; |
1306 | |
1307 | TotalSize += Size; |
1308 | if (MIB->isBranch(Inst: Instruction)) { |
1309 | Ret = matchVeneer(VeneerInstructions, Instruction, Offset, |
1310 | AbsoluteInstrAddr, TotalSize); |
1311 | break; |
1312 | } |
1313 | |
1314 | VeneerInstructions.emplace(args&: Offset, args: std::move(Instruction)); |
1315 | } |
1316 | |
1317 | return Ret; |
1318 | } |
1319 | |
1320 | void BinaryContext::processInterproceduralReferences() { |
1321 | for (const std::pair<BinaryFunction *, uint64_t> &It : |
1322 | InterproceduralReferences) { |
1323 | BinaryFunction &Function = *It.first; |
1324 | uint64_t Address = It.second; |
1325 | if (!Address || Function.isIgnored()) |
1326 | continue; |
1327 | |
1328 | BinaryFunction *TargetFunction = |
1329 | getBinaryFunctionContainingAddress(Address); |
1330 | if (&Function == TargetFunction) |
1331 | continue; |
1332 | |
1333 | if (TargetFunction) { |
1334 | if (TargetFunction->isFragment() && |
1335 | !TargetFunction->isChildOf(Other: Function)) { |
1336 | this->errs() |
1337 | << "BOLT-WARNING: interprocedural reference between unrelated " |
1338 | "fragments: " |
1339 | << Function.getPrintName() << " and " |
1340 | << TargetFunction->getPrintName() << '\n'; |
1341 | } |
1342 | if (uint64_t Offset = Address - TargetFunction->getAddress()) |
1343 | TargetFunction->addEntryPointAtOffset(Offset); |
1344 | |
1345 | continue; |
1346 | } |
1347 | |
1348 | // Check if address falls in function padding space - this could be |
1349 | // unmarked data in code. In this case adjust the padding space size. |
1350 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
1351 | assert(Section && "cannot get section for referenced address" ); |
1352 | |
1353 | if (!Section->isText()) |
1354 | continue; |
1355 | |
1356 | // PLT requires special handling and could be ignored in this context. |
1357 | StringRef SectionName = Section->getName(); |
1358 | if (SectionName == ".plt" || SectionName == ".plt.got" ) |
1359 | continue; |
1360 | |
1361 | // Check if it is aarch64 veneer written at Address |
1362 | if (isAArch64() && handleAArch64Veneer(Address)) |
1363 | continue; |
1364 | |
1365 | if (opts::processAllFunctions()) { |
1366 | this->errs() << "BOLT-ERROR: cannot process binaries with unmarked " |
1367 | << "object in code at address 0x" |
1368 | << Twine::utohexstr(Val: Address) << " belonging to section " |
1369 | << SectionName << " in current mode\n" ; |
1370 | exit(status: 1); |
1371 | } |
1372 | |
1373 | TargetFunction = getBinaryFunctionContainingAddress(Address, |
1374 | /*CheckPastEnd=*/false, |
1375 | /*UseMaxSize=*/true); |
1376 | // We are not going to overwrite non-simple functions, but for simple |
1377 | // ones - adjust the padding size. |
1378 | if (TargetFunction && TargetFunction->isSimple()) { |
1379 | this->errs() |
1380 | << "BOLT-WARNING: function " << *TargetFunction |
1381 | << " has an object detected in a padding region at address 0x" |
1382 | << Twine::utohexstr(Val: Address) << '\n'; |
1383 | TargetFunction->setMaxSize(TargetFunction->getSize()); |
1384 | } |
1385 | } |
1386 | |
1387 | InterproceduralReferences.clear(); |
1388 | } |
1389 | |
1390 | void BinaryContext::postProcessSymbolTable() { |
1391 | fixBinaryDataHoles(); |
1392 | bool Valid = true; |
1393 | for (auto &Entry : BinaryDataMap) { |
1394 | BinaryData *BD = Entry.second; |
1395 | if ((BD->getName().starts_with(Prefix: "SYMBOLat" ) || |
1396 | BD->getName().starts_with(Prefix: "DATAat" )) && |
1397 | !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && |
1398 | BD->getSection()) { |
1399 | this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD |
1400 | << "\n" ; |
1401 | Valid = false; |
1402 | } |
1403 | } |
1404 | assert(Valid); |
1405 | (void)Valid; |
1406 | generateSymbolHashes(); |
1407 | } |
1408 | |
1409 | void BinaryContext::foldFunction(BinaryFunction &ChildBF, |
1410 | BinaryFunction &ParentBF) { |
1411 | assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && |
1412 | "cannot merge functions with multiple entry points" ); |
1413 | |
1414 | std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); |
1415 | std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( |
1416 | SymbolToFunctionMapMutex, std::defer_lock); |
1417 | |
1418 | const StringRef ChildName = ChildBF.getOneName(); |
1419 | |
1420 | // Move symbols over and update bookkeeping info. |
1421 | for (MCSymbol *Symbol : ChildBF.getSymbols()) { |
1422 | ParentBF.getSymbols().push_back(Elt: Symbol); |
1423 | WriteSymbolMapLock.lock(); |
1424 | SymbolToFunctionMap[Symbol] = &ParentBF; |
1425 | WriteSymbolMapLock.unlock(); |
1426 | // NB: there's no need to update BinaryDataMap and GlobalSymbols. |
1427 | } |
1428 | ChildBF.getSymbols().clear(); |
1429 | |
1430 | // Move other names the child function is known under. |
1431 | llvm::move(Range&: ChildBF.Aliases, Out: std::back_inserter(x&: ParentBF.Aliases)); |
1432 | ChildBF.Aliases.clear(); |
1433 | |
1434 | if (HasRelocations) { |
1435 | // Merge execution counts of ChildBF into those of ParentBF. |
1436 | // Without relocations, we cannot reliably merge profiles as both functions |
1437 | // continue to exist and either one can be executed. |
1438 | ChildBF.mergeProfileDataInto(BF&: ParentBF); |
1439 | |
1440 | std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, |
1441 | std::defer_lock); |
1442 | std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, |
1443 | std::defer_lock); |
1444 | // Remove ChildBF from the global set of functions in relocs mode. |
1445 | ReadBfsLock.lock(); |
1446 | auto FI = BinaryFunctions.find(x: ChildBF.getAddress()); |
1447 | ReadBfsLock.unlock(); |
1448 | |
1449 | assert(FI != BinaryFunctions.end() && "function not found" ); |
1450 | assert(&ChildBF == &FI->second && "function mismatch" ); |
1451 | |
1452 | WriteBfsLock.lock(); |
1453 | ChildBF.clearDisasmState(); |
1454 | FI = BinaryFunctions.erase(position: FI); |
1455 | WriteBfsLock.unlock(); |
1456 | |
1457 | } else { |
1458 | // In non-relocation mode we keep the function, but rename it. |
1459 | std::string NewName = "__ICF_" + ChildName.str(); |
1460 | |
1461 | WriteCtxLock.lock(); |
1462 | ChildBF.getSymbols().push_back(Elt: Ctx->getOrCreateSymbol(Name: NewName)); |
1463 | WriteCtxLock.unlock(); |
1464 | |
1465 | ChildBF.setFolded(&ParentBF); |
1466 | } |
1467 | |
1468 | ParentBF.setHasFunctionsFoldedInto(); |
1469 | } |
1470 | |
1471 | void BinaryContext::fixBinaryDataHoles() { |
1472 | assert(validateObjectNesting() && "object nesting inconsistency detected" ); |
1473 | |
1474 | for (BinarySection &Section : allocatableSections()) { |
1475 | std::vector<std::pair<uint64_t, uint64_t>> Holes; |
1476 | |
1477 | auto isNotHole = [&Section](const binary_data_iterator &Itr) { |
1478 | BinaryData *BD = Itr->second; |
1479 | bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && |
1480 | (BD->getName().starts_with(Prefix: "SYMBOLat0x" ) || |
1481 | BD->getName().starts_with(Prefix: "DATAat0x" ) || |
1482 | BD->getName().starts_with(Prefix: "ANONYMOUS" ))); |
1483 | return !isHole && BD->getSection() == Section && !BD->getParent(); |
1484 | }; |
1485 | |
1486 | auto BDStart = BinaryDataMap.begin(); |
1487 | auto BDEnd = BinaryDataMap.end(); |
1488 | auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); |
1489 | auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); |
1490 | |
1491 | uint64_t EndAddress = Section.getAddress(); |
1492 | |
1493 | while (Itr != End) { |
1494 | if (Itr->second->getAddress() > EndAddress) { |
1495 | uint64_t Gap = Itr->second->getAddress() - EndAddress; |
1496 | Holes.emplace_back(args&: EndAddress, args&: Gap); |
1497 | } |
1498 | EndAddress = Itr->second->getEndAddress(); |
1499 | ++Itr; |
1500 | } |
1501 | |
1502 | if (EndAddress < Section.getEndAddress()) |
1503 | Holes.emplace_back(args&: EndAddress, args: Section.getEndAddress() - EndAddress); |
1504 | |
1505 | // If there is already a symbol at the start of the hole, grow that symbol |
1506 | // to cover the rest. Otherwise, create a new symbol to cover the hole. |
1507 | for (std::pair<uint64_t, uint64_t> &Hole : Holes) { |
1508 | BinaryData *BD = getBinaryDataAtAddress(Address: Hole.first); |
1509 | if (BD) { |
1510 | // BD->getSection() can be != Section if there are sections that |
1511 | // overlap. In this case it is probably safe to just skip the holes |
1512 | // since the overlapping section will not(?) have any symbols in it. |
1513 | if (BD->getSection() == Section) |
1514 | setBinaryDataSize(Address: Hole.first, Size: Hole.second); |
1515 | } else { |
1516 | getOrCreateGlobalSymbol(Address: Hole.first, Prefix: "HOLEat" , Size: Hole.second, Alignment: 1); |
1517 | } |
1518 | } |
1519 | } |
1520 | |
1521 | assert(validateObjectNesting() && "object nesting inconsistency detected" ); |
1522 | assert(validateHoles() && "top level hole detected in object map" ); |
1523 | } |
1524 | |
1525 | void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { |
1526 | const BinarySection *CurrentSection = nullptr; |
1527 | bool FirstSection = true; |
1528 | |
1529 | for (auto &Entry : BinaryDataMap) { |
1530 | const BinaryData *BD = Entry.second; |
1531 | const BinarySection &Section = BD->getSection(); |
1532 | if (FirstSection || Section != *CurrentSection) { |
1533 | uint64_t Address, Size; |
1534 | StringRef Name = Section.getName(); |
1535 | if (Section) { |
1536 | Address = Section.getAddress(); |
1537 | Size = Section.getSize(); |
1538 | } else { |
1539 | Address = BD->getAddress(); |
1540 | Size = BD->getSize(); |
1541 | } |
1542 | OS << "BOLT-INFO: Section " << Name << ", " |
1543 | << "0x" + Twine::utohexstr(Val: Address) << ":" |
1544 | << "0x" + Twine::utohexstr(Val: Address + Size) << "/" << Size << "\n" ; |
1545 | CurrentSection = &Section; |
1546 | FirstSection = false; |
1547 | } |
1548 | |
1549 | OS << "BOLT-INFO: " ; |
1550 | const BinaryData *P = BD->getParent(); |
1551 | while (P) { |
1552 | OS << " " ; |
1553 | P = P->getParent(); |
1554 | } |
1555 | OS << *BD << "\n" ; |
1556 | } |
1557 | } |
1558 | |
1559 | Expected<unsigned> BinaryContext::getDwarfFile( |
1560 | StringRef Directory, StringRef FileName, unsigned FileNumber, |
1561 | std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, |
1562 | unsigned CUID, unsigned DWARFVersion) { |
1563 | DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; |
1564 | return Table.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion: DWARFVersion, |
1565 | FileNumber); |
1566 | } |
1567 | |
1568 | unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, |
1569 | const uint32_t SrcCUID, |
1570 | unsigned FileIndex) { |
1571 | DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(Offset: SrcCUID); |
1572 | const DWARFDebugLine::LineTable *LineTable = |
1573 | DwCtx->getLineTableForUnit(U: SrcUnit); |
1574 | const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = |
1575 | LineTable->Prologue.FileNames; |
1576 | // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 |
1577 | // means empty dir. |
1578 | assert(FileIndex > 0 && FileIndex <= FileNames.size() && |
1579 | "FileIndex out of range for the compilation unit." ); |
1580 | StringRef Dir = "" ; |
1581 | if (FileNames[FileIndex - 1].DirIdx != 0) { |
1582 | if (std::optional<const char *> DirName = dwarf::toString( |
1583 | V: LineTable->Prologue |
1584 | .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { |
1585 | Dir = *DirName; |
1586 | } |
1587 | } |
1588 | StringRef FileName = "" ; |
1589 | if (std::optional<const char *> FName = |
1590 | dwarf::toString(V: FileNames[FileIndex - 1].Name)) |
1591 | FileName = *FName; |
1592 | assert(FileName != "" ); |
1593 | DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(Offset: DestCUID); |
1594 | return cantFail(ValOrErr: getDwarfFile(Directory: Dir, FileName, FileNumber: 0, Checksum: std::nullopt, Source: std::nullopt, |
1595 | CUID: DestCUID, DWARFVersion: DstUnit->getVersion())); |
1596 | } |
1597 | |
1598 | std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { |
1599 | std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); |
1600 | llvm::transform(Range: llvm::make_second_range(c&: BinaryFunctions), |
1601 | d_first: SortedFunctions.begin(), |
1602 | F: [](BinaryFunction &BF) { return &BF; }); |
1603 | |
1604 | llvm::stable_sort(Range&: SortedFunctions, |
1605 | C: [](const BinaryFunction *A, const BinaryFunction *B) { |
1606 | if (A->hasValidIndex() && B->hasValidIndex()) { |
1607 | return A->getIndex() < B->getIndex(); |
1608 | } |
1609 | return A->hasValidIndex(); |
1610 | }); |
1611 | return SortedFunctions; |
1612 | } |
1613 | |
1614 | std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { |
1615 | std::vector<BinaryFunction *> AllFunctions; |
1616 | AllFunctions.reserve(n: BinaryFunctions.size() + InjectedBinaryFunctions.size()); |
1617 | llvm::transform(Range: llvm::make_second_range(c&: BinaryFunctions), |
1618 | d_first: std::back_inserter(x&: AllFunctions), |
1619 | F: [](BinaryFunction &BF) { return &BF; }); |
1620 | llvm::copy(Range&: InjectedBinaryFunctions, Out: std::back_inserter(x&: AllFunctions)); |
1621 | |
1622 | return AllFunctions; |
1623 | } |
1624 | |
1625 | std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { |
1626 | auto Iter = DWOCUs.find(x: DWOId); |
1627 | if (Iter == DWOCUs.end()) |
1628 | return std::nullopt; |
1629 | |
1630 | return Iter->second; |
1631 | } |
1632 | |
1633 | DWARFContext *BinaryContext::getDWOContext() const { |
1634 | if (DWOCUs.empty()) |
1635 | return nullptr; |
1636 | return &DWOCUs.begin()->second->getContext(); |
1637 | } |
1638 | |
1639 | /// Handles DWO sections that can either be in .o, .dwo or .dwp files. |
1640 | void BinaryContext::preprocessDWODebugInfo() { |
1641 | for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
1642 | DWARFUnit *const DwarfUnit = CU.get(); |
1643 | if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { |
1644 | std::string DWOName = dwarf::toString( |
1645 | V: DwarfUnit->getUnitDIE().find( |
1646 | Attrs: {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), |
1647 | Default: "" ); |
1648 | SmallString<16> AbsolutePath; |
1649 | if (!opts::CompDirOverride.empty()) { |
1650 | sys::path::append(path&: AbsolutePath, a: opts::CompDirOverride); |
1651 | sys::path::append(path&: AbsolutePath, a: DWOName); |
1652 | } |
1653 | DWARFUnit *DWOCU = |
1654 | DwarfUnit->getNonSkeletonUnitDIE(ExtractUnitDIEOnly: false, DWOAlternativeLocation: AbsolutePath).getDwarfUnit(); |
1655 | if (!DWOCU->isDWOUnit()) { |
1656 | this->outs() |
1657 | << "BOLT-WARNING: Debug Fission: DWO debug information for " |
1658 | << DWOName |
1659 | << " was not retrieved and won't be updated. Please check " |
1660 | "relative path.\n" ; |
1661 | continue; |
1662 | } |
1663 | DWOCUs[*DWOId] = DWOCU; |
1664 | } |
1665 | } |
1666 | if (!DWOCUs.empty()) |
1667 | this->outs() << "BOLT-INFO: processing split DWARF\n" ; |
1668 | } |
1669 | |
1670 | void BinaryContext::preprocessDebugInfo() { |
1671 | struct CURange { |
1672 | uint64_t LowPC; |
1673 | uint64_t HighPC; |
1674 | DWARFUnit *Unit; |
1675 | |
1676 | bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } |
1677 | }; |
1678 | |
1679 | // Building a map of address ranges to CUs similar to .debug_aranges and use |
1680 | // it to assign CU to functions. |
1681 | std::vector<CURange> AllRanges; |
1682 | AllRanges.reserve(n: DwCtx->getNumCompileUnits()); |
1683 | for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
1684 | Expected<DWARFAddressRangesVector> RangesOrError = |
1685 | CU->getUnitDIE().getAddressRanges(); |
1686 | if (!RangesOrError) { |
1687 | consumeError(Err: RangesOrError.takeError()); |
1688 | continue; |
1689 | } |
1690 | for (DWARFAddressRange &Range : *RangesOrError) { |
1691 | // Parts of the debug info could be invalidated due to corresponding code |
1692 | // being removed from the binary by the linker. Hence we check if the |
1693 | // address is a valid one. |
1694 | if (containsAddress(Address: Range.LowPC)) |
1695 | AllRanges.emplace_back(args: CURange{.LowPC: Range.LowPC, .HighPC: Range.HighPC, .Unit: CU.get()}); |
1696 | } |
1697 | |
1698 | ContainsDwarf5 |= CU->getVersion() >= 5; |
1699 | ContainsDwarfLegacy |= CU->getVersion() < 5; |
1700 | } |
1701 | |
1702 | llvm::sort(C&: AllRanges); |
1703 | for (auto &KV : BinaryFunctions) { |
1704 | const uint64_t FunctionAddress = KV.first; |
1705 | BinaryFunction &Function = KV.second; |
1706 | |
1707 | auto It = llvm::partition_point( |
1708 | Range&: AllRanges, P: [=](CURange R) { return R.HighPC <= FunctionAddress; }); |
1709 | if (It != AllRanges.end() && It->LowPC <= FunctionAddress) |
1710 | Function.setDWARFUnit(It->Unit); |
1711 | } |
1712 | |
1713 | // Discover units with debug info that needs to be updated. |
1714 | for (const auto &KV : BinaryFunctions) { |
1715 | const BinaryFunction &BF = KV.second; |
1716 | if (shouldEmit(Function: BF) && BF.getDWARFUnit()) |
1717 | ProcessedCUs.insert(x: BF.getDWARFUnit()); |
1718 | } |
1719 | |
1720 | // Clear debug info for functions from units that we are not going to process. |
1721 | for (auto &KV : BinaryFunctions) { |
1722 | BinaryFunction &BF = KV.second; |
1723 | if (BF.getDWARFUnit() && !ProcessedCUs.count(x: BF.getDWARFUnit())) |
1724 | BF.setDWARFUnit(nullptr); |
1725 | } |
1726 | |
1727 | if (opts::Verbosity >= 1) { |
1728 | this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " |
1729 | << DwCtx->getNumCompileUnits() << " CUs will be updated\n" ; |
1730 | } |
1731 | |
1732 | preprocessDWODebugInfo(); |
1733 | |
1734 | // Populate MCContext with DWARF files from all units. |
1735 | StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); |
1736 | for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
1737 | const uint64_t CUID = CU->getOffset(); |
1738 | DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); |
1739 | BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( |
1740 | Name: GlobalPrefix + "line_table_start" + Twine(CUID))); |
1741 | |
1742 | if (!ProcessedCUs.count(x: CU.get())) |
1743 | continue; |
1744 | |
1745 | const DWARFDebugLine::LineTable *LineTable = |
1746 | DwCtx->getLineTableForUnit(U: CU.get()); |
1747 | const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = |
1748 | LineTable->Prologue.FileNames; |
1749 | |
1750 | uint16_t DwarfVersion = LineTable->Prologue.getVersion(); |
1751 | if (DwarfVersion >= 5) { |
1752 | std::optional<MD5::MD5Result> Checksum; |
1753 | if (LineTable->Prologue.ContentTypes.HasMD5) |
1754 | Checksum = LineTable->Prologue.FileNames[0].Checksum; |
1755 | std::optional<const char *> Name = |
1756 | dwarf::toString(V: CU->getUnitDIE().find(Attr: dwarf::DW_AT_name), Default: nullptr); |
1757 | if (std::optional<uint64_t> DWOID = CU->getDWOId()) { |
1758 | auto Iter = DWOCUs.find(x: *DWOID); |
1759 | assert(Iter != DWOCUs.end() && "DWO CU was not found." ); |
1760 | Name = dwarf::toString( |
1761 | V: Iter->second->getUnitDIE().find(Attr: dwarf::DW_AT_name), Default: nullptr); |
1762 | } |
1763 | BinaryLineTable.setRootFile(Directory: CU->getCompilationDir(), FileName: *Name, Checksum, |
1764 | Source: std::nullopt); |
1765 | } |
1766 | |
1767 | BinaryLineTable.setDwarfVersion(DwarfVersion); |
1768 | |
1769 | // Assign a unique label to every line table, one per CU. |
1770 | // Make sure empty debug line tables are registered too. |
1771 | if (FileNames.empty()) { |
1772 | cantFail(ValOrErr: getDwarfFile(Directory: "" , FileName: "<unknown>" , FileNumber: 0, Checksum: std::nullopt, Source: std::nullopt, |
1773 | CUID, DWARFVersion: DwarfVersion)); |
1774 | continue; |
1775 | } |
1776 | const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; |
1777 | for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { |
1778 | // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 |
1779 | // means empty dir. |
1780 | StringRef Dir = "" ; |
1781 | if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) |
1782 | if (std::optional<const char *> DirName = dwarf::toString( |
1783 | V: LineTable->Prologue |
1784 | .IncludeDirectories[FileNames[I].DirIdx - Offset])) |
1785 | Dir = *DirName; |
1786 | StringRef FileName = "" ; |
1787 | if (std::optional<const char *> FName = |
1788 | dwarf::toString(V: FileNames[I].Name)) |
1789 | FileName = *FName; |
1790 | assert(FileName != "" ); |
1791 | std::optional<MD5::MD5Result> Checksum; |
1792 | if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) |
1793 | Checksum = LineTable->Prologue.FileNames[I].Checksum; |
1794 | cantFail(ValOrErr: getDwarfFile(Directory: Dir, FileName, FileNumber: 0, Checksum, Source: std::nullopt, CUID, |
1795 | DWARFVersion: DwarfVersion)); |
1796 | } |
1797 | } |
1798 | } |
1799 | |
1800 | bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { |
1801 | if (Function.isPseudo()) |
1802 | return false; |
1803 | |
1804 | if (opts::processAllFunctions()) |
1805 | return true; |
1806 | |
1807 | if (Function.isIgnored()) |
1808 | return false; |
1809 | |
1810 | // In relocation mode we will emit non-simple functions with CFG. |
1811 | // If the function does not have a CFG it should be marked as ignored. |
1812 | return HasRelocations || Function.isSimple(); |
1813 | } |
1814 | |
1815 | void BinaryContext::dump(const MCInst &Inst) const { |
1816 | if (LLVM_UNLIKELY(!InstPrinter)) { |
1817 | dbgs() << "Cannot dump for InstPrinter is not initialized.\n" ; |
1818 | return; |
1819 | } |
1820 | InstPrinter->printInst(MI: &Inst, Address: 0, Annot: "" , STI: *STI, OS&: dbgs()); |
1821 | dbgs() << "\n" ; |
1822 | } |
1823 | |
1824 | void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { |
1825 | uint32_t Operation = Inst.getOperation(); |
1826 | switch (Operation) { |
1827 | case MCCFIInstruction::OpSameValue: |
1828 | OS << "OpSameValue Reg" << Inst.getRegister(); |
1829 | break; |
1830 | case MCCFIInstruction::OpRememberState: |
1831 | OS << "OpRememberState" ; |
1832 | break; |
1833 | case MCCFIInstruction::OpRestoreState: |
1834 | OS << "OpRestoreState" ; |
1835 | break; |
1836 | case MCCFIInstruction::OpOffset: |
1837 | OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); |
1838 | break; |
1839 | case MCCFIInstruction::OpDefCfaRegister: |
1840 | OS << "OpDefCfaRegister Reg" << Inst.getRegister(); |
1841 | break; |
1842 | case MCCFIInstruction::OpDefCfaOffset: |
1843 | OS << "OpDefCfaOffset " << Inst.getOffset(); |
1844 | break; |
1845 | case MCCFIInstruction::OpDefCfa: |
1846 | OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); |
1847 | break; |
1848 | case MCCFIInstruction::OpRelOffset: |
1849 | OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); |
1850 | break; |
1851 | case MCCFIInstruction::OpAdjustCfaOffset: |
1852 | OS << "OfAdjustCfaOffset " << Inst.getOffset(); |
1853 | break; |
1854 | case MCCFIInstruction::OpEscape: |
1855 | OS << "OpEscape" ; |
1856 | break; |
1857 | case MCCFIInstruction::OpRestore: |
1858 | OS << "OpRestore Reg" << Inst.getRegister(); |
1859 | break; |
1860 | case MCCFIInstruction::OpUndefined: |
1861 | OS << "OpUndefined Reg" << Inst.getRegister(); |
1862 | break; |
1863 | case MCCFIInstruction::OpRegister: |
1864 | OS << "OpRegister Reg" << Inst.getRegister() << " Reg" |
1865 | << Inst.getRegister2(); |
1866 | break; |
1867 | case MCCFIInstruction::OpWindowSave: |
1868 | OS << "OpWindowSave" ; |
1869 | break; |
1870 | case MCCFIInstruction::OpGnuArgsSize: |
1871 | OS << "OpGnuArgsSize" ; |
1872 | break; |
1873 | default: |
1874 | OS << "Op#" << Operation; |
1875 | break; |
1876 | } |
1877 | } |
1878 | |
1879 | MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { |
1880 | // For aarch64 and riscv, the ABI defines mapping symbols so we identify data |
1881 | // in the code section (see IHI0056B). $x identifies a symbol starting code or |
1882 | // the end of a data chunk inside code, $d identifies start of data. |
1883 | if (isX86() || ELFSymbolRef(Symbol).getSize()) |
1884 | return MarkerSymType::NONE; |
1885 | |
1886 | Expected<StringRef> NameOrError = Symbol.getName(); |
1887 | Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); |
1888 | |
1889 | if (!TypeOrError || !NameOrError) |
1890 | return MarkerSymType::NONE; |
1891 | |
1892 | if (*TypeOrError != SymbolRef::ST_Unknown) |
1893 | return MarkerSymType::NONE; |
1894 | |
1895 | if (*NameOrError == "$x" || NameOrError->starts_with(Prefix: "$x." )) |
1896 | return MarkerSymType::CODE; |
1897 | |
1898 | // $x<ISA> |
1899 | if (isRISCV() && NameOrError->starts_with(Prefix: "$x" )) |
1900 | return MarkerSymType::CODE; |
1901 | |
1902 | if (*NameOrError == "$d" || NameOrError->starts_with(Prefix: "$d." )) |
1903 | return MarkerSymType::DATA; |
1904 | |
1905 | return MarkerSymType::NONE; |
1906 | } |
1907 | |
1908 | bool BinaryContext::isMarker(const SymbolRef &Symbol) const { |
1909 | return getMarkerType(Symbol) != MarkerSymType::NONE; |
1910 | } |
1911 | |
1912 | static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, |
1913 | const BinaryFunction *Function, |
1914 | DWARFContext *DwCtx) { |
1915 | DebugLineTableRowRef RowRef = |
1916 | DebugLineTableRowRef::fromSMLoc(Loc: Instruction.getLoc()); |
1917 | if (RowRef == DebugLineTableRowRef::NULL_ROW) |
1918 | return; |
1919 | |
1920 | const DWARFDebugLine::LineTable *LineTable; |
1921 | if (Function && Function->getDWARFUnit() && |
1922 | Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { |
1923 | LineTable = Function->getDWARFLineTable(); |
1924 | } else { |
1925 | LineTable = DwCtx->getLineTableForUnit( |
1926 | U: DwCtx->getCompileUnitForOffset(Offset: RowRef.DwCompileUnitIndex)); |
1927 | } |
1928 | assert(LineTable && "line table expected for instruction with debug info" ); |
1929 | |
1930 | const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; |
1931 | StringRef FileName = "" ; |
1932 | if (std::optional<const char *> FName = |
1933 | dwarf::toString(V: LineTable->Prologue.FileNames[Row.File - 1].Name)) |
1934 | FileName = *FName; |
1935 | OS << " # debug line " << FileName << ":" << Row.Line; |
1936 | if (Row.Column) |
1937 | OS << ":" << Row.Column; |
1938 | if (Row.Discriminator) |
1939 | OS << " discriminator:" << Row.Discriminator; |
1940 | } |
1941 | |
1942 | void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, |
1943 | uint64_t Offset, |
1944 | const BinaryFunction *Function, |
1945 | bool PrintMCInst, bool PrintMemData, |
1946 | bool PrintRelocations, |
1947 | StringRef Endl) const { |
1948 | OS << format(Fmt: " %08" PRIx64 ": " , Vals: Offset); |
1949 | if (MIB->isCFI(Inst: Instruction)) { |
1950 | uint32_t Offset = Instruction.getOperand(i: 0).getImm(); |
1951 | OS << "\t!CFI\t$" << Offset << "\t; " ; |
1952 | if (Function) |
1953 | printCFI(OS, Inst: *Function->getCFIFor(Instr: Instruction)); |
1954 | OS << Endl; |
1955 | return; |
1956 | } |
1957 | if (std::optional<uint32_t> DynamicID = |
1958 | MIB->getDynamicBranchID(Inst: Instruction)) { |
1959 | OS << "\tjit\t" << MIB->getTargetSymbol(Inst: Instruction)->getName() |
1960 | << " # ID: " << DynamicID; |
1961 | } else { |
1962 | InstPrinter->printInst(MI: &Instruction, Address: 0, Annot: "" , STI: *STI, OS); |
1963 | } |
1964 | if (MIB->isCall(Inst: Instruction)) { |
1965 | if (MIB->isTailCall(Inst: Instruction)) |
1966 | OS << " # TAILCALL " ; |
1967 | if (MIB->isInvoke(Inst: Instruction)) { |
1968 | const std::optional<MCPlus::MCLandingPad> EHInfo = |
1969 | MIB->getEHInfo(Inst: Instruction); |
1970 | OS << " # handler: " ; |
1971 | if (EHInfo->first) |
1972 | OS << *EHInfo->first; |
1973 | else |
1974 | OS << '0'; |
1975 | OS << "; action: " << EHInfo->second; |
1976 | const int64_t GnuArgsSize = MIB->getGnuArgsSize(Inst: Instruction); |
1977 | if (GnuArgsSize >= 0) |
1978 | OS << "; GNU_args_size = " << GnuArgsSize; |
1979 | } |
1980 | } else if (MIB->isIndirectBranch(Inst: Instruction)) { |
1981 | if (uint64_t JTAddress = MIB->getJumpTable(Inst: Instruction)) { |
1982 | OS << " # JUMPTABLE @0x" << Twine::utohexstr(Val: JTAddress); |
1983 | } else { |
1984 | OS << " # UNKNOWN CONTROL FLOW" ; |
1985 | } |
1986 | } |
1987 | if (std::optional<uint32_t> Offset = MIB->getOffset(Inst: Instruction)) |
1988 | OS << " # Offset: " << *Offset; |
1989 | if (std::optional<uint32_t> Size = MIB->getSize(Inst: Instruction)) |
1990 | OS << " # Size: " << *Size; |
1991 | if (MCSymbol *Label = MIB->getInstLabel(Inst: Instruction)) |
1992 | OS << " # Label: " << *Label; |
1993 | |
1994 | MIB->printAnnotations(Inst: Instruction, OS); |
1995 | |
1996 | if (opts::PrintDebugInfo) |
1997 | printDebugInfo(OS, Instruction, Function, DwCtx: DwCtx.get()); |
1998 | |
1999 | if ((opts::PrintRelocations || PrintRelocations) && Function) { |
2000 | const uint64_t Size = computeCodeSize(Beg: &Instruction, End: &Instruction + 1); |
2001 | Function->printRelocations(OS, Offset, Size); |
2002 | } |
2003 | |
2004 | OS << Endl; |
2005 | |
2006 | if (PrintMCInst) { |
2007 | Instruction.dump_pretty(OS, Printer: InstPrinter.get()); |
2008 | OS << Endl; |
2009 | } |
2010 | } |
2011 | |
2012 | std::optional<uint64_t> |
2013 | BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, |
2014 | uint64_t FileOffset) const { |
2015 | // Find a segment with a matching file offset. |
2016 | for (auto &KV : SegmentMapInfo) { |
2017 | const SegmentInfo &SegInfo = KV.second; |
2018 | // FileOffset is got from perf event, |
2019 | // and it is equal to alignDown(SegInfo.FileOffset, pagesize). |
2020 | // If the pagesize is not equal to SegInfo.Alignment. |
2021 | // FileOffset and SegInfo.FileOffset should be aligned first, |
2022 | // and then judge whether they are equal. |
2023 | if (alignDown(Value: SegInfo.FileOffset, Align: SegInfo.Alignment) == |
2024 | alignDown(Value: FileOffset, Align: SegInfo.Alignment)) { |
2025 | // The function's offset from base address in VAS is aligned by pagesize |
2026 | // instead of SegInfo.Alignment. Pagesize can't be got from perf events. |
2027 | // However, The ELF document says that SegInfo.FileOffset should equal |
2028 | // to SegInfo.Address, modulo the pagesize. |
2029 | // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf |
2030 | |
2031 | // So alignDown(SegInfo.Address, pagesize) can be calculated by: |
2032 | // alignDown(SegInfo.Address, pagesize) |
2033 | // = SegInfo.Address - (SegInfo.Address % pagesize) |
2034 | // = SegInfo.Address - (SegInfo.FileOffset % pagesize) |
2035 | // = SegInfo.Address - SegInfo.FileOffset + |
2036 | // alignDown(SegInfo.FileOffset, pagesize) |
2037 | // = SegInfo.Address - SegInfo.FileOffset + FileOffset |
2038 | return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); |
2039 | } |
2040 | } |
2041 | |
2042 | return std::nullopt; |
2043 | } |
2044 | |
2045 | ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { |
2046 | auto SI = AddressToSection.upper_bound(x: Address); |
2047 | if (SI != AddressToSection.begin()) { |
2048 | --SI; |
2049 | uint64_t UpperBound = SI->first + SI->second->getSize(); |
2050 | if (!SI->second->getSize()) |
2051 | UpperBound += 1; |
2052 | if (UpperBound > Address) |
2053 | return *SI->second; |
2054 | } |
2055 | return std::make_error_code(e: std::errc::bad_address); |
2056 | } |
2057 | |
2058 | ErrorOr<StringRef> |
2059 | BinaryContext::getSectionNameForAddress(uint64_t Address) const { |
2060 | if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) |
2061 | return Section->getName(); |
2062 | return std::make_error_code(e: std::errc::bad_address); |
2063 | } |
2064 | |
2065 | BinarySection &BinaryContext::registerSection(BinarySection *Section) { |
2066 | auto Res = Sections.insert(x: Section); |
2067 | (void)Res; |
2068 | assert(Res.second && "can't register the same section twice." ); |
2069 | |
2070 | // Only register allocatable sections in the AddressToSection map. |
2071 | if (Section->isAllocatable() && Section->getAddress()) |
2072 | AddressToSection.insert(x: std::make_pair(x: Section->getAddress(), y&: Section)); |
2073 | NameToSection.insert( |
2074 | x: std::make_pair(x: std::string(Section->getName()), y&: Section)); |
2075 | if (Section->hasSectionRef()) |
2076 | SectionRefToBinarySection.insert( |
2077 | KV: std::make_pair(x: Section->getSectionRef(), y&: Section)); |
2078 | |
2079 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n" ); |
2080 | return *Section; |
2081 | } |
2082 | |
2083 | BinarySection &BinaryContext::registerSection(SectionRef Section) { |
2084 | return registerSection(Section: new BinarySection(*this, Section)); |
2085 | } |
2086 | |
2087 | BinarySection & |
2088 | BinaryContext::registerSection(const Twine &SectionName, |
2089 | const BinarySection &OriginalSection) { |
2090 | return registerSection( |
2091 | Section: new BinarySection(*this, SectionName, OriginalSection)); |
2092 | } |
2093 | |
2094 | BinarySection & |
2095 | BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, |
2096 | unsigned ELFFlags, uint8_t *Data, |
2097 | uint64_t Size, unsigned Alignment) { |
2098 | auto NamedSections = getSectionByName(Name); |
2099 | if (NamedSections.begin() != NamedSections.end()) { |
2100 | assert(std::next(NamedSections.begin()) == NamedSections.end() && |
2101 | "can only update unique sections" ); |
2102 | BinarySection *Section = NamedSections.begin()->second; |
2103 | |
2104 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> " ); |
2105 | const bool Flag = Section->isAllocatable(); |
2106 | (void)Flag; |
2107 | Section->update(NewData: Data, NewSize: Size, NewAlignment: Alignment, NewELFType: ELFType, NewELFFlags: ELFFlags); |
2108 | LLVM_DEBUG(dbgs() << *Section << "\n" ); |
2109 | // FIXME: Fix section flags/attributes for MachO. |
2110 | if (isELF()) |
2111 | assert(Flag == Section->isAllocatable() && |
2112 | "can't change section allocation status" ); |
2113 | return *Section; |
2114 | } |
2115 | |
2116 | return registerSection( |
2117 | Section: new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); |
2118 | } |
2119 | |
2120 | void BinaryContext::deregisterSectionName(const BinarySection &Section) { |
2121 | auto NameRange = NameToSection.equal_range(x: Section.getName().str()); |
2122 | while (NameRange.first != NameRange.second) { |
2123 | if (NameRange.first->second == &Section) { |
2124 | NameToSection.erase(position: NameRange.first); |
2125 | break; |
2126 | } |
2127 | ++NameRange.first; |
2128 | } |
2129 | } |
2130 | |
2131 | void BinaryContext::deregisterUnusedSections() { |
2132 | ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName(SectionName: "<absolute>" ); |
2133 | for (auto SI = Sections.begin(); SI != Sections.end();) { |
2134 | BinarySection *Section = *SI; |
2135 | // We check getOutputData() instead of getOutputSize() because sometimes |
2136 | // zero-sized .text.cold sections are allocated. |
2137 | if (Section->hasSectionRef() || Section->getOutputData() || |
2138 | (AbsSection && Section == &AbsSection.get())) { |
2139 | ++SI; |
2140 | continue; |
2141 | } |
2142 | |
2143 | LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() |
2144 | << '\n';); |
2145 | deregisterSectionName(Section: *Section); |
2146 | SI = Sections.erase(position: SI); |
2147 | delete Section; |
2148 | } |
2149 | } |
2150 | |
2151 | bool BinaryContext::deregisterSection(BinarySection &Section) { |
2152 | BinarySection *SectionPtr = &Section; |
2153 | auto Itr = Sections.find(x: SectionPtr); |
2154 | if (Itr != Sections.end()) { |
2155 | auto Range = AddressToSection.equal_range(x: SectionPtr->getAddress()); |
2156 | while (Range.first != Range.second) { |
2157 | if (Range.first->second == SectionPtr) { |
2158 | AddressToSection.erase(position: Range.first); |
2159 | break; |
2160 | } |
2161 | ++Range.first; |
2162 | } |
2163 | |
2164 | deregisterSectionName(Section: *SectionPtr); |
2165 | Sections.erase(position: Itr); |
2166 | delete SectionPtr; |
2167 | return true; |
2168 | } |
2169 | return false; |
2170 | } |
2171 | |
2172 | void BinaryContext::renameSection(BinarySection &Section, |
2173 | const Twine &NewName) { |
2174 | auto Itr = Sections.find(x: &Section); |
2175 | assert(Itr != Sections.end() && "Section must exist to be renamed." ); |
2176 | Sections.erase(position: Itr); |
2177 | |
2178 | deregisterSectionName(Section); |
2179 | |
2180 | Section.Name = NewName.str(); |
2181 | Section.setOutputName(Section.Name); |
2182 | |
2183 | NameToSection.insert(x: std::make_pair(x&: Section.Name, y: &Section)); |
2184 | |
2185 | // Reinsert with the new name. |
2186 | Sections.insert(x: &Section); |
2187 | } |
2188 | |
2189 | void BinaryContext::printSections(raw_ostream &OS) const { |
2190 | for (BinarySection *const &Section : Sections) |
2191 | OS << "BOLT-INFO: " << *Section << "\n" ; |
2192 | } |
2193 | |
2194 | BinarySection &BinaryContext::absoluteSection() { |
2195 | if (ErrorOr<BinarySection &> Section = getUniqueSectionByName(SectionName: "<absolute>" )) |
2196 | return *Section; |
2197 | return registerOrUpdateSection(Name: "<absolute>" , ELFType: ELF::SHT_NULL, ELFFlags: 0u); |
2198 | } |
2199 | |
2200 | ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, |
2201 | size_t Size) const { |
2202 | const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
2203 | if (!Section) |
2204 | return std::make_error_code(e: std::errc::bad_address); |
2205 | |
2206 | if (Section->isVirtual()) |
2207 | return 0; |
2208 | |
2209 | DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), |
2210 | AsmInfo->getCodePointerSize()); |
2211 | auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); |
2212 | return DE.getUnsigned(offset_ptr: &ValueOffset, byte_size: Size); |
2213 | } |
2214 | |
2215 | ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, |
2216 | size_t Size) const { |
2217 | const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
2218 | if (!Section) |
2219 | return std::make_error_code(e: std::errc::bad_address); |
2220 | |
2221 | if (Section->isVirtual()) |
2222 | return 0; |
2223 | |
2224 | DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), |
2225 | AsmInfo->getCodePointerSize()); |
2226 | auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); |
2227 | return DE.getSigned(offset_ptr: &ValueOffset, size: Size); |
2228 | } |
2229 | |
2230 | void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, |
2231 | uint64_t Type, uint64_t Addend, |
2232 | uint64_t Value) { |
2233 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
2234 | assert(Section && "cannot find section for address" ); |
2235 | Section->addRelocation(Offset: Address - Section->getAddress(), Symbol, Type, Addend, |
2236 | Value); |
2237 | } |
2238 | |
2239 | void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, |
2240 | uint64_t Type, uint64_t Addend, |
2241 | uint64_t Value) { |
2242 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
2243 | assert(Section && "cannot find section for address" ); |
2244 | Section->addDynamicRelocation(Offset: Address - Section->getAddress(), Symbol, Type, |
2245 | Addend, Value); |
2246 | } |
2247 | |
2248 | bool BinaryContext::removeRelocationAt(uint64_t Address) { |
2249 | ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
2250 | assert(Section && "cannot find section for address" ); |
2251 | return Section->removeRelocationAt(Offset: Address - Section->getAddress()); |
2252 | } |
2253 | |
2254 | const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { |
2255 | ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
2256 | if (!Section) |
2257 | return nullptr; |
2258 | |
2259 | return Section->getRelocationAt(Offset: Address - Section->getAddress()); |
2260 | } |
2261 | |
2262 | const Relocation * |
2263 | BinaryContext::getDynamicRelocationAt(uint64_t Address) const { |
2264 | ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
2265 | if (!Section) |
2266 | return nullptr; |
2267 | |
2268 | return Section->getDynamicRelocationAt(Offset: Address - Section->getAddress()); |
2269 | } |
2270 | |
2271 | void BinaryContext::markAmbiguousRelocations(BinaryData &BD, |
2272 | const uint64_t Address) { |
2273 | auto setImmovable = [&](BinaryData &BD) { |
2274 | BinaryData *Root = BD.getAtomicRoot(); |
2275 | LLVM_DEBUG(if (Root->isMoveable()) { |
2276 | dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " |
2277 | << "due to ambiguous relocation referencing 0x" |
2278 | << Twine::utohexstr(Address) << '\n'; |
2279 | }); |
2280 | Root->setIsMoveable(false); |
2281 | }; |
2282 | |
2283 | if (Address == BD.getAddress()) { |
2284 | setImmovable(BD); |
2285 | |
2286 | // Set previous symbol as immovable |
2287 | BinaryData *Prev = getBinaryDataContainingAddress(Address: Address - 1); |
2288 | if (Prev && Prev->getEndAddress() == BD.getAddress()) |
2289 | setImmovable(*Prev); |
2290 | } |
2291 | |
2292 | if (Address == BD.getEndAddress()) { |
2293 | setImmovable(BD); |
2294 | |
2295 | // Set next symbol as immovable |
2296 | BinaryData *Next = getBinaryDataContainingAddress(Address: BD.getEndAddress()); |
2297 | if (Next && Next->getAddress() == BD.getEndAddress()) |
2298 | setImmovable(*Next); |
2299 | } |
2300 | } |
2301 | |
2302 | BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, |
2303 | uint64_t *EntryDesc) { |
2304 | std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); |
2305 | auto BFI = SymbolToFunctionMap.find(x: Symbol); |
2306 | if (BFI == SymbolToFunctionMap.end()) |
2307 | return nullptr; |
2308 | |
2309 | BinaryFunction *BF = BFI->second; |
2310 | if (EntryDesc) |
2311 | *EntryDesc = BF->getEntryIDForSymbol(EntrySymbol: Symbol); |
2312 | |
2313 | return BF; |
2314 | } |
2315 | |
2316 | std::string |
2317 | BinaryContext::generateBugReportMessage(StringRef Message, |
2318 | const BinaryFunction &Function) const { |
2319 | std::string Msg; |
2320 | raw_string_ostream SS(Msg); |
2321 | SS << "=======================================\n" ; |
2322 | SS << "BOLT is unable to proceed because it couldn't properly understand " |
2323 | "this function.\n" ; |
2324 | SS << "If you are running the most recent version of BOLT, you may " |
2325 | "want to " |
2326 | "report this and paste this dump.\nPlease check that there is no " |
2327 | "sensitive contents being shared in this dump.\n" ; |
2328 | SS << "\nOffending function: " << Function.getPrintName() << "\n\n" ; |
2329 | ScopedPrinter SP(SS); |
2330 | SP.printBinaryBlock(Label: "Function contents" , Value: *Function.getData()); |
2331 | SS << "\n" ; |
2332 | const_cast<BinaryFunction &>(Function).print(OS&: SS, Annotation: "" ); |
2333 | SS << "ERROR: " << Message; |
2334 | SS << "\n=======================================\n" ; |
2335 | return Msg; |
2336 | } |
2337 | |
2338 | BinaryFunction * |
2339 | BinaryContext::createInjectedBinaryFunction(const std::string &Name, |
2340 | bool IsSimple) { |
2341 | InjectedBinaryFunctions.push_back(x: new BinaryFunction(Name, *this, IsSimple)); |
2342 | BinaryFunction *BF = InjectedBinaryFunctions.back(); |
2343 | setSymbolToFunctionMap(Sym: BF->getSymbol(), BF); |
2344 | BF->CurrentState = BinaryFunction::State::CFG; |
2345 | return BF; |
2346 | } |
2347 | |
2348 | std::pair<size_t, size_t> |
2349 | BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { |
2350 | // Adjust branch instruction to match the current layout. |
2351 | if (FixBranches) |
2352 | BF.fixBranches(); |
2353 | |
2354 | // Create local MC context to isolate the effect of ephemeral code emission. |
2355 | IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); |
2356 | MCContext *LocalCtx = MCEInstance.LocalCtx.get(); |
2357 | MCAsmBackend *MAB = |
2358 | TheTarget->createMCAsmBackend(STI: *STI, MRI: *MRI, Options: MCTargetOptions()); |
2359 | |
2360 | SmallString<256> Code; |
2361 | raw_svector_ostream VecOS(Code); |
2362 | |
2363 | std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS&: VecOS); |
2364 | std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( |
2365 | T: *TheTriple, Ctx&: *LocalCtx, TAB: std::unique_ptr<MCAsmBackend>(MAB), OW: std::move(OW), |
2366 | Emitter: std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), STI: *STI, |
2367 | /*RelaxAll=*/false, |
2368 | /*IncrementalLinkerCompatible=*/false, |
2369 | /*DWARFMustBeAtTheEnd=*/false)); |
2370 | |
2371 | Streamer->initSections(NoExecStack: false, STI: *STI); |
2372 | |
2373 | MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); |
2374 | Section->setHasInstructions(true); |
2375 | |
2376 | // Create symbols in the LocalCtx so that they get destroyed with it. |
2377 | MCSymbol *StartLabel = LocalCtx->createTempSymbol(); |
2378 | MCSymbol *EndLabel = LocalCtx->createTempSymbol(); |
2379 | |
2380 | Streamer->switchSection(Section); |
2381 | Streamer->emitLabel(Symbol: StartLabel); |
2382 | emitFunctionBody(Streamer&: *Streamer, BF, FF&: BF.getLayout().getMainFragment(), |
2383 | /*EmitCodeOnly=*/true); |
2384 | Streamer->emitLabel(Symbol: EndLabel); |
2385 | |
2386 | using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; |
2387 | SmallVector<LabelRange> SplitLabels; |
2388 | for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { |
2389 | MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); |
2390 | MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); |
2391 | SplitLabels.emplace_back(Args: SplitStartLabel, Args: SplitEndLabel); |
2392 | |
2393 | MCSectionELF *const SplitSection = LocalCtx->getELFSection( |
2394 | Section: BF.getCodeSectionName(Fragment: FF.getFragmentNum()), Type: ELF::SHT_PROGBITS, |
2395 | Flags: ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); |
2396 | SplitSection->setHasInstructions(true); |
2397 | Streamer->switchSection(Section: SplitSection); |
2398 | |
2399 | Streamer->emitLabel(Symbol: SplitStartLabel); |
2400 | emitFunctionBody(Streamer&: *Streamer, BF, FF, /*EmitCodeOnly=*/true); |
2401 | Streamer->emitLabel(Symbol: SplitEndLabel); |
2402 | // To avoid calling MCObjectStreamer::flushPendingLabels() which is |
2403 | // private |
2404 | Streamer->emitBytes(Data: StringRef("" )); |
2405 | Streamer->switchSection(Section); |
2406 | } |
2407 | |
2408 | // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or |
2409 | // MCStreamer::Finish(), which does more than we want |
2410 | Streamer->emitBytes(Data: StringRef("" )); |
2411 | |
2412 | MCAssembler &Assembler = |
2413 | static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); |
2414 | MCAsmLayout Layout(Assembler); |
2415 | Assembler.layout(Layout); |
2416 | |
2417 | // Obtain fragment sizes. |
2418 | std::vector<uint64_t> FragmentSizes; |
2419 | // Main fragment size. |
2420 | const uint64_t HotSize = |
2421 | Layout.getSymbolOffset(S: *EndLabel) - Layout.getSymbolOffset(S: *StartLabel); |
2422 | FragmentSizes.push_back(x: HotSize); |
2423 | // Split fragment sizes. |
2424 | uint64_t ColdSize = 0; |
2425 | for (const auto &Labels : SplitLabels) { |
2426 | uint64_t Size = Layout.getSymbolOffset(S: *Labels.second) - |
2427 | Layout.getSymbolOffset(S: *Labels.first); |
2428 | FragmentSizes.push_back(x: Size); |
2429 | ColdSize += Size; |
2430 | } |
2431 | |
2432 | // Populate new start and end offsets of each basic block. |
2433 | uint64_t FragmentIndex = 0; |
2434 | for (FunctionFragment &FF : BF.getLayout().fragments()) { |
2435 | BinaryBasicBlock *PrevBB = nullptr; |
2436 | for (BinaryBasicBlock *BB : FF) { |
2437 | const uint64_t BBStartOffset = Layout.getSymbolOffset(S: *(BB->getLabel())); |
2438 | BB->setOutputStartAddress(BBStartOffset); |
2439 | if (PrevBB) |
2440 | PrevBB->setOutputEndAddress(BBStartOffset); |
2441 | PrevBB = BB; |
2442 | } |
2443 | if (PrevBB) |
2444 | PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]); |
2445 | FragmentIndex++; |
2446 | } |
2447 | |
2448 | // Clean-up the effect of the code emission. |
2449 | for (const MCSymbol &Symbol : Assembler.symbols()) { |
2450 | MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); |
2451 | MutableSymbol->setUndefined(); |
2452 | MutableSymbol->setIsRegistered(false); |
2453 | } |
2454 | |
2455 | return std::make_pair(x: HotSize, y&: ColdSize); |
2456 | } |
2457 | |
2458 | bool BinaryContext::validateInstructionEncoding( |
2459 | ArrayRef<uint8_t> InputSequence) const { |
2460 | MCInst Inst; |
2461 | uint64_t InstSize; |
2462 | DisAsm->getInstruction(Instr&: Inst, Size&: InstSize, Bytes: InputSequence, Address: 0, CStream&: nulls()); |
2463 | assert(InstSize == InputSequence.size() && |
2464 | "Disassembled instruction size does not match the sequence." ); |
2465 | |
2466 | SmallString<256> Code; |
2467 | SmallVector<MCFixup, 4> Fixups; |
2468 | |
2469 | MCE->encodeInstruction(Inst, CB&: Code, Fixups, STI: *STI); |
2470 | auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); |
2471 | if (InputSequence != OutputSequence) { |
2472 | if (opts::Verbosity > 1) { |
2473 | this->errs() << "BOLT-WARNING: mismatched encoding detected\n" |
2474 | << " input: " << InputSequence << '\n' |
2475 | << " output: " << OutputSequence << '\n'; |
2476 | } |
2477 | return false; |
2478 | } |
2479 | |
2480 | return true; |
2481 | } |
2482 | |
2483 | uint64_t BinaryContext::getHotThreshold() const { |
2484 | static uint64_t Threshold = 0; |
2485 | if (Threshold == 0) { |
2486 | Threshold = std::max( |
2487 | a: (uint64_t)opts::ExecutionCountThreshold, |
2488 | b: NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); |
2489 | } |
2490 | return Threshold; |
2491 | } |
2492 | |
2493 | BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( |
2494 | uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { |
2495 | auto FI = BinaryFunctions.upper_bound(x: Address); |
2496 | if (FI == BinaryFunctions.begin()) |
2497 | return nullptr; |
2498 | --FI; |
2499 | |
2500 | const uint64_t UsedSize = |
2501 | UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); |
2502 | |
2503 | if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) |
2504 | return nullptr; |
2505 | |
2506 | return &FI->second; |
2507 | } |
2508 | |
2509 | BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { |
2510 | // First, try to find a function starting at the given address. If the |
2511 | // function was folded, this will get us the original folded function if it |
2512 | // wasn't removed from the list, e.g. in non-relocation mode. |
2513 | auto BFI = BinaryFunctions.find(x: Address); |
2514 | if (BFI != BinaryFunctions.end()) |
2515 | return &BFI->second; |
2516 | |
2517 | // We might have folded the function matching the object at the given |
2518 | // address. In such case, we look for a function matching the symbol |
2519 | // registered at the original address. The new function (the one that the |
2520 | // original was folded into) will hold the symbol. |
2521 | if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { |
2522 | uint64_t EntryID = 0; |
2523 | BinaryFunction *BF = getFunctionForSymbol(Symbol: BD->getSymbol(), EntryDesc: &EntryID); |
2524 | if (BF && EntryID == 0) |
2525 | return BF; |
2526 | } |
2527 | return nullptr; |
2528 | } |
2529 | |
2530 | DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( |
2531 | const DWARFAddressRangesVector &InputRanges) const { |
2532 | DebugAddressRangesVector OutputRanges; |
2533 | |
2534 | for (const DWARFAddressRange Range : InputRanges) { |
2535 | auto BFI = BinaryFunctions.lower_bound(x: Range.LowPC); |
2536 | while (BFI != BinaryFunctions.end()) { |
2537 | const BinaryFunction &Function = BFI->second; |
2538 | if (Function.getAddress() >= Range.HighPC) |
2539 | break; |
2540 | const DebugAddressRangesVector FunctionRanges = |
2541 | Function.getOutputAddressRanges(); |
2542 | llvm::move(Range: FunctionRanges, Out: std::back_inserter(x&: OutputRanges)); |
2543 | std::advance(i&: BFI, n: 1); |
2544 | } |
2545 | } |
2546 | |
2547 | return OutputRanges; |
2548 | } |
2549 | |
2550 | } // namespace bolt |
2551 | } // namespace llvm |
2552 | |