1//===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Support for updating Linux Kernel metadata.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/BinaryFunction.h"
14#include "bolt/Rewrite/MetadataRewriter.h"
15#include "bolt/Rewrite/MetadataRewriters.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/MC/MCDisassembler/MCDisassembler.h"
20#include "llvm/Support/BinaryStreamWriter.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/Errc.h"
24#include "llvm/Support/ErrorOr.h"
25#include <regex>
26
27#define DEBUG_TYPE "bolt-linux"
28
29using namespace llvm;
30using namespace bolt;
31
32namespace opts {
33
34static cl::opt<bool>
35 AltInstHasPadLen("alt-inst-has-padlen",
36 cl::desc("specify that .altinstructions has padlen field"),
37 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
38
39static cl::opt<uint32_t>
40 AltInstFeatureSize("alt-inst-feature-size",
41 cl::desc("size of feature field in .altinstructions"),
42 cl::init(Val: 2), cl::Hidden, cl::cat(BoltCategory));
43
44static cl::opt<bool>
45 DumpAltInstructions("dump-alt-instructions",
46 cl::desc("dump Linux alternative instructions info"),
47 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
48
49static cl::opt<bool>
50 DumpExceptions("dump-linux-exceptions",
51 cl::desc("dump Linux kernel exception table"),
52 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
53
54static cl::opt<bool>
55 DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"),
56 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
57
58static cl::opt<bool> DumpParavirtualPatchSites(
59 "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
60 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
61
62static cl::opt<bool>
63 DumpPCIFixups("dump-pci-fixups",
64 cl::desc("dump Linux kernel PCI fixup table"),
65 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
66
67static cl::opt<bool> DumpSMPLocks("dump-smp-locks",
68 cl::desc("dump Linux kernel SMP locks"),
69 cl::init(Val: false), cl::Hidden,
70 cl::cat(BoltCategory));
71
72static cl::opt<bool> DumpStaticCalls("dump-static-calls",
73 cl::desc("dump Linux kernel static calls"),
74 cl::init(Val: false), cl::Hidden,
75 cl::cat(BoltCategory));
76
77static cl::opt<bool>
78 DumpStaticKeys("dump-static-keys",
79 cl::desc("dump Linux kernel static keys jump table"),
80 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
81
82static cl::opt<bool> LongJumpLabels(
83 "long-jump-labels",
84 cl::desc("always use long jumps/nops for Linux kernel static keys"),
85 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
86
87static cl::opt<bool>
88 PrintORC("print-orc",
89 cl::desc("print ORC unwind information for instructions"),
90 cl::init(Val: true), cl::Hidden, cl::cat(BoltCategory));
91
92} // namespace opts
93
94/// Linux kernel version
95struct LKVersion {
96 LKVersion() {}
97 LKVersion(unsigned Major, unsigned Minor, unsigned Rev)
98 : Major(Major), Minor(Minor), Rev(Rev) {}
99
100 bool operator<(const LKVersion &Other) const {
101 return std::make_tuple(args: Major, args: Minor, args: Rev) <
102 std::make_tuple(args: Other.Major, args: Other.Minor, args: Other.Rev);
103 }
104
105 bool operator>(const LKVersion &Other) const { return Other < *this; }
106
107 bool operator<=(const LKVersion &Other) const { return !(*this > Other); }
108
109 bool operator>=(const LKVersion &Other) const { return !(*this < Other); }
110
111 bool operator==(const LKVersion &Other) const {
112 return Major == Other.Major && Minor == Other.Minor && Rev == Other.Rev;
113 }
114
115 bool operator!=(const LKVersion &Other) const { return !(*this == Other); }
116
117 unsigned Major{0};
118 unsigned Minor{0};
119 unsigned Rev{0};
120};
121
122/// Linux Kernel supports stack unwinding using ORC (oops rewind capability).
123/// ORC state at every IP can be described by the following data structure.
124struct ORCState {
125 int16_t SPOffset;
126 int16_t BPOffset;
127 int16_t Info;
128
129 bool operator==(const ORCState &Other) const {
130 return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset &&
131 Info == Other.Info;
132 }
133
134 bool operator!=(const ORCState &Other) const { return !(*this == Other); }
135};
136
137/// Section terminator ORC entry.
138static ORCState NullORC = {.SPOffset: 0, .BPOffset: 0, .Info: 0};
139
140/// Basic printer for ORC entry. It does not provide the same level of
141/// information as objtool (for now).
142inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) {
143 if (!opts::PrintORC)
144 return OS;
145 if (E != NullORC)
146 OS << format(Fmt: "{sp: %d, bp: %d, info: 0x%x}", Vals: E.SPOffset, Vals: E.BPOffset,
147 Vals: E.Info);
148 else
149 OS << "{terminator}";
150
151 return OS;
152}
153
154namespace {
155
156/// Extension to DataExtractor that supports reading addresses stored in
157/// PC-relative format.
158class AddressExtractor : public DataExtractor {
159 uint64_t DataAddress;
160
161public:
162 AddressExtractor(StringRef Data, uint64_t DataAddress, bool IsLittleEndian,
163 uint8_t AddressSize)
164 : DataExtractor(Data, IsLittleEndian, AddressSize),
165 DataAddress(DataAddress) {}
166
167 /// Extract 32-bit PC-relative address/pointer.
168 uint64_t getPCRelAddress32(Cursor &C) {
169 const uint64_t Base = DataAddress + C.tell();
170 return Base + (int32_t)getU32(C);
171 }
172
173 /// Extract 64-bit PC-relative address/pointer.
174 uint64_t getPCRelAddress64(Cursor &C) {
175 const uint64_t Base = DataAddress + C.tell();
176 return Base + (int64_t)getU64(C);
177 }
178};
179
180class LinuxKernelRewriter final : public MetadataRewriter {
181 LKVersion LinuxKernelVersion;
182
183 /// Information required for updating metadata referencing an instruction.
184 struct InstructionFixup {
185 BinarySection &Section; // Section referencing the instruction.
186 uint64_t Offset; // Offset in the section above.
187 BinaryFunction &BF; // Function containing the instruction.
188 MCSymbol &Label; // Label marking the instruction.
189 bool IsPCRelative; // If the reference type is relative.
190 };
191 std::vector<InstructionFixup> Fixups;
192
193 /// Size of an entry in .smp_locks section.
194 static constexpr size_t SMP_LOCKS_ENTRY_SIZE = 4;
195
196 /// Linux ORC sections.
197 ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address;
198 ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address;
199
200 /// Size of entries in ORC sections.
201 static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6;
202 static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4;
203
204 struct ORCListEntry {
205 uint64_t IP; /// Instruction address.
206 BinaryFunction *BF; /// Binary function corresponding to the entry.
207 ORCState ORC; /// Stack unwind info in ORC format.
208
209 /// ORC entries are sorted by their IPs. Terminator entries (NullORC)
210 /// should precede other entries with the same address.
211 bool operator<(const ORCListEntry &Other) const {
212 if (IP < Other.IP)
213 return 1;
214 if (IP > Other.IP)
215 return 0;
216 return ORC == NullORC && Other.ORC != NullORC;
217 }
218 };
219
220 using ORCListType = std::vector<ORCListEntry>;
221 ORCListType ORCEntries;
222
223 /// Number of entries in the input file ORC sections.
224 uint64_t NumORCEntries = 0;
225
226 /// Section containing static keys jump table.
227 ErrorOr<BinarySection &> StaticKeysJumpSection = std::errc::bad_address;
228 uint64_t StaticKeysJumpTableAddress = 0;
229 static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8;
230
231 struct JumpInfoEntry {
232 bool Likely;
233 bool InitValue;
234 };
235 SmallVector<JumpInfoEntry, 16> JumpInfo;
236
237 /// Static key entries that need nop conversion.
238 DenseSet<uint32_t> NopIDs;
239
240 /// Section containing static call table.
241 ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address;
242 uint64_t StaticCallTableAddress = 0;
243 static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8;
244
245 struct StaticCallInfo {
246 uint32_t ID; /// Identifier of the entry in the table.
247 BinaryFunction *Function; /// Function containing associated call.
248 MCSymbol *Label; /// Label attached to the call.
249 };
250 using StaticCallListType = std::vector<StaticCallInfo>;
251 StaticCallListType StaticCallEntries;
252
253 /// Section containing the Linux exception table.
254 ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address;
255 static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12;
256
257 /// Functions with exception handling code.
258 DenseSet<BinaryFunction *> FunctionsWithExceptions;
259
260 /// Section with paravirtual patch sites.
261 ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address;
262
263 /// Alignment of paravirtual patch structures.
264 static constexpr size_t PARA_PATCH_ALIGN = 8;
265
266 /// .altinstructions section.
267 ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address;
268
269 /// Section containing Linux bug table.
270 ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address;
271
272 /// Size of bug_entry struct.
273 static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12;
274
275 /// List of bug entries per function.
276 using FunctionBugListType =
277 DenseMap<BinaryFunction *, SmallVector<uint32_t, 2>>;
278 FunctionBugListType FunctionBugList;
279
280 /// .pci_fixup section.
281 ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address;
282 static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16;
283
284 Error detectLinuxKernelVersion();
285
286 /// Process linux kernel special sections and their relocations.
287 void processLKSections();
288
289 /// Process __ksymtab and __ksymtab_gpl.
290 void processLKKSymtab(bool IsGPL = false);
291
292 // Create relocations in sections requiring fixups.
293 //
294 // Make sure functions that will not be emitted are marked as such before this
295 // function is executed.
296 void processInstructionFixups();
297
298 /// Process .smp_locks section.
299 Error processSMPLocks();
300
301 /// Read ORC unwind information and annotate instructions.
302 Error readORCTables();
303
304 /// Update ORC for functions once CFG is constructed.
305 Error processORCPostCFG();
306
307 /// Update ORC data in the binary.
308 Error rewriteORCTables();
309
310 /// Validate written ORC tables after binary emission.
311 Error validateORCTables();
312
313 /// Static call table handling.
314 Error readStaticCalls();
315 Error rewriteStaticCalls();
316
317 Error readExceptionTable();
318 Error rewriteExceptionTable();
319
320 /// Paravirtual instruction patch sites.
321 Error readParaInstructions();
322 Error rewriteParaInstructions();
323
324 /// __bug_table section handling.
325 Error readBugTable();
326 Error rewriteBugTable();
327
328 /// Do no process functions containing instruction annotated with
329 /// \p Annotation.
330 void skipFunctionsWithAnnotation(StringRef Annotation) const;
331
332 /// Handle alternative instruction info from .altinstructions.
333 Error readAltInstructions();
334 void processAltInstructionsPostCFG();
335 Error tryReadAltInstructions(uint32_t AltInstFeatureSize,
336 bool AltInstHasPadLen, bool ParseOnly);
337
338 /// Read .pci_fixup
339 Error readPCIFixupTable();
340
341 /// Handle static keys jump table.
342 Error readStaticKeysJumpTable();
343 Error rewriteStaticKeysJumpTable();
344 Error updateStaticKeysJumpTablePostEmit();
345
346public:
347 LinuxKernelRewriter(BinaryContext &BC)
348 : MetadataRewriter("linux-kernel-rewriter", BC) {}
349
350 Error preCFGInitializer() override {
351 if (Error E = detectLinuxKernelVersion())
352 return E;
353
354 processLKSections();
355
356 if (Error E = processSMPLocks())
357 return E;
358
359 if (Error E = readStaticCalls())
360 return E;
361
362 if (Error E = readExceptionTable())
363 return E;
364
365 if (Error E = readParaInstructions())
366 return E;
367
368 if (Error E = readBugTable())
369 return E;
370
371 if (Error E = readAltInstructions())
372 return E;
373
374 // Some ORC entries could be linked to alternative instruction
375 // sequences. Hence, we read ORC after .altinstructions.
376 if (Error E = readORCTables())
377 return E;
378
379 if (Error E = readPCIFixupTable())
380 return E;
381
382 if (Error E = readStaticKeysJumpTable())
383 return E;
384
385 return Error::success();
386 }
387
388 Error postCFGInitializer() override {
389 if (Error E = processORCPostCFG())
390 return E;
391
392 processAltInstructionsPostCFG();
393
394 return Error::success();
395 }
396
397 Error preEmitFinalizer() override {
398 // Since rewriteExceptionTable() can mark functions as non-simple, run it
399 // before other rewriters that depend on simple/emit status.
400 if (Error E = rewriteExceptionTable())
401 return E;
402
403 if (Error E = rewriteParaInstructions())
404 return E;
405
406 if (Error E = rewriteORCTables())
407 return E;
408
409 if (Error E = rewriteStaticCalls())
410 return E;
411
412 if (Error E = rewriteStaticKeysJumpTable())
413 return E;
414
415 if (Error E = rewriteBugTable())
416 return E;
417
418 processInstructionFixups();
419
420 return Error::success();
421 }
422
423 Error postEmitFinalizer() override {
424 if (Error E = updateStaticKeysJumpTablePostEmit())
425 return E;
426
427 if (Error E = validateORCTables())
428 return E;
429
430 return Error::success();
431 }
432};
433
434Error LinuxKernelRewriter::detectLinuxKernelVersion() {
435 if (BinaryData *BD = BC.getBinaryDataByName(Name: "linux_banner")) {
436 const BinarySection &Section = BD->getSection();
437 const std::string S =
438 Section.getContents().substr(Start: BD->getOffset(), N: BD->getSize()).str();
439
440 const std::regex Re(R"---(Linux version ((\d+)\.(\d+)(\.(\d+))?))---");
441 std::smatch Match;
442 if (std::regex_search(s: S, m&: Match, e: Re)) {
443 const unsigned Major = std::stoi(str: Match[2].str());
444 const unsigned Minor = std::stoi(str: Match[3].str());
445 const unsigned Rev = Match[5].matched ? std::stoi(str: Match[5].str()) : 0;
446 LinuxKernelVersion = LKVersion(Major, Minor, Rev);
447 BC.outs() << "BOLT-INFO: Linux kernel version is " << Match[1].str()
448 << "\n";
449 return Error::success();
450 }
451 }
452 return createStringError(EC: errc::executable_format_error,
453 S: "Linux kernel version is unknown");
454}
455
456void LinuxKernelRewriter::processLKSections() {
457 processLKKSymtab();
458 processLKKSymtab(IsGPL: true);
459}
460
461/// Process __ksymtab[_gpl] sections of Linux Kernel.
462/// This section lists all the vmlinux symbols that kernel modules can access.
463///
464/// All the entries are 4 bytes each and hence we can read them by one by one
465/// and ignore the ones that are not pointing to the .text section. All pointers
466/// are PC relative offsets. Always, points to the beginning of the function.
467void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) {
468 StringRef SectionName = "__ksymtab";
469 if (IsGPL)
470 SectionName = "__ksymtab_gpl";
471 ErrorOr<BinarySection &> SectionOrError =
472 BC.getUniqueSectionByName(SectionName);
473 assert(SectionOrError &&
474 "__ksymtab[_gpl] section not found in Linux Kernel binary");
475 const uint64_t SectionSize = SectionOrError->getSize();
476 const uint64_t SectionAddress = SectionOrError->getAddress();
477 assert((SectionSize % 4) == 0 &&
478 "The size of the __ksymtab[_gpl] section should be a multiple of 4");
479
480 for (uint64_t I = 0; I < SectionSize; I += 4) {
481 const uint64_t EntryAddress = SectionAddress + I;
482 ErrorOr<int64_t> Offset = BC.getSignedValueAtAddress(Address: EntryAddress, Size: 4);
483 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
484 const int32_t SignedOffset = *Offset;
485 const uint64_t RefAddress = EntryAddress + SignedOffset;
486 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Address: RefAddress);
487 if (!BF)
488 continue;
489
490 BC.addRelocation(Address: EntryAddress, Symbol: BF->getSymbol(), Type: Relocation::getPC32(), Addend: 0,
491 Value: *Offset);
492 }
493}
494
495/// .smp_locks section contains PC-relative references to instructions with LOCK
496/// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
497Error LinuxKernelRewriter::processSMPLocks() {
498 ErrorOr<BinarySection &> SMPLocksSection =
499 BC.getUniqueSectionByName(SectionName: ".smp_locks");
500 if (!SMPLocksSection)
501 return Error::success();
502
503 const uint64_t SectionSize = SMPLocksSection->getSize();
504 const uint64_t SectionAddress = SMPLocksSection->getAddress();
505 if (SectionSize % SMP_LOCKS_ENTRY_SIZE)
506 return createStringError(EC: errc::executable_format_error,
507 S: "bad size of .smp_locks section");
508
509 AddressExtractor AE(SMPLocksSection->getContents(), SectionAddress,
510 BC.AsmInfo->isLittleEndian(),
511 BC.AsmInfo->getCodePointerSize());
512 AddressExtractor::Cursor Cursor(0);
513 while (Cursor && Cursor.tell() < SectionSize) {
514 const uint64_t Offset = Cursor.tell();
515 const uint64_t IP = AE.getPCRelAddress32(C&: Cursor);
516
517 // Consume the status of the cursor.
518 if (!Cursor)
519 return createStringError(EC: errc::executable_format_error,
520 Fmt: "error while reading .smp_locks: %s",
521 Vals: toString(E: Cursor.takeError()).c_str());
522
523 if (opts::DumpSMPLocks)
524 BC.outs() << "SMP lock at 0x: " << Twine::utohexstr(Val: IP) << '\n';
525
526 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: IP);
527 if (!BF || !BC.shouldEmit(Function: *BF))
528 continue;
529
530 MCInst *Inst = BF->getInstructionAtOffset(Offset: IP - BF->getAddress());
531 if (!Inst)
532 return createStringError(EC: errc::executable_format_error,
533 Fmt: "no instruction matches lock at 0x%" PRIx64, Vals: IP);
534
535 // Check for duplicate entries.
536 if (BC.MIB->hasAnnotation(Inst: *Inst, Name: "SMPLock"))
537 return createStringError(EC: errc::executable_format_error,
538 Fmt: "duplicate SMP lock at 0x%" PRIx64, Vals: IP);
539
540 BC.MIB->addAnnotation(Inst&: *Inst, Name: "SMPLock", Val: true);
541 MCSymbol *Label =
542 BC.MIB->getOrCreateInstLabel(Inst&: *Inst, Name: "__SMPLock_", Ctx: BC.Ctx.get());
543
544 Fixups.push_back(x: {.Section: *SMPLocksSection, .Offset: Offset, .BF: *BF, .Label: *Label,
545 /*IsPCRelative*/ true});
546 }
547
548 const uint64_t NumEntries = SectionSize / SMP_LOCKS_ENTRY_SIZE;
549 BC.outs() << "BOLT-INFO: parsed " << NumEntries << " SMP lock entries\n";
550
551 return Error::success();
552}
553
554void LinuxKernelRewriter::processInstructionFixups() {
555 for (InstructionFixup &Fixup : Fixups) {
556 if (!BC.shouldEmit(Function: Fixup.BF))
557 continue;
558
559 Fixup.Section.addRelocation(Offset: Fixup.Offset, Symbol: &Fixup.Label,
560 Type: Fixup.IsPCRelative ? ELF::R_X86_64_PC32
561 : ELF::R_X86_64_64,
562 /*Addend*/ 0);
563 }
564}
565
566Error LinuxKernelRewriter::readORCTables() {
567 // NOTE: we should ignore relocations for orc tables as the tables are sorted
568 // post-link time and relocations are not updated.
569 ORCUnwindSection = BC.getUniqueSectionByName(SectionName: ".orc_unwind");
570 ORCUnwindIPSection = BC.getUniqueSectionByName(SectionName: ".orc_unwind_ip");
571
572 if (!ORCUnwindSection && !ORCUnwindIPSection)
573 return Error::success();
574
575 if (!ORCUnwindSection || !ORCUnwindIPSection)
576 return createStringError(EC: errc::executable_format_error,
577 S: "missing ORC section");
578
579 NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
580 if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE ||
581 ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE)
582 return createStringError(EC: errc::executable_format_error,
583 S: "ORC entries number mismatch detected");
584
585 DataExtractor OrcDE(ORCUnwindSection->getContents(),
586 BC.AsmInfo->isLittleEndian(),
587 BC.AsmInfo->getCodePointerSize());
588 AddressExtractor IPAE(
589 ORCUnwindIPSection->getContents(), ORCUnwindIPSection->getAddress(),
590 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
591 DataExtractor::Cursor ORCCursor(0);
592 DataExtractor::Cursor IPCursor(0);
593 uint64_t PrevIP = 0;
594 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
595 const uint64_t IP = IPAE.getPCRelAddress32(C&: IPCursor);
596 // Consume the status of the cursor.
597 if (!IPCursor)
598 return createStringError(EC: errc::executable_format_error,
599 Fmt: "out of bounds while reading ORC IP table: %s",
600 Vals: toString(E: IPCursor.takeError()).c_str());
601
602 if (IP < PrevIP && opts::Verbosity)
603 BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(Val: IP)
604 << " detected while reading ORC\n";
605
606 PrevIP = IP;
607
608 // Store all entries, includes those we are not going to update as the
609 // tables need to be sorted globally before being written out.
610 ORCEntries.push_back(x: ORCListEntry());
611 ORCListEntry &Entry = ORCEntries.back();
612
613 Entry.IP = IP;
614 Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(C&: ORCCursor);
615 Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(C&: ORCCursor);
616 Entry.ORC.Info = (int16_t)OrcDE.getU16(C&: ORCCursor);
617 Entry.BF = nullptr;
618
619 // Consume the status of the cursor.
620 if (!ORCCursor)
621 return createStringError(EC: errc::executable_format_error,
622 Fmt: "out of bounds while reading ORC: %s",
623 Vals: toString(E: ORCCursor.takeError()).c_str());
624
625 if (Entry.ORC == NullORC)
626 continue;
627
628 BinaryFunction *&BF = Entry.BF;
629 BF = BC.getBinaryFunctionContainingAddress(Address: IP, /*CheckPastEnd*/ true);
630
631 // If the entry immediately pointing past the end of the function is not
632 // the terminator entry, then it does not belong to this function.
633 if (BF && BF->getAddress() + BF->getSize() == IP)
634 BF = 0;
635
636 if (!BF) {
637 if (opts::Verbosity)
638 BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
639 << Twine::utohexstr(Val: IP) << ": " << Entry.ORC << '\n';
640 continue;
641 }
642
643 BF->setHasORC(true);
644
645 if (!BF->hasInstructions())
646 continue;
647
648 const uint64_t Offset = IP - BF->getAddress();
649 MCInst *Inst = BF->getInstructionAtOffset(Offset);
650 if (!Inst) {
651 // Check if there is an alternative instruction(s) at this IP. Multiple
652 // alternative instructions can take a place of a single original
653 // instruction and each alternative can have a separate ORC entry.
654 // Since ORC table is shared between all alternative sequences, there's
655 // a requirement that only one (out of many) sequences can have an
656 // instruction from the ORC table to avoid ambiguities/conflicts.
657 //
658 // For now, we have limited support for alternatives. I.e. we still print
659 // functions with them, but will not change the code in the output binary.
660 // As such, we can ignore alternative ORC entries. They will be preserved
661 // in the binary, but will not get printed in the instruction stream.
662 Inst = BF->getInstructionContainingOffset(Offset);
663 if (Inst || BC.MIB->hasAnnotation(Inst: *Inst, Name: "AltInst"))
664 continue;
665
666 return createStringError(
667 EC: errc::executable_format_error,
668 Fmt: "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", Vals: IP);
669 }
670
671 // Some addresses will have two entries associated with them. The first
672 // one being a "weak" section terminator. Since we ignore the terminator,
673 // we should only assign one entry per instruction.
674 if (BC.MIB->hasAnnotation(Inst: *Inst, Name: "ORC"))
675 return createStringError(
676 EC: errc::executable_format_error,
677 Fmt: "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", Vals: IP);
678
679 BC.MIB->addAnnotation(Inst&: *Inst, Name: "ORC", Val: Entry.ORC);
680 }
681
682 BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n";
683
684 if (opts::DumpORC) {
685 BC.outs() << "BOLT-INFO: ORC unwind information:\n";
686 for (const ORCListEntry &E : ORCEntries) {
687 BC.outs() << "0x" << Twine::utohexstr(Val: E.IP) << ": " << E.ORC;
688 if (E.BF)
689 BC.outs() << ": " << *E.BF;
690 BC.outs() << '\n';
691 }
692 }
693
694 // Add entries for functions that don't have explicit ORC info at the start.
695 // We'll have the correct info for them even if ORC for the preceding function
696 // changes.
697 ORCListType NewEntries;
698 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
699 auto It = llvm::partition_point(Range&: ORCEntries, P: [&](const ORCListEntry &E) {
700 return E.IP <= BF.getAddress();
701 });
702 if (It != ORCEntries.begin())
703 --It;
704
705 if (It->BF == &BF)
706 continue;
707
708 if (It->ORC == NullORC && It->IP == BF.getAddress()) {
709 assert(!It->BF);
710 It->BF = &BF;
711 continue;
712 }
713
714 NewEntries.push_back(x: {.IP: BF.getAddress(), .BF: &BF, .ORC: It->ORC});
715 if (It->ORC != NullORC)
716 BF.setHasORC(true);
717 }
718
719 llvm::copy(Range&: NewEntries, Out: std::back_inserter(x&: ORCEntries));
720 llvm::sort(C&: ORCEntries);
721
722 if (opts::DumpORC) {
723 BC.outs() << "BOLT-INFO: amended ORC unwind information:\n";
724 for (const ORCListEntry &E : ORCEntries) {
725 BC.outs() << "0x" << Twine::utohexstr(Val: E.IP) << ": " << E.ORC;
726 if (E.BF)
727 BC.outs() << ": " << *E.BF;
728 BC.outs() << '\n';
729 }
730 }
731
732 return Error::success();
733}
734
735Error LinuxKernelRewriter::processORCPostCFG() {
736 if (!NumORCEntries)
737 return Error::success();
738
739 // Propagate ORC to the rest of the function. We can annotate every
740 // instruction in every function, but to minimize the overhead, we annotate
741 // the first instruction in every basic block to reflect the state at the
742 // entry. This way, the ORC state can be calculated based on annotations
743 // regardless of the basic block layout. Note that if we insert/delete
744 // instructions, we must take care to attach ORC info to the new/deleted ones.
745 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
746
747 std::optional<ORCState> CurrentState;
748 for (BinaryBasicBlock &BB : BF) {
749 for (MCInst &Inst : BB) {
750 ErrorOr<ORCState> State =
751 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, Name: "ORC");
752
753 if (State) {
754 CurrentState = *State;
755 continue;
756 }
757
758 // Get state for the start of the function.
759 if (!CurrentState) {
760 // A terminator entry (NullORC) can match the function address. If
761 // there's also a non-terminator entry, it will be placed after the
762 // terminator. Hence, we are looking for the last ORC entry that
763 // matches the address.
764 auto It =
765 llvm::partition_point(Range&: ORCEntries, P: [&](const ORCListEntry &E) {
766 return E.IP <= BF.getAddress();
767 });
768 if (It != ORCEntries.begin())
769 --It;
770
771 assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) &&
772 "ORC info at function entry expected.");
773
774 if (It->ORC == NullORC && BF.hasORC()) {
775 BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for "
776 << BF << '\n';
777 }
778
779 It->BF = &BF;
780
781 CurrentState = It->ORC;
782 if (It->ORC != NullORC)
783 BF.setHasORC(true);
784 }
785
786 // While printing ORC, attach info to every instruction for convenience.
787 if (opts::PrintORC || &Inst == &BB.front())
788 BC.MIB->addAnnotation(Inst, Name: "ORC", Val: *CurrentState);
789 }
790 }
791 }
792
793 return Error::success();
794}
795
796Error LinuxKernelRewriter::rewriteORCTables() {
797 if (!NumORCEntries)
798 return Error::success();
799
800 // Update ORC sections in-place. As we change the code, the number of ORC
801 // entries may increase for some functions. However, as we remove terminator
802 // redundancy (see below), more space is freed up and we should always be able
803 // to fit new ORC tables in the reserved space.
804 auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter {
805 const size_t Size = Section.getSize();
806 uint8_t *NewContents = new uint8_t[Size];
807 Section.updateContents(NewData: NewContents, NewSize: Size);
808 Section.setOutputFileOffset(Section.getInputFileOffset());
809 return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian()
810 ? endianness::little
811 : endianness::big);
812 };
813 BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection);
814 BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection);
815
816 uint64_t NumEmitted = 0;
817 std::optional<ORCState> LastEmittedORC;
818 auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC,
819 MCSymbol *Label = 0, bool Force = false) -> Error {
820 if (LastEmittedORC && ORC == *LastEmittedORC && !Force)
821 return Error::success();
822
823 LastEmittedORC = ORC;
824
825 if (++NumEmitted > NumORCEntries)
826 return createStringError(EC: errc::executable_format_error,
827 S: "exceeded the number of allocated ORC entries");
828
829 if (Label)
830 ORCUnwindIPSection->addRelocation(Offset: UnwindIPWriter.getOffset(), Symbol: Label,
831 Type: Relocation::getPC32(), /*Addend*/ 0);
832
833 const int32_t IPValue =
834 IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset();
835 if (Error E = UnwindIPWriter.writeInteger(Value: IPValue))
836 return E;
837
838 if (Error E = UnwindWriter.writeInteger(Value: ORC.SPOffset))
839 return E;
840 if (Error E = UnwindWriter.writeInteger(Value: ORC.BPOffset))
841 return E;
842 if (Error E = UnwindWriter.writeInteger(Value: ORC.Info))
843 return E;
844
845 return Error::success();
846 };
847
848 // Emit new ORC entries for the emitted function.
849 auto emitORC = [&](const FunctionFragment &FF) -> Error {
850 ORCState CurrentState = NullORC;
851 for (BinaryBasicBlock *BB : FF) {
852 for (MCInst &Inst : *BB) {
853 ErrorOr<ORCState> ErrorOrState =
854 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, Name: "ORC");
855 if (!ErrorOrState || *ErrorOrState == CurrentState)
856 continue;
857
858 // Issue label for the instruction.
859 MCSymbol *Label =
860 BC.MIB->getOrCreateInstLabel(Inst, Name: "__ORC_", Ctx: BC.Ctx.get());
861
862 if (Error E = emitORCEntry(0, *ErrorOrState, Label))
863 return E;
864
865 CurrentState = *ErrorOrState;
866 }
867 }
868
869 return Error::success();
870 };
871
872 // Emit ORC entries for cold fragments. We assume that these fragments are
873 // emitted contiguously in memory using reserved space in the kernel. This
874 // assumption is validated in post-emit pass validateORCTables() where we
875 // check that ORC entries are sorted by their addresses.
876 auto emitColdORC = [&]() -> Error {
877 for (BinaryFunction &BF :
878 llvm::make_second_range(c&: BC.getBinaryFunctions())) {
879 if (!BC.shouldEmit(Function: BF))
880 continue;
881 for (FunctionFragment &FF : BF.getLayout().getSplitFragments())
882 if (Error E = emitORC(FF))
883 return E;
884 }
885
886 return Error::success();
887 };
888
889 bool ShouldEmitCold = !BC.BOLTReserved.empty();
890 for (ORCListEntry &Entry : ORCEntries) {
891 if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) {
892 if (Error E = emitColdORC())
893 return E;
894
895 // Emit terminator entry at the end of the reserved region.
896 if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC))
897 return E;
898
899 ShouldEmitCold = false;
900 }
901
902 // Emit original entries for functions that we haven't modified.
903 if (!Entry.BF || !BC.shouldEmit(Function: *Entry.BF)) {
904 // Emit terminator only if it marks the start of a function.
905 if (Entry.ORC == NullORC && !Entry.BF)
906 continue;
907 if (Error E = emitORCEntry(Entry.IP, Entry.ORC))
908 return E;
909 continue;
910 }
911
912 // Emit all ORC entries for a function referenced by an entry and skip over
913 // the rest of entries for this function by resetting its ORC attribute.
914 if (Entry.BF->hasORC()) {
915 if (Error E = emitORC(Entry.BF->getLayout().getMainFragment()))
916 return E;
917 Entry.BF->setHasORC(false);
918 }
919 }
920
921 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
922 << " ORC entries\n");
923
924 // Populate ORC tables with a terminator entry with max address to match the
925 // original table sizes.
926 const uint64_t LastIP = std::numeric_limits<uint64_t>::max();
927 while (UnwindWriter.bytesRemaining()) {
928 if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
929 return E;
930 }
931
932 return Error::success();
933}
934
935Error LinuxKernelRewriter::validateORCTables() {
936 if (!ORCUnwindIPSection)
937 return Error::success();
938
939 AddressExtractor IPAE(
940 ORCUnwindIPSection->getOutputContents(), ORCUnwindIPSection->getAddress(),
941 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
942 AddressExtractor::Cursor IPCursor(0);
943 uint64_t PrevIP = 0;
944 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
945 const uint64_t IP = IPAE.getPCRelAddress32(C&: IPCursor);
946 if (!IPCursor)
947 return createStringError(EC: errc::executable_format_error,
948 Fmt: "out of bounds while reading ORC IP table: %s",
949 Vals: toString(E: IPCursor.takeError()).c_str());
950
951 assert(IP >= PrevIP && "Unsorted ORC table detected");
952 (void)PrevIP;
953 PrevIP = IP;
954 }
955
956 return Error::success();
957}
958
959/// The static call site table is created by objtool and contains entries in the
960/// following format:
961///
962/// struct static_call_site {
963/// s32 addr;
964/// s32 key;
965/// };
966///
967Error LinuxKernelRewriter::readStaticCalls() {
968 const BinaryData *StaticCallTable =
969 BC.getBinaryDataByName(Name: "__start_static_call_sites");
970 if (!StaticCallTable)
971 return Error::success();
972
973 StaticCallTableAddress = StaticCallTable->getAddress();
974
975 const BinaryData *Stop = BC.getBinaryDataByName(Name: "__stop_static_call_sites");
976 if (!Stop)
977 return createStringError(EC: errc::executable_format_error,
978 S: "missing __stop_static_call_sites symbol");
979
980 ErrorOr<BinarySection &> ErrorOrSection =
981 BC.getSectionForAddress(Address: StaticCallTableAddress);
982 if (!ErrorOrSection)
983 return createStringError(EC: errc::executable_format_error,
984 S: "no section matching __start_static_call_sites");
985
986 StaticCallSection = *ErrorOrSection;
987 if (!StaticCallSection->containsAddress(Address: Stop->getAddress() - 1))
988 return createStringError(EC: errc::executable_format_error,
989 S: "__stop_static_call_sites not in the same section "
990 "as __start_static_call_sites");
991
992 if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE)
993 return createStringError(EC: errc::executable_format_error,
994 S: "static call table size error");
995
996 const uint64_t SectionAddress = StaticCallSection->getAddress();
997 AddressExtractor AE(StaticCallSection->getContents(), SectionAddress,
998 BC.AsmInfo->isLittleEndian(),
999 BC.AsmInfo->getCodePointerSize());
1000 AddressExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress);
1001 uint32_t EntryID = 0;
1002 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1003 const uint64_t CallAddress = AE.getPCRelAddress32(C&: Cursor);
1004 const uint64_t KeyAddress = AE.getPCRelAddress32(C&: Cursor);
1005
1006 // Consume the status of the cursor.
1007 if (!Cursor)
1008 return createStringError(EC: errc::executable_format_error,
1009 Fmt: "out of bounds while reading static calls: %s",
1010 Vals: toString(E: Cursor.takeError()).c_str());
1011
1012 ++EntryID;
1013
1014 if (opts::DumpStaticCalls) {
1015 BC.outs() << "Static Call Site: " << EntryID << '\n';
1016 BC.outs() << "\tCallAddress: 0x" << Twine::utohexstr(Val: CallAddress)
1017 << "\n\tKeyAddress: 0x" << Twine::utohexstr(Val: KeyAddress)
1018 << '\n';
1019 }
1020
1021 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: CallAddress);
1022 if (!BF)
1023 continue;
1024
1025 if (!BC.shouldEmit(Function: *BF))
1026 continue;
1027
1028 if (!BF->hasInstructions())
1029 continue;
1030
1031 MCInst *Inst = BF->getInstructionAtOffset(Offset: CallAddress - BF->getAddress());
1032 if (!Inst)
1033 return createStringError(EC: errc::executable_format_error,
1034 Fmt: "no instruction at call site address 0x%" PRIx64,
1035 Vals: CallAddress);
1036
1037 // Check for duplicate entries.
1038 if (BC.MIB->hasAnnotation(Inst: *Inst, Name: "StaticCall"))
1039 return createStringError(EC: errc::executable_format_error,
1040 Fmt: "duplicate static call site at 0x%" PRIx64,
1041 Vals: CallAddress);
1042
1043 BC.MIB->addAnnotation(Inst&: *Inst, Name: "StaticCall", Val: EntryID);
1044
1045 MCSymbol *Label =
1046 BC.MIB->getOrCreateInstLabel(Inst&: *Inst, Name: "__SC_", Ctx: BC.Ctx.get());
1047
1048 StaticCallEntries.push_back(x: {.ID: EntryID, .Function: BF, .Label: Label});
1049 }
1050
1051 BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size()
1052 << " static call entries\n";
1053
1054 return Error::success();
1055}
1056
1057/// The static call table is sorted during boot time in
1058/// static_call_sort_entries(). This makes it possible to update existing
1059/// entries in-place ignoring their relative order.
1060Error LinuxKernelRewriter::rewriteStaticCalls() {
1061 if (!StaticCallTableAddress || !StaticCallSection)
1062 return Error::success();
1063
1064 for (auto &Entry : StaticCallEntries) {
1065 if (!Entry.Function)
1066 continue;
1067
1068 BinaryFunction &BF = *Entry.Function;
1069 if (!BC.shouldEmit(Function: BF))
1070 continue;
1071
1072 // Create a relocation against the label.
1073 const uint64_t EntryOffset = StaticCallTableAddress -
1074 StaticCallSection->getAddress() +
1075 (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE;
1076 StaticCallSection->addRelocation(Offset: EntryOffset, Symbol: Entry.Label,
1077 Type: ELF::R_X86_64_PC32, /*Addend*/ 0);
1078 }
1079
1080 return Error::success();
1081}
1082
1083/// Instructions that access user-space memory can cause page faults. These
1084/// faults will be handled by the kernel and execution will resume at the fixup
1085/// code location if the address was invalid. The kernel uses the exception
1086/// table to match the faulting instruction to its fixup. The table consists of
1087/// the following entries:
1088///
1089/// struct exception_table_entry {
1090/// int insn;
1091/// int fixup;
1092/// int data;
1093/// };
1094///
1095/// More info at:
1096/// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt
1097Error LinuxKernelRewriter::readExceptionTable() {
1098 ExceptionsSection = BC.getUniqueSectionByName(SectionName: "__ex_table");
1099 if (!ExceptionsSection)
1100 return Error::success();
1101
1102 if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE)
1103 return createStringError(EC: errc::executable_format_error,
1104 S: "exception table size error");
1105
1106 AddressExtractor AE(
1107 ExceptionsSection->getContents(), ExceptionsSection->getAddress(),
1108 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
1109 AddressExtractor::Cursor Cursor(0);
1110 uint32_t EntryID = 0;
1111 while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) {
1112 const uint64_t InstAddress = AE.getPCRelAddress32(C&: Cursor);
1113 const uint64_t FixupAddress = AE.getPCRelAddress32(C&: Cursor);
1114 const uint64_t Data = AE.getU32(C&: Cursor);
1115
1116 // Consume the status of the cursor.
1117 if (!Cursor)
1118 return createStringError(
1119 EC: errc::executable_format_error,
1120 Fmt: "out of bounds while reading exception table: %s",
1121 Vals: toString(E: Cursor.takeError()).c_str());
1122
1123 ++EntryID;
1124
1125 if (opts::DumpExceptions) {
1126 BC.outs() << "Exception Entry: " << EntryID << '\n';
1127 BC.outs() << "\tInsn: 0x" << Twine::utohexstr(Val: InstAddress) << '\n'
1128 << "\tFixup: 0x" << Twine::utohexstr(Val: FixupAddress) << '\n'
1129 << "\tData: 0x" << Twine::utohexstr(Val: Data) << '\n';
1130 }
1131
1132 MCInst *Inst = nullptr;
1133 MCSymbol *FixupLabel = nullptr;
1134
1135 BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(Address: InstAddress);
1136 if (InstBF && BC.shouldEmit(Function: *InstBF)) {
1137 Inst = InstBF->getInstructionAtOffset(Offset: InstAddress - InstBF->getAddress());
1138 if (!Inst)
1139 return createStringError(EC: errc::executable_format_error,
1140 Fmt: "no instruction at address 0x%" PRIx64
1141 " in exception table",
1142 Vals: InstAddress);
1143 BC.MIB->addAnnotation(Inst&: *Inst, Name: "ExceptionEntry", Val: EntryID);
1144 FunctionsWithExceptions.insert(V: InstBF);
1145 }
1146
1147 if (!InstBF && opts::Verbosity) {
1148 BC.outs() << "BOLT-INFO: no function matches instruction at 0x"
1149 << Twine::utohexstr(Val: InstAddress)
1150 << " referenced by Linux exception table\n";
1151 }
1152
1153 BinaryFunction *FixupBF =
1154 BC.getBinaryFunctionContainingAddress(Address: FixupAddress);
1155 if (FixupBF && BC.shouldEmit(Function: *FixupBF)) {
1156 const uint64_t Offset = FixupAddress - FixupBF->getAddress();
1157 if (!FixupBF->getInstructionAtOffset(Offset))
1158 return createStringError(EC: errc::executable_format_error,
1159 Fmt: "no instruction at fixup address 0x%" PRIx64
1160 " in exception table",
1161 Vals: FixupAddress);
1162 FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset)
1163 : FixupBF->getSymbol();
1164 if (Inst)
1165 BC.MIB->addAnnotation(Inst&: *Inst, Name: "Fixup", Val: FixupLabel->getName());
1166 FunctionsWithExceptions.insert(V: FixupBF);
1167 }
1168
1169 if (!FixupBF && opts::Verbosity) {
1170 BC.outs() << "BOLT-INFO: no function matches fixup code at 0x"
1171 << Twine::utohexstr(Val: FixupAddress)
1172 << " referenced by Linux exception table\n";
1173 }
1174 }
1175
1176 BC.outs() << "BOLT-INFO: parsed "
1177 << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE
1178 << " exception table entries\n";
1179
1180 return Error::success();
1181}
1182
1183/// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects
1184/// the exception table to be sorted. Hence we have to sort it after code
1185/// reordering.
1186Error LinuxKernelRewriter::rewriteExceptionTable() {
1187 // Disable output of functions with exceptions before rewrite support is
1188 // added.
1189 for (BinaryFunction *BF : FunctionsWithExceptions)
1190 BF->setSimple(false);
1191
1192 return Error::success();
1193}
1194
1195/// .parainsrtuctions section contains information for patching parvirtual call
1196/// instructions during runtime. The entries in the section are in the form:
1197///
1198/// struct paravirt_patch_site {
1199/// u8 *instr; /* original instructions */
1200/// u8 type; /* type of this instruction */
1201/// u8 len; /* length of original instruction */
1202/// };
1203///
1204/// Note that the structures are aligned at 8-byte boundary.
1205Error LinuxKernelRewriter::readParaInstructions() {
1206 ParavirtualPatchSection = BC.getUniqueSectionByName(SectionName: ".parainstructions");
1207 if (!ParavirtualPatchSection)
1208 return Error::success();
1209
1210 DataExtractor DE(ParavirtualPatchSection->getContents(),
1211 BC.AsmInfo->isLittleEndian(),
1212 BC.AsmInfo->getCodePointerSize());
1213 uint32_t EntryID = 0;
1214 DataExtractor::Cursor Cursor(0);
1215 while (Cursor && !DE.eof(C: Cursor)) {
1216 const uint64_t NextOffset = alignTo(Size: Cursor.tell(), A: Align(PARA_PATCH_ALIGN));
1217 if (!DE.isValidOffset(offset: NextOffset))
1218 break;
1219
1220 Cursor.seek(NewOffSet: NextOffset);
1221
1222 const uint64_t InstrLocation = DE.getU64(C&: Cursor);
1223 const uint8_t Type = DE.getU8(C&: Cursor);
1224 const uint8_t Len = DE.getU8(C&: Cursor);
1225
1226 if (!Cursor)
1227 return createStringError(
1228 EC: errc::executable_format_error,
1229 Fmt: "out of bounds while reading .parainstructions: %s",
1230 Vals: toString(E: Cursor.takeError()).c_str());
1231
1232 ++EntryID;
1233
1234 if (opts::DumpParavirtualPatchSites) {
1235 BC.outs() << "Paravirtual patch site: " << EntryID << '\n';
1236 BC.outs() << "\tInstr: 0x" << Twine::utohexstr(Val: InstrLocation)
1237 << "\n\tType: 0x" << Twine::utohexstr(Val: Type) << "\n\tLen: 0x"
1238 << Twine::utohexstr(Val: Len) << '\n';
1239 }
1240
1241 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: InstrLocation);
1242 if (!BF && opts::Verbosity) {
1243 BC.outs() << "BOLT-INFO: no function matches address 0x"
1244 << Twine::utohexstr(Val: InstrLocation)
1245 << " referenced by paravirutal patch site\n";
1246 }
1247
1248 if (BF && BC.shouldEmit(Function: *BF)) {
1249 MCInst *Inst =
1250 BF->getInstructionAtOffset(Offset: InstrLocation - BF->getAddress());
1251 if (!Inst)
1252 return createStringError(EC: errc::executable_format_error,
1253 Fmt: "no instruction at address 0x%" PRIx64
1254 " in paravirtual call site %d",
1255 Vals: InstrLocation, Vals: EntryID);
1256 BC.MIB->addAnnotation(Inst&: *Inst, Name: "ParaSite", Val: EntryID);
1257 }
1258 }
1259
1260 BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n";
1261
1262 return Error::success();
1263}
1264
1265void LinuxKernelRewriter::skipFunctionsWithAnnotation(
1266 StringRef Annotation) const {
1267 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
1268 if (!BC.shouldEmit(Function: BF))
1269 continue;
1270 for (const BinaryBasicBlock &BB : BF) {
1271 const bool HasAnnotation = llvm::any_of(Range: BB, P: [&](const MCInst &Inst) {
1272 return BC.MIB->hasAnnotation(Inst, Name: Annotation);
1273 });
1274 if (HasAnnotation) {
1275 BF.setSimple(false);
1276 break;
1277 }
1278 }
1279 }
1280}
1281
1282Error LinuxKernelRewriter::rewriteParaInstructions() {
1283 // Disable output of functions with paravirtual instructions before the
1284 // rewrite support is complete.
1285 skipFunctionsWithAnnotation(Annotation: "ParaSite");
1286
1287 return Error::success();
1288}
1289
1290/// Process __bug_table section.
1291/// This section contains information useful for kernel debugging, mostly
1292/// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON().
1293///
1294/// Each entry in the section is a struct bug_entry that contains a pointer to
1295/// the ud2 instruction corresponding to the bug, corresponding file name (both
1296/// pointers use PC relative offset addressing), line number, and flags.
1297/// The definition of the struct bug_entry can be found in
1298/// `include/asm-generic/bug.h`. The first entry in the struct is an instruction
1299/// address encoded as a PC-relative offset. In theory, it could be an absolute
1300/// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice
1301/// the kernel code relies on it being a relative offset on x86-64.
1302Error LinuxKernelRewriter::readBugTable() {
1303 BugTableSection = BC.getUniqueSectionByName(SectionName: "__bug_table");
1304 if (!BugTableSection)
1305 return Error::success();
1306
1307 if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE)
1308 return createStringError(EC: errc::executable_format_error,
1309 S: "bug table size error");
1310
1311 AddressExtractor AE(
1312 BugTableSection->getContents(), BugTableSection->getAddress(),
1313 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
1314 AddressExtractor::Cursor Cursor(0);
1315 uint32_t EntryID = 0;
1316 while (Cursor && Cursor.tell() < BugTableSection->getSize()) {
1317 const uint64_t Pos = Cursor.tell();
1318 const uint64_t InstAddress = AE.getPCRelAddress32(C&: Cursor);
1319 Cursor.seek(NewOffSet: Pos + BUG_TABLE_ENTRY_SIZE);
1320
1321 if (!Cursor)
1322 return createStringError(EC: errc::executable_format_error,
1323 Fmt: "out of bounds while reading __bug_table: %s",
1324 Vals: toString(E: Cursor.takeError()).c_str());
1325
1326 ++EntryID;
1327
1328 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: InstAddress);
1329 if (!BF && opts::Verbosity) {
1330 BC.outs() << "BOLT-INFO: no function matches address 0x"
1331 << Twine::utohexstr(Val: InstAddress)
1332 << " referenced by bug table\n";
1333 }
1334
1335 if (BF && BC.shouldEmit(Function: *BF)) {
1336 MCInst *Inst = BF->getInstructionAtOffset(Offset: InstAddress - BF->getAddress());
1337 if (!Inst)
1338 return createStringError(EC: errc::executable_format_error,
1339 Fmt: "no instruction at address 0x%" PRIx64
1340 " referenced by bug table entry %d",
1341 Vals: InstAddress, Vals: EntryID);
1342 BC.MIB->addAnnotation(Inst&: *Inst, Name: "BugEntry", Val: EntryID);
1343
1344 FunctionBugList[BF].push_back(Elt: EntryID);
1345 }
1346 }
1347
1348 BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n";
1349
1350 return Error::success();
1351}
1352
1353/// find_bug() uses linear search to match an address to an entry in the bug
1354/// table. Hence, there is no need to sort entries when rewriting the table.
1355/// When we need to erase an entry, we set its instruction address to zero.
1356Error LinuxKernelRewriter::rewriteBugTable() {
1357 if (!BugTableSection)
1358 return Error::success();
1359
1360 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
1361 if (!BC.shouldEmit(Function: BF))
1362 continue;
1363
1364 if (!FunctionBugList.count(Val: &BF))
1365 continue;
1366
1367 // Bugs that will be emitted for this function.
1368 DenseSet<uint32_t> EmittedIDs;
1369 for (BinaryBasicBlock &BB : BF) {
1370 for (MCInst &Inst : BB) {
1371 if (!BC.MIB->hasAnnotation(Inst, Name: "BugEntry"))
1372 continue;
1373 const uint32_t ID = BC.MIB->getAnnotationAs<uint32_t>(Inst, Name: "BugEntry");
1374 EmittedIDs.insert(V: ID);
1375
1376 // Create a relocation entry for this bug entry.
1377 MCSymbol *Label =
1378 BC.MIB->getOrCreateInstLabel(Inst, Name: "__BUG_", Ctx: BC.Ctx.get());
1379 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1380 BugTableSection->addRelocation(Offset: EntryOffset, Symbol: Label, Type: ELF::R_X86_64_PC32,
1381 /*Addend*/ 0);
1382 }
1383 }
1384
1385 // Clear bug entries that were not emitted for this function, e.g. as a
1386 // result of DCE, but setting their instruction address to zero.
1387 for (const uint32_t ID : FunctionBugList[&BF]) {
1388 if (!EmittedIDs.count(V: ID)) {
1389 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1390 BugTableSection->addRelocation(Offset: EntryOffset, Symbol: nullptr, Type: ELF::R_X86_64_PC32,
1391 /*Addend*/ 0);
1392 }
1393 }
1394 }
1395
1396 return Error::success();
1397}
1398
1399/// The kernel can replace certain instruction sequences depending on hardware
1400/// it is running on and features specified during boot time. The information
1401/// about alternative instruction sequences is stored in .altinstructions
1402/// section. The format of entries in this section is defined in
1403/// arch/x86/include/asm/alternative.h:
1404///
1405/// struct alt_instr {
1406/// s32 instr_offset;
1407/// s32 repl_offset;
1408/// uXX feature;
1409/// u8 instrlen;
1410/// u8 replacementlen;
1411/// u8 padlen; // present in older kernels
1412/// } __packed;
1413///
1414/// Note that the structure is packed.
1415///
1416/// Since the size of the "feature" field could be either u16 or u32, and
1417/// "padlen" presence is unknown, we attempt to parse .altinstructions section
1418/// using all possible combinations (four at this time). Since we validate the
1419/// contents of the section and its size, the detection works quite well.
1420/// Still, we leave the user the opportunity to specify these features on the
1421/// command line and skip the guesswork.
1422Error LinuxKernelRewriter::readAltInstructions() {
1423 AltInstrSection = BC.getUniqueSectionByName(SectionName: ".altinstructions");
1424 if (!AltInstrSection)
1425 return Error::success();
1426
1427 // Presence of "padlen" field.
1428 std::vector<bool> PadLenVariants;
1429 if (opts::AltInstHasPadLen.getNumOccurrences())
1430 PadLenVariants.push_back(x: opts::AltInstHasPadLen);
1431 else
1432 PadLenVariants = {false, true};
1433
1434 // Size (in bytes) variants of "feature" field.
1435 std::vector<uint32_t> FeatureSizeVariants;
1436 if (opts::AltInstFeatureSize.getNumOccurrences())
1437 FeatureSizeVariants.push_back(x: opts::AltInstFeatureSize);
1438 else
1439 FeatureSizeVariants = {2, 4};
1440
1441 for (bool AltInstHasPadLen : PadLenVariants) {
1442 for (uint32_t AltInstFeatureSize : FeatureSizeVariants) {
1443 LLVM_DEBUG({
1444 dbgs() << "BOLT-DEBUG: trying AltInstHasPadLen = " << AltInstHasPadLen
1445 << "; AltInstFeatureSize = " << AltInstFeatureSize << ";\n";
1446 });
1447 if (Error E = tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen,
1448 /*ParseOnly*/ true)) {
1449 consumeError(Err: std::move(E));
1450 continue;
1451 }
1452
1453 LLVM_DEBUG(dbgs() << "Matched .altinstructions format\n");
1454
1455 if (!opts::AltInstHasPadLen.getNumOccurrences())
1456 BC.outs() << "BOLT-INFO: setting --" << opts::AltInstHasPadLen.ArgStr
1457 << '=' << AltInstHasPadLen << '\n';
1458
1459 if (!opts::AltInstFeatureSize.getNumOccurrences())
1460 BC.outs() << "BOLT-INFO: setting --" << opts::AltInstFeatureSize.ArgStr
1461 << '=' << AltInstFeatureSize << '\n';
1462
1463 return tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen,
1464 /*ParseOnly*/ false);
1465 }
1466 }
1467
1468 // We couldn't match the format. Read again to properly propagate the error
1469 // to the user.
1470 return tryReadAltInstructions(AltInstFeatureSize: opts::AltInstFeatureSize,
1471 AltInstHasPadLen: opts::AltInstHasPadLen, /*ParseOnly*/ false);
1472}
1473
1474Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize,
1475 bool AltInstHasPadLen,
1476 bool ParseOnly) {
1477 AddressExtractor AE(
1478 AltInstrSection->getContents(), AltInstrSection->getAddress(),
1479 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
1480 AddressExtractor::Cursor Cursor(0);
1481 uint64_t EntryID = 0;
1482 while (Cursor && !AE.eof(C: Cursor)) {
1483 const uint64_t OrgInstAddress = AE.getPCRelAddress32(C&: Cursor);
1484 const uint64_t AltInstAddress = AE.getPCRelAddress32(C&: Cursor);
1485 const uint64_t Feature = AE.getUnsigned(C&: Cursor, Size: AltInstFeatureSize);
1486 const uint8_t OrgSize = AE.getU8(C&: Cursor);
1487 const uint8_t AltSize = AE.getU8(C&: Cursor);
1488
1489 // Older kernels may have the padlen field.
1490 const uint8_t PadLen = AltInstHasPadLen ? AE.getU8(C&: Cursor) : 0;
1491
1492 if (!Cursor)
1493 return createStringError(
1494 EC: errc::executable_format_error,
1495 Fmt: "out of bounds while reading .altinstructions: %s",
1496 Vals: toString(E: Cursor.takeError()).c_str());
1497
1498 ++EntryID;
1499
1500 if (opts::DumpAltInstructions) {
1501 BC.outs() << "Alternative instruction entry: " << EntryID
1502 << "\n\tOrg: 0x" << Twine::utohexstr(Val: OrgInstAddress)
1503 << "\n\tAlt: 0x" << Twine::utohexstr(Val: AltInstAddress)
1504 << "\n\tFeature: 0x" << Twine::utohexstr(Val: Feature)
1505 << "\n\tOrgSize: " << (int)OrgSize
1506 << "\n\tAltSize: " << (int)AltSize << '\n';
1507 if (AltInstHasPadLen)
1508 BC.outs() << "\tPadLen: " << (int)PadLen << '\n';
1509 }
1510
1511 if (AltSize > OrgSize)
1512 return createStringError(EC: errc::executable_format_error,
1513 S: "error reading .altinstructions");
1514
1515 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: OrgInstAddress);
1516 if (!BF && opts::Verbosity) {
1517 BC.outs() << "BOLT-INFO: no function matches address 0x"
1518 << Twine::utohexstr(Val: OrgInstAddress)
1519 << " of instruction from .altinstructions\n";
1520 }
1521
1522 BinaryFunction *AltBF =
1523 BC.getBinaryFunctionContainingAddress(Address: AltInstAddress);
1524 if (!ParseOnly && AltBF && BC.shouldEmit(Function: *AltBF)) {
1525 BC.errs()
1526 << "BOLT-WARNING: alternative instruction sequence found in function "
1527 << *AltBF << '\n';
1528 AltBF->setIgnored();
1529 }
1530
1531 if (!BF || !BF->hasInstructions())
1532 continue;
1533
1534 if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize())
1535 return createStringError(EC: errc::executable_format_error,
1536 S: "error reading .altinstructions");
1537
1538 MCInst *Inst =
1539 BF->getInstructionAtOffset(Offset: OrgInstAddress - BF->getAddress());
1540 if (!Inst)
1541 return createStringError(EC: errc::executable_format_error,
1542 Fmt: "no instruction at address 0x%" PRIx64
1543 " referenced by .altinstructions entry %d",
1544 Vals: OrgInstAddress, Vals: EntryID);
1545
1546 if (ParseOnly)
1547 continue;
1548
1549 // There could be more than one alternative instruction sequences for the
1550 // same original instruction. Annotate each alternative separately.
1551 std::string AnnotationName = "AltInst";
1552 unsigned N = 2;
1553 while (BC.MIB->hasAnnotation(Inst: *Inst, Name: AnnotationName))
1554 AnnotationName = "AltInst" + std::to_string(val: N++);
1555
1556 BC.MIB->addAnnotation(Inst&: *Inst, Name: AnnotationName, Val: EntryID);
1557
1558 // Annotate all instructions from the original sequence. Note that it's not
1559 // the most efficient way to look for instructions in the address range,
1560 // but since alternative instructions are uncommon, it will do for now.
1561 for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) {
1562 Inst = BF->getInstructionAtOffset(Offset: OrgInstAddress + Offset -
1563 BF->getAddress());
1564 if (Inst)
1565 BC.MIB->addAnnotation(Inst&: *Inst, Name: AnnotationName, Val: EntryID);
1566 }
1567 }
1568
1569 if (!ParseOnly)
1570 BC.outs() << "BOLT-INFO: parsed " << EntryID
1571 << " alternative instruction entries\n";
1572
1573 return Error::success();
1574}
1575
1576void LinuxKernelRewriter::processAltInstructionsPostCFG() {
1577 // Disable optimization and output of functions with alt instructions before
1578 // the rewrite support is complete. Alt instructions can modify the control
1579 // flow, hence we may end up deleting seemingly unreachable code.
1580 skipFunctionsWithAnnotation(Annotation: "AltInst");
1581}
1582
1583/// When the Linux kernel needs to handle an error associated with a given PCI
1584/// device, it uses a table stored in .pci_fixup section to locate a fixup code
1585/// specific to the vendor and the problematic device. The section contains a
1586/// list of the following structures defined in include/linux/pci.h:
1587///
1588/// struct pci_fixup {
1589/// u16 vendor; /* Or PCI_ANY_ID */
1590/// u16 device; /* Or PCI_ANY_ID */
1591/// u32 class; /* Or PCI_ANY_ID */
1592/// unsigned int class_shift; /* should be 0, 8, 16 */
1593/// int hook_offset;
1594/// };
1595///
1596/// Normally, the hook will point to a function start and we don't have to
1597/// update the pointer if we are not relocating functions. Hence, while reading
1598/// the table we validate this assumption. If a function has a fixup code in the
1599/// middle of its body, we issue a warning and ignore it.
1600Error LinuxKernelRewriter::readPCIFixupTable() {
1601 PCIFixupSection = BC.getUniqueSectionByName(SectionName: ".pci_fixup");
1602 if (!PCIFixupSection)
1603 return Error::success();
1604
1605 if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE)
1606 return createStringError(EC: errc::executable_format_error,
1607 S: "PCI fixup table size error");
1608
1609 AddressExtractor AE(
1610 PCIFixupSection->getContents(), PCIFixupSection->getAddress(),
1611 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
1612 AddressExtractor::Cursor Cursor(0);
1613 uint64_t EntryID = 0;
1614 while (Cursor && !AE.eof(C: Cursor)) {
1615 const uint16_t Vendor = AE.getU16(C&: Cursor);
1616 const uint16_t Device = AE.getU16(C&: Cursor);
1617 const uint32_t Class = AE.getU32(C&: Cursor);
1618 const uint32_t ClassShift = AE.getU32(C&: Cursor);
1619 const uint64_t HookAddress = AE.getPCRelAddress32(C&: Cursor);
1620
1621 if (!Cursor)
1622 return createStringError(EC: errc::executable_format_error,
1623 Fmt: "out of bounds while reading .pci_fixup: %s",
1624 Vals: toString(E: Cursor.takeError()).c_str());
1625
1626 ++EntryID;
1627
1628 if (opts::DumpPCIFixups) {
1629 BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor 0x"
1630 << Twine::utohexstr(Val: Vendor) << "\n\tDevice: 0x"
1631 << Twine::utohexstr(Val: Device) << "\n\tClass: 0x"
1632 << Twine::utohexstr(Val: Class) << "\n\tClassShift: 0x"
1633 << Twine::utohexstr(Val: ClassShift) << "\n\tHookAddress: 0x"
1634 << Twine::utohexstr(Val: HookAddress) << '\n';
1635 }
1636
1637 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: HookAddress);
1638 if (!BF && opts::Verbosity) {
1639 BC.outs() << "BOLT-INFO: no function matches address 0x"
1640 << Twine::utohexstr(Val: HookAddress)
1641 << " of hook from .pci_fixup\n";
1642 }
1643
1644 if (!BF || !BC.shouldEmit(Function: *BF))
1645 continue;
1646
1647 if (const uint64_t Offset = HookAddress - BF->getAddress()) {
1648 BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function "
1649 << *BF << " at offset 0x" << Twine::utohexstr(Val: Offset) << '\n';
1650 BF->setSimple(false);
1651 }
1652 }
1653
1654 BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n";
1655
1656 return Error::success();
1657}
1658
1659/// Runtime code modification used by static keys is the most ubiquitous
1660/// self-modifying feature of the Linux kernel. The idea is to eliminate the
1661/// condition check and associated conditional jump on a hot path if that
1662/// condition (based on a boolean value of a static key) does not change often.
1663/// Whenever the condition changes, the kernel runtime modifies all code paths
1664/// associated with that key flipping the code between nop and (unconditional)
1665/// jump. The information about the code is stored in a static key jump table
1666/// and contains the list of entries of the following type from
1667/// include/linux/jump_label.h:
1668//
1669/// struct jump_entry {
1670/// s32 code;
1671/// s32 target;
1672/// long key; // key may be far away from the core kernel under KASLR
1673/// };
1674///
1675/// The list does not have to be stored in any sorted way, but it is sorted at
1676/// boot time (or module initialization time) first by "key" and then by "code".
1677/// jump_label_sort_entries() is responsible for sorting the table.
1678///
1679/// The key in jump_entry structure uses lower two bits of the key address
1680/// (which itself is aligned) to store extra information. We are interested in
1681/// the lower bit which indicates if the key is likely to be set on the code
1682/// path associated with this jump_entry.
1683///
1684/// static_key_{enable,disable}() functions modify the code based on key and
1685/// jump table entries.
1686///
1687/// jump_label_update() updates all code entries for a given key. Batch mode is
1688/// used for x86.
1689///
1690/// The actual patching happens in text_poke_bp_batch() that overrides the first
1691/// byte of the sequence with int3 before proceeding with actual code
1692/// replacement.
1693Error LinuxKernelRewriter::readStaticKeysJumpTable() {
1694 const BinaryData *StaticKeysJumpTable =
1695 BC.getBinaryDataByName(Name: "__start___jump_table");
1696 if (!StaticKeysJumpTable)
1697 return Error::success();
1698
1699 StaticKeysJumpTableAddress = StaticKeysJumpTable->getAddress();
1700
1701 const BinaryData *Stop = BC.getBinaryDataByName(Name: "__stop___jump_table");
1702 if (!Stop)
1703 return createStringError(EC: errc::executable_format_error,
1704 S: "missing __stop___jump_table symbol");
1705
1706 ErrorOr<BinarySection &> ErrorOrSection =
1707 BC.getSectionForAddress(Address: StaticKeysJumpTableAddress);
1708 if (!ErrorOrSection)
1709 return createStringError(EC: errc::executable_format_error,
1710 S: "no section matching __start___jump_table");
1711
1712 StaticKeysJumpSection = *ErrorOrSection;
1713 if (!StaticKeysJumpSection->containsAddress(Address: Stop->getAddress() - 1))
1714 return createStringError(EC: errc::executable_format_error,
1715 S: "__stop___jump_table not in the same section "
1716 "as __start___jump_table");
1717
1718 if ((Stop->getAddress() - StaticKeysJumpTableAddress) %
1719 STATIC_KEYS_JUMP_ENTRY_SIZE)
1720 return createStringError(EC: errc::executable_format_error,
1721 S: "static keys jump table size error");
1722
1723 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1724 AddressExtractor AE(StaticKeysJumpSection->getContents(), SectionAddress,
1725 BC.AsmInfo->isLittleEndian(),
1726 BC.AsmInfo->getCodePointerSize());
1727 AddressExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1728 uint32_t EntryID = 0;
1729 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1730 const uint64_t JumpAddress = AE.getPCRelAddress32(C&: Cursor);
1731 const uint64_t TargetAddress = AE.getPCRelAddress32(C&: Cursor);
1732 const uint64_t KeyAddress = AE.getPCRelAddress64(C&: Cursor);
1733
1734 // Consume the status of the cursor.
1735 if (!Cursor)
1736 return createStringError(
1737 EC: errc::executable_format_error,
1738 Fmt: "out of bounds while reading static keys jump table: %s",
1739 Vals: toString(E: Cursor.takeError()).c_str());
1740
1741 ++EntryID;
1742
1743 JumpInfo.push_back(Elt: JumpInfoEntry());
1744 JumpInfoEntry &Info = JumpInfo.back();
1745 Info.Likely = KeyAddress & 1;
1746
1747 if (opts::DumpStaticKeys) {
1748 BC.outs() << "Static key jump entry: " << EntryID
1749 << "\n\tJumpAddress: 0x" << Twine::utohexstr(Val: JumpAddress)
1750 << "\n\tTargetAddress: 0x" << Twine::utohexstr(Val: TargetAddress)
1751 << "\n\tKeyAddress: 0x" << Twine::utohexstr(Val: KeyAddress)
1752 << "\n\tIsLikely: " << Info.Likely << '\n';
1753 }
1754
1755 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: JumpAddress);
1756 if (!BF && opts::Verbosity) {
1757 BC.outs()
1758 << "BOLT-INFO: no function matches address 0x"
1759 << Twine::utohexstr(Val: JumpAddress)
1760 << " of jump instruction referenced from static keys jump table\n";
1761 }
1762
1763 if (!BF || !BC.shouldEmit(Function: *BF))
1764 continue;
1765
1766 MCInst *Inst = BF->getInstructionAtOffset(Offset: JumpAddress - BF->getAddress());
1767 if (!Inst)
1768 return createStringError(
1769 EC: errc::executable_format_error,
1770 Fmt: "no instruction at static keys jump site address 0x%" PRIx64,
1771 Vals: JumpAddress);
1772
1773 if (!BF->containsAddress(PC: TargetAddress))
1774 return createStringError(
1775 EC: errc::executable_format_error,
1776 Fmt: "invalid target of static keys jump at 0x%" PRIx64 " : 0x%" PRIx64,
1777 Vals: JumpAddress, Vals: TargetAddress);
1778
1779 const bool IsBranch = BC.MIB->isBranch(Inst: *Inst);
1780 if (!IsBranch && !BC.MIB->isNoop(Inst: *Inst))
1781 return createStringError(EC: errc::executable_format_error,
1782 Fmt: "jump or nop expected at address 0x%" PRIx64,
1783 Vals: JumpAddress);
1784
1785 const uint64_t Size = BC.computeInstructionSize(Inst: *Inst);
1786 if (Size != 2 && Size != 5) {
1787 return createStringError(
1788 EC: errc::executable_format_error,
1789 Fmt: "unexpected static keys jump size at address 0x%" PRIx64,
1790 Vals: JumpAddress);
1791 }
1792
1793 MCSymbol *Target = BF->registerBranch(Src: JumpAddress, Dst: TargetAddress);
1794 MCInst StaticKeyBranch;
1795
1796 // Create a conditional branch instruction. The actual conditional code type
1797 // should not matter as long as it's a valid code. The instruction should be
1798 // treated as a conditional branch for control-flow purposes. Before we emit
1799 // the code, it will be converted to a different instruction in
1800 // rewriteStaticKeysJumpTable().
1801 //
1802 // NB: for older kernels, under LongJumpLabels option, we create long
1803 // conditional branch to guarantee that code size estimation takes
1804 // into account the extra bytes needed for long branch that will be used
1805 // by the kernel patching code. Newer kernels can work with both short
1806 // and long branches. The code for long conditional branch is larger
1807 // than unconditional one, so we are pessimistic in our estimations.
1808 if (opts::LongJumpLabels)
1809 BC.MIB->createLongCondBranch(Inst&: StaticKeyBranch, Target, CC: 0, Ctx: BC.Ctx.get());
1810 else
1811 BC.MIB->createCondBranch(Inst&: StaticKeyBranch, Target, CC: 0, Ctx: BC.Ctx.get());
1812 BC.MIB->moveAnnotations(SrcInst: std::move(*Inst), DstInst&: StaticKeyBranch);
1813 BC.MIB->setDynamicBranch(Inst&: StaticKeyBranch, ID: EntryID);
1814 *Inst = StaticKeyBranch;
1815
1816 // IsBranch = InitialValue ^ LIKELY
1817 //
1818 // 0 0 0
1819 // 1 0 1
1820 // 1 1 0
1821 // 0 1 1
1822 //
1823 // => InitialValue = IsBranch ^ LIKELY
1824 Info.InitValue = IsBranch ^ Info.Likely;
1825
1826 // Add annotations to facilitate manual code analysis.
1827 BC.MIB->addAnnotation(Inst&: *Inst, Name: "Likely", Val: Info.Likely);
1828 BC.MIB->addAnnotation(Inst&: *Inst, Name: "InitValue", Val: Info.InitValue);
1829 if (!BC.MIB->getSize(Inst: *Inst))
1830 BC.MIB->setSize(Inst&: *Inst, Size);
1831
1832 if (!BC.MIB->getOffset(Inst: *Inst))
1833 BC.MIB->setOffset(Inst&: *Inst, Offset: JumpAddress - BF->getAddress());
1834
1835 if (opts::LongJumpLabels)
1836 BC.MIB->setSize(Inst&: *Inst, Size: 5);
1837 }
1838
1839 BC.outs() << "BOLT-INFO: parsed " << EntryID << " static keys jump entries\n";
1840
1841 return Error::success();
1842}
1843
1844// Pre-emit pass. Convert dynamic branch instructions into jumps that could be
1845// relaxed. In post-emit pass we will convert those jumps into nops when
1846// necessary. We do the unconditional conversion into jumps so that the jumps
1847// can be relaxed and the optimal size of jump/nop instruction is selected.
1848Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() {
1849 if (!StaticKeysJumpSection)
1850 return Error::success();
1851
1852 uint64_t NumShort = 0;
1853 uint64_t NumLong = 0;
1854 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
1855 if (!BC.shouldEmit(Function: BF))
1856 continue;
1857
1858 for (BinaryBasicBlock &BB : BF) {
1859 for (MCInst &Inst : BB) {
1860 if (!BC.MIB->isDynamicBranch(Inst))
1861 continue;
1862
1863 const uint32_t EntryID = *BC.MIB->getDynamicBranchID(Inst);
1864 MCSymbol *Target =
1865 const_cast<MCSymbol *>(BC.MIB->getTargetSymbol(Inst));
1866 assert(Target && "Target symbol should be set.");
1867
1868 const JumpInfoEntry &Info = JumpInfo[EntryID - 1];
1869 const bool IsBranch = Info.Likely ^ Info.InitValue;
1870
1871 uint32_t Size = *BC.MIB->getSize(Inst);
1872 if (Size == 2)
1873 ++NumShort;
1874 else if (Size == 5)
1875 ++NumLong;
1876 else
1877 llvm_unreachable("Wrong size for static keys jump instruction.");
1878
1879 MCInst NewInst;
1880 // Replace the instruction with unconditional jump even if it needs to
1881 // be nop in the binary.
1882 if (opts::LongJumpLabels) {
1883 BC.MIB->createLongUncondBranch(Inst&: NewInst, Target, Ctx: BC.Ctx.get());
1884 } else {
1885 // Newer kernels can handle short and long jumps for static keys.
1886 // Optimistically, emit short jump and check if it gets relaxed into
1887 // a long one during post-emit. Only then convert the jump to a nop.
1888 BC.MIB->createUncondBranch(Inst&: NewInst, TBB: Target, Ctx: BC.Ctx.get());
1889 }
1890
1891 BC.MIB->moveAnnotations(SrcInst: std::move(Inst), DstInst&: NewInst);
1892 Inst = NewInst;
1893
1894 // Mark the instruction for nop conversion.
1895 if (!IsBranch)
1896 NopIDs.insert(V: EntryID);
1897
1898 MCSymbol *Label =
1899 BC.MIB->getOrCreateInstLabel(Inst, Name: "__SK_", Ctx: BC.Ctx.get());
1900
1901 // Create a relocation against the label.
1902 const uint64_t EntryOffset = StaticKeysJumpTableAddress -
1903 StaticKeysJumpSection->getAddress() +
1904 (EntryID - 1) * 16;
1905 StaticKeysJumpSection->addRelocation(Offset: EntryOffset, Symbol: Label,
1906 Type: ELF::R_X86_64_PC32,
1907 /*Addend*/ 0);
1908 StaticKeysJumpSection->addRelocation(Offset: EntryOffset + 4, Symbol: Target,
1909 Type: ELF::R_X86_64_PC32, /*Addend*/ 0);
1910 }
1911 }
1912 }
1913
1914 BC.outs() << "BOLT-INFO: the input contains " << NumShort << " short and "
1915 << NumLong << " long static keys jumps in optimized functions\n";
1916
1917 return Error::success();
1918}
1919
1920// Post-emit pass of static keys jump section. Convert jumps to nops.
1921Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() {
1922 if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized())
1923 return Error::success();
1924
1925 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1926 AddressExtractor AE(StaticKeysJumpSection->getOutputContents(),
1927 SectionAddress, BC.AsmInfo->isLittleEndian(),
1928 BC.AsmInfo->getCodePointerSize());
1929 AddressExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1930 const BinaryData *Stop = BC.getBinaryDataByName(Name: "__stop___jump_table");
1931 uint32_t EntryID = 0;
1932 uint64_t NumShort = 0;
1933 uint64_t NumLong = 0;
1934 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1935 const uint64_t JumpAddress = AE.getPCRelAddress32(C&: Cursor);
1936 const uint64_t TargetAddress = AE.getPCRelAddress32(C&: Cursor);
1937 const uint64_t KeyAddress = AE.getPCRelAddress64(C&: Cursor);
1938
1939 // Consume the status of the cursor.
1940 if (!Cursor)
1941 return createStringError(EC: errc::executable_format_error,
1942 Fmt: "out of bounds while updating static keys: %s",
1943 Vals: toString(E: Cursor.takeError()).c_str());
1944
1945 ++EntryID;
1946
1947 LLVM_DEBUG({
1948 dbgs() << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress)
1949 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress)
1950 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) << '\n';
1951 });
1952 (void)TargetAddress;
1953 (void)KeyAddress;
1954
1955 BinaryFunction *BF =
1956 BC.getBinaryFunctionContainingAddress(Address: JumpAddress,
1957 /*CheckPastEnd*/ false,
1958 /*UseMaxSize*/ true);
1959 assert(BF && "Cannot get function for modified static key.");
1960
1961 if (!BF->isEmitted())
1962 continue;
1963
1964 // Disassemble instruction to collect stats even if nop-conversion is
1965 // unnecessary.
1966 MutableArrayRef<uint8_t> Contents = MutableArrayRef<uint8_t>(
1967 reinterpret_cast<uint8_t *>(BF->getImageAddress()), BF->getImageSize());
1968 assert(Contents.size() && "Non-empty function image expected.");
1969
1970 MCInst Inst;
1971 uint64_t Size;
1972 const uint64_t JumpOffset = JumpAddress - BF->getAddress();
1973 if (!BC.DisAsm->getInstruction(Instr&: Inst, Size, Bytes: Contents.slice(N: JumpOffset), Address: 0,
1974 CStream&: nulls())) {
1975 llvm_unreachable("Unable to disassemble jump instruction.");
1976 }
1977 assert(BC.MIB->isBranch(Inst) && "Branch instruction expected.");
1978
1979 if (Size == 2)
1980 ++NumShort;
1981 else if (Size == 5)
1982 ++NumLong;
1983 else
1984 llvm_unreachable("Unexpected size for static keys jump instruction.");
1985
1986 // Check if we need to convert jump instruction into a nop.
1987 if (!NopIDs.contains(V: EntryID))
1988 continue;
1989
1990 SmallString<15> NopCode;
1991 raw_svector_ostream VecOS(NopCode);
1992 BC.MAB->writeNopData(OS&: VecOS, Count: Size, STI: BC.STI.get());
1993 for (uint64_t I = 0; I < Size; ++I)
1994 Contents[JumpOffset + I] = NopCode[I];
1995 }
1996
1997 BC.outs() << "BOLT-INFO: written " << NumShort << " short and " << NumLong
1998 << " long static keys jumps in optimized functions\n";
1999
2000 return Error::success();
2001}
2002
2003} // namespace
2004
2005std::unique_ptr<MetadataRewriter>
2006llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) {
2007 return std::make_unique<LinuxKernelRewriter>(args&: BC);
2008}
2009

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of bolt/lib/Rewrite/LinuxKernelRewriter.cpp