1//===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Support for updating Linux Kernel metadata.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/BinaryFunction.h"
14#include "bolt/Rewrite/MetadataRewriter.h"
15#include "bolt/Rewrite/MetadataRewriters.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/MC/MCDisassembler/MCDisassembler.h"
20#include "llvm/Support/BinaryStreamWriter.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/Errc.h"
24#include "llvm/Support/ErrorOr.h"
25#include <regex>
26
27#define DEBUG_TYPE "bolt-linux"
28
29using namespace llvm;
30using namespace bolt;
31
32namespace opts {
33
34static cl::opt<bool>
35 AltInstHasPadLen("alt-inst-has-padlen",
36 cl::desc("specify that .altinstructions has padlen field"),
37 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
38
39static cl::opt<uint32_t>
40 AltInstFeatureSize("alt-inst-feature-size",
41 cl::desc("size of feature field in .altinstructions"),
42 cl::init(Val: 2), cl::Hidden, cl::cat(BoltCategory));
43
44static cl::opt<bool>
45 DumpAltInstructions("dump-alt-instructions",
46 cl::desc("dump Linux alternative instructions info"),
47 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
48
49static cl::opt<bool>
50 DumpExceptions("dump-linux-exceptions",
51 cl::desc("dump Linux kernel exception table"),
52 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
53
54static cl::opt<bool>
55 DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"),
56 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
57
58static cl::opt<bool> DumpParavirtualPatchSites(
59 "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
60 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
61
62static cl::opt<bool>
63 DumpPCIFixups("dump-pci-fixups",
64 cl::desc("dump Linux kernel PCI fixup table"),
65 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
66
67static cl::opt<bool> DumpSMPLocks("dump-smp-locks",
68 cl::desc("dump Linux kernel SMP locks"),
69 cl::init(Val: false), cl::Hidden,
70 cl::cat(BoltCategory));
71
72static cl::opt<bool> DumpStaticCalls("dump-static-calls",
73 cl::desc("dump Linux kernel static calls"),
74 cl::init(Val: false), cl::Hidden,
75 cl::cat(BoltCategory));
76
77static cl::opt<bool>
78 DumpStaticKeys("dump-static-keys",
79 cl::desc("dump Linux kernel static keys jump table"),
80 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
81
82static cl::opt<bool> LongJumpLabels(
83 "long-jump-labels",
84 cl::desc("always use long jumps/nops for Linux kernel static keys"),
85 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
86
87static cl::opt<bool>
88 PrintORC("print-orc",
89 cl::desc("print ORC unwind information for instructions"),
90 cl::init(Val: true), cl::Hidden, cl::cat(BoltCategory));
91
92} // namespace opts
93
94/// Linux kernel version
95struct LKVersion {
96 LKVersion() {}
97 LKVersion(unsigned Major, unsigned Minor, unsigned Rev)
98 : Major(Major), Minor(Minor), Rev(Rev) {}
99
100 bool operator<(const LKVersion &Other) const {
101 return std::make_tuple(args: Major, args: Minor, args: Rev) <
102 std::make_tuple(args: Other.Major, args: Other.Minor, args: Other.Rev);
103 }
104
105 bool operator>(const LKVersion &Other) const { return Other < *this; }
106
107 bool operator<=(const LKVersion &Other) const { return !(*this > Other); }
108
109 bool operator>=(const LKVersion &Other) const { return !(*this < Other); }
110
111 bool operator==(const LKVersion &Other) const {
112 return Major == Other.Major && Minor == Other.Minor && Rev == Other.Rev;
113 }
114
115 bool operator!=(const LKVersion &Other) const { return !(*this == Other); }
116
117 unsigned Major{0};
118 unsigned Minor{0};
119 unsigned Rev{0};
120};
121
122/// Linux Kernel supports stack unwinding using ORC (oops rewind capability).
123/// ORC state at every IP can be described by the following data structure.
124struct ORCState {
125 int16_t SPOffset;
126 int16_t BPOffset;
127 int16_t Info;
128
129 bool operator==(const ORCState &Other) const {
130 return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset &&
131 Info == Other.Info;
132 }
133
134 bool operator!=(const ORCState &Other) const { return !(*this == Other); }
135};
136
137/// Section terminator ORC entry.
138static ORCState NullORC = {.SPOffset: 0, .BPOffset: 0, .Info: 0};
139
140/// Basic printer for ORC entry. It does not provide the same level of
141/// information as objtool (for now).
142inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) {
143 if (!opts::PrintORC)
144 return OS;
145 if (E != NullORC)
146 OS << format(Fmt: "{sp: %d, bp: %d, info: 0x%x}", Vals: E.SPOffset, Vals: E.BPOffset,
147 Vals: E.Info);
148 else
149 OS << "{terminator}";
150
151 return OS;
152}
153
154namespace {
155
156/// Extension to DataExtractor that supports reading addresses stored in
157/// PC-relative format.
158class AddressExtractor : public DataExtractor {
159 uint64_t DataAddress;
160
161public:
162 AddressExtractor(StringRef Data, uint64_t DataAddress, bool IsLittleEndian,
163 uint8_t AddressSize)
164 : DataExtractor(Data, IsLittleEndian, AddressSize),
165 DataAddress(DataAddress) {}
166
167 /// Extract 32-bit PC-relative address/pointer.
168 uint64_t getPCRelAddress32(Cursor &C) {
169 const uint64_t Base = DataAddress + C.tell();
170 return Base + (int32_t)getU32(C);
171 }
172
173 /// Extract 64-bit PC-relative address/pointer.
174 uint64_t getPCRelAddress64(Cursor &C) {
175 const uint64_t Base = DataAddress + C.tell();
176 return Base + (int64_t)getU64(C);
177 }
178};
179
180class LinuxKernelRewriter final : public MetadataRewriter {
181 LKVersion LinuxKernelVersion;
182
183 /// Information required for updating metadata referencing an instruction.
184 struct InstructionFixup {
185 BinarySection &Section; // Section referencing the instruction.
186 uint64_t Offset; // Offset in the section above.
187 BinaryFunction &BF; // Function containing the instruction.
188 MCSymbol &Label; // Label marking the instruction.
189 bool IsPCRelative; // If the reference type is relative.
190 };
191 std::vector<InstructionFixup> Fixups;
192
193 /// Size of an entry in .smp_locks section.
194 static constexpr size_t SMP_LOCKS_ENTRY_SIZE = 4;
195
196 /// Linux ORC sections.
197 ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address;
198 ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address;
199
200 /// Size of entries in ORC sections.
201 static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6;
202 static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4;
203
204 struct ORCListEntry {
205 uint64_t IP; /// Instruction address.
206 BinaryFunction *BF; /// Binary function corresponding to the entry.
207 ORCState ORC; /// Stack unwind info in ORC format.
208
209 /// ORC entries are sorted by their IPs. Terminator entries (NullORC)
210 /// should precede other entries with the same address.
211 bool operator<(const ORCListEntry &Other) const {
212 if (IP < Other.IP)
213 return 1;
214 if (IP > Other.IP)
215 return 0;
216 return ORC == NullORC && Other.ORC != NullORC;
217 }
218 };
219
220 using ORCListType = std::vector<ORCListEntry>;
221 ORCListType ORCEntries;
222
223 /// Number of entries in the input file ORC sections.
224 uint64_t NumORCEntries = 0;
225
226 /// Section containing static keys jump table.
227 ErrorOr<BinarySection &> StaticKeysJumpSection = std::errc::bad_address;
228 uint64_t StaticKeysJumpTableAddress = 0;
229 static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8;
230
231 struct JumpInfoEntry {
232 bool Likely;
233 bool InitValue;
234 };
235 SmallVector<JumpInfoEntry, 16> JumpInfo;
236
237 /// Static key entries that need nop conversion.
238 DenseSet<uint32_t> NopIDs;
239
240 /// Section containing static call table.
241 ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address;
242 uint64_t StaticCallTableAddress = 0;
243 static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8;
244
245 struct StaticCallInfo {
246 uint32_t ID; /// Identifier of the entry in the table.
247 BinaryFunction *Function; /// Function containing associated call.
248 MCSymbol *Label; /// Label attached to the call.
249 };
250 using StaticCallListType = std::vector<StaticCallInfo>;
251 StaticCallListType StaticCallEntries;
252
253 /// Section containing the Linux exception table.
254 ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address;
255 static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12;
256
257 /// Functions with exception handling code.
258 DenseSet<BinaryFunction *> FunctionsWithExceptions;
259
260 /// Section with paravirtual patch sites.
261 ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address;
262
263 /// Alignment of paravirtual patch structures.
264 static constexpr size_t PARA_PATCH_ALIGN = 8;
265
266 /// .altinstructions section.
267 ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address;
268
269 /// Section containing Linux bug table.
270 ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address;
271
272 /// Size of bug_entry struct.
273 static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12;
274
275 /// List of bug entries per function.
276 using FunctionBugListType =
277 DenseMap<BinaryFunction *, SmallVector<uint32_t, 2>>;
278 FunctionBugListType FunctionBugList;
279
280 /// .pci_fixup section.
281 ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address;
282 static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16;
283
284 Error detectLinuxKernelVersion();
285
286 /// Process linux kernel special sections and their relocations.
287 void processLKSections();
288
289 /// Process __ksymtab and __ksymtab_gpl.
290 void processLKKSymtab(bool IsGPL = false);
291
292 // Create relocations in sections requiring fixups.
293 //
294 // Make sure functions that will not be emitted are marked as such before this
295 // function is executed.
296 void processInstructionFixups();
297
298 /// Process .smp_locks section.
299 Error processSMPLocks();
300
301 /// Read ORC unwind information and annotate instructions.
302 Error readORCTables();
303
304 /// Update ORC for functions once CFG is constructed.
305 Error processORCPostCFG();
306
307 /// Update ORC data in the binary.
308 Error rewriteORCTables();
309
310 /// Validate written ORC tables after binary emission.
311 Error validateORCTables();
312
313 /// Static call table handling.
314 Error readStaticCalls();
315 Error rewriteStaticCalls();
316
317 Error readExceptionTable();
318 Error rewriteExceptionTable();
319
320 /// Paravirtual instruction patch sites.
321 Error readParaInstructions();
322 Error rewriteParaInstructions();
323
324 /// __bug_table section handling.
325 Error readBugTable();
326 Error rewriteBugTable();
327
328 /// Do no process functions containing instruction annotated with
329 /// \p Annotation.
330 void skipFunctionsWithAnnotation(StringRef Annotation) const;
331
332 /// Handle alternative instruction info from .altinstructions.
333 Error readAltInstructions();
334 void processAltInstructionsPostCFG();
335 Error tryReadAltInstructions(uint32_t AltInstFeatureSize,
336 bool AltInstHasPadLen, bool ParseOnly);
337
338 /// Read .pci_fixup
339 Error readPCIFixupTable();
340
341 /// Handle static keys jump table.
342 Error readStaticKeysJumpTable();
343 Error rewriteStaticKeysJumpTable();
344 Error updateStaticKeysJumpTablePostEmit();
345
346public:
347 LinuxKernelRewriter(BinaryContext &BC)
348 : MetadataRewriter("linux-kernel-rewriter", BC) {}
349
350 Error preCFGInitializer() override {
351 if (Error E = detectLinuxKernelVersion())
352 return E;
353
354 processLKSections();
355
356 if (Error E = processSMPLocks())
357 return E;
358
359 if (Error E = readStaticCalls())
360 return E;
361
362 if (Error E = readExceptionTable())
363 return E;
364
365 if (Error E = readParaInstructions())
366 return E;
367
368 if (Error E = readBugTable())
369 return E;
370
371 if (Error E = readAltInstructions())
372 return E;
373
374 // Some ORC entries could be linked to alternative instruction
375 // sequences. Hence, we read ORC after .altinstructions.
376 if (Error E = readORCTables())
377 return E;
378
379 if (Error E = readPCIFixupTable())
380 return E;
381
382 if (Error E = readStaticKeysJumpTable())
383 return E;
384
385 return Error::success();
386 }
387
388 Error postCFGInitializer() override {
389 if (Error E = processORCPostCFG())
390 return E;
391
392 processAltInstructionsPostCFG();
393
394 return Error::success();
395 }
396
397 Error preEmitFinalizer() override {
398 // Since rewriteExceptionTable() can mark functions as non-simple, run it
399 // before other rewriters that depend on simple/emit status.
400 if (Error E = rewriteExceptionTable())
401 return E;
402
403 if (Error E = rewriteParaInstructions())
404 return E;
405
406 if (Error E = rewriteORCTables())
407 return E;
408
409 if (Error E = rewriteStaticCalls())
410 return E;
411
412 if (Error E = rewriteStaticKeysJumpTable())
413 return E;
414
415 if (Error E = rewriteBugTable())
416 return E;
417
418 processInstructionFixups();
419
420 return Error::success();
421 }
422
423 Error postEmitFinalizer() override {
424 if (Error E = updateStaticKeysJumpTablePostEmit())
425 return E;
426
427 if (Error E = validateORCTables())
428 return E;
429
430 return Error::success();
431 }
432};
433
434Error LinuxKernelRewriter::detectLinuxKernelVersion() {
435 // Check for global and local linux_banner symbol.
436 BinaryData *BD = BC.getBinaryDataByName(Name: "linux_banner");
437 if (!BD)
438 BD = BC.getBinaryDataByName(Name: "linux_banner/1");
439
440 if (!BD)
441 return createStringError(EC: errc::executable_format_error,
442 S: "unable to locate linux_banner");
443
444 const BinarySection &Section = BD->getSection();
445 const std::string S =
446 Section.getContents().substr(Start: BD->getOffset(), N: BD->getSize()).str();
447
448 const std::regex Re(R"---(Linux version ((\d+)\.(\d+)(\.(\d+))?))---");
449 std::smatch Match;
450 if (std::regex_search(s: S, m&: Match, e: Re)) {
451 const unsigned Major = std::stoi(str: Match[2].str());
452 const unsigned Minor = std::stoi(str: Match[3].str());
453 const unsigned Rev = Match[5].matched ? std::stoi(str: Match[5].str()) : 0;
454 LinuxKernelVersion = LKVersion(Major, Minor, Rev);
455 BC.outs() << "BOLT-INFO: Linux kernel version is " << Match[1].str()
456 << "\n";
457 return Error::success();
458 }
459
460 return createStringError(EC: errc::executable_format_error,
461 S: "Linux kernel version is unknown: " + S);
462}
463
464void LinuxKernelRewriter::processLKSections() {
465 processLKKSymtab();
466 processLKKSymtab(IsGPL: true);
467}
468
469/// Process __ksymtab[_gpl] sections of Linux Kernel.
470/// This section lists all the vmlinux symbols that kernel modules can access.
471///
472/// All the entries are 4 bytes each and hence we can read them by one by one
473/// and ignore the ones that are not pointing to the .text section. All pointers
474/// are PC relative offsets. Always, points to the beginning of the function.
475void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) {
476 StringRef SectionName = "__ksymtab";
477 if (IsGPL)
478 SectionName = "__ksymtab_gpl";
479 ErrorOr<BinarySection &> SectionOrError =
480 BC.getUniqueSectionByName(SectionName);
481 assert(SectionOrError &&
482 "__ksymtab[_gpl] section not found in Linux Kernel binary");
483 const uint64_t SectionSize = SectionOrError->getSize();
484 const uint64_t SectionAddress = SectionOrError->getAddress();
485 assert((SectionSize % 4) == 0 &&
486 "The size of the __ksymtab[_gpl] section should be a multiple of 4");
487
488 for (uint64_t I = 0; I < SectionSize; I += 4) {
489 const uint64_t EntryAddress = SectionAddress + I;
490 ErrorOr<int64_t> Offset = BC.getSignedValueAtAddress(Address: EntryAddress, Size: 4);
491 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
492 const int32_t SignedOffset = *Offset;
493 const uint64_t RefAddress = EntryAddress + SignedOffset;
494 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Address: RefAddress);
495 if (!BF)
496 continue;
497
498 BC.addRelocation(Address: EntryAddress, Symbol: BF->getSymbol(), Type: Relocation::getPC32(), Addend: 0,
499 Value: *Offset);
500 }
501}
502
503/// .smp_locks section contains PC-relative references to instructions with LOCK
504/// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
505Error LinuxKernelRewriter::processSMPLocks() {
506 ErrorOr<BinarySection &> SMPLocksSection =
507 BC.getUniqueSectionByName(SectionName: ".smp_locks");
508 if (!SMPLocksSection)
509 return Error::success();
510
511 const uint64_t SectionSize = SMPLocksSection->getSize();
512 const uint64_t SectionAddress = SMPLocksSection->getAddress();
513 if (SectionSize % SMP_LOCKS_ENTRY_SIZE)
514 return createStringError(EC: errc::executable_format_error,
515 S: "bad size of .smp_locks section");
516
517 AddressExtractor AE(SMPLocksSection->getContents(), SectionAddress,
518 BC.AsmInfo->isLittleEndian(),
519 BC.AsmInfo->getCodePointerSize());
520 AddressExtractor::Cursor Cursor(0);
521 while (Cursor && Cursor.tell() < SectionSize) {
522 const uint64_t Offset = Cursor.tell();
523 const uint64_t IP = AE.getPCRelAddress32(C&: Cursor);
524
525 // Consume the status of the cursor.
526 if (!Cursor)
527 return createStringError(EC: errc::executable_format_error,
528 Fmt: "error while reading .smp_locks: %s",
529 Vals: toString(E: Cursor.takeError()).c_str());
530
531 if (opts::DumpSMPLocks)
532 BC.outs() << "SMP lock at 0x: " << Twine::utohexstr(Val: IP) << '\n';
533
534 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: IP);
535 if (!BF || !BC.shouldEmit(Function: *BF))
536 continue;
537
538 MCInst *Inst = BF->getInstructionAtOffset(Offset: IP - BF->getAddress());
539 if (!Inst)
540 return createStringError(EC: errc::executable_format_error,
541 Fmt: "no instruction matches lock at 0x%" PRIx64, Vals: IP);
542
543 // Check for duplicate entries.
544 if (BC.MIB->hasAnnotation(Inst: *Inst, Name: "SMPLock"))
545 return createStringError(EC: errc::executable_format_error,
546 Fmt: "duplicate SMP lock at 0x%" PRIx64, Vals: IP);
547
548 BC.MIB->addAnnotation(Inst&: *Inst, Name: "SMPLock", Val: true);
549 MCSymbol *Label =
550 BC.MIB->getOrCreateInstLabel(Inst&: *Inst, Name: "__SMPLock_", Ctx: BC.Ctx.get());
551
552 Fixups.push_back(x: {.Section: *SMPLocksSection, .Offset: Offset, .BF: *BF, .Label: *Label,
553 /*IsPCRelative*/ true});
554 }
555
556 const uint64_t NumEntries = SectionSize / SMP_LOCKS_ENTRY_SIZE;
557 BC.outs() << "BOLT-INFO: parsed " << NumEntries << " SMP lock entries\n";
558
559 return Error::success();
560}
561
562void LinuxKernelRewriter::processInstructionFixups() {
563 for (InstructionFixup &Fixup : Fixups) {
564 if (!BC.shouldEmit(Function: Fixup.BF))
565 continue;
566
567 Fixup.Section.addRelocation(Offset: Fixup.Offset, Symbol: &Fixup.Label,
568 Type: Fixup.IsPCRelative ? ELF::R_X86_64_PC32
569 : ELF::R_X86_64_64,
570 /*Addend*/ 0);
571 }
572}
573
574Error LinuxKernelRewriter::readORCTables() {
575 // NOTE: we should ignore relocations for orc tables as the tables are sorted
576 // post-link time and relocations are not updated.
577 ORCUnwindSection = BC.getUniqueSectionByName(SectionName: ".orc_unwind");
578 ORCUnwindIPSection = BC.getUniqueSectionByName(SectionName: ".orc_unwind_ip");
579
580 if (!ORCUnwindSection && !ORCUnwindIPSection)
581 return Error::success();
582
583 if (!ORCUnwindSection || !ORCUnwindIPSection)
584 return createStringError(EC: errc::executable_format_error,
585 S: "missing ORC section");
586
587 NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
588 if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE ||
589 ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE)
590 return createStringError(EC: errc::executable_format_error,
591 S: "ORC entries number mismatch detected");
592
593 DataExtractor OrcDE(ORCUnwindSection->getContents(),
594 BC.AsmInfo->isLittleEndian(),
595 BC.AsmInfo->getCodePointerSize());
596 AddressExtractor IPAE(
597 ORCUnwindIPSection->getContents(), ORCUnwindIPSection->getAddress(),
598 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
599 DataExtractor::Cursor ORCCursor(0);
600 DataExtractor::Cursor IPCursor(0);
601 uint64_t PrevIP = 0;
602 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
603 const uint64_t IP = IPAE.getPCRelAddress32(C&: IPCursor);
604 // Consume the status of the cursor.
605 if (!IPCursor)
606 return createStringError(EC: errc::executable_format_error,
607 Fmt: "out of bounds while reading ORC IP table: %s",
608 Vals: toString(E: IPCursor.takeError()).c_str());
609
610 if (IP < PrevIP && opts::Verbosity)
611 BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(Val: IP)
612 << " detected while reading ORC\n";
613
614 PrevIP = IP;
615
616 // Store all entries, includes those we are not going to update as the
617 // tables need to be sorted globally before being written out.
618 ORCEntries.push_back(x: ORCListEntry());
619 ORCListEntry &Entry = ORCEntries.back();
620
621 Entry.IP = IP;
622 Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(C&: ORCCursor);
623 Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(C&: ORCCursor);
624 Entry.ORC.Info = (int16_t)OrcDE.getU16(C&: ORCCursor);
625 Entry.BF = nullptr;
626
627 // Consume the status of the cursor.
628 if (!ORCCursor)
629 return createStringError(EC: errc::executable_format_error,
630 Fmt: "out of bounds while reading ORC: %s",
631 Vals: toString(E: ORCCursor.takeError()).c_str());
632
633 if (Entry.ORC == NullORC)
634 continue;
635
636 BinaryFunction *&BF = Entry.BF;
637 BF = BC.getBinaryFunctionContainingAddress(Address: IP, /*CheckPastEnd*/ true);
638
639 // If the entry immediately pointing past the end of the function is not
640 // the terminator entry, then it does not belong to this function.
641 if (BF && BF->getAddress() + BF->getSize() == IP)
642 BF = 0;
643
644 if (!BF) {
645 if (opts::Verbosity)
646 BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
647 << Twine::utohexstr(Val: IP) << ": " << Entry.ORC << '\n';
648 continue;
649 }
650
651 BF->setHasORC(true);
652
653 if (!BF->hasInstructions())
654 continue;
655
656 const uint64_t Offset = IP - BF->getAddress();
657 MCInst *Inst = BF->getInstructionAtOffset(Offset);
658 if (!Inst) {
659 // Check if there is an alternative instruction(s) at this IP. Multiple
660 // alternative instructions can take a place of a single original
661 // instruction and each alternative can have a separate ORC entry.
662 // Since ORC table is shared between all alternative sequences, there's
663 // a requirement that only one (out of many) sequences can have an
664 // instruction from the ORC table to avoid ambiguities/conflicts.
665 //
666 // For now, we have limited support for alternatives. I.e. we still print
667 // functions with them, but will not change the code in the output binary.
668 // As such, we can ignore alternative ORC entries. They will be preserved
669 // in the binary, but will not get printed in the instruction stream.
670 Inst = BF->getInstructionContainingOffset(Offset);
671 if (Inst || BC.MIB->hasAnnotation(Inst: *Inst, Name: "AltInst"))
672 continue;
673
674 return createStringError(
675 EC: errc::executable_format_error,
676 Fmt: "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", Vals: IP);
677 }
678
679 // Some addresses will have two entries associated with them. The first
680 // one being a "weak" section terminator. Since we ignore the terminator,
681 // we should only assign one entry per instruction.
682 if (BC.MIB->hasAnnotation(Inst: *Inst, Name: "ORC"))
683 return createStringError(
684 EC: errc::executable_format_error,
685 Fmt: "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", Vals: IP);
686
687 BC.MIB->addAnnotation(Inst&: *Inst, Name: "ORC", Val: Entry.ORC);
688 }
689
690 BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n";
691
692 if (opts::DumpORC) {
693 BC.outs() << "BOLT-INFO: ORC unwind information:\n";
694 for (const ORCListEntry &E : ORCEntries) {
695 BC.outs() << "0x" << Twine::utohexstr(Val: E.IP) << ": " << E.ORC;
696 if (E.BF)
697 BC.outs() << ": " << *E.BF;
698 BC.outs() << '\n';
699 }
700 }
701
702 // Add entries for functions that don't have explicit ORC info at the start.
703 // We'll have the correct info for them even if ORC for the preceding function
704 // changes.
705 ORCListType NewEntries;
706 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
707 auto It = llvm::partition_point(Range&: ORCEntries, P: [&](const ORCListEntry &E) {
708 return E.IP <= BF.getAddress();
709 });
710 if (It != ORCEntries.begin())
711 --It;
712
713 if (It->BF == &BF)
714 continue;
715
716 if (It->ORC == NullORC && It->IP == BF.getAddress()) {
717 assert(!It->BF);
718 It->BF = &BF;
719 continue;
720 }
721
722 NewEntries.push_back(x: {.IP: BF.getAddress(), .BF: &BF, .ORC: It->ORC});
723 if (It->ORC != NullORC)
724 BF.setHasORC(true);
725 }
726
727 llvm::copy(Range&: NewEntries, Out: std::back_inserter(x&: ORCEntries));
728 llvm::sort(C&: ORCEntries);
729
730 if (opts::DumpORC) {
731 BC.outs() << "BOLT-INFO: amended ORC unwind information:\n";
732 for (const ORCListEntry &E : ORCEntries) {
733 BC.outs() << "0x" << Twine::utohexstr(Val: E.IP) << ": " << E.ORC;
734 if (E.BF)
735 BC.outs() << ": " << *E.BF;
736 BC.outs() << '\n';
737 }
738 }
739
740 return Error::success();
741}
742
743Error LinuxKernelRewriter::processORCPostCFG() {
744 if (!NumORCEntries)
745 return Error::success();
746
747 // Propagate ORC to the rest of the function. We can annotate every
748 // instruction in every function, but to minimize the overhead, we annotate
749 // the first instruction in every basic block to reflect the state at the
750 // entry. This way, the ORC state can be calculated based on annotations
751 // regardless of the basic block layout. Note that if we insert/delete
752 // instructions, we must take care to attach ORC info to the new/deleted ones.
753 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
754
755 std::optional<ORCState> CurrentState;
756 for (BinaryBasicBlock &BB : BF) {
757 for (MCInst &Inst : BB) {
758 ErrorOr<ORCState> State =
759 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, Name: "ORC");
760
761 if (State) {
762 CurrentState = *State;
763 continue;
764 }
765
766 // Get state for the start of the function.
767 if (!CurrentState) {
768 // A terminator entry (NullORC) can match the function address. If
769 // there's also a non-terminator entry, it will be placed after the
770 // terminator. Hence, we are looking for the last ORC entry that
771 // matches the address.
772 auto It =
773 llvm::partition_point(Range&: ORCEntries, P: [&](const ORCListEntry &E) {
774 return E.IP <= BF.getAddress();
775 });
776 if (It != ORCEntries.begin())
777 --It;
778
779 assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) &&
780 "ORC info at function entry expected.");
781
782 if (It->ORC == NullORC && BF.hasORC()) {
783 BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for "
784 << BF << '\n';
785 }
786
787 It->BF = &BF;
788
789 CurrentState = It->ORC;
790 if (It->ORC != NullORC)
791 BF.setHasORC(true);
792 }
793
794 // While printing ORC, attach info to every instruction for convenience.
795 if (opts::PrintORC || &Inst == &BB.front())
796 BC.MIB->addAnnotation(Inst, Name: "ORC", Val: *CurrentState);
797 }
798 }
799 }
800
801 return Error::success();
802}
803
804Error LinuxKernelRewriter::rewriteORCTables() {
805 if (!NumORCEntries)
806 return Error::success();
807
808 // Update ORC sections in-place. As we change the code, the number of ORC
809 // entries may increase for some functions. However, as we remove terminator
810 // redundancy (see below), more space is freed up and we should always be able
811 // to fit new ORC tables in the reserved space.
812 auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter {
813 const size_t Size = Section.getSize();
814 uint8_t *NewContents = new uint8_t[Size];
815 Section.updateContents(NewData: NewContents, NewSize: Size);
816 Section.setOutputFileOffset(Section.getInputFileOffset());
817 return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian()
818 ? endianness::little
819 : endianness::big);
820 };
821 BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection);
822 BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection);
823
824 uint64_t NumEmitted = 0;
825 std::optional<ORCState> LastEmittedORC;
826 auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC,
827 MCSymbol *Label = 0, bool Force = false) -> Error {
828 if (LastEmittedORC && ORC == *LastEmittedORC && !Force)
829 return Error::success();
830
831 LastEmittedORC = ORC;
832
833 if (++NumEmitted > NumORCEntries)
834 return createStringError(EC: errc::executable_format_error,
835 S: "exceeded the number of allocated ORC entries");
836
837 if (Label)
838 ORCUnwindIPSection->addRelocation(Offset: UnwindIPWriter.getOffset(), Symbol: Label,
839 Type: Relocation::getPC32(), /*Addend*/ 0);
840
841 const int32_t IPValue =
842 IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset();
843 if (Error E = UnwindIPWriter.writeInteger(Value: IPValue))
844 return E;
845
846 if (Error E = UnwindWriter.writeInteger(Value: ORC.SPOffset))
847 return E;
848 if (Error E = UnwindWriter.writeInteger(Value: ORC.BPOffset))
849 return E;
850 if (Error E = UnwindWriter.writeInteger(Value: ORC.Info))
851 return E;
852
853 return Error::success();
854 };
855
856 // Emit new ORC entries for the emitted function.
857 auto emitORC = [&](const FunctionFragment &FF) -> Error {
858 ORCState CurrentState = NullORC;
859 for (BinaryBasicBlock *BB : FF) {
860 for (MCInst &Inst : *BB) {
861 ErrorOr<ORCState> ErrorOrState =
862 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, Name: "ORC");
863 if (!ErrorOrState || *ErrorOrState == CurrentState)
864 continue;
865
866 // Issue label for the instruction.
867 MCSymbol *Label =
868 BC.MIB->getOrCreateInstLabel(Inst, Name: "__ORC_", Ctx: BC.Ctx.get());
869
870 if (Error E = emitORCEntry(0, *ErrorOrState, Label))
871 return E;
872
873 CurrentState = *ErrorOrState;
874 }
875 }
876
877 return Error::success();
878 };
879
880 // Emit ORC entries for cold fragments. We assume that these fragments are
881 // emitted contiguously in memory using reserved space in the kernel. This
882 // assumption is validated in post-emit pass validateORCTables() where we
883 // check that ORC entries are sorted by their addresses.
884 auto emitColdORC = [&]() -> Error {
885 for (BinaryFunction &BF :
886 llvm::make_second_range(c&: BC.getBinaryFunctions())) {
887 if (!BC.shouldEmit(Function: BF))
888 continue;
889 for (FunctionFragment &FF : BF.getLayout().getSplitFragments())
890 if (Error E = emitORC(FF))
891 return E;
892 }
893
894 return Error::success();
895 };
896
897 bool ShouldEmitCold = !BC.BOLTReserved.empty();
898 for (ORCListEntry &Entry : ORCEntries) {
899 if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) {
900 if (Error E = emitColdORC())
901 return E;
902
903 // Emit terminator entry at the end of the reserved region.
904 if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC))
905 return E;
906
907 ShouldEmitCold = false;
908 }
909
910 // Emit original entries for functions that we haven't modified.
911 if (!Entry.BF || !BC.shouldEmit(Function: *Entry.BF)) {
912 // Emit terminator only if it marks the start of a function.
913 if (Entry.ORC == NullORC && !Entry.BF)
914 continue;
915 if (Error E = emitORCEntry(Entry.IP, Entry.ORC))
916 return E;
917 continue;
918 }
919
920 // Emit all ORC entries for a function referenced by an entry and skip over
921 // the rest of entries for this function by resetting its ORC attribute.
922 if (Entry.BF->hasORC()) {
923 if (Error E = emitORC(Entry.BF->getLayout().getMainFragment()))
924 return E;
925 Entry.BF->setHasORC(false);
926 }
927 }
928
929 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
930 << " ORC entries\n");
931
932 // Populate ORC tables with a terminator entry with max address to match the
933 // original table sizes.
934 const uint64_t LastIP = std::numeric_limits<uint64_t>::max();
935 while (UnwindWriter.bytesRemaining()) {
936 if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
937 return E;
938 }
939
940 return Error::success();
941}
942
943Error LinuxKernelRewriter::validateORCTables() {
944 if (!ORCUnwindIPSection)
945 return Error::success();
946
947 AddressExtractor IPAE(
948 ORCUnwindIPSection->getOutputContents(), ORCUnwindIPSection->getAddress(),
949 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
950 AddressExtractor::Cursor IPCursor(0);
951 uint64_t PrevIP = 0;
952 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
953 const uint64_t IP = IPAE.getPCRelAddress32(C&: IPCursor);
954 if (!IPCursor)
955 return createStringError(EC: errc::executable_format_error,
956 Fmt: "out of bounds while reading ORC IP table: %s",
957 Vals: toString(E: IPCursor.takeError()).c_str());
958
959 assert(IP >= PrevIP && "Unsorted ORC table detected");
960 (void)PrevIP;
961 PrevIP = IP;
962 }
963
964 return Error::success();
965}
966
967/// The static call site table is created by objtool and contains entries in the
968/// following format:
969///
970/// struct static_call_site {
971/// s32 addr;
972/// s32 key;
973/// };
974///
975Error LinuxKernelRewriter::readStaticCalls() {
976 const BinaryData *StaticCallTable =
977 BC.getBinaryDataByName(Name: "__start_static_call_sites");
978 if (!StaticCallTable)
979 return Error::success();
980
981 StaticCallTableAddress = StaticCallTable->getAddress();
982
983 const BinaryData *Stop = BC.getBinaryDataByName(Name: "__stop_static_call_sites");
984 if (!Stop)
985 return createStringError(EC: errc::executable_format_error,
986 S: "missing __stop_static_call_sites symbol");
987
988 ErrorOr<BinarySection &> ErrorOrSection =
989 BC.getSectionForAddress(Address: StaticCallTableAddress);
990 if (!ErrorOrSection)
991 return createStringError(EC: errc::executable_format_error,
992 S: "no section matching __start_static_call_sites");
993
994 StaticCallSection = *ErrorOrSection;
995 if (!StaticCallSection->containsAddress(Address: Stop->getAddress() - 1))
996 return createStringError(EC: errc::executable_format_error,
997 S: "__stop_static_call_sites not in the same section "
998 "as __start_static_call_sites");
999
1000 if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE)
1001 return createStringError(EC: errc::executable_format_error,
1002 S: "static call table size error");
1003
1004 const uint64_t SectionAddress = StaticCallSection->getAddress();
1005 AddressExtractor AE(StaticCallSection->getContents(), SectionAddress,
1006 BC.AsmInfo->isLittleEndian(),
1007 BC.AsmInfo->getCodePointerSize());
1008 AddressExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress);
1009 uint32_t EntryID = 0;
1010 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1011 const uint64_t CallAddress = AE.getPCRelAddress32(C&: Cursor);
1012 const uint64_t KeyAddress = AE.getPCRelAddress32(C&: Cursor);
1013
1014 // Consume the status of the cursor.
1015 if (!Cursor)
1016 return createStringError(EC: errc::executable_format_error,
1017 Fmt: "out of bounds while reading static calls: %s",
1018 Vals: toString(E: Cursor.takeError()).c_str());
1019
1020 ++EntryID;
1021
1022 if (opts::DumpStaticCalls) {
1023 BC.outs() << "Static Call Site: " << EntryID << '\n';
1024 BC.outs() << "\tCallAddress: 0x" << Twine::utohexstr(Val: CallAddress)
1025 << "\n\tKeyAddress: 0x" << Twine::utohexstr(Val: KeyAddress)
1026 << '\n';
1027 }
1028
1029 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: CallAddress);
1030 if (!BF)
1031 continue;
1032
1033 if (!BC.shouldEmit(Function: *BF))
1034 continue;
1035
1036 if (!BF->hasInstructions())
1037 continue;
1038
1039 MCInst *Inst = BF->getInstructionAtOffset(Offset: CallAddress - BF->getAddress());
1040 if (!Inst)
1041 return createStringError(EC: errc::executable_format_error,
1042 Fmt: "no instruction at call site address 0x%" PRIx64,
1043 Vals: CallAddress);
1044
1045 // Check for duplicate entries.
1046 if (BC.MIB->hasAnnotation(Inst: *Inst, Name: "StaticCall"))
1047 return createStringError(EC: errc::executable_format_error,
1048 Fmt: "duplicate static call site at 0x%" PRIx64,
1049 Vals: CallAddress);
1050
1051 BC.MIB->addAnnotation(Inst&: *Inst, Name: "StaticCall", Val: EntryID);
1052
1053 MCSymbol *Label =
1054 BC.MIB->getOrCreateInstLabel(Inst&: *Inst, Name: "__SC_", Ctx: BC.Ctx.get());
1055
1056 StaticCallEntries.push_back(x: {.ID: EntryID, .Function: BF, .Label: Label});
1057 }
1058
1059 BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size()
1060 << " static call entries\n";
1061
1062 return Error::success();
1063}
1064
1065/// The static call table is sorted during boot time in
1066/// static_call_sort_entries(). This makes it possible to update existing
1067/// entries in-place ignoring their relative order.
1068Error LinuxKernelRewriter::rewriteStaticCalls() {
1069 if (!StaticCallTableAddress || !StaticCallSection)
1070 return Error::success();
1071
1072 for (auto &Entry : StaticCallEntries) {
1073 if (!Entry.Function)
1074 continue;
1075
1076 BinaryFunction &BF = *Entry.Function;
1077 if (!BC.shouldEmit(Function: BF))
1078 continue;
1079
1080 // Create a relocation against the label.
1081 const uint64_t EntryOffset = StaticCallTableAddress -
1082 StaticCallSection->getAddress() +
1083 (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE;
1084 StaticCallSection->addRelocation(Offset: EntryOffset, Symbol: Entry.Label,
1085 Type: ELF::R_X86_64_PC32, /*Addend*/ 0);
1086 }
1087
1088 return Error::success();
1089}
1090
1091/// Instructions that access user-space memory can cause page faults. These
1092/// faults will be handled by the kernel and execution will resume at the fixup
1093/// code location if the address was invalid. The kernel uses the exception
1094/// table to match the faulting instruction to its fixup. The table consists of
1095/// the following entries:
1096///
1097/// struct exception_table_entry {
1098/// int insn;
1099/// int fixup;
1100/// int data;
1101/// };
1102///
1103/// More info at:
1104/// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt
1105Error LinuxKernelRewriter::readExceptionTable() {
1106 ExceptionsSection = BC.getUniqueSectionByName(SectionName: "__ex_table");
1107 if (!ExceptionsSection)
1108 return Error::success();
1109
1110 if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE)
1111 return createStringError(EC: errc::executable_format_error,
1112 S: "exception table size error");
1113
1114 AddressExtractor AE(
1115 ExceptionsSection->getContents(), ExceptionsSection->getAddress(),
1116 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
1117 AddressExtractor::Cursor Cursor(0);
1118 uint32_t EntryID = 0;
1119 while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) {
1120 const uint64_t InstAddress = AE.getPCRelAddress32(C&: Cursor);
1121 const uint64_t FixupAddress = AE.getPCRelAddress32(C&: Cursor);
1122 const uint64_t Data = AE.getU32(C&: Cursor);
1123
1124 // Consume the status of the cursor.
1125 if (!Cursor)
1126 return createStringError(
1127 EC: errc::executable_format_error,
1128 Fmt: "out of bounds while reading exception table: %s",
1129 Vals: toString(E: Cursor.takeError()).c_str());
1130
1131 ++EntryID;
1132
1133 if (opts::DumpExceptions) {
1134 BC.outs() << "Exception Entry: " << EntryID << '\n';
1135 BC.outs() << "\tInsn: 0x" << Twine::utohexstr(Val: InstAddress) << '\n'
1136 << "\tFixup: 0x" << Twine::utohexstr(Val: FixupAddress) << '\n'
1137 << "\tData: 0x" << Twine::utohexstr(Val: Data) << '\n';
1138 }
1139
1140 MCInst *Inst = nullptr;
1141 MCSymbol *FixupLabel = nullptr;
1142
1143 BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(Address: InstAddress);
1144 if (InstBF && BC.shouldEmit(Function: *InstBF)) {
1145 Inst = InstBF->getInstructionAtOffset(Offset: InstAddress - InstBF->getAddress());
1146 if (!Inst)
1147 return createStringError(EC: errc::executable_format_error,
1148 Fmt: "no instruction at address 0x%" PRIx64
1149 " in exception table",
1150 Vals: InstAddress);
1151 BC.MIB->addAnnotation(Inst&: *Inst, Name: "ExceptionEntry", Val: EntryID);
1152 FunctionsWithExceptions.insert(V: InstBF);
1153 }
1154
1155 if (!InstBF && opts::Verbosity) {
1156 BC.outs() << "BOLT-INFO: no function matches instruction at 0x"
1157 << Twine::utohexstr(Val: InstAddress)
1158 << " referenced by Linux exception table\n";
1159 }
1160
1161 BinaryFunction *FixupBF =
1162 BC.getBinaryFunctionContainingAddress(Address: FixupAddress);
1163 if (FixupBF && BC.shouldEmit(Function: *FixupBF)) {
1164 const uint64_t Offset = FixupAddress - FixupBF->getAddress();
1165 if (!FixupBF->getInstructionAtOffset(Offset))
1166 return createStringError(EC: errc::executable_format_error,
1167 Fmt: "no instruction at fixup address 0x%" PRIx64
1168 " in exception table",
1169 Vals: FixupAddress);
1170 FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset)
1171 : FixupBF->getSymbol();
1172 if (Inst)
1173 BC.MIB->addAnnotation(Inst&: *Inst, Name: "Fixup", Val: FixupLabel->getName());
1174 FunctionsWithExceptions.insert(V: FixupBF);
1175 }
1176
1177 if (!FixupBF && opts::Verbosity) {
1178 BC.outs() << "BOLT-INFO: no function matches fixup code at 0x"
1179 << Twine::utohexstr(Val: FixupAddress)
1180 << " referenced by Linux exception table\n";
1181 }
1182 }
1183
1184 BC.outs() << "BOLT-INFO: parsed "
1185 << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE
1186 << " exception table entries\n";
1187
1188 return Error::success();
1189}
1190
1191/// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects
1192/// the exception table to be sorted. Hence we have to sort it after code
1193/// reordering.
1194Error LinuxKernelRewriter::rewriteExceptionTable() {
1195 // Disable output of functions with exceptions before rewrite support is
1196 // added.
1197 for (BinaryFunction *BF : FunctionsWithExceptions)
1198 BF->setSimple(false);
1199
1200 return Error::success();
1201}
1202
1203/// .parainsrtuctions section contains information for patching parvirtual call
1204/// instructions during runtime. The entries in the section are in the form:
1205///
1206/// struct paravirt_patch_site {
1207/// u8 *instr; /* original instructions */
1208/// u8 type; /* type of this instruction */
1209/// u8 len; /* length of original instruction */
1210/// };
1211///
1212/// Note that the structures are aligned at 8-byte boundary.
1213Error LinuxKernelRewriter::readParaInstructions() {
1214 ParavirtualPatchSection = BC.getUniqueSectionByName(SectionName: ".parainstructions");
1215 if (!ParavirtualPatchSection)
1216 return Error::success();
1217
1218 DataExtractor DE(ParavirtualPatchSection->getContents(),
1219 BC.AsmInfo->isLittleEndian(),
1220 BC.AsmInfo->getCodePointerSize());
1221 uint32_t EntryID = 0;
1222 DataExtractor::Cursor Cursor(0);
1223 while (Cursor && !DE.eof(C: Cursor)) {
1224 const uint64_t NextOffset = alignTo(Size: Cursor.tell(), A: Align(PARA_PATCH_ALIGN));
1225 if (!DE.isValidOffset(offset: NextOffset))
1226 break;
1227
1228 Cursor.seek(NewOffSet: NextOffset);
1229
1230 const uint64_t InstrLocation = DE.getU64(C&: Cursor);
1231 const uint8_t Type = DE.getU8(C&: Cursor);
1232 const uint8_t Len = DE.getU8(C&: Cursor);
1233
1234 if (!Cursor)
1235 return createStringError(
1236 EC: errc::executable_format_error,
1237 Fmt: "out of bounds while reading .parainstructions: %s",
1238 Vals: toString(E: Cursor.takeError()).c_str());
1239
1240 ++EntryID;
1241
1242 if (opts::DumpParavirtualPatchSites) {
1243 BC.outs() << "Paravirtual patch site: " << EntryID << '\n';
1244 BC.outs() << "\tInstr: 0x" << Twine::utohexstr(Val: InstrLocation)
1245 << "\n\tType: 0x" << Twine::utohexstr(Val: Type) << "\n\tLen: 0x"
1246 << Twine::utohexstr(Val: Len) << '\n';
1247 }
1248
1249 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: InstrLocation);
1250 if (!BF && opts::Verbosity) {
1251 BC.outs() << "BOLT-INFO: no function matches address 0x"
1252 << Twine::utohexstr(Val: InstrLocation)
1253 << " referenced by paravirutal patch site\n";
1254 }
1255
1256 if (BF && BC.shouldEmit(Function: *BF)) {
1257 MCInst *Inst =
1258 BF->getInstructionAtOffset(Offset: InstrLocation - BF->getAddress());
1259 if (!Inst)
1260 return createStringError(EC: errc::executable_format_error,
1261 Fmt: "no instruction at address 0x%" PRIx64
1262 " in paravirtual call site %d",
1263 Vals: InstrLocation, Vals: EntryID);
1264 BC.MIB->addAnnotation(Inst&: *Inst, Name: "ParaSite", Val: EntryID);
1265 }
1266 }
1267
1268 BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n";
1269
1270 return Error::success();
1271}
1272
1273void LinuxKernelRewriter::skipFunctionsWithAnnotation(
1274 StringRef Annotation) const {
1275 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
1276 if (!BC.shouldEmit(Function: BF))
1277 continue;
1278 for (const BinaryBasicBlock &BB : BF) {
1279 const bool HasAnnotation = llvm::any_of(Range: BB, P: [&](const MCInst &Inst) {
1280 return BC.MIB->hasAnnotation(Inst, Name: Annotation);
1281 });
1282 if (HasAnnotation) {
1283 BF.setSimple(false);
1284 break;
1285 }
1286 }
1287 }
1288}
1289
1290Error LinuxKernelRewriter::rewriteParaInstructions() {
1291 // Disable output of functions with paravirtual instructions before the
1292 // rewrite support is complete.
1293 skipFunctionsWithAnnotation(Annotation: "ParaSite");
1294
1295 return Error::success();
1296}
1297
1298/// Process __bug_table section.
1299/// This section contains information useful for kernel debugging, mostly
1300/// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON().
1301///
1302/// Each entry in the section is a struct bug_entry that contains a pointer to
1303/// the ud2 instruction corresponding to the bug, corresponding file name (both
1304/// pointers use PC relative offset addressing), line number, and flags.
1305/// The definition of the struct bug_entry can be found in
1306/// `include/asm-generic/bug.h`. The first entry in the struct is an instruction
1307/// address encoded as a PC-relative offset. In theory, it could be an absolute
1308/// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice
1309/// the kernel code relies on it being a relative offset on x86-64.
1310Error LinuxKernelRewriter::readBugTable() {
1311 BugTableSection = BC.getUniqueSectionByName(SectionName: "__bug_table");
1312 if (!BugTableSection)
1313 return Error::success();
1314
1315 if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE)
1316 return createStringError(EC: errc::executable_format_error,
1317 S: "bug table size error");
1318
1319 AddressExtractor AE(
1320 BugTableSection->getContents(), BugTableSection->getAddress(),
1321 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
1322 AddressExtractor::Cursor Cursor(0);
1323 uint32_t EntryID = 0;
1324 while (Cursor && Cursor.tell() < BugTableSection->getSize()) {
1325 const uint64_t Pos = Cursor.tell();
1326 const uint64_t InstAddress = AE.getPCRelAddress32(C&: Cursor);
1327 Cursor.seek(NewOffSet: Pos + BUG_TABLE_ENTRY_SIZE);
1328
1329 if (!Cursor)
1330 return createStringError(EC: errc::executable_format_error,
1331 Fmt: "out of bounds while reading __bug_table: %s",
1332 Vals: toString(E: Cursor.takeError()).c_str());
1333
1334 ++EntryID;
1335
1336 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: InstAddress);
1337 if (!BF && opts::Verbosity) {
1338 BC.outs() << "BOLT-INFO: no function matches address 0x"
1339 << Twine::utohexstr(Val: InstAddress)
1340 << " referenced by bug table\n";
1341 }
1342
1343 if (BF && BC.shouldEmit(Function: *BF)) {
1344 MCInst *Inst = BF->getInstructionAtOffset(Offset: InstAddress - BF->getAddress());
1345 if (!Inst)
1346 return createStringError(EC: errc::executable_format_error,
1347 Fmt: "no instruction at address 0x%" PRIx64
1348 " referenced by bug table entry %d",
1349 Vals: InstAddress, Vals: EntryID);
1350 BC.MIB->addAnnotation(Inst&: *Inst, Name: "BugEntry", Val: EntryID);
1351
1352 FunctionBugList[BF].push_back(Elt: EntryID);
1353 }
1354 }
1355
1356 BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n";
1357
1358 return Error::success();
1359}
1360
1361/// find_bug() uses linear search to match an address to an entry in the bug
1362/// table. Hence, there is no need to sort entries when rewriting the table.
1363/// When we need to erase an entry, we set its instruction address to zero.
1364Error LinuxKernelRewriter::rewriteBugTable() {
1365 if (!BugTableSection)
1366 return Error::success();
1367
1368 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
1369 if (!BC.shouldEmit(Function: BF))
1370 continue;
1371
1372 if (!FunctionBugList.count(Val: &BF))
1373 continue;
1374
1375 // Bugs that will be emitted for this function.
1376 DenseSet<uint32_t> EmittedIDs;
1377 for (BinaryBasicBlock &BB : BF) {
1378 for (MCInst &Inst : BB) {
1379 if (!BC.MIB->hasAnnotation(Inst, Name: "BugEntry"))
1380 continue;
1381 const uint32_t ID = BC.MIB->getAnnotationAs<uint32_t>(Inst, Name: "BugEntry");
1382 EmittedIDs.insert(V: ID);
1383
1384 // Create a relocation entry for this bug entry.
1385 MCSymbol *Label =
1386 BC.MIB->getOrCreateInstLabel(Inst, Name: "__BUG_", Ctx: BC.Ctx.get());
1387 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1388 BugTableSection->addRelocation(Offset: EntryOffset, Symbol: Label, Type: ELF::R_X86_64_PC32,
1389 /*Addend*/ 0);
1390 }
1391 }
1392
1393 // Clear bug entries that were not emitted for this function, e.g. as a
1394 // result of DCE, but setting their instruction address to zero.
1395 for (const uint32_t ID : FunctionBugList[&BF]) {
1396 if (!EmittedIDs.count(V: ID)) {
1397 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1398 BugTableSection->addRelocation(Offset: EntryOffset, Symbol: nullptr, Type: ELF::R_X86_64_PC32,
1399 /*Addend*/ 0);
1400 }
1401 }
1402 }
1403
1404 return Error::success();
1405}
1406
1407/// The kernel can replace certain instruction sequences depending on hardware
1408/// it is running on and features specified during boot time. The information
1409/// about alternative instruction sequences is stored in .altinstructions
1410/// section. The format of entries in this section is defined in
1411/// arch/x86/include/asm/alternative.h:
1412///
1413/// struct alt_instr {
1414/// s32 instr_offset;
1415/// s32 repl_offset;
1416/// uXX feature;
1417/// u8 instrlen;
1418/// u8 replacementlen;
1419/// u8 padlen; // present in older kernels
1420/// } __packed;
1421///
1422/// Note that the structure is packed.
1423///
1424/// Since the size of the "feature" field could be either u16 or u32, and
1425/// "padlen" presence is unknown, we attempt to parse .altinstructions section
1426/// using all possible combinations (four at this time). Since we validate the
1427/// contents of the section and its size, the detection works quite well.
1428/// Still, we leave the user the opportunity to specify these features on the
1429/// command line and skip the guesswork.
1430Error LinuxKernelRewriter::readAltInstructions() {
1431 AltInstrSection = BC.getUniqueSectionByName(SectionName: ".altinstructions");
1432 if (!AltInstrSection)
1433 return Error::success();
1434
1435 // Presence of "padlen" field.
1436 std::vector<bool> PadLenVariants;
1437 if (opts::AltInstHasPadLen.getNumOccurrences())
1438 PadLenVariants.push_back(x: opts::AltInstHasPadLen);
1439 else
1440 PadLenVariants = {false, true};
1441
1442 // Size (in bytes) variants of "feature" field.
1443 std::vector<uint32_t> FeatureSizeVariants;
1444 if (opts::AltInstFeatureSize.getNumOccurrences())
1445 FeatureSizeVariants.push_back(x: opts::AltInstFeatureSize);
1446 else
1447 FeatureSizeVariants = {2, 4};
1448
1449 for (bool AltInstHasPadLen : PadLenVariants) {
1450 for (uint32_t AltInstFeatureSize : FeatureSizeVariants) {
1451 LLVM_DEBUG({
1452 dbgs() << "BOLT-DEBUG: trying AltInstHasPadLen = " << AltInstHasPadLen
1453 << "; AltInstFeatureSize = " << AltInstFeatureSize << ";\n";
1454 });
1455 if (Error E = tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen,
1456 /*ParseOnly*/ true)) {
1457 consumeError(Err: std::move(E));
1458 continue;
1459 }
1460
1461 LLVM_DEBUG(dbgs() << "Matched .altinstructions format\n");
1462
1463 if (!opts::AltInstHasPadLen.getNumOccurrences())
1464 BC.outs() << "BOLT-INFO: setting --" << opts::AltInstHasPadLen.ArgStr
1465 << '=' << AltInstHasPadLen << '\n';
1466
1467 if (!opts::AltInstFeatureSize.getNumOccurrences())
1468 BC.outs() << "BOLT-INFO: setting --" << opts::AltInstFeatureSize.ArgStr
1469 << '=' << AltInstFeatureSize << '\n';
1470
1471 return tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen,
1472 /*ParseOnly*/ false);
1473 }
1474 }
1475
1476 // We couldn't match the format. Read again to properly propagate the error
1477 // to the user.
1478 return tryReadAltInstructions(AltInstFeatureSize: opts::AltInstFeatureSize,
1479 AltInstHasPadLen: opts::AltInstHasPadLen, /*ParseOnly*/ false);
1480}
1481
1482Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize,
1483 bool AltInstHasPadLen,
1484 bool ParseOnly) {
1485 AddressExtractor AE(
1486 AltInstrSection->getContents(), AltInstrSection->getAddress(),
1487 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
1488 AddressExtractor::Cursor Cursor(0);
1489 uint64_t EntryID = 0;
1490 while (Cursor && !AE.eof(C: Cursor)) {
1491 const uint64_t OrgInstAddress = AE.getPCRelAddress32(C&: Cursor);
1492 const uint64_t AltInstAddress = AE.getPCRelAddress32(C&: Cursor);
1493 const uint64_t Feature = AE.getUnsigned(C&: Cursor, Size: AltInstFeatureSize);
1494 const uint8_t OrgSize = AE.getU8(C&: Cursor);
1495 const uint8_t AltSize = AE.getU8(C&: Cursor);
1496
1497 // Older kernels may have the padlen field.
1498 const uint8_t PadLen = AltInstHasPadLen ? AE.getU8(C&: Cursor) : 0;
1499
1500 if (!Cursor)
1501 return createStringError(
1502 EC: errc::executable_format_error,
1503 Fmt: "out of bounds while reading .altinstructions: %s",
1504 Vals: toString(E: Cursor.takeError()).c_str());
1505
1506 ++EntryID;
1507
1508 if (opts::DumpAltInstructions) {
1509 BC.outs() << "Alternative instruction entry: " << EntryID
1510 << "\n\tOrg: 0x" << Twine::utohexstr(Val: OrgInstAddress)
1511 << "\n\tAlt: 0x" << Twine::utohexstr(Val: AltInstAddress)
1512 << "\n\tFeature: 0x" << Twine::utohexstr(Val: Feature)
1513 << "\n\tOrgSize: " << (int)OrgSize
1514 << "\n\tAltSize: " << (int)AltSize << '\n';
1515 if (AltInstHasPadLen)
1516 BC.outs() << "\tPadLen: " << (int)PadLen << '\n';
1517 }
1518
1519 if (AltSize > OrgSize)
1520 return createStringError(EC: errc::executable_format_error,
1521 S: "error reading .altinstructions");
1522
1523 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: OrgInstAddress);
1524 if (!BF && opts::Verbosity) {
1525 BC.outs() << "BOLT-INFO: no function matches address 0x"
1526 << Twine::utohexstr(Val: OrgInstAddress)
1527 << " of instruction from .altinstructions\n";
1528 }
1529
1530 BinaryFunction *AltBF =
1531 BC.getBinaryFunctionContainingAddress(Address: AltInstAddress);
1532 if (!ParseOnly && AltBF && BC.shouldEmit(Function: *AltBF)) {
1533 BC.errs()
1534 << "BOLT-WARNING: alternative instruction sequence found in function "
1535 << *AltBF << '\n';
1536 AltBF->setIgnored();
1537 }
1538
1539 if (!BF || !BF->hasInstructions())
1540 continue;
1541
1542 if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize())
1543 return createStringError(EC: errc::executable_format_error,
1544 S: "error reading .altinstructions");
1545
1546 MCInst *Inst =
1547 BF->getInstructionAtOffset(Offset: OrgInstAddress - BF->getAddress());
1548 if (!Inst)
1549 return createStringError(EC: errc::executable_format_error,
1550 Fmt: "no instruction at address 0x%" PRIx64
1551 " referenced by .altinstructions entry %d",
1552 Vals: OrgInstAddress, Vals: EntryID);
1553
1554 if (ParseOnly)
1555 continue;
1556
1557 // There could be more than one alternative instruction sequences for the
1558 // same original instruction. Annotate each alternative separately.
1559 std::string AnnotationName = "AltInst";
1560 unsigned N = 2;
1561 while (BC.MIB->hasAnnotation(Inst: *Inst, Name: AnnotationName))
1562 AnnotationName = "AltInst" + std::to_string(val: N++);
1563
1564 BC.MIB->addAnnotation(Inst&: *Inst, Name: AnnotationName, Val: EntryID);
1565
1566 // Annotate all instructions from the original sequence. Note that it's not
1567 // the most efficient way to look for instructions in the address range,
1568 // but since alternative instructions are uncommon, it will do for now.
1569 for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) {
1570 Inst = BF->getInstructionAtOffset(Offset: OrgInstAddress + Offset -
1571 BF->getAddress());
1572 if (Inst)
1573 BC.MIB->addAnnotation(Inst&: *Inst, Name: AnnotationName, Val: EntryID);
1574 }
1575 }
1576
1577 if (!ParseOnly)
1578 BC.outs() << "BOLT-INFO: parsed " << EntryID
1579 << " alternative instruction entries\n";
1580
1581 return Error::success();
1582}
1583
1584void LinuxKernelRewriter::processAltInstructionsPostCFG() {
1585 // Disable optimization and output of functions with alt instructions before
1586 // the rewrite support is complete. Alt instructions can modify the control
1587 // flow, hence we may end up deleting seemingly unreachable code.
1588 skipFunctionsWithAnnotation(Annotation: "AltInst");
1589}
1590
1591/// When the Linux kernel needs to handle an error associated with a given PCI
1592/// device, it uses a table stored in .pci_fixup section to locate a fixup code
1593/// specific to the vendor and the problematic device. The section contains a
1594/// list of the following structures defined in include/linux/pci.h:
1595///
1596/// struct pci_fixup {
1597/// u16 vendor; /* Or PCI_ANY_ID */
1598/// u16 device; /* Or PCI_ANY_ID */
1599/// u32 class; /* Or PCI_ANY_ID */
1600/// unsigned int class_shift; /* should be 0, 8, 16 */
1601/// int hook_offset;
1602/// };
1603///
1604/// Normally, the hook will point to a function start and we don't have to
1605/// update the pointer if we are not relocating functions. Hence, while reading
1606/// the table we validate this assumption. If a function has a fixup code in the
1607/// middle of its body, we issue a warning and ignore it.
1608Error LinuxKernelRewriter::readPCIFixupTable() {
1609 PCIFixupSection = BC.getUniqueSectionByName(SectionName: ".pci_fixup");
1610 if (!PCIFixupSection)
1611 return Error::success();
1612
1613 if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE)
1614 return createStringError(EC: errc::executable_format_error,
1615 S: "PCI fixup table size error");
1616
1617 AddressExtractor AE(
1618 PCIFixupSection->getContents(), PCIFixupSection->getAddress(),
1619 BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
1620 AddressExtractor::Cursor Cursor(0);
1621 uint64_t EntryID = 0;
1622 while (Cursor && !AE.eof(C: Cursor)) {
1623 const uint16_t Vendor = AE.getU16(C&: Cursor);
1624 const uint16_t Device = AE.getU16(C&: Cursor);
1625 const uint32_t Class = AE.getU32(C&: Cursor);
1626 const uint32_t ClassShift = AE.getU32(C&: Cursor);
1627 const uint64_t HookAddress = AE.getPCRelAddress32(C&: Cursor);
1628
1629 if (!Cursor)
1630 return createStringError(EC: errc::executable_format_error,
1631 Fmt: "out of bounds while reading .pci_fixup: %s",
1632 Vals: toString(E: Cursor.takeError()).c_str());
1633
1634 ++EntryID;
1635
1636 if (opts::DumpPCIFixups) {
1637 BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor 0x"
1638 << Twine::utohexstr(Val: Vendor) << "\n\tDevice: 0x"
1639 << Twine::utohexstr(Val: Device) << "\n\tClass: 0x"
1640 << Twine::utohexstr(Val: Class) << "\n\tClassShift: 0x"
1641 << Twine::utohexstr(Val: ClassShift) << "\n\tHookAddress: 0x"
1642 << Twine::utohexstr(Val: HookAddress) << '\n';
1643 }
1644
1645 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: HookAddress);
1646 if (!BF && opts::Verbosity) {
1647 BC.outs() << "BOLT-INFO: no function matches address 0x"
1648 << Twine::utohexstr(Val: HookAddress)
1649 << " of hook from .pci_fixup\n";
1650 }
1651
1652 if (!BF || !BC.shouldEmit(Function: *BF))
1653 continue;
1654
1655 if (const uint64_t Offset = HookAddress - BF->getAddress()) {
1656 BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function "
1657 << *BF << " at offset 0x" << Twine::utohexstr(Val: Offset) << '\n';
1658 BF->setSimple(false);
1659 }
1660 }
1661
1662 BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n";
1663
1664 return Error::success();
1665}
1666
1667/// Runtime code modification used by static keys is the most ubiquitous
1668/// self-modifying feature of the Linux kernel. The idea is to eliminate the
1669/// condition check and associated conditional jump on a hot path if that
1670/// condition (based on a boolean value of a static key) does not change often.
1671/// Whenever the condition changes, the kernel runtime modifies all code paths
1672/// associated with that key flipping the code between nop and (unconditional)
1673/// jump. The information about the code is stored in a static key jump table
1674/// and contains the list of entries of the following type from
1675/// include/linux/jump_label.h:
1676//
1677/// struct jump_entry {
1678/// s32 code;
1679/// s32 target;
1680/// long key; // key may be far away from the core kernel under KASLR
1681/// };
1682///
1683/// The list does not have to be stored in any sorted way, but it is sorted at
1684/// boot time (or module initialization time) first by "key" and then by "code".
1685/// jump_label_sort_entries() is responsible for sorting the table.
1686///
1687/// The key in jump_entry structure uses lower two bits of the key address
1688/// (which itself is aligned) to store extra information. We are interested in
1689/// the lower bit which indicates if the key is likely to be set on the code
1690/// path associated with this jump_entry.
1691///
1692/// static_key_{enable,disable}() functions modify the code based on key and
1693/// jump table entries.
1694///
1695/// jump_label_update() updates all code entries for a given key. Batch mode is
1696/// used for x86.
1697///
1698/// The actual patching happens in text_poke_bp_batch() that overrides the first
1699/// byte of the sequence with int3 before proceeding with actual code
1700/// replacement.
1701Error LinuxKernelRewriter::readStaticKeysJumpTable() {
1702 const BinaryData *StaticKeysJumpTable =
1703 BC.getBinaryDataByName(Name: "__start___jump_table");
1704 if (!StaticKeysJumpTable)
1705 return Error::success();
1706
1707 StaticKeysJumpTableAddress = StaticKeysJumpTable->getAddress();
1708
1709 const BinaryData *Stop = BC.getBinaryDataByName(Name: "__stop___jump_table");
1710 if (!Stop)
1711 return createStringError(EC: errc::executable_format_error,
1712 S: "missing __stop___jump_table symbol");
1713
1714 ErrorOr<BinarySection &> ErrorOrSection =
1715 BC.getSectionForAddress(Address: StaticKeysJumpTableAddress);
1716 if (!ErrorOrSection)
1717 return createStringError(EC: errc::executable_format_error,
1718 S: "no section matching __start___jump_table");
1719
1720 StaticKeysJumpSection = *ErrorOrSection;
1721 if (!StaticKeysJumpSection->containsAddress(Address: Stop->getAddress() - 1))
1722 return createStringError(EC: errc::executable_format_error,
1723 S: "__stop___jump_table not in the same section "
1724 "as __start___jump_table");
1725
1726 if ((Stop->getAddress() - StaticKeysJumpTableAddress) %
1727 STATIC_KEYS_JUMP_ENTRY_SIZE)
1728 return createStringError(EC: errc::executable_format_error,
1729 S: "static keys jump table size error");
1730
1731 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1732 AddressExtractor AE(StaticKeysJumpSection->getContents(), SectionAddress,
1733 BC.AsmInfo->isLittleEndian(),
1734 BC.AsmInfo->getCodePointerSize());
1735 AddressExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1736 uint32_t EntryID = 0;
1737 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1738 const uint64_t JumpAddress = AE.getPCRelAddress32(C&: Cursor);
1739 const uint64_t TargetAddress = AE.getPCRelAddress32(C&: Cursor);
1740 const uint64_t KeyAddress = AE.getPCRelAddress64(C&: Cursor);
1741
1742 // Consume the status of the cursor.
1743 if (!Cursor)
1744 return createStringError(
1745 EC: errc::executable_format_error,
1746 Fmt: "out of bounds while reading static keys jump table: %s",
1747 Vals: toString(E: Cursor.takeError()).c_str());
1748
1749 ++EntryID;
1750
1751 JumpInfo.push_back(Elt: JumpInfoEntry());
1752 JumpInfoEntry &Info = JumpInfo.back();
1753 Info.Likely = KeyAddress & 1;
1754
1755 if (opts::DumpStaticKeys) {
1756 BC.outs() << "Static key jump entry: " << EntryID
1757 << "\n\tJumpAddress: 0x" << Twine::utohexstr(Val: JumpAddress)
1758 << "\n\tTargetAddress: 0x" << Twine::utohexstr(Val: TargetAddress)
1759 << "\n\tKeyAddress: 0x" << Twine::utohexstr(Val: KeyAddress)
1760 << "\n\tIsLikely: " << Info.Likely << '\n';
1761 }
1762
1763 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: JumpAddress);
1764 if (!BF && opts::Verbosity) {
1765 BC.outs()
1766 << "BOLT-INFO: no function matches address 0x"
1767 << Twine::utohexstr(Val: JumpAddress)
1768 << " of jump instruction referenced from static keys jump table\n";
1769 }
1770
1771 if (!BF || !BC.shouldEmit(Function: *BF))
1772 continue;
1773
1774 MCInst *Inst = BF->getInstructionAtOffset(Offset: JumpAddress - BF->getAddress());
1775 if (!Inst)
1776 return createStringError(
1777 EC: errc::executable_format_error,
1778 Fmt: "no instruction at static keys jump site address 0x%" PRIx64,
1779 Vals: JumpAddress);
1780
1781 if (!BF->containsAddress(PC: TargetAddress))
1782 return createStringError(
1783 EC: errc::executable_format_error,
1784 Fmt: "invalid target of static keys jump at 0x%" PRIx64 " : 0x%" PRIx64,
1785 Vals: JumpAddress, Vals: TargetAddress);
1786
1787 const bool IsBranch = BC.MIB->isBranch(Inst: *Inst);
1788 if (!IsBranch && !BC.MIB->isNoop(Inst: *Inst))
1789 return createStringError(EC: errc::executable_format_error,
1790 Fmt: "jump or nop expected at address 0x%" PRIx64,
1791 Vals: JumpAddress);
1792
1793 const uint64_t Size = BC.computeInstructionSize(Inst: *Inst);
1794 if (Size != 2 && Size != 5) {
1795 return createStringError(
1796 EC: errc::executable_format_error,
1797 Fmt: "unexpected static keys jump size at address 0x%" PRIx64,
1798 Vals: JumpAddress);
1799 }
1800
1801 MCSymbol *Target = BF->registerBranch(Src: JumpAddress, Dst: TargetAddress);
1802 MCInst StaticKeyBranch;
1803
1804 // Create a conditional branch instruction. The actual conditional code type
1805 // should not matter as long as it's a valid code. The instruction should be
1806 // treated as a conditional branch for control-flow purposes. Before we emit
1807 // the code, it will be converted to a different instruction in
1808 // rewriteStaticKeysJumpTable().
1809 //
1810 // NB: for older kernels, under LongJumpLabels option, we create long
1811 // conditional branch to guarantee that code size estimation takes
1812 // into account the extra bytes needed for long branch that will be used
1813 // by the kernel patching code. Newer kernels can work with both short
1814 // and long branches. The code for long conditional branch is larger
1815 // than unconditional one, so we are pessimistic in our estimations.
1816 if (opts::LongJumpLabels)
1817 BC.MIB->createLongCondBranch(Inst&: StaticKeyBranch, Target, CC: 0, Ctx: BC.Ctx.get());
1818 else
1819 BC.MIB->createCondBranch(Inst&: StaticKeyBranch, Target, CC: 0, Ctx: BC.Ctx.get());
1820 BC.MIB->moveAnnotations(SrcInst: std::move(*Inst), DstInst&: StaticKeyBranch);
1821 BC.MIB->setDynamicBranch(Inst&: StaticKeyBranch, ID: EntryID);
1822 *Inst = StaticKeyBranch;
1823
1824 // IsBranch = InitialValue ^ LIKELY
1825 //
1826 // 0 0 0
1827 // 1 0 1
1828 // 1 1 0
1829 // 0 1 1
1830 //
1831 // => InitialValue = IsBranch ^ LIKELY
1832 Info.InitValue = IsBranch ^ Info.Likely;
1833
1834 // Add annotations to facilitate manual code analysis.
1835 BC.MIB->addAnnotation(Inst&: *Inst, Name: "Likely", Val: Info.Likely);
1836 BC.MIB->addAnnotation(Inst&: *Inst, Name: "InitValue", Val: Info.InitValue);
1837 if (!BC.MIB->getSize(Inst: *Inst))
1838 BC.MIB->setSize(Inst&: *Inst, Size);
1839
1840 if (!BC.MIB->getOffset(Inst: *Inst))
1841 BC.MIB->setOffset(Inst&: *Inst, Offset: JumpAddress - BF->getAddress());
1842
1843 if (opts::LongJumpLabels)
1844 BC.MIB->setSize(Inst&: *Inst, Size: 5);
1845 }
1846
1847 BC.outs() << "BOLT-INFO: parsed " << EntryID << " static keys jump entries\n";
1848
1849 return Error::success();
1850}
1851
1852// Pre-emit pass. Convert dynamic branch instructions into jumps that could be
1853// relaxed. In post-emit pass we will convert those jumps into nops when
1854// necessary. We do the unconditional conversion into jumps so that the jumps
1855// can be relaxed and the optimal size of jump/nop instruction is selected.
1856Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() {
1857 if (!StaticKeysJumpSection)
1858 return Error::success();
1859
1860 uint64_t NumShort = 0;
1861 uint64_t NumLong = 0;
1862 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
1863 if (!BC.shouldEmit(Function: BF))
1864 continue;
1865
1866 for (BinaryBasicBlock &BB : BF) {
1867 for (MCInst &Inst : BB) {
1868 if (!BC.MIB->isDynamicBranch(Inst))
1869 continue;
1870
1871 const uint32_t EntryID = *BC.MIB->getDynamicBranchID(Inst);
1872 MCSymbol *Target =
1873 const_cast<MCSymbol *>(BC.MIB->getTargetSymbol(Inst));
1874 assert(Target && "Target symbol should be set.");
1875
1876 const JumpInfoEntry &Info = JumpInfo[EntryID - 1];
1877 const bool IsBranch = Info.Likely ^ Info.InitValue;
1878
1879 uint32_t Size = *BC.MIB->getSize(Inst);
1880 if (Size == 2)
1881 ++NumShort;
1882 else if (Size == 5)
1883 ++NumLong;
1884 else
1885 llvm_unreachable("Wrong size for static keys jump instruction.");
1886
1887 MCInst NewInst;
1888 // Replace the instruction with unconditional jump even if it needs to
1889 // be nop in the binary.
1890 if (opts::LongJumpLabels) {
1891 BC.MIB->createLongUncondBranch(Inst&: NewInst, Target, Ctx: BC.Ctx.get());
1892 } else {
1893 // Newer kernels can handle short and long jumps for static keys.
1894 // Optimistically, emit short jump and check if it gets relaxed into
1895 // a long one during post-emit. Only then convert the jump to a nop.
1896 BC.MIB->createUncondBranch(Inst&: NewInst, TBB: Target, Ctx: BC.Ctx.get());
1897 }
1898
1899 BC.MIB->moveAnnotations(SrcInst: std::move(Inst), DstInst&: NewInst);
1900 Inst = NewInst;
1901
1902 // Mark the instruction for nop conversion.
1903 if (!IsBranch)
1904 NopIDs.insert(V: EntryID);
1905
1906 MCSymbol *Label =
1907 BC.MIB->getOrCreateInstLabel(Inst, Name: "__SK_", Ctx: BC.Ctx.get());
1908
1909 // Create a relocation against the label.
1910 const uint64_t EntryOffset = StaticKeysJumpTableAddress -
1911 StaticKeysJumpSection->getAddress() +
1912 (EntryID - 1) * 16;
1913 StaticKeysJumpSection->addRelocation(Offset: EntryOffset, Symbol: Label,
1914 Type: ELF::R_X86_64_PC32,
1915 /*Addend*/ 0);
1916 StaticKeysJumpSection->addRelocation(Offset: EntryOffset + 4, Symbol: Target,
1917 Type: ELF::R_X86_64_PC32, /*Addend*/ 0);
1918 }
1919 }
1920 }
1921
1922 BC.outs() << "BOLT-INFO: the input contains " << NumShort << " short and "
1923 << NumLong << " long static keys jumps in optimized functions\n";
1924
1925 return Error::success();
1926}
1927
1928// Post-emit pass of static keys jump section. Convert jumps to nops.
1929Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() {
1930 if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized())
1931 return Error::success();
1932
1933 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1934 AddressExtractor AE(StaticKeysJumpSection->getOutputContents(),
1935 SectionAddress, BC.AsmInfo->isLittleEndian(),
1936 BC.AsmInfo->getCodePointerSize());
1937 AddressExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1938 const BinaryData *Stop = BC.getBinaryDataByName(Name: "__stop___jump_table");
1939 uint32_t EntryID = 0;
1940 uint64_t NumShort = 0;
1941 uint64_t NumLong = 0;
1942 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1943 const uint64_t JumpAddress = AE.getPCRelAddress32(C&: Cursor);
1944 const uint64_t TargetAddress = AE.getPCRelAddress32(C&: Cursor);
1945 const uint64_t KeyAddress = AE.getPCRelAddress64(C&: Cursor);
1946
1947 // Consume the status of the cursor.
1948 if (!Cursor)
1949 return createStringError(EC: errc::executable_format_error,
1950 Fmt: "out of bounds while updating static keys: %s",
1951 Vals: toString(E: Cursor.takeError()).c_str());
1952
1953 ++EntryID;
1954
1955 LLVM_DEBUG({
1956 dbgs() << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress)
1957 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress)
1958 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) << '\n';
1959 });
1960 (void)TargetAddress;
1961 (void)KeyAddress;
1962
1963 BinaryFunction *BF =
1964 BC.getBinaryFunctionContainingAddress(Address: JumpAddress,
1965 /*CheckPastEnd*/ false,
1966 /*UseMaxSize*/ true);
1967 assert(BF && "Cannot get function for modified static key.");
1968
1969 if (!BF->isEmitted())
1970 continue;
1971
1972 // Disassemble instruction to collect stats even if nop-conversion is
1973 // unnecessary.
1974 MutableArrayRef<uint8_t> Contents = MutableArrayRef<uint8_t>(
1975 reinterpret_cast<uint8_t *>(BF->getImageAddress()), BF->getImageSize());
1976 assert(Contents.size() && "Non-empty function image expected.");
1977
1978 MCInst Inst;
1979 uint64_t Size;
1980 const uint64_t JumpOffset = JumpAddress - BF->getAddress();
1981 if (!BC.DisAsm->getInstruction(Instr&: Inst, Size, Bytes: Contents.slice(N: JumpOffset), Address: 0,
1982 CStream&: nulls())) {
1983 llvm_unreachable("Unable to disassemble jump instruction.");
1984 }
1985 assert(BC.MIB->isBranch(Inst) && "Branch instruction expected.");
1986
1987 if (Size == 2)
1988 ++NumShort;
1989 else if (Size == 5)
1990 ++NumLong;
1991 else
1992 llvm_unreachable("Unexpected size for static keys jump instruction.");
1993
1994 // Check if we need to convert jump instruction into a nop.
1995 if (!NopIDs.contains(V: EntryID))
1996 continue;
1997
1998 SmallString<15> NopCode;
1999 raw_svector_ostream VecOS(NopCode);
2000 BC.MAB->writeNopData(OS&: VecOS, Count: Size, STI: BC.STI.get());
2001 for (uint64_t I = 0; I < Size; ++I)
2002 Contents[JumpOffset + I] = NopCode[I];
2003 }
2004
2005 BC.outs() << "BOLT-INFO: written " << NumShort << " short and " << NumLong
2006 << " long static keys jumps in optimized functions\n";
2007
2008 return Error::success();
2009}
2010
2011} // namespace
2012
2013std::unique_ptr<MetadataRewriter>
2014llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) {
2015 return std::make_unique<LinuxKernelRewriter>(args&: BC);
2016}
2017

source code of bolt/lib/Rewrite/LinuxKernelRewriter.cpp