1//===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Support for updating Linux Kernel metadata.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/BinaryFunction.h"
14#include "bolt/Rewrite/MetadataRewriter.h"
15#include "bolt/Rewrite/MetadataRewriters.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/MC/MCDisassembler/MCDisassembler.h"
20#include "llvm/Support/BinaryStreamWriter.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/Errc.h"
24
25#define DEBUG_TYPE "bolt-linux"
26
27using namespace llvm;
28using namespace bolt;
29
30namespace opts {
31
32static cl::opt<bool>
33 AltInstHasPadLen("alt-inst-has-padlen",
34 cl::desc("specify that .altinstructions has padlen field"),
35 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
36
37static cl::opt<uint32_t>
38 AltInstFeatureSize("alt-inst-feature-size",
39 cl::desc("size of feature field in .altinstructions"),
40 cl::init(Val: 2), cl::Hidden, cl::cat(BoltCategory));
41
42static cl::opt<bool>
43 DumpAltInstructions("dump-alt-instructions",
44 cl::desc("dump Linux alternative instructions info"),
45 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
46
47static cl::opt<bool>
48 DumpExceptions("dump-linux-exceptions",
49 cl::desc("dump Linux kernel exception table"),
50 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
51
52static cl::opt<bool>
53 DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"),
54 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
55
56static cl::opt<bool> DumpParavirtualPatchSites(
57 "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
58 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
59
60static cl::opt<bool>
61 DumpPCIFixups("dump-pci-fixups",
62 cl::desc("dump Linux kernel PCI fixup table"),
63 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
64
65static cl::opt<bool> DumpStaticCalls("dump-static-calls",
66 cl::desc("dump Linux kernel static calls"),
67 cl::init(Val: false), cl::Hidden,
68 cl::cat(BoltCategory));
69
70static cl::opt<bool>
71 DumpStaticKeys("dump-static-keys",
72 cl::desc("dump Linux kernel static keys jump table"),
73 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
74
75static cl::opt<bool> LongJumpLabels(
76 "long-jump-labels",
77 cl::desc("always use long jumps/nops for Linux kernel static keys"),
78 cl::init(Val: false), cl::Hidden, cl::cat(BoltCategory));
79
80static cl::opt<bool>
81 PrintORC("print-orc",
82 cl::desc("print ORC unwind information for instructions"),
83 cl::init(Val: true), cl::Hidden, cl::cat(BoltCategory));
84
85} // namespace opts
86
87/// Linux Kernel supports stack unwinding using ORC (oops rewind capability).
88/// ORC state at every IP can be described by the following data structure.
89struct ORCState {
90 int16_t SPOffset;
91 int16_t BPOffset;
92 int16_t Info;
93
94 bool operator==(const ORCState &Other) const {
95 return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset &&
96 Info == Other.Info;
97 }
98
99 bool operator!=(const ORCState &Other) const { return !(*this == Other); }
100};
101
102/// Section terminator ORC entry.
103static ORCState NullORC = {.SPOffset: 0, .BPOffset: 0, .Info: 0};
104
105/// Basic printer for ORC entry. It does not provide the same level of
106/// information as objtool (for now).
107inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) {
108 if (!opts::PrintORC)
109 return OS;
110 if (E != NullORC)
111 OS << format(Fmt: "{sp: %d, bp: %d, info: 0x%x}", Vals: E.SPOffset, Vals: E.BPOffset,
112 Vals: E.Info);
113 else
114 OS << "{terminator}";
115
116 return OS;
117}
118
119namespace {
120
121class LinuxKernelRewriter final : public MetadataRewriter {
122 /// Linux Kernel special sections point to a specific instruction in many
123 /// cases. Unlike SDTMarkerInfo, these markers can come from different
124 /// sections.
125 struct LKInstructionMarkerInfo {
126 uint64_t SectionOffset;
127 int32_t PCRelativeOffset;
128 bool IsPCRelative;
129 StringRef SectionName;
130 };
131
132 /// Map linux kernel program locations/instructions to their pointers in
133 /// special linux kernel sections
134 std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers;
135
136 /// Linux ORC sections.
137 ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address;
138 ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address;
139
140 /// Size of entries in ORC sections.
141 static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6;
142 static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4;
143
144 struct ORCListEntry {
145 uint64_t IP; /// Instruction address.
146 BinaryFunction *BF; /// Binary function corresponding to the entry.
147 ORCState ORC; /// Stack unwind info in ORC format.
148
149 /// ORC entries are sorted by their IPs. Terminator entries (NullORC)
150 /// should precede other entries with the same address.
151 bool operator<(const ORCListEntry &Other) const {
152 if (IP < Other.IP)
153 return 1;
154 if (IP > Other.IP)
155 return 0;
156 return ORC == NullORC && Other.ORC != NullORC;
157 }
158 };
159
160 using ORCListType = std::vector<ORCListEntry>;
161 ORCListType ORCEntries;
162
163 /// Number of entries in the input file ORC sections.
164 uint64_t NumORCEntries = 0;
165
166 /// Section containing static keys jump table.
167 ErrorOr<BinarySection &> StaticKeysJumpSection = std::errc::bad_address;
168 uint64_t StaticKeysJumpTableAddress = 0;
169 static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8;
170
171 struct JumpInfoEntry {
172 bool Likely;
173 bool InitValue;
174 };
175 SmallVector<JumpInfoEntry, 16> JumpInfo;
176
177 /// Static key entries that need nop conversion.
178 DenseSet<uint32_t> NopIDs;
179
180 /// Section containing static call table.
181 ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address;
182 uint64_t StaticCallTableAddress = 0;
183 static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8;
184
185 struct StaticCallInfo {
186 uint32_t ID; /// Identifier of the entry in the table.
187 BinaryFunction *Function; /// Function containing associated call.
188 MCSymbol *Label; /// Label attached to the call.
189 };
190 using StaticCallListType = std::vector<StaticCallInfo>;
191 StaticCallListType StaticCallEntries;
192
193 /// Section containing the Linux exception table.
194 ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address;
195 static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12;
196
197 /// Functions with exception handling code.
198 DenseSet<BinaryFunction *> FunctionsWithExceptions;
199
200 /// Section with paravirtual patch sites.
201 ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address;
202
203 /// Alignment of paravirtual patch structures.
204 static constexpr size_t PARA_PATCH_ALIGN = 8;
205
206 /// .altinstructions section.
207 ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address;
208
209 /// Section containing Linux bug table.
210 ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address;
211
212 /// Size of bug_entry struct.
213 static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12;
214
215 /// List of bug entries per function.
216 using FunctionBugListType =
217 DenseMap<BinaryFunction *, SmallVector<uint32_t, 2>>;
218 FunctionBugListType FunctionBugList;
219
220 /// .pci_fixup section.
221 ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address;
222 static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16;
223
224 /// Insert an LKMarker for a given code pointer \p PC from a non-code section
225 /// \p SectionName.
226 void insertLKMarker(uint64_t PC, uint64_t SectionOffset,
227 int32_t PCRelativeOffset, bool IsPCRelative,
228 StringRef SectionName);
229
230 /// Process linux kernel special sections and their relocations.
231 void processLKSections();
232
233 /// Process __ksymtab and __ksymtab_gpl.
234 void processLKKSymtab(bool IsGPL = false);
235
236 /// Process special linux kernel section, .smp_locks.
237 void processLKSMPLocks();
238
239 /// Update LKMarkers' locations for the output binary.
240 void updateLKMarkers();
241
242 /// Read ORC unwind information and annotate instructions.
243 Error readORCTables();
244
245 /// Update ORC for functions once CFG is constructed.
246 Error processORCPostCFG();
247
248 /// Update ORC data in the binary.
249 Error rewriteORCTables();
250
251 /// Static call table handling.
252 Error readStaticCalls();
253 Error rewriteStaticCalls();
254
255 Error readExceptionTable();
256 Error rewriteExceptionTable();
257
258 /// Paravirtual instruction patch sites.
259 Error readParaInstructions();
260 Error rewriteParaInstructions();
261
262 /// __bug_table section handling.
263 Error readBugTable();
264 Error rewriteBugTable();
265
266 /// Do no process functions containing instruction annotated with
267 /// \p Annotation.
268 void skipFunctionsWithAnnotation(StringRef Annotation) const;
269
270 /// Handle alternative instruction info from .altinstructions.
271 Error readAltInstructions();
272 Error rewriteAltInstructions();
273
274 /// Read .pci_fixup
275 Error readPCIFixupTable();
276
277 /// Handle static keys jump table.
278 Error readStaticKeysJumpTable();
279 Error rewriteStaticKeysJumpTable();
280 Error updateStaticKeysJumpTablePostEmit();
281
282 /// Mark instructions referenced by kernel metadata.
283 Error markInstructions();
284
285public:
286 LinuxKernelRewriter(BinaryContext &BC)
287 : MetadataRewriter("linux-kernel-rewriter", BC) {}
288
289 Error preCFGInitializer() override {
290 processLKSections();
291 if (Error E = markInstructions())
292 return E;
293
294 if (Error E = readORCTables())
295 return E;
296
297 if (Error E = readStaticCalls())
298 return E;
299
300 if (Error E = readExceptionTable())
301 return E;
302
303 if (Error E = readParaInstructions())
304 return E;
305
306 if (Error E = readBugTable())
307 return E;
308
309 if (Error E = readAltInstructions())
310 return E;
311
312 if (Error E = readPCIFixupTable())
313 return E;
314
315 if (Error E = readStaticKeysJumpTable())
316 return E;
317
318 return Error::success();
319 }
320
321 Error postCFGInitializer() override {
322 if (Error E = processORCPostCFG())
323 return E;
324
325 return Error::success();
326 }
327
328 Error preEmitFinalizer() override {
329 // Since rewriteExceptionTable() can mark functions as non-simple, run it
330 // before other rewriters that depend on simple/emit status.
331 if (Error E = rewriteExceptionTable())
332 return E;
333
334 if (Error E = rewriteAltInstructions())
335 return E;
336
337 if (Error E = rewriteParaInstructions())
338 return E;
339
340 if (Error E = rewriteORCTables())
341 return E;
342
343 if (Error E = rewriteStaticCalls())
344 return E;
345
346 if (Error E = rewriteStaticKeysJumpTable())
347 return E;
348
349 if (Error E = rewriteBugTable())
350 return E;
351
352 return Error::success();
353 }
354
355 Error postEmitFinalizer() override {
356 updateLKMarkers();
357
358 if (Error E = updateStaticKeysJumpTablePostEmit())
359 return E;
360
361 return Error::success();
362 }
363};
364
365Error LinuxKernelRewriter::markInstructions() {
366 for (const uint64_t PC : llvm::make_first_range(c&: LKMarkers)) {
367 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: PC);
368
369 if (!BF || !BC.shouldEmit(Function: *BF))
370 continue;
371
372 const uint64_t Offset = PC - BF->getAddress();
373 MCInst *Inst = BF->getInstructionAtOffset(Offset);
374 if (!Inst)
375 return createStringError(EC: errc::executable_format_error,
376 Msg: "no instruction matches kernel marker offset");
377
378 BC.MIB->setOffset(Inst&: *Inst, Offset: static_cast<uint32_t>(Offset));
379
380 BF->setHasSDTMarker(true);
381 }
382
383 return Error::success();
384}
385
386void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
387 int32_t PCRelativeOffset,
388 bool IsPCRelative,
389 StringRef SectionName) {
390 LKMarkers[PC].emplace_back(args: LKInstructionMarkerInfo{
391 .SectionOffset: SectionOffset, .PCRelativeOffset: PCRelativeOffset, .IsPCRelative: IsPCRelative, .SectionName: SectionName});
392}
393
394void LinuxKernelRewriter::processLKSections() {
395 processLKKSymtab();
396 processLKKSymtab(IsGPL: true);
397 processLKSMPLocks();
398}
399
400/// Process __ksymtab[_gpl] sections of Linux Kernel.
401/// This section lists all the vmlinux symbols that kernel modules can access.
402///
403/// All the entries are 4 bytes each and hence we can read them by one by one
404/// and ignore the ones that are not pointing to the .text section. All pointers
405/// are PC relative offsets. Always, points to the beginning of the function.
406void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) {
407 StringRef SectionName = "__ksymtab";
408 if (IsGPL)
409 SectionName = "__ksymtab_gpl";
410 ErrorOr<BinarySection &> SectionOrError =
411 BC.getUniqueSectionByName(SectionName);
412 assert(SectionOrError &&
413 "__ksymtab[_gpl] section not found in Linux Kernel binary");
414 const uint64_t SectionSize = SectionOrError->getSize();
415 const uint64_t SectionAddress = SectionOrError->getAddress();
416 assert((SectionSize % 4) == 0 &&
417 "The size of the __ksymtab[_gpl] section should be a multiple of 4");
418
419 for (uint64_t I = 0; I < SectionSize; I += 4) {
420 const uint64_t EntryAddress = SectionAddress + I;
421 ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(Address: EntryAddress, Size: 4);
422 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
423 const int32_t SignedOffset = *Offset;
424 const uint64_t RefAddress = EntryAddress + SignedOffset;
425 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Address: RefAddress);
426 if (!BF)
427 continue;
428
429 BC.addRelocation(Address: EntryAddress, Symbol: BF->getSymbol(), Type: Relocation::getPC32(), Addend: 0,
430 Value: *Offset);
431 }
432}
433
434/// .smp_locks section contains PC-relative references to instructions with LOCK
435/// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
436void LinuxKernelRewriter::processLKSMPLocks() {
437 ErrorOr<BinarySection &> SectionOrError =
438 BC.getUniqueSectionByName(SectionName: ".smp_locks");
439 if (!SectionOrError)
440 return;
441
442 uint64_t SectionSize = SectionOrError->getSize();
443 const uint64_t SectionAddress = SectionOrError->getAddress();
444 assert((SectionSize % 4) == 0 &&
445 "The size of the .smp_locks section should be a multiple of 4");
446
447 for (uint64_t I = 0; I < SectionSize; I += 4) {
448 const uint64_t EntryAddress = SectionAddress + I;
449 ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(Address: EntryAddress, Size: 4);
450 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry");
451 int32_t SignedOffset = *Offset;
452 uint64_t RefAddress = EntryAddress + SignedOffset;
453
454 BinaryFunction *ContainingBF =
455 BC.getBinaryFunctionContainingAddress(Address: RefAddress);
456 if (!ContainingBF)
457 continue;
458
459 insertLKMarker(PC: RefAddress, SectionOffset: I, PCRelativeOffset: SignedOffset, IsPCRelative: true, SectionName: ".smp_locks");
460 }
461}
462
463void LinuxKernelRewriter::updateLKMarkers() {
464 if (LKMarkers.size() == 0)
465 return;
466
467 std::unordered_map<std::string, uint64_t> PatchCounts;
468 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>>
469 &LKMarkerInfoKV : LKMarkers) {
470 const uint64_t OriginalAddress = LKMarkerInfoKV.first;
471 const BinaryFunction *BF =
472 BC.getBinaryFunctionContainingAddress(Address: OriginalAddress, CheckPastEnd: false, UseMaxSize: true);
473 if (!BF)
474 continue;
475
476 uint64_t NewAddress = BF->translateInputToOutputAddress(Address: OriginalAddress);
477 if (NewAddress == 0)
478 continue;
479
480 // Apply base address.
481 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff)
482 NewAddress = NewAddress + 0xffffffff00000000;
483
484 if (OriginalAddress == NewAddress)
485 continue;
486
487 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) {
488 StringRef SectionName = LKMarkerInfo.SectionName;
489 SimpleBinaryPatcher *LKPatcher;
490 ErrorOr<BinarySection &> BSec = BC.getUniqueSectionByName(SectionName);
491 assert(BSec && "missing section info for kernel section");
492 if (!BSec->getPatcher())
493 BSec->registerPatcher(BPatcher: std::make_unique<SimpleBinaryPatcher>());
494 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher());
495 PatchCounts[std::string(SectionName)]++;
496 if (LKMarkerInfo.IsPCRelative)
497 LKPatcher->addLE32Patch(Offset: LKMarkerInfo.SectionOffset,
498 NewValue: NewAddress - OriginalAddress +
499 LKMarkerInfo.PCRelativeOffset);
500 else
501 LKPatcher->addLE64Patch(Offset: LKMarkerInfo.SectionOffset, NewValue: NewAddress);
502 }
503 }
504 BC.outs() << "BOLT-INFO: patching linux kernel sections. Total patches per "
505 "section are as follows:\n";
506 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts)
507 BC.outs() << " Section: " << KV.first << ", patch-counts: " << KV.second
508 << '\n';
509}
510
511Error LinuxKernelRewriter::readORCTables() {
512 // NOTE: we should ignore relocations for orc tables as the tables are sorted
513 // post-link time and relocations are not updated.
514 ORCUnwindSection = BC.getUniqueSectionByName(SectionName: ".orc_unwind");
515 ORCUnwindIPSection = BC.getUniqueSectionByName(SectionName: ".orc_unwind_ip");
516
517 if (!ORCUnwindSection && !ORCUnwindIPSection)
518 return Error::success();
519
520 if (!ORCUnwindSection || !ORCUnwindIPSection)
521 return createStringError(EC: errc::executable_format_error,
522 Msg: "missing ORC section");
523
524 NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
525 if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE ||
526 ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE)
527 return createStringError(EC: errc::executable_format_error,
528 Msg: "ORC entries number mismatch detected");
529
530 const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
531 DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(),
532 BC.AsmInfo->isLittleEndian(),
533 BC.AsmInfo->getCodePointerSize());
534 DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(),
535 BC.AsmInfo->isLittleEndian(),
536 BC.AsmInfo->getCodePointerSize());
537 DataExtractor::Cursor ORCCursor(0);
538 DataExtractor::Cursor IPCursor(0);
539 uint64_t PrevIP = 0;
540 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
541 const uint64_t IP =
542 IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(C&: IPCursor);
543
544 // Consume the status of the cursor.
545 if (!IPCursor)
546 return createStringError(EC: errc::executable_format_error,
547 Fmt: "out of bounds while reading ORC IP table: %s",
548 Vals: toString(E: IPCursor.takeError()).c_str());
549
550 if (IP < PrevIP && opts::Verbosity)
551 BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(Val: IP)
552 << " detected while reading ORC\n";
553
554 PrevIP = IP;
555
556 // Store all entries, includes those we are not going to update as the
557 // tables need to be sorted globally before being written out.
558 ORCEntries.push_back(x: ORCListEntry());
559 ORCListEntry &Entry = ORCEntries.back();
560
561 Entry.IP = IP;
562 Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(C&: ORCCursor);
563 Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(C&: ORCCursor);
564 Entry.ORC.Info = (int16_t)OrcDE.getU16(C&: ORCCursor);
565 Entry.BF = nullptr;
566
567 // Consume the status of the cursor.
568 if (!ORCCursor)
569 return createStringError(EC: errc::executable_format_error,
570 Fmt: "out of bounds while reading ORC: %s",
571 Vals: toString(E: ORCCursor.takeError()).c_str());
572
573 if (Entry.ORC == NullORC)
574 continue;
575
576 BinaryFunction *&BF = Entry.BF;
577 BF = BC.getBinaryFunctionContainingAddress(Address: IP, /*CheckPastEnd*/ true);
578
579 // If the entry immediately pointing past the end of the function is not
580 // the terminator entry, then it does not belong to this function.
581 if (BF && BF->getAddress() + BF->getSize() == IP)
582 BF = 0;
583
584 if (!BF) {
585 if (opts::Verbosity)
586 BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
587 << Twine::utohexstr(Val: IP) << ": " << Entry.ORC << '\n';
588 continue;
589 }
590
591 BF->setHasORC(true);
592
593 if (!BF->hasInstructions())
594 continue;
595
596 MCInst *Inst = BF->getInstructionAtOffset(Offset: IP - BF->getAddress());
597 if (!Inst)
598 return createStringError(
599 EC: errc::executable_format_error,
600 Fmt: "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", Vals: IP);
601
602 // Some addresses will have two entries associated with them. The first
603 // one being a "weak" section terminator. Since we ignore the terminator,
604 // we should only assign one entry per instruction.
605 if (BC.MIB->hasAnnotation(Inst: *Inst, Name: "ORC"))
606 return createStringError(
607 EC: errc::executable_format_error,
608 Fmt: "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", Vals: IP);
609
610 BC.MIB->addAnnotation(Inst&: *Inst, Name: "ORC", Val: Entry.ORC);
611 }
612
613 BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n";
614
615 if (opts::DumpORC) {
616 BC.outs() << "BOLT-INFO: ORC unwind information:\n";
617 for (const ORCListEntry &E : ORCEntries) {
618 BC.outs() << "0x" << Twine::utohexstr(Val: E.IP) << ": " << E.ORC;
619 if (E.BF)
620 BC.outs() << ": " << *E.BF;
621 BC.outs() << '\n';
622 }
623 }
624
625 // Add entries for functions that don't have explicit ORC info at the start.
626 // We'll have the correct info for them even if ORC for the preceding function
627 // changes.
628 ORCListType NewEntries;
629 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
630 auto It = llvm::partition_point(Range&: ORCEntries, P: [&](const ORCListEntry &E) {
631 return E.IP <= BF.getAddress();
632 });
633 if (It != ORCEntries.begin())
634 --It;
635
636 if (It->BF == &BF)
637 continue;
638
639 if (It->ORC == NullORC && It->IP == BF.getAddress()) {
640 assert(!It->BF);
641 It->BF = &BF;
642 continue;
643 }
644
645 NewEntries.push_back(x: {.IP: BF.getAddress(), .BF: &BF, .ORC: It->ORC});
646 if (It->ORC != NullORC)
647 BF.setHasORC(true);
648 }
649
650 llvm::copy(Range&: NewEntries, Out: std::back_inserter(x&: ORCEntries));
651 llvm::sort(C&: ORCEntries);
652
653 if (opts::DumpORC) {
654 BC.outs() << "BOLT-INFO: amended ORC unwind information:\n";
655 for (const ORCListEntry &E : ORCEntries) {
656 BC.outs() << "0x" << Twine::utohexstr(Val: E.IP) << ": " << E.ORC;
657 if (E.BF)
658 BC.outs() << ": " << *E.BF;
659 BC.outs() << '\n';
660 }
661 }
662
663 return Error::success();
664}
665
666Error LinuxKernelRewriter::processORCPostCFG() {
667 if (!NumORCEntries)
668 return Error::success();
669
670 // Propagate ORC to the rest of the function. We can annotate every
671 // instruction in every function, but to minimize the overhead, we annotate
672 // the first instruction in every basic block to reflect the state at the
673 // entry. This way, the ORC state can be calculated based on annotations
674 // regardless of the basic block layout. Note that if we insert/delete
675 // instructions, we must take care to attach ORC info to the new/deleted ones.
676 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
677
678 std::optional<ORCState> CurrentState;
679 for (BinaryBasicBlock &BB : BF) {
680 for (MCInst &Inst : BB) {
681 ErrorOr<ORCState> State =
682 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, Name: "ORC");
683
684 if (State) {
685 CurrentState = *State;
686 continue;
687 }
688
689 // Get state for the start of the function.
690 if (!CurrentState) {
691 // A terminator entry (NullORC) can match the function address. If
692 // there's also a non-terminator entry, it will be placed after the
693 // terminator. Hence, we are looking for the last ORC entry that
694 // matches the address.
695 auto It =
696 llvm::partition_point(Range&: ORCEntries, P: [&](const ORCListEntry &E) {
697 return E.IP <= BF.getAddress();
698 });
699 if (It != ORCEntries.begin())
700 --It;
701
702 assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) &&
703 "ORC info at function entry expected.");
704
705 if (It->ORC == NullORC && BF.hasORC()) {
706 BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for "
707 << BF << '\n';
708 }
709
710 It->BF = &BF;
711
712 CurrentState = It->ORC;
713 if (It->ORC != NullORC)
714 BF.setHasORC(true);
715 }
716
717 // While printing ORC, attach info to every instruction for convenience.
718 if (opts::PrintORC || &Inst == &BB.front())
719 BC.MIB->addAnnotation(Inst, Name: "ORC", Val: *CurrentState);
720 }
721 }
722 }
723
724 return Error::success();
725}
726
727Error LinuxKernelRewriter::rewriteORCTables() {
728 if (!NumORCEntries)
729 return Error::success();
730
731 // Update ORC sections in-place. As we change the code, the number of ORC
732 // entries may increase for some functions. However, as we remove terminator
733 // redundancy (see below), more space is freed up and we should always be able
734 // to fit new ORC tables in the reserved space.
735 auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter {
736 const size_t Size = Section.getSize();
737 uint8_t *NewContents = new uint8_t[Size];
738 Section.updateContents(NewData: NewContents, NewSize: Size);
739 Section.setOutputFileOffset(Section.getInputFileOffset());
740 return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian()
741 ? endianness::little
742 : endianness::big);
743 };
744 BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection);
745 BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection);
746
747 uint64_t NumEmitted = 0;
748 std::optional<ORCState> LastEmittedORC;
749 auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC,
750 MCSymbol *Label = 0, bool Force = false) -> Error {
751 if (LastEmittedORC && ORC == *LastEmittedORC && !Force)
752 return Error::success();
753
754 LastEmittedORC = ORC;
755
756 if (++NumEmitted > NumORCEntries)
757 return createStringError(EC: errc::executable_format_error,
758 Msg: "exceeded the number of allocated ORC entries");
759
760 if (Label)
761 ORCUnwindIPSection->addRelocation(Offset: UnwindIPWriter.getOffset(), Symbol: Label,
762 Type: Relocation::getPC32(), /*Addend*/ 0);
763
764 const int32_t IPValue =
765 IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset();
766 if (Error E = UnwindIPWriter.writeInteger(Value: IPValue))
767 return E;
768
769 if (Error E = UnwindWriter.writeInteger(Value: ORC.SPOffset))
770 return E;
771 if (Error E = UnwindWriter.writeInteger(Value: ORC.BPOffset))
772 return E;
773 if (Error E = UnwindWriter.writeInteger(Value: ORC.Info))
774 return E;
775
776 return Error::success();
777 };
778
779 // Emit new ORC entries for the emitted function.
780 auto emitORC = [&](const BinaryFunction &BF) -> Error {
781 assert(!BF.isSplit() && "Split functions not supported by ORC writer yet.");
782
783 ORCState CurrentState = NullORC;
784 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
785 for (MCInst &Inst : *BB) {
786 ErrorOr<ORCState> ErrorOrState =
787 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, Name: "ORC");
788 if (!ErrorOrState || *ErrorOrState == CurrentState)
789 continue;
790
791 // Issue label for the instruction.
792 MCSymbol *Label =
793 BC.MIB->getOrCreateInstLabel(Inst, Name: "__ORC_", Ctx: BC.Ctx.get());
794
795 if (Error E = emitORCEntry(0, *ErrorOrState, Label))
796 return E;
797
798 CurrentState = *ErrorOrState;
799 }
800 }
801
802 return Error::success();
803 };
804
805 for (ORCListEntry &Entry : ORCEntries) {
806 // Emit original entries for functions that we haven't modified.
807 if (!Entry.BF || !BC.shouldEmit(Function: *Entry.BF)) {
808 // Emit terminator only if it marks the start of a function.
809 if (Entry.ORC == NullORC && !Entry.BF)
810 continue;
811 if (Error E = emitORCEntry(Entry.IP, Entry.ORC))
812 return E;
813 continue;
814 }
815
816 // Emit all ORC entries for a function referenced by an entry and skip over
817 // the rest of entries for this function by resetting its ORC attribute.
818 if (Entry.BF->hasORC()) {
819 if (Error E = emitORC(*Entry.BF))
820 return E;
821 Entry.BF->setHasORC(false);
822 }
823 }
824
825 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
826 << " ORC entries\n");
827
828 // Replicate terminator entry at the end of sections to match the original
829 // table sizes.
830 const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second;
831 const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize();
832 while (UnwindWriter.bytesRemaining()) {
833 if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
834 return E;
835 }
836
837 return Error::success();
838}
839
840/// The static call site table is created by objtool and contains entries in the
841/// following format:
842///
843/// struct static_call_site {
844/// s32 addr;
845/// s32 key;
846/// };
847///
848Error LinuxKernelRewriter::readStaticCalls() {
849 const BinaryData *StaticCallTable =
850 BC.getBinaryDataByName(Name: "__start_static_call_sites");
851 if (!StaticCallTable)
852 return Error::success();
853
854 StaticCallTableAddress = StaticCallTable->getAddress();
855
856 const BinaryData *Stop = BC.getBinaryDataByName(Name: "__stop_static_call_sites");
857 if (!Stop)
858 return createStringError(EC: errc::executable_format_error,
859 Msg: "missing __stop_static_call_sites symbol");
860
861 ErrorOr<BinarySection &> ErrorOrSection =
862 BC.getSectionForAddress(Address: StaticCallTableAddress);
863 if (!ErrorOrSection)
864 return createStringError(EC: errc::executable_format_error,
865 Msg: "no section matching __start_static_call_sites");
866
867 StaticCallSection = *ErrorOrSection;
868 if (!StaticCallSection->containsAddress(Address: Stop->getAddress() - 1))
869 return createStringError(EC: errc::executable_format_error,
870 Msg: "__stop_static_call_sites not in the same section "
871 "as __start_static_call_sites");
872
873 if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE)
874 return createStringError(EC: errc::executable_format_error,
875 Msg: "static call table size error");
876
877 const uint64_t SectionAddress = StaticCallSection->getAddress();
878 DataExtractor DE(StaticCallSection->getContents(),
879 BC.AsmInfo->isLittleEndian(),
880 BC.AsmInfo->getCodePointerSize());
881 DataExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress);
882 uint32_t EntryID = 0;
883 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
884 const uint64_t CallAddress =
885 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
886 const uint64_t KeyAddress =
887 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
888
889 // Consume the status of the cursor.
890 if (!Cursor)
891 return createStringError(EC: errc::executable_format_error,
892 Fmt: "out of bounds while reading static calls: %s",
893 Vals: toString(E: Cursor.takeError()).c_str());
894
895 ++EntryID;
896
897 if (opts::DumpStaticCalls) {
898 BC.outs() << "Static Call Site: " << EntryID << '\n';
899 BC.outs() << "\tCallAddress: 0x" << Twine::utohexstr(Val: CallAddress)
900 << "\n\tKeyAddress: 0x" << Twine::utohexstr(Val: KeyAddress)
901 << '\n';
902 }
903
904 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: CallAddress);
905 if (!BF)
906 continue;
907
908 if (!BC.shouldEmit(Function: *BF))
909 continue;
910
911 if (!BF->hasInstructions())
912 continue;
913
914 MCInst *Inst = BF->getInstructionAtOffset(Offset: CallAddress - BF->getAddress());
915 if (!Inst)
916 return createStringError(EC: errc::executable_format_error,
917 Fmt: "no instruction at call site address 0x%" PRIx64,
918 Vals: CallAddress);
919
920 // Check for duplicate entries.
921 if (BC.MIB->hasAnnotation(Inst: *Inst, Name: "StaticCall"))
922 return createStringError(EC: errc::executable_format_error,
923 Fmt: "duplicate static call site at 0x%" PRIx64,
924 Vals: CallAddress);
925
926 BC.MIB->addAnnotation(Inst&: *Inst, Name: "StaticCall", Val: EntryID);
927
928 MCSymbol *Label =
929 BC.MIB->getOrCreateInstLabel(Inst&: *Inst, Name: "__SC_", Ctx: BC.Ctx.get());
930
931 StaticCallEntries.push_back(x: {.ID: EntryID, .Function: BF, .Label: Label});
932 }
933
934 BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size()
935 << " static call entries\n";
936
937 return Error::success();
938}
939
940/// The static call table is sorted during boot time in
941/// static_call_sort_entries(). This makes it possible to update existing
942/// entries in-place ignoring their relative order.
943Error LinuxKernelRewriter::rewriteStaticCalls() {
944 if (!StaticCallTableAddress || !StaticCallSection)
945 return Error::success();
946
947 for (auto &Entry : StaticCallEntries) {
948 if (!Entry.Function)
949 continue;
950
951 BinaryFunction &BF = *Entry.Function;
952 if (!BC.shouldEmit(Function: BF))
953 continue;
954
955 // Create a relocation against the label.
956 const uint64_t EntryOffset = StaticCallTableAddress -
957 StaticCallSection->getAddress() +
958 (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE;
959 StaticCallSection->addRelocation(Offset: EntryOffset, Symbol: Entry.Label,
960 Type: ELF::R_X86_64_PC32, /*Addend*/ 0);
961 }
962
963 return Error::success();
964}
965
966/// Instructions that access user-space memory can cause page faults. These
967/// faults will be handled by the kernel and execution will resume at the fixup
968/// code location if the address was invalid. The kernel uses the exception
969/// table to match the faulting instruction to its fixup. The table consists of
970/// the following entries:
971///
972/// struct exception_table_entry {
973/// int insn;
974/// int fixup;
975/// int data;
976/// };
977///
978/// More info at:
979/// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt
980Error LinuxKernelRewriter::readExceptionTable() {
981 ExceptionsSection = BC.getUniqueSectionByName(SectionName: "__ex_table");
982 if (!ExceptionsSection)
983 return Error::success();
984
985 if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE)
986 return createStringError(EC: errc::executable_format_error,
987 Msg: "exception table size error");
988
989 const uint64_t SectionAddress = ExceptionsSection->getAddress();
990 DataExtractor DE(ExceptionsSection->getContents(),
991 BC.AsmInfo->isLittleEndian(),
992 BC.AsmInfo->getCodePointerSize());
993 DataExtractor::Cursor Cursor(0);
994 uint32_t EntryID = 0;
995 while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) {
996 const uint64_t InstAddress =
997 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
998 const uint64_t FixupAddress =
999 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
1000 const uint64_t Data = DE.getU32(C&: Cursor);
1001
1002 // Consume the status of the cursor.
1003 if (!Cursor)
1004 return createStringError(
1005 EC: errc::executable_format_error,
1006 Fmt: "out of bounds while reading exception table: %s",
1007 Vals: toString(E: Cursor.takeError()).c_str());
1008
1009 ++EntryID;
1010
1011 if (opts::DumpExceptions) {
1012 BC.outs() << "Exception Entry: " << EntryID << '\n';
1013 BC.outs() << "\tInsn: 0x" << Twine::utohexstr(Val: InstAddress) << '\n'
1014 << "\tFixup: 0x" << Twine::utohexstr(Val: FixupAddress) << '\n'
1015 << "\tData: 0x" << Twine::utohexstr(Val: Data) << '\n';
1016 }
1017
1018 MCInst *Inst = nullptr;
1019 MCSymbol *FixupLabel = nullptr;
1020
1021 BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(Address: InstAddress);
1022 if (InstBF && BC.shouldEmit(Function: *InstBF)) {
1023 Inst = InstBF->getInstructionAtOffset(Offset: InstAddress - InstBF->getAddress());
1024 if (!Inst)
1025 return createStringError(EC: errc::executable_format_error,
1026 Fmt: "no instruction at address 0x%" PRIx64
1027 " in exception table",
1028 Vals: InstAddress);
1029 BC.MIB->addAnnotation(Inst&: *Inst, Name: "ExceptionEntry", Val: EntryID);
1030 FunctionsWithExceptions.insert(V: InstBF);
1031 }
1032
1033 if (!InstBF && opts::Verbosity) {
1034 BC.outs() << "BOLT-INFO: no function matches instruction at 0x"
1035 << Twine::utohexstr(Val: InstAddress)
1036 << " referenced by Linux exception table\n";
1037 }
1038
1039 BinaryFunction *FixupBF =
1040 BC.getBinaryFunctionContainingAddress(Address: FixupAddress);
1041 if (FixupBF && BC.shouldEmit(Function: *FixupBF)) {
1042 const uint64_t Offset = FixupAddress - FixupBF->getAddress();
1043 if (!FixupBF->getInstructionAtOffset(Offset))
1044 return createStringError(EC: errc::executable_format_error,
1045 Fmt: "no instruction at fixup address 0x%" PRIx64
1046 " in exception table",
1047 Vals: FixupAddress);
1048 FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset)
1049 : FixupBF->getSymbol();
1050 if (Inst)
1051 BC.MIB->addAnnotation(Inst&: *Inst, Name: "Fixup", Val: FixupLabel->getName());
1052 FunctionsWithExceptions.insert(V: FixupBF);
1053 }
1054
1055 if (!FixupBF && opts::Verbosity) {
1056 BC.outs() << "BOLT-INFO: no function matches fixup code at 0x"
1057 << Twine::utohexstr(Val: FixupAddress)
1058 << " referenced by Linux exception table\n";
1059 }
1060 }
1061
1062 BC.outs() << "BOLT-INFO: parsed "
1063 << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE
1064 << " exception table entries\n";
1065
1066 return Error::success();
1067}
1068
1069/// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects
1070/// the exception table to be sorted. Hence we have to sort it after code
1071/// reordering.
1072Error LinuxKernelRewriter::rewriteExceptionTable() {
1073 // Disable output of functions with exceptions before rewrite support is
1074 // added.
1075 for (BinaryFunction *BF : FunctionsWithExceptions)
1076 BF->setSimple(false);
1077
1078 return Error::success();
1079}
1080
1081/// .parainsrtuctions section contains information for patching parvirtual call
1082/// instructions during runtime. The entries in the section are in the form:
1083///
1084/// struct paravirt_patch_site {
1085/// u8 *instr; /* original instructions */
1086/// u8 type; /* type of this instruction */
1087/// u8 len; /* length of original instruction */
1088/// };
1089///
1090/// Note that the structures are aligned at 8-byte boundary.
1091Error LinuxKernelRewriter::readParaInstructions() {
1092 ParavirtualPatchSection = BC.getUniqueSectionByName(SectionName: ".parainstructions");
1093 if (!ParavirtualPatchSection)
1094 return Error::success();
1095
1096 DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(),
1097 BC.AsmInfo->isLittleEndian(),
1098 BC.AsmInfo->getCodePointerSize());
1099 uint32_t EntryID = 0;
1100 DataExtractor::Cursor Cursor(0);
1101 while (Cursor && !DE.eof(C: Cursor)) {
1102 const uint64_t NextOffset = alignTo(Size: Cursor.tell(), A: Align(PARA_PATCH_ALIGN));
1103 if (!DE.isValidOffset(offset: NextOffset))
1104 break;
1105
1106 Cursor.seek(NewOffSet: NextOffset);
1107
1108 const uint64_t InstrLocation = DE.getU64(C&: Cursor);
1109 const uint8_t Type = DE.getU8(C&: Cursor);
1110 const uint8_t Len = DE.getU8(C&: Cursor);
1111
1112 if (!Cursor)
1113 return createStringError(
1114 EC: errc::executable_format_error,
1115 Fmt: "out of bounds while reading .parainstructions: %s",
1116 Vals: toString(E: Cursor.takeError()).c_str());
1117
1118 ++EntryID;
1119
1120 if (opts::DumpParavirtualPatchSites) {
1121 BC.outs() << "Paravirtual patch site: " << EntryID << '\n';
1122 BC.outs() << "\tInstr: 0x" << Twine::utohexstr(Val: InstrLocation)
1123 << "\n\tType: 0x" << Twine::utohexstr(Val: Type) << "\n\tLen: 0x"
1124 << Twine::utohexstr(Val: Len) << '\n';
1125 }
1126
1127 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: InstrLocation);
1128 if (!BF && opts::Verbosity) {
1129 BC.outs() << "BOLT-INFO: no function matches address 0x"
1130 << Twine::utohexstr(Val: InstrLocation)
1131 << " referenced by paravirutal patch site\n";
1132 }
1133
1134 if (BF && BC.shouldEmit(Function: *BF)) {
1135 MCInst *Inst =
1136 BF->getInstructionAtOffset(Offset: InstrLocation - BF->getAddress());
1137 if (!Inst)
1138 return createStringError(EC: errc::executable_format_error,
1139 Fmt: "no instruction at address 0x%" PRIx64
1140 " in paravirtual call site %d",
1141 Vals: InstrLocation, Vals: EntryID);
1142 BC.MIB->addAnnotation(Inst&: *Inst, Name: "ParaSite", Val: EntryID);
1143 }
1144 }
1145
1146 BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n";
1147
1148 return Error::success();
1149}
1150
1151void LinuxKernelRewriter::skipFunctionsWithAnnotation(
1152 StringRef Annotation) const {
1153 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
1154 if (!BC.shouldEmit(Function: BF))
1155 continue;
1156 for (const BinaryBasicBlock &BB : BF) {
1157 const bool HasAnnotation = llvm::any_of(Range: BB, P: [&](const MCInst &Inst) {
1158 return BC.MIB->hasAnnotation(Inst, Name: Annotation);
1159 });
1160 if (HasAnnotation) {
1161 BF.setSimple(false);
1162 break;
1163 }
1164 }
1165 }
1166}
1167
1168Error LinuxKernelRewriter::rewriteParaInstructions() {
1169 // Disable output of functions with paravirtual instructions before the
1170 // rewrite support is complete.
1171 skipFunctionsWithAnnotation(Annotation: "ParaSite");
1172
1173 return Error::success();
1174}
1175
1176/// Process __bug_table section.
1177/// This section contains information useful for kernel debugging, mostly
1178/// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON().
1179///
1180/// Each entry in the section is a struct bug_entry that contains a pointer to
1181/// the ud2 instruction corresponding to the bug, corresponding file name (both
1182/// pointers use PC relative offset addressing), line number, and flags.
1183/// The definition of the struct bug_entry can be found in
1184/// `include/asm-generic/bug.h`. The first entry in the struct is an instruction
1185/// address encoded as a PC-relative offset. In theory, it could be an absolute
1186/// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice
1187/// the kernel code relies on it being a relative offset on x86-64.
1188Error LinuxKernelRewriter::readBugTable() {
1189 BugTableSection = BC.getUniqueSectionByName(SectionName: "__bug_table");
1190 if (!BugTableSection)
1191 return Error::success();
1192
1193 if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE)
1194 return createStringError(EC: errc::executable_format_error,
1195 Msg: "bug table size error");
1196
1197 const uint64_t SectionAddress = BugTableSection->getAddress();
1198 DataExtractor DE(BugTableSection->getContents(), BC.AsmInfo->isLittleEndian(),
1199 BC.AsmInfo->getCodePointerSize());
1200 DataExtractor::Cursor Cursor(0);
1201 uint32_t EntryID = 0;
1202 while (Cursor && Cursor.tell() < BugTableSection->getSize()) {
1203 const uint64_t Pos = Cursor.tell();
1204 const uint64_t InstAddress =
1205 SectionAddress + Pos + (int32_t)DE.getU32(C&: Cursor);
1206 Cursor.seek(NewOffSet: Pos + BUG_TABLE_ENTRY_SIZE);
1207
1208 if (!Cursor)
1209 return createStringError(EC: errc::executable_format_error,
1210 Fmt: "out of bounds while reading __bug_table: %s",
1211 Vals: toString(E: Cursor.takeError()).c_str());
1212
1213 ++EntryID;
1214
1215 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: InstAddress);
1216 if (!BF && opts::Verbosity) {
1217 BC.outs() << "BOLT-INFO: no function matches address 0x"
1218 << Twine::utohexstr(Val: InstAddress)
1219 << " referenced by bug table\n";
1220 }
1221
1222 if (BF && BC.shouldEmit(Function: *BF)) {
1223 MCInst *Inst = BF->getInstructionAtOffset(Offset: InstAddress - BF->getAddress());
1224 if (!Inst)
1225 return createStringError(EC: errc::executable_format_error,
1226 Fmt: "no instruction at address 0x%" PRIx64
1227 " referenced by bug table entry %d",
1228 Vals: InstAddress, Vals: EntryID);
1229 BC.MIB->addAnnotation(Inst&: *Inst, Name: "BugEntry", Val: EntryID);
1230
1231 FunctionBugList[BF].push_back(Elt: EntryID);
1232 }
1233 }
1234
1235 BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n";
1236
1237 return Error::success();
1238}
1239
1240/// find_bug() uses linear search to match an address to an entry in the bug
1241/// table. Hence, there is no need to sort entries when rewriting the table.
1242/// When we need to erase an entry, we set its instruction address to zero.
1243Error LinuxKernelRewriter::rewriteBugTable() {
1244 if (!BugTableSection)
1245 return Error::success();
1246
1247 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
1248 if (!BC.shouldEmit(Function: BF))
1249 continue;
1250
1251 if (!FunctionBugList.count(Val: &BF))
1252 continue;
1253
1254 // Bugs that will be emitted for this function.
1255 DenseSet<uint32_t> EmittedIDs;
1256 for (BinaryBasicBlock &BB : BF) {
1257 for (MCInst &Inst : BB) {
1258 if (!BC.MIB->hasAnnotation(Inst, Name: "BugEntry"))
1259 continue;
1260 const uint32_t ID = BC.MIB->getAnnotationAs<uint32_t>(Inst, Name: "BugEntry");
1261 EmittedIDs.insert(V: ID);
1262
1263 // Create a relocation entry for this bug entry.
1264 MCSymbol *Label =
1265 BC.MIB->getOrCreateInstLabel(Inst, Name: "__BUG_", Ctx: BC.Ctx.get());
1266 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1267 BugTableSection->addRelocation(Offset: EntryOffset, Symbol: Label, Type: ELF::R_X86_64_PC32,
1268 /*Addend*/ 0);
1269 }
1270 }
1271
1272 // Clear bug entries that were not emitted for this function, e.g. as a
1273 // result of DCE, but setting their instruction address to zero.
1274 for (const uint32_t ID : FunctionBugList[&BF]) {
1275 if (!EmittedIDs.count(V: ID)) {
1276 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1277 BugTableSection->addRelocation(Offset: EntryOffset, Symbol: nullptr, Type: ELF::R_X86_64_PC32,
1278 /*Addend*/ 0);
1279 }
1280 }
1281 }
1282
1283 return Error::success();
1284}
1285
1286/// The kernel can replace certain instruction sequences depending on hardware
1287/// it is running on and features specified during boot time. The information
1288/// about alternative instruction sequences is stored in .altinstructions
1289/// section. The format of entries in this section is defined in
1290/// arch/x86/include/asm/alternative.h:
1291///
1292/// struct alt_instr {
1293/// s32 instr_offset;
1294/// s32 repl_offset;
1295/// uXX feature;
1296/// u8 instrlen;
1297/// u8 replacementlen;
1298/// u8 padlen; // present in older kernels
1299/// } __packed;
1300///
1301/// Note the structures is packed.
1302Error LinuxKernelRewriter::readAltInstructions() {
1303 AltInstrSection = BC.getUniqueSectionByName(SectionName: ".altinstructions");
1304 if (!AltInstrSection)
1305 return Error::success();
1306
1307 const uint64_t Address = AltInstrSection->getAddress();
1308 DataExtractor DE = DataExtractor(AltInstrSection->getContents(),
1309 BC.AsmInfo->isLittleEndian(),
1310 BC.AsmInfo->getCodePointerSize());
1311 uint64_t EntryID = 0;
1312 DataExtractor::Cursor Cursor(0);
1313 while (Cursor && !DE.eof(C: Cursor)) {
1314 const uint64_t OrgInstAddress =
1315 Address + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
1316 const uint64_t AltInstAddress =
1317 Address + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
1318 const uint64_t Feature = DE.getUnsigned(C&: Cursor, Size: opts::AltInstFeatureSize);
1319 const uint8_t OrgSize = DE.getU8(C&: Cursor);
1320 const uint8_t AltSize = DE.getU8(C&: Cursor);
1321
1322 // Older kernels may have the padlen field.
1323 const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(C&: Cursor) : 0;
1324
1325 if (!Cursor)
1326 return createStringError(
1327 EC: errc::executable_format_error,
1328 Fmt: "out of bounds while reading .altinstructions: %s",
1329 Vals: toString(E: Cursor.takeError()).c_str());
1330
1331 ++EntryID;
1332
1333 if (opts::DumpAltInstructions) {
1334 BC.outs() << "Alternative instruction entry: " << EntryID
1335 << "\n\tOrg: 0x" << Twine::utohexstr(Val: OrgInstAddress)
1336 << "\n\tAlt: 0x" << Twine::utohexstr(Val: AltInstAddress)
1337 << "\n\tFeature: 0x" << Twine::utohexstr(Val: Feature)
1338 << "\n\tOrgSize: " << (int)OrgSize
1339 << "\n\tAltSize: " << (int)AltSize << '\n';
1340 if (opts::AltInstHasPadLen)
1341 BC.outs() << "\tPadLen: " << (int)PadLen << '\n';
1342 }
1343
1344 if (AltSize > OrgSize)
1345 return createStringError(EC: errc::executable_format_error,
1346 Msg: "error reading .altinstructions");
1347
1348 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: OrgInstAddress);
1349 if (!BF && opts::Verbosity) {
1350 BC.outs() << "BOLT-INFO: no function matches address 0x"
1351 << Twine::utohexstr(Val: OrgInstAddress)
1352 << " of instruction from .altinstructions\n";
1353 }
1354
1355 BinaryFunction *AltBF =
1356 BC.getBinaryFunctionContainingAddress(Address: AltInstAddress);
1357 if (AltBF && BC.shouldEmit(Function: *AltBF)) {
1358 BC.errs()
1359 << "BOLT-WARNING: alternative instruction sequence found in function "
1360 << *AltBF << '\n';
1361 AltBF->setIgnored();
1362 }
1363
1364 if (!BF || !BC.shouldEmit(Function: *BF))
1365 continue;
1366
1367 if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize())
1368 return createStringError(EC: errc::executable_format_error,
1369 Msg: "error reading .altinstructions");
1370
1371 MCInst *Inst =
1372 BF->getInstructionAtOffset(Offset: OrgInstAddress - BF->getAddress());
1373 if (!Inst)
1374 return createStringError(EC: errc::executable_format_error,
1375 Fmt: "no instruction at address 0x%" PRIx64
1376 " referenced by .altinstructions entry %d",
1377 Vals: OrgInstAddress, Vals: EntryID);
1378
1379 // There could be more than one alternative instruction sequences for the
1380 // same original instruction. Annotate each alternative separately.
1381 std::string AnnotationName = "AltInst";
1382 unsigned N = 2;
1383 while (BC.MIB->hasAnnotation(Inst: *Inst, Name: AnnotationName))
1384 AnnotationName = "AltInst" + std::to_string(val: N++);
1385
1386 BC.MIB->addAnnotation(Inst&: *Inst, Name: AnnotationName, Val: EntryID);
1387
1388 // Annotate all instructions from the original sequence. Note that it's not
1389 // the most efficient way to look for instructions in the address range,
1390 // but since alternative instructions are uncommon, it will do for now.
1391 for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) {
1392 Inst = BF->getInstructionAtOffset(Offset: OrgInstAddress + Offset -
1393 BF->getAddress());
1394 if (Inst)
1395 BC.MIB->addAnnotation(Inst&: *Inst, Name: AnnotationName, Val: EntryID);
1396 }
1397 }
1398
1399 BC.outs() << "BOLT-INFO: parsed " << EntryID
1400 << " alternative instruction entries\n";
1401
1402 return Error::success();
1403}
1404
1405Error LinuxKernelRewriter::rewriteAltInstructions() {
1406 // Disable output of functions with alt instructions before the rewrite
1407 // support is complete.
1408 skipFunctionsWithAnnotation(Annotation: "AltInst");
1409
1410 return Error::success();
1411}
1412
1413/// When the Linux kernel needs to handle an error associated with a given PCI
1414/// device, it uses a table stored in .pci_fixup section to locate a fixup code
1415/// specific to the vendor and the problematic device. The section contains a
1416/// list of the following structures defined in include/linux/pci.h:
1417///
1418/// struct pci_fixup {
1419/// u16 vendor; /* Or PCI_ANY_ID */
1420/// u16 device; /* Or PCI_ANY_ID */
1421/// u32 class; /* Or PCI_ANY_ID */
1422/// unsigned int class_shift; /* should be 0, 8, 16 */
1423/// int hook_offset;
1424/// };
1425///
1426/// Normally, the hook will point to a function start and we don't have to
1427/// update the pointer if we are not relocating functions. Hence, while reading
1428/// the table we validate this assumption. If a function has a fixup code in the
1429/// middle of its body, we issue a warning and ignore it.
1430Error LinuxKernelRewriter::readPCIFixupTable() {
1431 PCIFixupSection = BC.getUniqueSectionByName(SectionName: ".pci_fixup");
1432 if (!PCIFixupSection)
1433 return Error::success();
1434
1435 if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE)
1436 return createStringError(EC: errc::executable_format_error,
1437 Msg: "PCI fixup table size error");
1438
1439 const uint64_t Address = PCIFixupSection->getAddress();
1440 DataExtractor DE = DataExtractor(PCIFixupSection->getContents(),
1441 BC.AsmInfo->isLittleEndian(),
1442 BC.AsmInfo->getCodePointerSize());
1443 uint64_t EntryID = 0;
1444 DataExtractor::Cursor Cursor(0);
1445 while (Cursor && !DE.eof(C: Cursor)) {
1446 const uint16_t Vendor = DE.getU16(C&: Cursor);
1447 const uint16_t Device = DE.getU16(C&: Cursor);
1448 const uint32_t Class = DE.getU32(C&: Cursor);
1449 const uint32_t ClassShift = DE.getU32(C&: Cursor);
1450 const uint64_t HookAddress =
1451 Address + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
1452
1453 if (!Cursor)
1454 return createStringError(EC: errc::executable_format_error,
1455 Fmt: "out of bounds while reading .pci_fixup: %s",
1456 Vals: toString(E: Cursor.takeError()).c_str());
1457
1458 ++EntryID;
1459
1460 if (opts::DumpPCIFixups) {
1461 BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor 0x"
1462 << Twine::utohexstr(Val: Vendor) << "\n\tDevice: 0x"
1463 << Twine::utohexstr(Val: Device) << "\n\tClass: 0x"
1464 << Twine::utohexstr(Val: Class) << "\n\tClassShift: 0x"
1465 << Twine::utohexstr(Val: ClassShift) << "\n\tHookAddress: 0x"
1466 << Twine::utohexstr(Val: HookAddress) << '\n';
1467 }
1468
1469 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: HookAddress);
1470 if (!BF && opts::Verbosity) {
1471 BC.outs() << "BOLT-INFO: no function matches address 0x"
1472 << Twine::utohexstr(Val: HookAddress)
1473 << " of hook from .pci_fixup\n";
1474 }
1475
1476 if (!BF || !BC.shouldEmit(Function: *BF))
1477 continue;
1478
1479 if (const uint64_t Offset = HookAddress - BF->getAddress()) {
1480 BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function "
1481 << *BF << " at offset 0x" << Twine::utohexstr(Val: Offset) << '\n';
1482 BF->setSimple(false);
1483 }
1484 }
1485
1486 BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n";
1487
1488 return Error::success();
1489}
1490
1491/// Runtime code modification used by static keys is the most ubiquitous
1492/// self-modifying feature of the Linux kernel. The idea is to eliminate the
1493/// condition check and associated conditional jump on a hot path if that
1494/// condition (based on a boolean value of a static key) does not change often.
1495/// Whenever the condition changes, the kernel runtime modifies all code paths
1496/// associated with that key flipping the code between nop and (unconditional)
1497/// jump. The information about the code is stored in a static key jump table
1498/// and contains the list of entries of the following type from
1499/// include/linux/jump_label.h:
1500//
1501/// struct jump_entry {
1502/// s32 code;
1503/// s32 target;
1504/// long key; // key may be far away from the core kernel under KASLR
1505/// };
1506///
1507/// The list does not have to be stored in any sorted way, but it is sorted at
1508/// boot time (or module initialization time) first by "key" and then by "code".
1509/// jump_label_sort_entries() is responsible for sorting the table.
1510///
1511/// The key in jump_entry structure uses lower two bits of the key address
1512/// (which itself is aligned) to store extra information. We are interested in
1513/// the lower bit which indicates if the key is likely to be set on the code
1514/// path associated with this jump_entry.
1515///
1516/// static_key_{enable,disable}() functions modify the code based on key and
1517/// jump table entries.
1518///
1519/// jump_label_update() updates all code entries for a given key. Batch mode is
1520/// used for x86.
1521///
1522/// The actual patching happens in text_poke_bp_batch() that overrides the first
1523/// byte of the sequence with int3 before proceeding with actual code
1524/// replacement.
1525Error LinuxKernelRewriter::readStaticKeysJumpTable() {
1526 const BinaryData *StaticKeysJumpTable =
1527 BC.getBinaryDataByName(Name: "__start___jump_table");
1528 if (!StaticKeysJumpTable)
1529 return Error::success();
1530
1531 StaticKeysJumpTableAddress = StaticKeysJumpTable->getAddress();
1532
1533 const BinaryData *Stop = BC.getBinaryDataByName(Name: "__stop___jump_table");
1534 if (!Stop)
1535 return createStringError(EC: errc::executable_format_error,
1536 Msg: "missing __stop___jump_table symbol");
1537
1538 ErrorOr<BinarySection &> ErrorOrSection =
1539 BC.getSectionForAddress(Address: StaticKeysJumpTableAddress);
1540 if (!ErrorOrSection)
1541 return createStringError(EC: errc::executable_format_error,
1542 Msg: "no section matching __start___jump_table");
1543
1544 StaticKeysJumpSection = *ErrorOrSection;
1545 if (!StaticKeysJumpSection->containsAddress(Address: Stop->getAddress() - 1))
1546 return createStringError(EC: errc::executable_format_error,
1547 Msg: "__stop___jump_table not in the same section "
1548 "as __start___jump_table");
1549
1550 if ((Stop->getAddress() - StaticKeysJumpTableAddress) %
1551 STATIC_KEYS_JUMP_ENTRY_SIZE)
1552 return createStringError(EC: errc::executable_format_error,
1553 Msg: "static keys jump table size error");
1554
1555 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1556 DataExtractor DE(StaticKeysJumpSection->getContents(),
1557 BC.AsmInfo->isLittleEndian(),
1558 BC.AsmInfo->getCodePointerSize());
1559 DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1560 uint32_t EntryID = 0;
1561 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1562 const uint64_t JumpAddress =
1563 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
1564 const uint64_t TargetAddress =
1565 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
1566 const uint64_t KeyAddress =
1567 SectionAddress + Cursor.tell() + (int64_t)DE.getU64(C&: Cursor);
1568
1569 // Consume the status of the cursor.
1570 if (!Cursor)
1571 return createStringError(
1572 EC: errc::executable_format_error,
1573 Fmt: "out of bounds while reading static keys jump table: %s",
1574 Vals: toString(E: Cursor.takeError()).c_str());
1575
1576 ++EntryID;
1577
1578 JumpInfo.push_back(Elt: JumpInfoEntry());
1579 JumpInfoEntry &Info = JumpInfo.back();
1580 Info.Likely = KeyAddress & 1;
1581
1582 if (opts::DumpStaticKeys) {
1583 BC.outs() << "Static key jump entry: " << EntryID
1584 << "\n\tJumpAddress: 0x" << Twine::utohexstr(Val: JumpAddress)
1585 << "\n\tTargetAddress: 0x" << Twine::utohexstr(Val: TargetAddress)
1586 << "\n\tKeyAddress: 0x" << Twine::utohexstr(Val: KeyAddress)
1587 << "\n\tIsLikely: " << Info.Likely << '\n';
1588 }
1589
1590 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(Address: JumpAddress);
1591 if (!BF && opts::Verbosity) {
1592 BC.outs()
1593 << "BOLT-INFO: no function matches address 0x"
1594 << Twine::utohexstr(Val: JumpAddress)
1595 << " of jump instruction referenced from static keys jump table\n";
1596 }
1597
1598 if (!BF || !BC.shouldEmit(Function: *BF))
1599 continue;
1600
1601 MCInst *Inst = BF->getInstructionAtOffset(Offset: JumpAddress - BF->getAddress());
1602 if (!Inst)
1603 return createStringError(
1604 EC: errc::executable_format_error,
1605 Fmt: "no instruction at static keys jump site address 0x%" PRIx64,
1606 Vals: JumpAddress);
1607
1608 if (!BF->containsAddress(PC: TargetAddress))
1609 return createStringError(
1610 EC: errc::executable_format_error,
1611 Fmt: "invalid target of static keys jump at 0x%" PRIx64 " : 0x%" PRIx64,
1612 Vals: JumpAddress, Vals: TargetAddress);
1613
1614 const bool IsBranch = BC.MIB->isBranch(Inst: *Inst);
1615 if (!IsBranch && !BC.MIB->isNoop(Inst: *Inst))
1616 return createStringError(EC: errc::executable_format_error,
1617 Fmt: "jump or nop expected at address 0x%" PRIx64,
1618 Vals: JumpAddress);
1619
1620 const uint64_t Size = BC.computeInstructionSize(Inst: *Inst);
1621 if (Size != 2 && Size != 5) {
1622 return createStringError(
1623 EC: errc::executable_format_error,
1624 Fmt: "unexpected static keys jump size at address 0x%" PRIx64,
1625 Vals: JumpAddress);
1626 }
1627
1628 MCSymbol *Target = BF->registerBranch(Src: JumpAddress, Dst: TargetAddress);
1629 MCInst StaticKeyBranch;
1630
1631 // Create a conditional branch instruction. The actual conditional code type
1632 // should not matter as long as it's a valid code. The instruction should be
1633 // treated as a conditional branch for control-flow purposes. Before we emit
1634 // the code, it will be converted to a different instruction in
1635 // rewriteStaticKeysJumpTable().
1636 //
1637 // NB: for older kernels, under LongJumpLabels option, we create long
1638 // conditional branch to guarantee that code size estimation takes
1639 // into account the extra bytes needed for long branch that will be used
1640 // by the kernel patching code. Newer kernels can work with both short
1641 // and long branches. The code for long conditional branch is larger
1642 // than unconditional one, so we are pessimistic in our estimations.
1643 if (opts::LongJumpLabels)
1644 BC.MIB->createLongCondBranch(Inst&: StaticKeyBranch, Target, CC: 0, Ctx: BC.Ctx.get());
1645 else
1646 BC.MIB->createCondBranch(Inst&: StaticKeyBranch, Target, CC: 0, Ctx: BC.Ctx.get());
1647 BC.MIB->moveAnnotations(SrcInst: std::move(*Inst), DstInst&: StaticKeyBranch);
1648 BC.MIB->setDynamicBranch(Inst&: StaticKeyBranch, ID: EntryID);
1649 *Inst = StaticKeyBranch;
1650
1651 // IsBranch = InitialValue ^ LIKELY
1652 //
1653 // 0 0 0
1654 // 1 0 1
1655 // 1 1 0
1656 // 0 1 1
1657 //
1658 // => InitialValue = IsBranch ^ LIKELY
1659 Info.InitValue = IsBranch ^ Info.Likely;
1660
1661 // Add annotations to facilitate manual code analysis.
1662 BC.MIB->addAnnotation(Inst&: *Inst, Name: "Likely", Val: Info.Likely);
1663 BC.MIB->addAnnotation(Inst&: *Inst, Name: "InitValue", Val: Info.InitValue);
1664 if (!BC.MIB->getSize(Inst: *Inst))
1665 BC.MIB->setSize(Inst&: *Inst, Size);
1666
1667 if (opts::LongJumpLabels)
1668 BC.MIB->setSize(Inst&: *Inst, Size: 5);
1669 }
1670
1671 BC.outs() << "BOLT-INFO: parsed " << EntryID << " static keys jump entries\n";
1672
1673 return Error::success();
1674}
1675
1676// Pre-emit pass. Convert dynamic branch instructions into jumps that could be
1677// relaxed. In post-emit pass we will convert those jumps into nops when
1678// necessary. We do the unconditional conversion into jumps so that the jumps
1679// can be relaxed and the optimal size of jump/nop instruction is selected.
1680Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() {
1681 if (!StaticKeysJumpSection)
1682 return Error::success();
1683
1684 uint64_t NumShort = 0;
1685 uint64_t NumLong = 0;
1686 for (BinaryFunction &BF : llvm::make_second_range(c&: BC.getBinaryFunctions())) {
1687 if (!BC.shouldEmit(Function: BF))
1688 continue;
1689
1690 for (BinaryBasicBlock &BB : BF) {
1691 for (MCInst &Inst : BB) {
1692 if (!BC.MIB->isDynamicBranch(Inst))
1693 continue;
1694
1695 const uint32_t EntryID = *BC.MIB->getDynamicBranchID(Inst);
1696 MCSymbol *Target =
1697 const_cast<MCSymbol *>(BC.MIB->getTargetSymbol(Inst));
1698 assert(Target && "Target symbol should be set.");
1699
1700 const JumpInfoEntry &Info = JumpInfo[EntryID - 1];
1701 const bool IsBranch = Info.Likely ^ Info.InitValue;
1702
1703 uint32_t Size = *BC.MIB->getSize(Inst);
1704 if (Size == 2)
1705 ++NumShort;
1706 else if (Size == 5)
1707 ++NumLong;
1708 else
1709 llvm_unreachable("Wrong size for static keys jump instruction.");
1710
1711 MCInst NewInst;
1712 // Replace the instruction with unconditional jump even if it needs to
1713 // be nop in the binary.
1714 if (opts::LongJumpLabels) {
1715 BC.MIB->createLongUncondBranch(Inst&: NewInst, Target, Ctx: BC.Ctx.get());
1716 } else {
1717 // Newer kernels can handle short and long jumps for static keys.
1718 // Optimistically, emit short jump and check if it gets relaxed into
1719 // a long one during post-emit. Only then convert the jump to a nop.
1720 BC.MIB->createUncondBranch(Inst&: NewInst, TBB: Target, Ctx: BC.Ctx.get());
1721 }
1722
1723 BC.MIB->moveAnnotations(SrcInst: std::move(Inst), DstInst&: NewInst);
1724 Inst = NewInst;
1725
1726 // Mark the instruction for nop conversion.
1727 if (!IsBranch)
1728 NopIDs.insert(V: EntryID);
1729
1730 MCSymbol *Label =
1731 BC.MIB->getOrCreateInstLabel(Inst, Name: "__SK_", Ctx: BC.Ctx.get());
1732
1733 // Create a relocation against the label.
1734 const uint64_t EntryOffset = StaticKeysJumpTableAddress -
1735 StaticKeysJumpSection->getAddress() +
1736 (EntryID - 1) * 16;
1737 StaticKeysJumpSection->addRelocation(Offset: EntryOffset, Symbol: Label,
1738 Type: ELF::R_X86_64_PC32,
1739 /*Addend*/ 0);
1740 StaticKeysJumpSection->addRelocation(Offset: EntryOffset + 4, Symbol: Target,
1741 Type: ELF::R_X86_64_PC32, /*Addend*/ 0);
1742 }
1743 }
1744 }
1745
1746 BC.outs() << "BOLT-INFO: the input contains " << NumShort << " short and "
1747 << NumLong << " long static keys jumps in optimized functions\n";
1748
1749 return Error::success();
1750}
1751
1752// Post-emit pass of static keys jump section. Convert jumps to nops.
1753Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() {
1754 if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized())
1755 return Error::success();
1756
1757 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1758 DataExtractor DE(StaticKeysJumpSection->getOutputContents(),
1759 BC.AsmInfo->isLittleEndian(),
1760 BC.AsmInfo->getCodePointerSize());
1761 DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1762 const BinaryData *Stop = BC.getBinaryDataByName(Name: "__stop___jump_table");
1763 uint32_t EntryID = 0;
1764 uint64_t NumShort = 0;
1765 uint64_t NumLong = 0;
1766 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1767 const uint64_t JumpAddress =
1768 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
1769 const uint64_t TargetAddress =
1770 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(C&: Cursor);
1771 const uint64_t KeyAddress =
1772 SectionAddress + Cursor.tell() + (int64_t)DE.getU64(C&: Cursor);
1773
1774 // Consume the status of the cursor.
1775 if (!Cursor)
1776 return createStringError(EC: errc::executable_format_error,
1777 Fmt: "out of bounds while updating static keys: %s",
1778 Vals: toString(E: Cursor.takeError()).c_str());
1779
1780 ++EntryID;
1781
1782 LLVM_DEBUG({
1783 dbgs() << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress)
1784 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress)
1785 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) << '\n';
1786 });
1787 (void)TargetAddress;
1788 (void)KeyAddress;
1789
1790 BinaryFunction *BF =
1791 BC.getBinaryFunctionContainingAddress(Address: JumpAddress,
1792 /*CheckPastEnd*/ false,
1793 /*UseMaxSize*/ true);
1794 assert(BF && "Cannot get function for modified static key.");
1795
1796 if (!BF->isEmitted())
1797 continue;
1798
1799 // Disassemble instruction to collect stats even if nop-conversion is
1800 // unnecessary.
1801 MutableArrayRef<uint8_t> Contents = MutableArrayRef<uint8_t>(
1802 reinterpret_cast<uint8_t *>(BF->getImageAddress()), BF->getImageSize());
1803 assert(Contents.size() && "Non-empty function image expected.");
1804
1805 MCInst Inst;
1806 uint64_t Size;
1807 const uint64_t JumpOffset = JumpAddress - BF->getAddress();
1808 if (!BC.DisAsm->getInstruction(Instr&: Inst, Size, Bytes: Contents.slice(N: JumpOffset), Address: 0,
1809 CStream&: nulls())) {
1810 llvm_unreachable("Unable to disassemble jump instruction.");
1811 }
1812 assert(BC.MIB->isBranch(Inst) && "Branch instruction expected.");
1813
1814 if (Size == 2)
1815 ++NumShort;
1816 else if (Size == 5)
1817 ++NumLong;
1818 else
1819 llvm_unreachable("Unexpected size for static keys jump instruction.");
1820
1821 // Check if we need to convert jump instruction into a nop.
1822 if (!NopIDs.contains(V: EntryID))
1823 continue;
1824
1825 SmallString<15> NopCode;
1826 raw_svector_ostream VecOS(NopCode);
1827 BC.MAB->writeNopData(OS&: VecOS, Count: Size, STI: BC.STI.get());
1828 for (uint64_t I = 0; I < Size; ++I)
1829 Contents[JumpOffset + I] = NopCode[I];
1830 }
1831
1832 BC.outs() << "BOLT-INFO: written " << NumShort << " short and " << NumLong
1833 << " long static keys jumps in optimized functions\n";
1834
1835 return Error::success();
1836}
1837
1838} // namespace
1839
1840std::unique_ptr<MetadataRewriter>
1841llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) {
1842 return std::make_unique<LinuxKernelRewriter>(args&: BC);
1843}
1844

source code of bolt/lib/Rewrite/LinuxKernelRewriter.cpp