| 1 | //===- bolt/runtime/hugify.cpp -------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===---------------------------------------------------------------------===// |
| 8 | |
| 9 | #if (defined(__x86_64__) || defined(__aarch64__) || defined(__arm64__)) && \ |
| 10 | !defined(__APPLE__) |
| 11 | |
| 12 | #include "common.h" |
| 13 | |
| 14 | #pragma GCC visibility push(hidden) |
| 15 | |
| 16 | // Enables a very verbose logging to stderr useful when debugging |
| 17 | // #define ENABLE_DEBUG |
| 18 | |
| 19 | #ifdef ENABLE_DEBUG |
| 20 | #define DEBUG(X) \ |
| 21 | { X; } |
| 22 | #else |
| 23 | #define DEBUG(X) \ |
| 24 | {} |
| 25 | #endif |
| 26 | |
| 27 | // Function constains trampoline to _start, |
| 28 | // so we can resume regular execution of the function that we hooked. |
| 29 | extern void __bolt_hugify_start_program(); |
| 30 | |
| 31 | // The __hot_start and __hot_end symbols set by Bolt. We use them to figure |
| 32 | // out the rage for marking huge pages. |
| 33 | extern uint64_t __hot_start; |
| 34 | extern uint64_t __hot_end; |
| 35 | |
| 36 | static void getKernelVersion(uint32_t *Val) { |
| 37 | // release should be in the format: %d.%d.%d |
| 38 | // major, minor, release |
| 39 | struct UtsNameTy UtsName; |
| 40 | int Ret = __uname(Buf: &UtsName); |
| 41 | const char *Buf = UtsName.release; |
| 42 | const char *End = Buf + strLen(Str: Buf); |
| 43 | const char Delims[2][2] = {"." , "." }; |
| 44 | |
| 45 | for (int i = 0; i < 3; ++i) { |
| 46 | if (!scanUInt32(Buf, End, Ret&: Val[i])) { |
| 47 | return; |
| 48 | } |
| 49 | if (i < sizeof(Delims) / sizeof(Delims[0])) { |
| 50 | const char *Ptr = Delims[i]; |
| 51 | while (*Ptr != '\0') { |
| 52 | if (*Ptr != *Buf) { |
| 53 | return; |
| 54 | } |
| 55 | ++Ptr; |
| 56 | ++Buf; |
| 57 | } |
| 58 | } |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | /// Check whether the kernel supports THP via corresponding sysfs entry. |
| 63 | /// thp works only starting from 5.10 |
| 64 | static bool hasPagecacheTHPSupport() { |
| 65 | char Buf[64]; |
| 66 | |
| 67 | int FD = __open(pathname: "/sys/kernel/mm/transparent_hugepage/enabled" , |
| 68 | flags: 0 /* O_RDONLY */, mode: 0); |
| 69 | if (FD < 0) |
| 70 | return false; |
| 71 | |
| 72 | memset(Buf, C: 0, Size: sizeof(Buf)); |
| 73 | const size_t Res = __read(fd: FD, buf: Buf, count: sizeof(Buf)); |
| 74 | if (Res < 0) |
| 75 | return false; |
| 76 | |
| 77 | if (!strStr(Haystack: Buf, Needle: "[always]" ) && !strStr(Haystack: Buf, Needle: "[madvise]" )) { |
| 78 | DEBUG(report("[hugify] THP support is not enabled.\n" );) |
| 79 | return false; |
| 80 | } |
| 81 | |
| 82 | struct KernelVersionTy { |
| 83 | uint32_t major; |
| 84 | uint32_t minor; |
| 85 | uint32_t release; |
| 86 | }; |
| 87 | |
| 88 | KernelVersionTy KernelVersion; |
| 89 | |
| 90 | getKernelVersion(Val: (uint32_t *)&KernelVersion); |
| 91 | if (KernelVersion.major >= 6 || |
| 92 | (KernelVersion.major == 5 && KernelVersion.minor >= 10)) |
| 93 | return true; |
| 94 | |
| 95 | return false; |
| 96 | } |
| 97 | |
| 98 | static void hugifyForOldKernel(uint8_t *From, uint8_t *To) { |
| 99 | const size_t Size = To - From; |
| 100 | |
| 101 | uint8_t *Mem = reinterpret_cast<uint8_t *>( |
| 102 | __mmap(addr: 0, size: Size, prot: 0x3 /* PROT_READ | PROT_WRITE */, |
| 103 | flags: 0x22 /* MAP_PRIVATE | MAP_ANONYMOUS */, fd: -1, offset: 0)); |
| 104 | |
| 105 | if (Mem == ((void *)-1) /* MAP_FAILED */) { |
| 106 | char Msg[] = "[hugify] could not allocate memory for text move\n" ; |
| 107 | reportError(Msg, Size: sizeof(Msg)); |
| 108 | } |
| 109 | |
| 110 | DEBUG(reportNumber("[hugify] allocated temporary address: " , (uint64_t)Mem, |
| 111 | 16);) |
| 112 | DEBUG(reportNumber("[hugify] allocated size: " , (uint64_t)Size, 16);) |
| 113 | |
| 114 | // Copy the hot code to a temporary location. |
| 115 | memcpy(Dest: Mem, Src: From, Len: Size); |
| 116 | |
| 117 | __prctl(Option: 41 /* PR_SET_THP_DISABLE */, Arg2: 0, Arg3: 0, Arg4: 0, Arg5: 0); |
| 118 | // Maps out the existing hot code. |
| 119 | if (__mmap(addr: reinterpret_cast<uint64_t>(From), size: Size, |
| 120 | prot: 0x3 /* PROT_READ | PROT_WRITE */, |
| 121 | flags: 0x32 /* MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE */, fd: -1, |
| 122 | offset: 0) == ((void *)-1) /*MAP_FAILED*/) { |
| 123 | char Msg[] = |
| 124 | "[hugify] failed to mmap memory for large page move terminating\n" ; |
| 125 | reportError(Msg, Size: sizeof(Msg)); |
| 126 | } |
| 127 | |
| 128 | // Mark the hot code page to be huge page. |
| 129 | if (__madvise(addr: From, length: Size, advice: 14 /* MADV_HUGEPAGE */) == -1) { |
| 130 | char Msg[] = "[hugify] setting MADV_HUGEPAGE is failed\n" ; |
| 131 | reportError(Msg, Size: sizeof(Msg)); |
| 132 | } |
| 133 | |
| 134 | // Copy the hot code back. |
| 135 | memcpy(Dest: From, Src: Mem, Len: Size); |
| 136 | |
| 137 | // Change permission back to read-only, ignore failure |
| 138 | __mprotect(addr: From, len: Size, prot: 0x5 /* PROT_READ | PROT_EXEC */); |
| 139 | |
| 140 | __munmap(addr: Mem, size: Size); |
| 141 | } |
| 142 | |
| 143 | extern "C" void __bolt_hugify_self_impl() { |
| 144 | uint8_t *HotStart = (uint8_t *)&__hot_start; |
| 145 | uint8_t *HotEnd = (uint8_t *)&__hot_end; |
| 146 | // Make sure the start and end are aligned with huge page address |
| 147 | const size_t HugePageBytes = 2L * 1024 * 1024; |
| 148 | uint8_t *From = HotStart - ((intptr_t)HotStart & (HugePageBytes - 1)); |
| 149 | uint8_t *To = HotEnd + (HugePageBytes - 1); |
| 150 | To -= (intptr_t)To & (HugePageBytes - 1); |
| 151 | |
| 152 | DEBUG(reportNumber("[hugify] hot start: " , (uint64_t)HotStart, 16);) |
| 153 | DEBUG(reportNumber("[hugify] hot end: " , (uint64_t)HotEnd, 16);) |
| 154 | DEBUG(reportNumber("[hugify] aligned huge page from: " , (uint64_t)From, 16);) |
| 155 | DEBUG(reportNumber("[hugify] aligned huge page to: " , (uint64_t)To, 16);) |
| 156 | |
| 157 | if (!hasPagecacheTHPSupport()) { |
| 158 | DEBUG(report( |
| 159 | "[hugify] workaround with memory alignment for kernel < 5.10\n" );) |
| 160 | hugifyForOldKernel(From, To); |
| 161 | return; |
| 162 | } |
| 163 | |
| 164 | if (__madvise(addr: From, length: (To - From), advice: 14 /* MADV_HUGEPAGE */) == -1) { |
| 165 | char Msg[] = "[hugify] failed to allocate large page\n" ; |
| 166 | // TODO: allow user to control the failure behavior. |
| 167 | reportError(Msg, Size: sizeof(Msg)); |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | /// This is hooking ELF's entry, it needs to save all machine state. |
| 172 | extern "C" __attribute((naked)) void __bolt_hugify_self() { |
| 173 | // clang-format off |
| 174 | #if defined(__x86_64__) |
| 175 | __asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL |
| 176 | "jmp __bolt_hugify_start_program\n" |
| 177 | :::); |
| 178 | #elif defined(__aarch64__) || defined(__arm64__) |
| 179 | __asm__ __volatile__(SAVE_ALL "bl __bolt_hugify_self_impl\n" RESTORE_ALL |
| 180 | "adrp x16, __bolt_hugify_start_program\n" |
| 181 | "add x16, x16, #:lo12:__bolt_hugify_start_program\n" |
| 182 | "br x16\n" |
| 183 | :::); |
| 184 | #else |
| 185 | __exit(1); |
| 186 | #endif |
| 187 | // clang-format on |
| 188 | } |
| 189 | #endif |
| 190 | |