| 1 | //===-- clear_cache.c - Implement __clear_cache ---------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "int_lib.h" |
| 10 | #if defined(__linux__) |
| 11 | #include <assert.h> |
| 12 | #endif |
| 13 | #include <stddef.h> |
| 14 | |
| 15 | #if __APPLE__ |
| 16 | #include <libkern/OSCacheControl.h> |
| 17 | #endif |
| 18 | |
| 19 | #if defined(_WIN32) |
| 20 | // Forward declare Win32 APIs since the GCC mode driver does not handle the |
| 21 | // newer SDKs as well as needed. |
| 22 | uint32_t FlushInstructionCache(uintptr_t hProcess, void *lpBaseAddress, |
| 23 | uintptr_t dwSize); |
| 24 | uintptr_t GetCurrentProcess(void); |
| 25 | #endif |
| 26 | |
| 27 | #if defined(__FreeBSD__) && defined(__arm__) |
| 28 | // clang-format off |
| 29 | #include <sys/types.h> |
| 30 | #include <machine/sysarch.h> |
| 31 | // clang-format on |
| 32 | #endif |
| 33 | |
| 34 | #if defined(__NetBSD__) && defined(__arm__) |
| 35 | #include <machine/sysarch.h> |
| 36 | #endif |
| 37 | |
| 38 | #if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__) || defined(__riscv)) |
| 39 | // clang-format off |
| 40 | #include <sys/types.h> |
| 41 | #include <machine/sysarch.h> |
| 42 | // clang-format on |
| 43 | #endif |
| 44 | |
| 45 | #if defined(__linux__) && defined(__mips__) |
| 46 | #include <sys/cachectl.h> |
| 47 | #include <sys/syscall.h> |
| 48 | #include <unistd.h> |
| 49 | #endif |
| 50 | |
| 51 | #if defined(__linux__) && defined(__riscv) |
| 52 | // to get platform-specific syscall definitions |
| 53 | #include <linux/unistd.h> |
| 54 | #endif |
| 55 | |
| 56 | // The compiler generates calls to __clear_cache() when creating |
| 57 | // trampoline functions on the stack for use with nested functions. |
| 58 | // It is expected to invalidate the instruction cache for the |
| 59 | // specified range. |
| 60 | |
| 61 | void __clear_cache(void *start, void *end) { |
| 62 | #if defined(_WIN32) && \ |
| 63 | (defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__)) |
| 64 | FlushInstructionCache(GetCurrentProcess(), start, end - start); |
| 65 | #elif __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64) |
| 66 | // Intel processors have a unified instruction and data cache |
| 67 | // so there is nothing to do |
| 68 | #elif defined(__s390__) |
| 69 | // no-op |
| 70 | #elif defined(__arm__) && !defined(__APPLE__) |
| 71 | #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) |
| 72 | struct arm_sync_icache_args arg; |
| 73 | |
| 74 | arg.addr = (uintptr_t)start; |
| 75 | arg.len = (uintptr_t)end - (uintptr_t)start; |
| 76 | |
| 77 | sysarch(ARM_SYNC_ICACHE, &arg); |
| 78 | #elif defined(__linux__) |
| 79 | // We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but |
| 80 | // it also brought many other unused defines, as well as a dependency on |
| 81 | // kernel headers to be installed. |
| 82 | // |
| 83 | // This value is stable at least since Linux 3.13 and should remain so for |
| 84 | // compatibility reasons, warranting it's re-definition here. |
| 85 | #define __ARM_NR_cacheflush 0x0f0002 |
| 86 | register int start_reg __asm("r0" ) = (int)(intptr_t)start; |
| 87 | const register int end_reg __asm("r1" ) = (int)(intptr_t)end; |
| 88 | const register int flags __asm("r2" ) = 0; |
| 89 | const register int syscall_nr __asm("r7" ) = __ARM_NR_cacheflush; |
| 90 | __asm __volatile("svc 0x0" |
| 91 | : "=r" (start_reg) |
| 92 | : "r" (syscall_nr), "r" (start_reg), "r" (end_reg), "r" (flags)); |
| 93 | assert(start_reg == 0 && "Cache flush syscall failed." ); |
| 94 | #else |
| 95 | compilerrt_abort(); |
| 96 | #endif |
| 97 | #elif defined(__linux__) && defined(__loongarch__) |
| 98 | __asm__ volatile("ibar 0" ); |
| 99 | #elif defined(__mips__) |
| 100 | const uintptr_t start_int = (uintptr_t)start; |
| 101 | const uintptr_t end_int = (uintptr_t)end; |
| 102 | uintptr_t synci_step; |
| 103 | __asm__ volatile("rdhwr %0, $1" : "=r" (synci_step)); |
| 104 | if (synci_step != 0) { |
| 105 | #if __mips_isa_rev >= 6 |
| 106 | for (uintptr_t p = start_int; p < end_int; p += synci_step) |
| 107 | __asm__ volatile("synci 0(%0)" : : "r" (p)); |
| 108 | |
| 109 | // The last "move $at, $0" is the target of jr.hb instead of delay slot. |
| 110 | __asm__ volatile(".set noat\n" |
| 111 | "sync\n" |
| 112 | "addiupc $at, 12\n" |
| 113 | "jr.hb $at\n" |
| 114 | "move $at, $0\n" |
| 115 | ".set at" ); |
| 116 | #elif defined(__linux__) || defined(__OpenBSD__) |
| 117 | // Pre-R6 may not be globalized. And some implementations may give strange |
| 118 | // synci_step. So, let's use libc call for it. |
| 119 | _flush_cache(start, end_int - start_int, BCACHE); |
| 120 | #else |
| 121 | (void)start_int; |
| 122 | (void)end_int; |
| 123 | compilerrt_abort(); |
| 124 | #endif |
| 125 | } |
| 126 | #elif defined(__aarch64__) && !defined(__APPLE__) |
| 127 | uint64_t xstart = (uint64_t)(uintptr_t)start; |
| 128 | uint64_t xend = (uint64_t)(uintptr_t)end; |
| 129 | |
| 130 | // Get Cache Type Info. |
| 131 | static uint64_t ctr_el0 = 0; |
| 132 | if (ctr_el0 == 0) |
| 133 | __asm __volatile("mrs %0, ctr_el0" : "=r" (ctr_el0)); |
| 134 | |
| 135 | // The DC and IC instructions must use 64-bit registers so we don't use |
| 136 | // uintptr_t in case this runs in an IPL32 environment. |
| 137 | uint64_t addr; |
| 138 | |
| 139 | // If CTR_EL0.IDC is set, data cache cleaning to the point of unification |
| 140 | // is not required for instruction to data coherence. |
| 141 | if (((ctr_el0 >> 28) & 0x1) == 0x0) { |
| 142 | const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); |
| 143 | for (addr = xstart & ~(dcache_line_size - 1); addr < xend; |
| 144 | addr += dcache_line_size) |
| 145 | __asm __volatile("dc cvau, %0" ::"r" (addr)); |
| 146 | } |
| 147 | __asm __volatile("dsb ish" ); |
| 148 | |
| 149 | // If CTR_EL0.DIC is set, instruction cache invalidation to the point of |
| 150 | // unification is not required for instruction to data coherence. |
| 151 | if (((ctr_el0 >> 29) & 0x1) == 0x0) { |
| 152 | const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); |
| 153 | for (addr = xstart & ~(icache_line_size - 1); addr < xend; |
| 154 | addr += icache_line_size) |
| 155 | __asm __volatile("ic ivau, %0" ::"r" (addr)); |
| 156 | __asm __volatile("dsb ish" ); |
| 157 | } |
| 158 | __asm __volatile("isb sy" ); |
| 159 | #elif defined(__powerpc__) |
| 160 | // Newer CPUs have a bigger line size made of multiple blocks, so the |
| 161 | // following value is a minimal common denominator for what used to be |
| 162 | // a single block cache line and is therefore inneficient. |
| 163 | const size_t line_size = 32; |
| 164 | const size_t len = (uintptr_t)end - (uintptr_t)start; |
| 165 | |
| 166 | const uintptr_t mask = ~(line_size - 1); |
| 167 | const uintptr_t start_line = ((uintptr_t)start) & mask; |
| 168 | const uintptr_t end_line = ((uintptr_t)start + len + line_size - 1) & mask; |
| 169 | |
| 170 | for (uintptr_t line = start_line; line < end_line; line += line_size) |
| 171 | __asm__ volatile("dcbf 0, %0" : : "r" (line)); |
| 172 | __asm__ volatile("sync" ); |
| 173 | |
| 174 | for (uintptr_t line = start_line; line < end_line; line += line_size) |
| 175 | __asm__ volatile("icbi 0, %0" : : "r" (line)); |
| 176 | __asm__ volatile("isync" ); |
| 177 | #elif defined(__sparc__) |
| 178 | const size_t dword_size = 8; |
| 179 | const size_t len = (uintptr_t)end - (uintptr_t)start; |
| 180 | |
| 181 | const uintptr_t mask = ~(dword_size - 1); |
| 182 | const uintptr_t start_dword = ((uintptr_t)start) & mask; |
| 183 | const uintptr_t end_dword = ((uintptr_t)start + len + dword_size - 1) & mask; |
| 184 | |
| 185 | for (uintptr_t dword = start_dword; dword < end_dword; dword += dword_size) |
| 186 | __asm__ volatile("flush %0" : : "r" (dword)); |
| 187 | #elif defined(__riscv) && defined(__linux__) |
| 188 | // See: arch/riscv/include/asm/cacheflush.h, arch/riscv/kernel/sys_riscv.c |
| 189 | register void *start_reg __asm("a0" ) = start; |
| 190 | const register void *end_reg __asm("a1" ) = end; |
| 191 | // "0" means that we clear cache for all threads (SYS_RISCV_FLUSH_ICACHE_ALL) |
| 192 | const register long flags __asm("a2" ) = 0; |
| 193 | const register long syscall_nr __asm("a7" ) = __NR_riscv_flush_icache; |
| 194 | __asm __volatile("ecall" |
| 195 | : "=r" (start_reg) |
| 196 | : "r" (start_reg), "r" (end_reg), "r" (flags), "r" (syscall_nr)); |
| 197 | assert(start_reg == 0 && "Cache flush syscall failed." ); |
| 198 | #elif defined(__riscv) && defined(__OpenBSD__) |
| 199 | struct riscv_sync_icache_args arg; |
| 200 | |
| 201 | arg.addr = (uintptr_t)start; |
| 202 | arg.len = (uintptr_t)end - (uintptr_t)start; |
| 203 | |
| 204 | sysarch(RISCV_SYNC_ICACHE, &arg); |
| 205 | #elif defined(__ve__) |
| 206 | __asm__ volatile("fencec 2" ); |
| 207 | #else |
| 208 | #if __APPLE__ |
| 209 | // On Darwin, sys_icache_invalidate() provides this functionality |
| 210 | sys_icache_invalidate(start, end - start); |
| 211 | #else |
| 212 | compilerrt_abort(); |
| 213 | #endif |
| 214 | #endif |
| 215 | } |
| 216 | |