1 | //===-- clear_cache.c - Implement __clear_cache ---------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "int_lib.h" |
10 | #if defined(__linux__) |
11 | #include <assert.h> |
12 | #endif |
13 | #include <stddef.h> |
14 | |
15 | #if __APPLE__ |
16 | #include <libkern/OSCacheControl.h> |
17 | #endif |
18 | |
19 | #if defined(_WIN32) |
20 | // Forward declare Win32 APIs since the GCC mode driver does not handle the |
21 | // newer SDKs as well as needed. |
22 | uint32_t FlushInstructionCache(uintptr_t hProcess, void *lpBaseAddress, |
23 | uintptr_t dwSize); |
24 | uintptr_t GetCurrentProcess(void); |
25 | #endif |
26 | |
27 | #if defined(__FreeBSD__) && defined(__arm__) |
28 | // clang-format off |
29 | #include <sys/types.h> |
30 | #include <machine/sysarch.h> |
31 | // clang-format on |
32 | #endif |
33 | |
34 | #if defined(__NetBSD__) && defined(__arm__) |
35 | #include <machine/sysarch.h> |
36 | #endif |
37 | |
38 | #if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__) || defined(__riscv)) |
39 | // clang-format off |
40 | #include <sys/types.h> |
41 | #include <machine/sysarch.h> |
42 | // clang-format on |
43 | #endif |
44 | |
45 | #if defined(__linux__) && defined(__mips__) |
46 | #include <sys/cachectl.h> |
47 | #include <sys/syscall.h> |
48 | #include <unistd.h> |
49 | #endif |
50 | |
51 | #if defined(__linux__) && defined(__riscv) |
52 | // to get platform-specific syscall definitions |
53 | #include <linux/unistd.h> |
54 | #endif |
55 | |
56 | // The compiler generates calls to __clear_cache() when creating |
57 | // trampoline functions on the stack for use with nested functions. |
58 | // It is expected to invalidate the instruction cache for the |
59 | // specified range. |
60 | |
61 | void __clear_cache(void *start, void *end) { |
62 | #if defined(_WIN32) && \ |
63 | (defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__)) |
64 | FlushInstructionCache(GetCurrentProcess(), start, end - start); |
65 | #elif __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64) |
66 | // Intel processors have a unified instruction and data cache |
67 | // so there is nothing to do |
68 | #elif defined(__s390__) |
69 | // no-op |
70 | #elif defined(__arm__) && !defined(__APPLE__) |
71 | #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) |
72 | struct arm_sync_icache_args arg; |
73 | |
74 | arg.addr = (uintptr_t)start; |
75 | arg.len = (uintptr_t)end - (uintptr_t)start; |
76 | |
77 | sysarch(ARM_SYNC_ICACHE, &arg); |
78 | #elif defined(__linux__) |
79 | // We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but |
80 | // it also brought many other unused defines, as well as a dependency on |
81 | // kernel headers to be installed. |
82 | // |
83 | // This value is stable at least since Linux 3.13 and should remain so for |
84 | // compatibility reasons, warranting it's re-definition here. |
85 | #define __ARM_NR_cacheflush 0x0f0002 |
86 | register int start_reg __asm("r0" ) = (int)(intptr_t)start; |
87 | const register int end_reg __asm("r1" ) = (int)(intptr_t)end; |
88 | const register int flags __asm("r2" ) = 0; |
89 | const register int syscall_nr __asm("r7" ) = __ARM_NR_cacheflush; |
90 | __asm __volatile("svc 0x0" |
91 | : "=r" (start_reg) |
92 | : "r" (syscall_nr), "r" (start_reg), "r" (end_reg), "r" (flags)); |
93 | assert(start_reg == 0 && "Cache flush syscall failed." ); |
94 | #else |
95 | compilerrt_abort(); |
96 | #endif |
97 | #elif defined(__linux__) && defined(__loongarch__) |
98 | __asm__ volatile("ibar 0" ); |
99 | #elif defined(__mips__) |
100 | const uintptr_t start_int = (uintptr_t)start; |
101 | const uintptr_t end_int = (uintptr_t)end; |
102 | uintptr_t synci_step; |
103 | __asm__ volatile("rdhwr %0, $1" : "=r" (synci_step)); |
104 | if (synci_step != 0) { |
105 | #if __mips_isa_rev >= 6 |
106 | for (uintptr_t p = start_int; p < end_int; p += synci_step) |
107 | __asm__ volatile("synci 0(%0)" : : "r" (p)); |
108 | |
109 | // The last "move $at, $0" is the target of jr.hb instead of delay slot. |
110 | __asm__ volatile(".set noat\n" |
111 | "sync\n" |
112 | "addiupc $at, 12\n" |
113 | "jr.hb $at\n" |
114 | "move $at, $0\n" |
115 | ".set at" ); |
116 | #elif defined(__linux__) || defined(__OpenBSD__) |
117 | // Pre-R6 may not be globalized. And some implementations may give strange |
118 | // synci_step. So, let's use libc call for it. |
119 | _flush_cache(start, end_int - start_int, BCACHE); |
120 | #else |
121 | (void)start_int; |
122 | (void)end_int; |
123 | compilerrt_abort(); |
124 | #endif |
125 | } |
126 | #elif defined(__aarch64__) && !defined(__APPLE__) |
127 | uint64_t xstart = (uint64_t)(uintptr_t)start; |
128 | uint64_t xend = (uint64_t)(uintptr_t)end; |
129 | |
130 | // Get Cache Type Info. |
131 | static uint64_t ctr_el0 = 0; |
132 | if (ctr_el0 == 0) |
133 | __asm __volatile("mrs %0, ctr_el0" : "=r" (ctr_el0)); |
134 | |
135 | // The DC and IC instructions must use 64-bit registers so we don't use |
136 | // uintptr_t in case this runs in an IPL32 environment. |
137 | uint64_t addr; |
138 | |
139 | // If CTR_EL0.IDC is set, data cache cleaning to the point of unification |
140 | // is not required for instruction to data coherence. |
141 | if (((ctr_el0 >> 28) & 0x1) == 0x0) { |
142 | const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); |
143 | for (addr = xstart & ~(dcache_line_size - 1); addr < xend; |
144 | addr += dcache_line_size) |
145 | __asm __volatile("dc cvau, %0" ::"r" (addr)); |
146 | } |
147 | __asm __volatile("dsb ish" ); |
148 | |
149 | // If CTR_EL0.DIC is set, instruction cache invalidation to the point of |
150 | // unification is not required for instruction to data coherence. |
151 | if (((ctr_el0 >> 29) & 0x1) == 0x0) { |
152 | const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); |
153 | for (addr = xstart & ~(icache_line_size - 1); addr < xend; |
154 | addr += icache_line_size) |
155 | __asm __volatile("ic ivau, %0" ::"r" (addr)); |
156 | __asm __volatile("dsb ish" ); |
157 | } |
158 | __asm __volatile("isb sy" ); |
159 | #elif defined(__powerpc__) |
160 | // Newer CPUs have a bigger line size made of multiple blocks, so the |
161 | // following value is a minimal common denominator for what used to be |
162 | // a single block cache line and is therefore inneficient. |
163 | const size_t line_size = 32; |
164 | const size_t len = (uintptr_t)end - (uintptr_t)start; |
165 | |
166 | const uintptr_t mask = ~(line_size - 1); |
167 | const uintptr_t start_line = ((uintptr_t)start) & mask; |
168 | const uintptr_t end_line = ((uintptr_t)start + len + line_size - 1) & mask; |
169 | |
170 | for (uintptr_t line = start_line; line < end_line; line += line_size) |
171 | __asm__ volatile("dcbf 0, %0" : : "r" (line)); |
172 | __asm__ volatile("sync" ); |
173 | |
174 | for (uintptr_t line = start_line; line < end_line; line += line_size) |
175 | __asm__ volatile("icbi 0, %0" : : "r" (line)); |
176 | __asm__ volatile("isync" ); |
177 | #elif defined(__sparc__) |
178 | const size_t dword_size = 8; |
179 | const size_t len = (uintptr_t)end - (uintptr_t)start; |
180 | |
181 | const uintptr_t mask = ~(dword_size - 1); |
182 | const uintptr_t start_dword = ((uintptr_t)start) & mask; |
183 | const uintptr_t end_dword = ((uintptr_t)start + len + dword_size - 1) & mask; |
184 | |
185 | for (uintptr_t dword = start_dword; dword < end_dword; dword += dword_size) |
186 | __asm__ volatile("flush %0" : : "r" (dword)); |
187 | #elif defined(__riscv) && defined(__linux__) |
188 | // See: arch/riscv/include/asm/cacheflush.h, arch/riscv/kernel/sys_riscv.c |
189 | register void *start_reg __asm("a0" ) = start; |
190 | const register void *end_reg __asm("a1" ) = end; |
191 | // "0" means that we clear cache for all threads (SYS_RISCV_FLUSH_ICACHE_ALL) |
192 | const register long flags __asm("a2" ) = 0; |
193 | const register long syscall_nr __asm("a7" ) = __NR_riscv_flush_icache; |
194 | __asm __volatile("ecall" |
195 | : "=r" (start_reg) |
196 | : "r" (start_reg), "r" (end_reg), "r" (flags), "r" (syscall_nr)); |
197 | assert(start_reg == 0 && "Cache flush syscall failed." ); |
198 | #elif defined(__riscv) && defined(__OpenBSD__) |
199 | struct riscv_sync_icache_args arg; |
200 | |
201 | arg.addr = (uintptr_t)start; |
202 | arg.len = (uintptr_t)end - (uintptr_t)start; |
203 | |
204 | sysarch(RISCV_SYNC_ICACHE, &arg); |
205 | #elif defined(__ve__) |
206 | __asm__ volatile("fencec 2" ); |
207 | #else |
208 | #if __APPLE__ |
209 | // On Darwin, sys_icache_invalidate() provides this functionality |
210 | sys_icache_invalidate(start, end - start); |
211 | #else |
212 | compilerrt_abort(); |
213 | #endif |
214 | #endif |
215 | } |
216 | |