1 | //===-- clear_cache.c - Implement __clear_cache ---------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "int_lib.h" |
10 | #if defined(__linux__) |
11 | #include <assert.h> |
12 | #endif |
13 | #include <stddef.h> |
14 | |
15 | #if __APPLE__ |
16 | #include <libkern/OSCacheControl.h> |
17 | #endif |
18 | |
19 | #if defined(_WIN32) |
20 | // Forward declare Win32 APIs since the GCC mode driver does not handle the |
21 | // newer SDKs as well as needed. |
22 | uint32_t FlushInstructionCache(uintptr_t hProcess, void *lpBaseAddress, |
23 | uintptr_t dwSize); |
24 | uintptr_t GetCurrentProcess(void); |
25 | #endif |
26 | |
27 | #if defined(__FreeBSD__) && defined(__arm__) |
28 | // clang-format off |
29 | #include <sys/types.h> |
30 | #include <machine/sysarch.h> |
31 | // clang-format on |
32 | #endif |
33 | |
34 | #if defined(__NetBSD__) && defined(__arm__) |
35 | #include <machine/sysarch.h> |
36 | #endif |
37 | |
38 | #if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__) || defined(__riscv)) |
39 | // clang-format off |
40 | #include <sys/types.h> |
41 | #include <machine/sysarch.h> |
42 | // clang-format on |
43 | #endif |
44 | |
45 | #if defined(__linux__) && defined(__mips__) |
46 | #include <sys/cachectl.h> |
47 | #include <sys/syscall.h> |
48 | #include <unistd.h> |
49 | #endif |
50 | |
51 | #if defined(__linux__) && defined(__riscv) |
52 | // to get platform-specific syscall definitions |
53 | #include <linux/unistd.h> |
54 | #endif |
55 | |
56 | // The compiler generates calls to __clear_cache() when creating |
57 | // trampoline functions on the stack for use with nested functions. |
58 | // It is expected to invalidate the instruction cache for the |
59 | // specified range. |
60 | |
61 | void __clear_cache(void *start, void *end) { |
62 | #if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64) |
63 | // Intel processors have a unified instruction and data cache |
64 | // so there is nothing to do |
65 | #elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__)) |
66 | FlushInstructionCache(GetCurrentProcess(), start, end - start); |
67 | #elif defined(__arm__) && !defined(__APPLE__) |
68 | #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) |
69 | struct arm_sync_icache_args arg; |
70 | |
71 | arg.addr = (uintptr_t)start; |
72 | arg.len = (uintptr_t)end - (uintptr_t)start; |
73 | |
74 | sysarch(ARM_SYNC_ICACHE, &arg); |
75 | #elif defined(__linux__) |
76 | // We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but |
77 | // it also brought many other unused defines, as well as a dependency on |
78 | // kernel headers to be installed. |
79 | // |
80 | // This value is stable at least since Linux 3.13 and should remain so for |
81 | // compatibility reasons, warranting it's re-definition here. |
82 | #define __ARM_NR_cacheflush 0x0f0002 |
83 | register int start_reg __asm("r0" ) = (int)(intptr_t)start; |
84 | const register int end_reg __asm("r1" ) = (int)(intptr_t)end; |
85 | const register int flags __asm("r2" ) = 0; |
86 | const register int syscall_nr __asm("r7" ) = __ARM_NR_cacheflush; |
87 | __asm __volatile("svc 0x0" |
88 | : "=r" (start_reg) |
89 | : "r" (syscall_nr), "r" (start_reg), "r" (end_reg), "r" (flags)); |
90 | assert(start_reg == 0 && "Cache flush syscall failed." ); |
91 | #else |
92 | compilerrt_abort(); |
93 | #endif |
94 | #elif defined(__linux__) && defined(__loongarch__) |
95 | __asm__ volatile("ibar 0" ); |
96 | #elif defined(__mips__) |
97 | const uintptr_t start_int = (uintptr_t)start; |
98 | const uintptr_t end_int = (uintptr_t)end; |
99 | uintptr_t synci_step; |
100 | __asm__ volatile("rdhwr %0, $1" : "=r" (synci_step)); |
101 | if (synci_step != 0) { |
102 | #if __mips_isa_rev >= 6 |
103 | for (uintptr_t p = start_int; p < end_int; p += synci_step) |
104 | __asm__ volatile("synci 0(%0)" : : "r" (p)); |
105 | |
106 | // The last "move $at, $0" is the target of jr.hb instead of delay slot. |
107 | __asm__ volatile(".set noat\n" |
108 | "sync\n" |
109 | "addiupc $at, 12\n" |
110 | "jr.hb $at\n" |
111 | "move $at, $0\n" |
112 | ".set at" ); |
113 | #elif defined(__linux__) || defined(__OpenBSD__) |
114 | // Pre-R6 may not be globalized. And some implementations may give strange |
115 | // synci_step. So, let's use libc call for it. |
116 | _flush_cache(start, end_int - start_int, BCACHE); |
117 | #else |
118 | (void)start_int; |
119 | (void)end_int; |
120 | compilerrt_abort(); |
121 | #endif |
122 | } |
123 | #elif defined(__aarch64__) && !defined(__APPLE__) |
124 | uint64_t xstart = (uint64_t)(uintptr_t)start; |
125 | uint64_t xend = (uint64_t)(uintptr_t)end; |
126 | |
127 | // Get Cache Type Info. |
128 | static uint64_t ctr_el0 = 0; |
129 | if (ctr_el0 == 0) |
130 | __asm __volatile("mrs %0, ctr_el0" : "=r" (ctr_el0)); |
131 | |
132 | // The DC and IC instructions must use 64-bit registers so we don't use |
133 | // uintptr_t in case this runs in an IPL32 environment. |
134 | uint64_t addr; |
135 | |
136 | // If CTR_EL0.IDC is set, data cache cleaning to the point of unification |
137 | // is not required for instruction to data coherence. |
138 | if (((ctr_el0 >> 28) & 0x1) == 0x0) { |
139 | const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); |
140 | for (addr = xstart & ~(dcache_line_size - 1); addr < xend; |
141 | addr += dcache_line_size) |
142 | __asm __volatile("dc cvau, %0" ::"r" (addr)); |
143 | } |
144 | __asm __volatile("dsb ish" ); |
145 | |
146 | // If CTR_EL0.DIC is set, instruction cache invalidation to the point of |
147 | // unification is not required for instruction to data coherence. |
148 | if (((ctr_el0 >> 29) & 0x1) == 0x0) { |
149 | const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); |
150 | for (addr = xstart & ~(icache_line_size - 1); addr < xend; |
151 | addr += icache_line_size) |
152 | __asm __volatile("ic ivau, %0" ::"r" (addr)); |
153 | __asm __volatile("dsb ish" ); |
154 | } |
155 | __asm __volatile("isb sy" ); |
156 | #elif defined(__powerpc__) |
157 | // Newer CPUs have a bigger line size made of multiple blocks, so the |
158 | // following value is a minimal common denominator for what used to be |
159 | // a single block cache line and is therefore inneficient. |
160 | const size_t line_size = 32; |
161 | const size_t len = (uintptr_t)end - (uintptr_t)start; |
162 | |
163 | const uintptr_t mask = ~(line_size - 1); |
164 | const uintptr_t start_line = ((uintptr_t)start) & mask; |
165 | const uintptr_t end_line = ((uintptr_t)start + len + line_size - 1) & mask; |
166 | |
167 | for (uintptr_t line = start_line; line < end_line; line += line_size) |
168 | __asm__ volatile("dcbf 0, %0" : : "r" (line)); |
169 | __asm__ volatile("sync" ); |
170 | |
171 | for (uintptr_t line = start_line; line < end_line; line += line_size) |
172 | __asm__ volatile("icbi 0, %0" : : "r" (line)); |
173 | __asm__ volatile("isync" ); |
174 | #elif defined(__sparc__) |
175 | const size_t dword_size = 8; |
176 | const size_t len = (uintptr_t)end - (uintptr_t)start; |
177 | |
178 | const uintptr_t mask = ~(dword_size - 1); |
179 | const uintptr_t start_dword = ((uintptr_t)start) & mask; |
180 | const uintptr_t end_dword = ((uintptr_t)start + len + dword_size - 1) & mask; |
181 | |
182 | for (uintptr_t dword = start_dword; dword < end_dword; dword += dword_size) |
183 | __asm__ volatile("flush %0" : : "r" (dword)); |
184 | #elif defined(__riscv) && defined(__linux__) |
185 | // See: arch/riscv/include/asm/cacheflush.h, arch/riscv/kernel/sys_riscv.c |
186 | register void *start_reg __asm("a0" ) = start; |
187 | const register void *end_reg __asm("a1" ) = end; |
188 | // "0" means that we clear cache for all threads (SYS_RISCV_FLUSH_ICACHE_ALL) |
189 | const register long flags __asm("a2" ) = 0; |
190 | const register long syscall_nr __asm("a7" ) = __NR_riscv_flush_icache; |
191 | __asm __volatile("ecall" |
192 | : "=r" (start_reg) |
193 | : "r" (start_reg), "r" (end_reg), "r" (flags), "r" (syscall_nr)); |
194 | assert(start_reg == 0 && "Cache flush syscall failed." ); |
195 | #elif defined(__riscv) && defined(__OpenBSD__) |
196 | struct riscv_sync_icache_args arg; |
197 | |
198 | arg.addr = (uintptr_t)start; |
199 | arg.len = (uintptr_t)end - (uintptr_t)start; |
200 | |
201 | sysarch(RISCV_SYNC_ICACHE, &arg); |
202 | #elif defined(__ve__) |
203 | __asm__ volatile("fencec 2" ); |
204 | #else |
205 | #if __APPLE__ |
206 | // On Darwin, sys_icache_invalidate() provides this functionality |
207 | sys_icache_invalidate(start, end - start); |
208 | #else |
209 | compilerrt_abort(); |
210 | #endif |
211 | #endif |
212 | } |
213 | |