1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * linux/arch/arm/mm/cache-v7.S |
4 | * |
5 | * Copyright (C) 2001 Deep Blue Solutions Ltd. |
6 | * Copyright (C) 2005 ARM Ltd. |
7 | * |
8 | * This is the "shell" of the ARMv7 processor support. |
9 | */ |
10 | #include <linux/linkage.h> |
11 | #include <linux/init.h> |
12 | #include <asm/assembler.h> |
13 | #include <asm/errno.h> |
14 | #include <asm/unwind.h> |
15 | #include <asm/hardware/cache-b15-rac.h> |
16 | |
17 | #include "proc-macros.S" |
18 | |
19 | .arch armv7-a |
20 | |
21 | #ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND |
22 | .globl icache_size |
23 | .data |
24 | .align 2 |
25 | icache_size: |
26 | .long 64 |
27 | .text |
28 | #endif |
29 | /* |
30 | * The secondary kernel init calls v7_flush_dcache_all before it enables |
31 | * the L1; however, the L1 comes out of reset in an undefined state, so |
32 | * the clean + invalidate performed by v7_flush_dcache_all causes a bunch |
33 | * of cache lines with uninitialized data and uninitialized tags to get |
34 | * written out to memory, which does really unpleasant things to the main |
35 | * processor. We fix this by performing an invalidate, rather than a |
36 | * clean + invalidate, before jumping into the kernel. |
37 | * |
38 | * This function needs to be called for both secondary cores startup and |
39 | * primary core resume procedures. |
40 | */ |
41 | ENTRY(v7_invalidate_l1) |
42 | mov r0, #0 |
43 | mcr p15, 2, r0, c0, c0, 0 @ select L1 data cache in CSSELR |
44 | isb |
45 | mrc p15, 1, r0, c0, c0, 0 @ read cache geometry from CCSIDR |
46 | |
47 | movw r3, #0x3ff |
48 | and r3, r3, r0, lsr #3 @ 'Associativity' in CCSIDR[12:3] |
49 | clz r1, r3 @ WayShift |
50 | mov r2, #1 |
51 | mov r3, r3, lsl r1 @ NumWays-1 shifted into bits [31:...] |
52 | movs r1, r2, lsl r1 @ #1 shifted left by same amount |
53 | moveq r1, #1 @ r1 needs value > 0 even if only 1 way |
54 | |
55 | and r2, r0, #0x7 |
56 | add r2, r2, #4 @ SetShift |
57 | |
58 | 1: movw ip, #0x7fff |
59 | and r0, ip, r0, lsr #13 @ 'NumSets' in CCSIDR[27:13] |
60 | |
61 | 2: mov ip, r0, lsl r2 @ NumSet << SetShift |
62 | orr ip, ip, r3 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift) |
63 | mcr p15, 0, ip, c7, c6, 2 |
64 | subs r0, r0, #1 @ Set-- |
65 | bpl 2b |
66 | subs r3, r3, r1 @ Way-- |
67 | bcc 3f |
68 | mrc p15, 1, r0, c0, c0, 0 @ re-read cache geometry from CCSIDR |
69 | b 1b |
70 | 3: dsb st |
71 | isb |
72 | ret lr |
73 | ENDPROC(v7_invalidate_l1) |
74 | |
75 | /* |
76 | * v7_flush_icache_all() |
77 | * |
78 | * Flush the whole I-cache. |
79 | * |
80 | * Registers: |
81 | * r0 - set to 0 |
82 | */ |
83 | ENTRY(v7_flush_icache_all) |
84 | mov r0, #0 |
85 | ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable |
86 | ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate |
87 | ret lr |
88 | ENDPROC(v7_flush_icache_all) |
89 | |
90 | /* |
91 | * v7_flush_dcache_louis() |
92 | * |
93 | * Flush the D-cache up to the Level of Unification Inner Shareable |
94 | * |
95 | * Corrupted registers: r0-r6, r9-r10 |
96 | */ |
97 | |
98 | ENTRY(v7_flush_dcache_louis) |
99 | dmb @ ensure ordering with previous memory accesses |
100 | mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr |
101 | ALT_SMP(mov r3, r0, lsr #20) @ move LoUIS into position |
102 | ALT_UP( mov r3, r0, lsr #26) @ move LoUU into position |
103 | ands r3, r3, #7 << 1 @ extract LoU*2 field from clidr |
104 | bne start_flush_levels @ LoU != 0, start flushing |
105 | #ifdef CONFIG_ARM_ERRATA_643719 |
106 | ALT_SMP(mrc p15, 0, r2, c0, c0, 0) @ read main ID register |
107 | ALT_UP( ret lr) @ LoUU is zero, so nothing to do |
108 | movw r1, #:lower16:(0x410fc090 >> 4) @ ID of ARM Cortex A9 r0p? |
109 | movt r1, #:upper16:(0x410fc090 >> 4) |
110 | teq r1, r2, lsr #4 @ test for errata affected core and if so... |
111 | moveq r3, #1 << 1 @ fix LoUIS value |
112 | beq start_flush_levels @ start flushing cache levels |
113 | #endif |
114 | ret lr |
115 | ENDPROC(v7_flush_dcache_louis) |
116 | |
117 | /* |
118 | * v7_flush_dcache_all() |
119 | * |
120 | * Flush the whole D-cache. |
121 | * |
122 | * Corrupted registers: r0-r6, r9-r10 |
123 | * |
124 | * - mm - mm_struct describing address space |
125 | */ |
126 | ENTRY(v7_flush_dcache_all) |
127 | dmb @ ensure ordering with previous memory accesses |
128 | mrc p15, 1, r0, c0, c0, 1 @ read clidr |
129 | mov r3, r0, lsr #23 @ move LoC into position |
130 | ands r3, r3, #7 << 1 @ extract LoC*2 from clidr |
131 | beq finished @ if loc is 0, then no need to clean |
132 | start_flush_levels: |
133 | mov r10, #0 @ start clean at cache level 0 |
134 | flush_levels: |
135 | add r2, r10, r10, lsr #1 @ work out 3x current cache level |
136 | mov r1, r0, lsr r2 @ extract cache type bits from clidr |
137 | and r1, r1, #7 @ mask of the bits for current cache only |
138 | cmp r1, #2 @ see what cache we have at this level |
139 | blt skip @ skip if no cache, or just i-cache |
140 | #ifdef CONFIG_PREEMPTION |
141 | save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic |
142 | #endif |
143 | mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr |
144 | isb @ isb to sych the new cssr&csidr |
145 | mrc p15, 1, r1, c0, c0, 0 @ read the new csidr |
146 | #ifdef CONFIG_PREEMPTION |
147 | restore_irqs_notrace r9 |
148 | #endif |
149 | and r2, r1, #7 @ extract the length of the cache lines |
150 | add r2, r2, #4 @ add 4 (line length offset) |
151 | movw r4, #0x3ff |
152 | ands r4, r4, r1, lsr #3 @ find maximum number on the way size |
153 | clz r5, r4 @ find bit position of way size increment |
154 | movw r6, #0x7fff |
155 | and r1, r6, r1, lsr #13 @ extract max number of the index size |
156 | mov r6, #1 |
157 | movne r4, r4, lsl r5 @ # of ways shifted into bits [31:...] |
158 | movne r6, r6, lsl r5 @ 1 shifted left by same amount |
159 | loop1: |
160 | mov r9, r1 @ create working copy of max index |
161 | loop2: |
162 | mov r5, r9, lsl r2 @ factor set number into r5 |
163 | orr r5, r5, r4 @ factor way number into r5 |
164 | orr r5, r5, r10 @ factor cache level into r5 |
165 | mcr p15, 0, r5, c7, c14, 2 @ clean & invalidate by set/way |
166 | subs r9, r9, #1 @ decrement the index |
167 | bge loop2 |
168 | subs r4, r4, r6 @ decrement the way |
169 | bcs loop1 |
170 | skip: |
171 | add r10, r10, #2 @ increment cache number |
172 | cmp r3, r10 |
173 | #ifdef CONFIG_ARM_ERRATA_814220 |
174 | dsb |
175 | #endif |
176 | bgt flush_levels |
177 | finished: |
178 | mov r10, #0 @ switch back to cache level 0 |
179 | mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr |
180 | dsb st |
181 | isb |
182 | ret lr |
183 | ENDPROC(v7_flush_dcache_all) |
184 | |
185 | /* |
186 | * v7_flush_cache_all() |
187 | * |
188 | * Flush the entire cache system. |
189 | * The data cache flush is now achieved using atomic clean / invalidates |
190 | * working outwards from L1 cache. This is done using Set/Way based cache |
191 | * maintenance instructions. |
192 | * The instruction cache can still be invalidated back to the point of |
193 | * unification in a single instruction. |
194 | * |
195 | */ |
196 | ENTRY(v7_flush_kern_cache_all) |
197 | stmfd sp!, {r4-r6, r9-r10, lr} |
198 | bl v7_flush_dcache_all |
199 | mov r0, #0 |
200 | ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable |
201 | ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate |
202 | ldmfd sp!, {r4-r6, r9-r10, lr} |
203 | ret lr |
204 | ENDPROC(v7_flush_kern_cache_all) |
205 | |
206 | /* |
207 | * v7_flush_kern_cache_louis(void) |
208 | * |
209 | * Flush the data cache up to Level of Unification Inner Shareable. |
210 | * Invalidate the I-cache to the point of unification. |
211 | */ |
212 | ENTRY(v7_flush_kern_cache_louis) |
213 | stmfd sp!, {r4-r6, r9-r10, lr} |
214 | bl v7_flush_dcache_louis |
215 | mov r0, #0 |
216 | ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable |
217 | ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate |
218 | ldmfd sp!, {r4-r6, r9-r10, lr} |
219 | ret lr |
220 | ENDPROC(v7_flush_kern_cache_louis) |
221 | |
222 | /* |
223 | * v7_flush_cache_all() |
224 | * |
225 | * Flush all TLB entries in a particular address space |
226 | * |
227 | * - mm - mm_struct describing address space |
228 | */ |
229 | ENTRY(v7_flush_user_cache_all) |
230 | /*FALLTHROUGH*/ |
231 | |
232 | /* |
233 | * v7_flush_cache_range(start, end, flags) |
234 | * |
235 | * Flush a range of TLB entries in the specified address space. |
236 | * |
237 | * - start - start address (may not be aligned) |
238 | * - end - end address (exclusive, may not be aligned) |
239 | * - flags - vm_area_struct flags describing address space |
240 | * |
241 | * It is assumed that: |
242 | * - we have a VIPT cache. |
243 | */ |
244 | ENTRY(v7_flush_user_cache_range) |
245 | ret lr |
246 | ENDPROC(v7_flush_user_cache_all) |
247 | ENDPROC(v7_flush_user_cache_range) |
248 | |
249 | /* |
250 | * v7_coherent_kern_range(start,end) |
251 | * |
252 | * Ensure that the I and D caches are coherent within specified |
253 | * region. This is typically used when code has been written to |
254 | * a memory region, and will be executed. |
255 | * |
256 | * - start - virtual start address of region |
257 | * - end - virtual end address of region |
258 | * |
259 | * It is assumed that: |
260 | * - the Icache does not read data from the write buffer |
261 | */ |
262 | ENTRY(v7_coherent_kern_range) |
263 | /* FALLTHROUGH */ |
264 | |
265 | /* |
266 | * v7_coherent_user_range(start,end) |
267 | * |
268 | * Ensure that the I and D caches are coherent within specified |
269 | * region. This is typically used when code has been written to |
270 | * a memory region, and will be executed. |
271 | * |
272 | * - start - virtual start address of region |
273 | * - end - virtual end address of region |
274 | * |
275 | * It is assumed that: |
276 | * - the Icache does not read data from the write buffer |
277 | */ |
278 | ENTRY(v7_coherent_user_range) |
279 | UNWIND(.fnstart ) |
280 | dcache_line_size r2, r3 |
281 | sub r3, r2, #1 |
282 | bic r12, r0, r3 |
283 | #ifdef CONFIG_ARM_ERRATA_764369 |
284 | ALT_SMP(W(dsb)) |
285 | ALT_UP(W(nop)) |
286 | #endif |
287 | 1: |
288 | USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification |
289 | add r12, r12, r2 |
290 | cmp r12, r1 |
291 | blo 1b |
292 | dsb ishst |
293 | #ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND |
294 | ldr r3, =icache_size |
295 | ldr r2, [r3, #0] |
296 | #else |
297 | icache_line_size r2, r3 |
298 | #endif |
299 | sub r3, r2, #1 |
300 | bic r12, r0, r3 |
301 | 2: |
302 | USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line |
303 | add r12, r12, r2 |
304 | cmp r12, r1 |
305 | blo 2b |
306 | mov r0, #0 |
307 | ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable |
308 | ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB |
309 | dsb ishst |
310 | isb |
311 | ret lr |
312 | |
313 | /* |
314 | * Fault handling for the cache operation above. If the virtual address in r0 |
315 | * isn't mapped, fail with -EFAULT. |
316 | */ |
317 | 9001: |
318 | #ifdef CONFIG_ARM_ERRATA_775420 |
319 | dsb |
320 | #endif |
321 | mov r0, #-EFAULT |
322 | ret lr |
323 | UNWIND(.fnend ) |
324 | ENDPROC(v7_coherent_kern_range) |
325 | ENDPROC(v7_coherent_user_range) |
326 | |
327 | /* |
328 | * v7_flush_kern_dcache_area(void *addr, size_t size) |
329 | * |
330 | * Ensure that the data held in the page kaddr is written back |
331 | * to the page in question. |
332 | * |
333 | * - addr - kernel address |
334 | * - size - region size |
335 | */ |
336 | ENTRY(v7_flush_kern_dcache_area) |
337 | dcache_line_size r2, r3 |
338 | add r1, r0, r1 |
339 | sub r3, r2, #1 |
340 | bic r0, r0, r3 |
341 | #ifdef CONFIG_ARM_ERRATA_764369 |
342 | ALT_SMP(W(dsb)) |
343 | ALT_UP(W(nop)) |
344 | #endif |
345 | 1: |
346 | mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line |
347 | add r0, r0, r2 |
348 | cmp r0, r1 |
349 | blo 1b |
350 | dsb st |
351 | ret lr |
352 | ENDPROC(v7_flush_kern_dcache_area) |
353 | |
354 | /* |
355 | * v7_dma_inv_range(start,end) |
356 | * |
357 | * Invalidate the data cache within the specified region; we will |
358 | * be performing a DMA operation in this region and we want to |
359 | * purge old data in the cache. |
360 | * |
361 | * - start - virtual start address of region |
362 | * - end - virtual end address of region |
363 | */ |
364 | v7_dma_inv_range: |
365 | dcache_line_size r2, r3 |
366 | sub r3, r2, #1 |
367 | tst r0, r3 |
368 | bic r0, r0, r3 |
369 | #ifdef CONFIG_ARM_ERRATA_764369 |
370 | ALT_SMP(W(dsb)) |
371 | ALT_UP(W(nop)) |
372 | #endif |
373 | mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line |
374 | addne r0, r0, r2 |
375 | |
376 | tst r1, r3 |
377 | bic r1, r1, r3 |
378 | mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line |
379 | cmp r0, r1 |
380 | 1: |
381 | mcrlo p15, 0, r0, c7, c6, 1 @ invalidate D / U line |
382 | addlo r0, r0, r2 |
383 | cmplo r0, r1 |
384 | blo 1b |
385 | dsb st |
386 | ret lr |
387 | ENDPROC(v7_dma_inv_range) |
388 | |
389 | /* |
390 | * v7_dma_clean_range(start,end) |
391 | * - start - virtual start address of region |
392 | * - end - virtual end address of region |
393 | */ |
394 | v7_dma_clean_range: |
395 | dcache_line_size r2, r3 |
396 | sub r3, r2, #1 |
397 | bic r0, r0, r3 |
398 | #ifdef CONFIG_ARM_ERRATA_764369 |
399 | ALT_SMP(W(dsb)) |
400 | ALT_UP(W(nop)) |
401 | #endif |
402 | 1: |
403 | mcr p15, 0, r0, c7, c10, 1 @ clean D / U line |
404 | add r0, r0, r2 |
405 | cmp r0, r1 |
406 | blo 1b |
407 | dsb st |
408 | ret lr |
409 | ENDPROC(v7_dma_clean_range) |
410 | |
411 | /* |
412 | * v7_dma_flush_range(start,end) |
413 | * - start - virtual start address of region |
414 | * - end - virtual end address of region |
415 | */ |
416 | ENTRY(v7_dma_flush_range) |
417 | dcache_line_size r2, r3 |
418 | sub r3, r2, #1 |
419 | bic r0, r0, r3 |
420 | #ifdef CONFIG_ARM_ERRATA_764369 |
421 | ALT_SMP(W(dsb)) |
422 | ALT_UP(W(nop)) |
423 | #endif |
424 | 1: |
425 | mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line |
426 | add r0, r0, r2 |
427 | cmp r0, r1 |
428 | blo 1b |
429 | dsb st |
430 | ret lr |
431 | ENDPROC(v7_dma_flush_range) |
432 | |
433 | /* |
434 | * dma_map_area(start, size, dir) |
435 | * - start - kernel virtual start address |
436 | * - size - size of region |
437 | * - dir - DMA direction |
438 | */ |
439 | ENTRY(v7_dma_map_area) |
440 | add r1, r1, r0 |
441 | teq r2, #DMA_FROM_DEVICE |
442 | beq v7_dma_inv_range |
443 | b v7_dma_clean_range |
444 | ENDPROC(v7_dma_map_area) |
445 | |
446 | /* |
447 | * dma_unmap_area(start, size, dir) |
448 | * - start - kernel virtual start address |
449 | * - size - size of region |
450 | * - dir - DMA direction |
451 | */ |
452 | ENTRY(v7_dma_unmap_area) |
453 | add r1, r1, r0 |
454 | teq r2, #DMA_TO_DEVICE |
455 | bne v7_dma_inv_range |
456 | ret lr |
457 | ENDPROC(v7_dma_unmap_area) |
458 | |
459 | __INITDATA |
460 | |
461 | @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) |
462 | define_cache_functions v7 |
463 | |
464 | /* The Broadcom Brahma-B15 read-ahead cache requires some modifications |
465 | * to the v7_cache_fns, we only override the ones we need |
466 | */ |
467 | #ifndef CONFIG_CACHE_B15_RAC |
468 | globl_equ b15_flush_kern_cache_all, v7_flush_kern_cache_all |
469 | #endif |
470 | globl_equ b15_flush_icache_all, v7_flush_icache_all |
471 | globl_equ b15_flush_kern_cache_louis, v7_flush_kern_cache_louis |
472 | globl_equ b15_flush_user_cache_all, v7_flush_user_cache_all |
473 | globl_equ b15_flush_user_cache_range, v7_flush_user_cache_range |
474 | globl_equ b15_coherent_kern_range, v7_coherent_kern_range |
475 | globl_equ b15_coherent_user_range, v7_coherent_user_range |
476 | globl_equ b15_flush_kern_dcache_area, v7_flush_kern_dcache_area |
477 | |
478 | globl_equ b15_dma_map_area, v7_dma_map_area |
479 | globl_equ b15_dma_unmap_area, v7_dma_unmap_area |
480 | globl_equ b15_dma_flush_range, v7_dma_flush_range |
481 | |
482 | define_cache_functions b15 |
483 | |