1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * linux/arch/arm/boot/compressed/head.S |
4 | * |
5 | * Copyright (C) 1996-2002 Russell King |
6 | * Copyright (C) 2004 Hyok S. Choi (MPU support) |
7 | */ |
8 | #include <linux/linkage.h> |
9 | #include <asm/assembler.h> |
10 | #include <asm/v7m.h> |
11 | |
12 | #include "efi-header.S" |
13 | |
14 | #ifdef __ARMEB__ |
15 | #define OF_DT_MAGIC 0xd00dfeed |
16 | #else |
17 | #define OF_DT_MAGIC 0xedfe0dd0 |
18 | #endif |
19 | |
20 | AR_CLASS( .arch armv7-a ) |
21 | M_CLASS( .arch armv7-m ) |
22 | |
23 | /* |
24 | * Debugging stuff |
25 | * |
26 | * Note that these macros must not contain any code which is not |
27 | * 100% relocatable. Any attempt to do so will result in a crash. |
28 | * Please select one of the following when turning on debugging. |
29 | */ |
30 | #ifdef DEBUG |
31 | |
32 | #if defined(CONFIG_DEBUG_ICEDCC) |
33 | |
34 | #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7) |
35 | .macro loadsp, rb, tmp1, tmp2 |
36 | .endm |
37 | .macro writeb, ch, rb, tmp |
38 | mcr p14, 0, \ch, c0, c5, 0 |
39 | .endm |
40 | #elif defined(CONFIG_CPU_XSCALE) |
41 | .macro loadsp, rb, tmp1, tmp2 |
42 | .endm |
43 | .macro writeb, ch, rb, tmp |
44 | mcr p14, 0, \ch, c8, c0, 0 |
45 | .endm |
46 | #else |
47 | .macro loadsp, rb, tmp1, tmp2 |
48 | .endm |
49 | .macro writeb, ch, rb, tmp |
50 | mcr p14, 0, \ch, c1, c0, 0 |
51 | .endm |
52 | #endif |
53 | |
54 | #else |
55 | |
56 | #include CONFIG_DEBUG_LL_INCLUDE |
57 | |
58 | .macro writeb, ch, rb, tmp |
59 | #ifdef CONFIG_DEBUG_UART_FLOW_CONTROL |
60 | waituartcts \tmp, \rb |
61 | #endif |
62 | waituarttxrdy \tmp, \rb |
63 | senduart \ch, \rb |
64 | busyuart \tmp, \rb |
65 | .endm |
66 | |
67 | #if defined(CONFIG_ARCH_SA1100) |
68 | .macro loadsp, rb, tmp1, tmp2 |
69 | mov \rb, #0x80000000 @ physical base address |
70 | add \rb, \rb, #0x00010000 @ Ser1 |
71 | .endm |
72 | #else |
73 | .macro loadsp, rb, tmp1, tmp2 |
74 | addruart \rb, \tmp1, \tmp2 |
75 | .endm |
76 | #endif |
77 | #endif |
78 | #endif |
79 | |
80 | .macro kputc,val |
81 | mov r0, \val |
82 | bl putc |
83 | .endm |
84 | |
85 | .macro kphex,val,len |
86 | mov r0, \val |
87 | mov r1, #\len |
88 | bl phex |
89 | .endm |
90 | |
91 | /* |
92 | * Debug kernel copy by printing the memory addresses involved |
93 | */ |
94 | .macro dbgkc, begin, end, cbegin, cend |
95 | #ifdef DEBUG |
96 | kputc #'C' |
97 | kputc #':' |
98 | kputc #'0' |
99 | kputc #'x' |
100 | kphex \begin, 8 /* Start of compressed kernel */ |
101 | kputc #'-' |
102 | kputc #'0' |
103 | kputc #'x' |
104 | kphex \end, 8 /* End of compressed kernel */ |
105 | kputc #'-' |
106 | kputc #'>' |
107 | kputc #'0' |
108 | kputc #'x' |
109 | kphex \cbegin, 8 /* Start of kernel copy */ |
110 | kputc #'-' |
111 | kputc #'0' |
112 | kputc #'x' |
113 | kphex \cend, 8 /* End of kernel copy */ |
114 | kputc #'\n' |
115 | #endif |
116 | .endm |
117 | |
118 | /* |
119 | * Debug print of the final appended DTB location |
120 | */ |
121 | .macro dbgadtb, begin, size |
122 | #ifdef DEBUG |
123 | kputc #'D' |
124 | kputc #'T' |
125 | kputc #'B' |
126 | kputc #':' |
127 | kputc #'0' |
128 | kputc #'x' |
129 | kphex \begin, 8 /* Start of appended DTB */ |
130 | kputc #' ' |
131 | kputc #'(' |
132 | kputc #'0' |
133 | kputc #'x' |
134 | kphex \size, 8 /* Size of appended DTB */ |
135 | kputc #')' |
136 | kputc #'\n' |
137 | #endif |
138 | .endm |
139 | |
140 | .macro enable_cp15_barriers, reg |
141 | mrc p15, 0, \reg, c1, c0, 0 @ read SCTLR |
142 | tst \reg, #(1 << 5) @ CP15BEN bit set? |
143 | bne .L_\@ |
144 | orr \reg, \reg, #(1 << 5) @ CP15 barrier instructions |
145 | mcr p15, 0, \reg, c1, c0, 0 @ write SCTLR |
146 | ARM( .inst 0xf57ff06f @ v7+ isb ) |
147 | THUMB( isb ) |
148 | .L_\@: |
149 | .endm |
150 | |
151 | /* |
152 | * The kernel build system appends the size of the |
153 | * decompressed kernel at the end of the compressed data |
154 | * in little-endian form. |
155 | */ |
156 | .macro get_inflated_image_size, res:req, tmp1:req, tmp2:req |
157 | adr \res, .Linflated_image_size_offset |
158 | ldr \tmp1, [\res] |
159 | add \tmp1, \tmp1, \res @ address of inflated image size |
160 | |
161 | ldrb \res, [\tmp1] @ get_unaligned_le32 |
162 | ldrb \tmp2, [\tmp1, #1] |
163 | orr \res, \res, \tmp2, lsl #8 |
164 | ldrb \tmp2, [\tmp1, #2] |
165 | ldrb \tmp1, [\tmp1, #3] |
166 | orr \res, \res, \tmp2, lsl #16 |
167 | orr \res, \res, \tmp1, lsl #24 |
168 | .endm |
169 | |
170 | .macro be32tocpu, val, tmp |
171 | #ifndef __ARMEB__ |
172 | /* convert to little endian */ |
173 | rev_l \val, \tmp |
174 | #endif |
175 | .endm |
176 | |
177 | .section ".start" , "ax" |
178 | /* |
179 | * sort out different calling conventions |
180 | */ |
181 | .align |
182 | /* |
183 | * Always enter in ARM state for CPUs that support the ARM ISA. |
184 | * As of today (2014) that's exactly the members of the A and R |
185 | * classes. |
186 | */ |
187 | AR_CLASS( .arm ) |
188 | start: |
189 | .type start,#function |
190 | /* |
191 | * These 7 nops along with the 1 nop immediately below for |
192 | * !THUMB2 form 8 nops that make the compressed kernel bootable |
193 | * on legacy ARM systems that were assuming the kernel in a.out |
194 | * binary format. The boot loaders on these systems would |
195 | * jump 32 bytes into the image to skip the a.out header. |
196 | * with these 8 nops filling exactly 32 bytes, things still |
197 | * work as expected on these legacy systems. Thumb2 mode keeps |
198 | * 7 of the nops as it turns out that some boot loaders |
199 | * were patching the initial instructions of the kernel, i.e |
200 | * had started to exploit this "patch area". |
201 | */ |
202 | __initial_nops |
203 | .rept 5 |
204 | __nop |
205 | .endr |
206 | #ifndef CONFIG_THUMB2_KERNEL |
207 | __nop |
208 | #else |
209 | AR_CLASS( sub pc, pc, #3 ) @ A/R: switch to Thumb2 mode |
210 | M_CLASS( nop.w ) @ M: already in Thumb2 mode |
211 | .thumb |
212 | #endif |
213 | W(b) 1f |
214 | |
215 | .word _magic_sig @ Magic numbers to help the loader |
216 | .word _magic_start @ absolute load/run zImage address |
217 | .word _magic_end @ zImage end address |
218 | .word 0x04030201 @ endianness flag |
219 | .word 0x45454545 @ another magic number to indicate |
220 | .word _magic_table @ additional data table |
221 | |
222 | __EFI_HEADER |
223 | 1: |
224 | ARM_BE8( setend be ) @ go BE8 if compiled for BE8 |
225 | AR_CLASS( mrs r9, cpsr ) |
226 | #ifdef CONFIG_ARM_VIRT_EXT |
227 | bl __hyp_stub_install @ get into SVC mode, reversibly |
228 | #endif |
229 | mov r7, r1 @ save architecture ID |
230 | mov r8, r2 @ save atags pointer |
231 | |
232 | #ifndef CONFIG_CPU_V7M |
233 | /* |
234 | * Booting from Angel - need to enter SVC mode and disable |
235 | * FIQs/IRQs (numeric definitions from angel arm.h source). |
236 | * We only do this if we were in user mode on entry. |
237 | */ |
238 | mrs r2, cpsr @ get current mode |
239 | tst r2, #3 @ not user? |
240 | bne not_angel |
241 | mov r0, #0x17 @ angel_SWIreason_EnterSVC |
242 | ARM( swi 0x123456 ) @ angel_SWI_ARM |
243 | THUMB( svc 0xab ) @ angel_SWI_THUMB |
244 | not_angel: |
245 | safe_svcmode_maskall r0 |
246 | msr spsr_cxsf, r9 @ Save the CPU boot mode in |
247 | @ SPSR |
248 | #endif |
249 | /* |
250 | * Note that some cache flushing and other stuff may |
251 | * be needed here - is there an Angel SWI call for this? |
252 | */ |
253 | |
254 | /* |
255 | * some architecture specific code can be inserted |
256 | * by the linker here, but it should preserve r7, r8, and r9. |
257 | */ |
258 | |
259 | .text |
260 | |
261 | #ifdef CONFIG_AUTO_ZRELADDR |
262 | /* |
263 | * Find the start of physical memory. As we are executing |
264 | * without the MMU on, we are in the physical address space. |
265 | * We just need to get rid of any offset by aligning the |
266 | * address. |
267 | * |
268 | * This alignment is a balance between the requirements of |
269 | * different platforms - we have chosen 128MB to allow |
270 | * platforms which align the start of their physical memory |
271 | * to 128MB to use this feature, while allowing the zImage |
272 | * to be placed within the first 128MB of memory on other |
273 | * platforms. Increasing the alignment means we place |
274 | * stricter alignment requirements on the start of physical |
275 | * memory, but relaxing it means that we break people who |
276 | * are already placing their zImage in (eg) the top 64MB |
277 | * of this range. |
278 | */ |
279 | mov r0, pc |
280 | and r0, r0, #0xf8000000 |
281 | #ifdef CONFIG_USE_OF |
282 | adr r1, LC1 |
283 | #ifdef CONFIG_ARM_APPENDED_DTB |
284 | /* |
285 | * Look for an appended DTB. If found, we cannot use it to |
286 | * validate the calculated start of physical memory, as its |
287 | * memory nodes may need to be augmented by ATAGS stored at |
288 | * an offset from the same start of physical memory. |
289 | */ |
290 | ldr r2, [r1, #4] @ get &_edata |
291 | add r2, r2, r1 @ relocate it |
292 | ldr r2, [r2] @ get DTB signature |
293 | ldr r3, =OF_DT_MAGIC |
294 | cmp r2, r3 @ do we have a DTB there? |
295 | beq 1f @ if yes, skip validation |
296 | #endif /* CONFIG_ARM_APPENDED_DTB */ |
297 | |
298 | /* |
299 | * Make sure we have some stack before calling C code. |
300 | * No GOT fixup has occurred yet, but none of the code we're |
301 | * about to call uses any global variables. |
302 | */ |
303 | ldr sp, [r1] @ get stack location |
304 | add sp, sp, r1 @ apply relocation |
305 | |
306 | /* Validate calculated start against passed DTB */ |
307 | mov r1, r8 |
308 | bl fdt_check_mem_start |
309 | 1: |
310 | #endif /* CONFIG_USE_OF */ |
311 | /* Determine final kernel image address. */ |
312 | add r4, r0, #TEXT_OFFSET |
313 | #else |
314 | ldr r4, =zreladdr |
315 | #endif |
316 | |
317 | /* |
318 | * Set up a page table only if it won't overwrite ourself. |
319 | * That means r4 < pc || r4 - 16k page directory > &_end. |
320 | * Given that r4 > &_end is most unfrequent, we add a rough |
321 | * additional 1MB of room for a possible appended DTB. |
322 | */ |
323 | mov r0, pc |
324 | cmp r0, r4 |
325 | ldrcc r0, .Lheadroom |
326 | addcc r0, r0, pc |
327 | cmpcc r4, r0 |
328 | orrcc r4, r4, #1 @ remember we skipped cache_on |
329 | blcs cache_on |
330 | |
331 | restart: adr r0, LC1 |
332 | ldr sp, [r0] |
333 | ldr r6, [r0, #4] |
334 | add sp, sp, r0 |
335 | add r6, r6, r0 |
336 | |
337 | get_inflated_image_size r9, r10, lr |
338 | |
339 | #ifndef CONFIG_ZBOOT_ROM |
340 | /* malloc space is above the relocated stack (64k max) */ |
341 | add r10, sp, #MALLOC_SIZE |
342 | #else |
343 | /* |
344 | * With ZBOOT_ROM the bss/stack is non relocatable, |
345 | * but someone could still run this code from RAM, |
346 | * in which case our reference is _edata. |
347 | */ |
348 | mov r10, r6 |
349 | #endif |
350 | |
351 | mov r5, #0 @ init dtb size to 0 |
352 | #ifdef CONFIG_ARM_APPENDED_DTB |
353 | /* |
354 | * r4 = final kernel address (possibly with LSB set) |
355 | * r5 = appended dtb size (still unknown) |
356 | * r6 = _edata |
357 | * r7 = architecture ID |
358 | * r8 = atags/device tree pointer |
359 | * r9 = size of decompressed image |
360 | * r10 = end of this image, including bss/stack/malloc space if non XIP |
361 | * sp = stack pointer |
362 | * |
363 | * if there are device trees (dtb) appended to zImage, advance r10 so that the |
364 | * dtb data will get relocated along with the kernel if necessary. |
365 | */ |
366 | |
367 | ldr lr, [r6, #0] |
368 | ldr r1, =OF_DT_MAGIC |
369 | cmp lr, r1 |
370 | bne dtb_check_done @ not found |
371 | |
372 | #ifdef CONFIG_ARM_ATAG_DTB_COMPAT |
373 | /* |
374 | * OK... Let's do some funky business here. |
375 | * If we do have a DTB appended to zImage, and we do have |
376 | * an ATAG list around, we want the later to be translated |
377 | * and folded into the former here. No GOT fixup has occurred |
378 | * yet, but none of the code we're about to call uses any |
379 | * global variable. |
380 | */ |
381 | |
382 | /* Get the initial DTB size */ |
383 | ldr r5, [r6, #4] |
384 | be32tocpu r5, r1 |
385 | dbgadtb r6, r5 |
386 | /* 50% DTB growth should be good enough */ |
387 | add r5, r5, r5, lsr #1 |
388 | /* preserve 64-bit alignment */ |
389 | add r5, r5, #7 |
390 | bic r5, r5, #7 |
391 | /* clamp to 32KB min and 1MB max */ |
392 | cmp r5, #(1 << 15) |
393 | movlo r5, #(1 << 15) |
394 | cmp r5, #(1 << 20) |
395 | movhi r5, #(1 << 20) |
396 | /* temporarily relocate the stack past the DTB work space */ |
397 | add sp, sp, r5 |
398 | |
399 | mov r0, r8 |
400 | mov r1, r6 |
401 | mov r2, r5 |
402 | bl atags_to_fdt |
403 | |
404 | /* |
405 | * If returned value is 1, there is no ATAG at the location |
406 | * pointed by r8. Try the typical 0x100 offset from start |
407 | * of RAM and hope for the best. |
408 | */ |
409 | cmp r0, #1 |
410 | sub r0, r4, #TEXT_OFFSET |
411 | bic r0, r0, #1 |
412 | add r0, r0, #0x100 |
413 | mov r1, r6 |
414 | mov r2, r5 |
415 | bleq atags_to_fdt |
416 | |
417 | sub sp, sp, r5 |
418 | #endif |
419 | |
420 | mov r8, r6 @ use the appended device tree |
421 | |
422 | /* |
423 | * Make sure that the DTB doesn't end up in the final |
424 | * kernel's .bss area. To do so, we adjust the decompressed |
425 | * kernel size to compensate if that .bss size is larger |
426 | * than the relocated code. |
427 | */ |
428 | ldr r5, =_kernel_bss_size |
429 | adr r1, wont_overwrite |
430 | sub r1, r6, r1 |
431 | subs r1, r5, r1 |
432 | addhi r9, r9, r1 |
433 | |
434 | /* Get the current DTB size */ |
435 | ldr r5, [r6, #4] |
436 | be32tocpu r5, r1 |
437 | |
438 | /* preserve 64-bit alignment */ |
439 | add r5, r5, #7 |
440 | bic r5, r5, #7 |
441 | |
442 | /* relocate some pointers past the appended dtb */ |
443 | add r6, r6, r5 |
444 | add r10, r10, r5 |
445 | add sp, sp, r5 |
446 | dtb_check_done: |
447 | #endif |
448 | |
449 | /* |
450 | * Check to see if we will overwrite ourselves. |
451 | * r4 = final kernel address (possibly with LSB set) |
452 | * r9 = size of decompressed image |
453 | * r10 = end of this image, including bss/stack/malloc space if non XIP |
454 | * We basically want: |
455 | * r4 - 16k page directory >= r10 -> OK |
456 | * r4 + image length <= address of wont_overwrite -> OK |
457 | * Note: the possible LSB in r4 is harmless here. |
458 | */ |
459 | add r10, r10, #16384 |
460 | cmp r4, r10 |
461 | bhs wont_overwrite |
462 | add r10, r4, r9 |
463 | adr r9, wont_overwrite |
464 | cmp r10, r9 |
465 | bls wont_overwrite |
466 | |
467 | /* |
468 | * Relocate ourselves past the end of the decompressed kernel. |
469 | * r6 = _edata |
470 | * r10 = end of the decompressed kernel |
471 | * Because we always copy ahead, we need to do it from the end and go |
472 | * backward in case the source and destination overlap. |
473 | */ |
474 | /* |
475 | * Bump to the next 256-byte boundary with the size of |
476 | * the relocation code added. This avoids overwriting |
477 | * ourself when the offset is small. |
478 | */ |
479 | add r10, r10, #((reloc_code_end - restart + 256) & ~255) |
480 | bic r10, r10, #255 |
481 | |
482 | /* Get start of code we want to copy and align it down. */ |
483 | adr r5, restart |
484 | bic r5, r5, #31 |
485 | |
486 | /* Relocate the hyp vector base if necessary */ |
487 | #ifdef CONFIG_ARM_VIRT_EXT |
488 | mrs r0, spsr |
489 | and r0, r0, #MODE_MASK |
490 | cmp r0, #HYP_MODE |
491 | bne 1f |
492 | |
493 | /* |
494 | * Compute the address of the hyp vectors after relocation. |
495 | * Call __hyp_set_vectors with the new address so that we |
496 | * can HVC again after the copy. |
497 | */ |
498 | adr_l r0, __hyp_stub_vectors |
499 | sub r0, r0, r5 |
500 | add r0, r0, r10 |
501 | bl __hyp_set_vectors |
502 | 1: |
503 | #endif |
504 | |
505 | sub r9, r6, r5 @ size to copy |
506 | add r9, r9, #31 @ rounded up to a multiple |
507 | bic r9, r9, #31 @ ... of 32 bytes |
508 | add r6, r9, r5 |
509 | add r9, r9, r10 |
510 | |
511 | #ifdef DEBUG |
512 | sub r10, r6, r5 |
513 | sub r10, r9, r10 |
514 | /* |
515 | * We are about to copy the kernel to a new memory area. |
516 | * The boundaries of the new memory area can be found in |
517 | * r10 and r9, whilst r5 and r6 contain the boundaries |
518 | * of the memory we are going to copy. |
519 | * Calling dbgkc will help with the printing of this |
520 | * information. |
521 | */ |
522 | dbgkc r5, r6, r10, r9 |
523 | #endif |
524 | |
525 | 1: ldmdb r6!, {r0 - r3, r10 - r12, lr} |
526 | cmp r6, r5 |
527 | stmdb r9!, {r0 - r3, r10 - r12, lr} |
528 | bhi 1b |
529 | |
530 | /* Preserve offset to relocated code. */ |
531 | sub r6, r9, r6 |
532 | |
533 | mov r0, r9 @ start of relocated zImage |
534 | add r1, sp, r6 @ end of relocated zImage |
535 | bl cache_clean_flush |
536 | |
537 | badr r0, restart |
538 | add r0, r0, r6 |
539 | mov pc, r0 |
540 | |
541 | wont_overwrite: |
542 | adr r0, LC0 |
543 | ldmia r0, {r1, r2, r3, r11, r12} |
544 | sub r0, r0, r1 @ calculate the delta offset |
545 | |
546 | /* |
547 | * If delta is zero, we are running at the address we were linked at. |
548 | * r0 = delta |
549 | * r2 = BSS start |
550 | * r3 = BSS end |
551 | * r4 = kernel execution address (possibly with LSB set) |
552 | * r5 = appended dtb size (0 if not present) |
553 | * r7 = architecture ID |
554 | * r8 = atags pointer |
555 | * r11 = GOT start |
556 | * r12 = GOT end |
557 | * sp = stack pointer |
558 | */ |
559 | orrs r1, r0, r5 |
560 | beq not_relocated |
561 | |
562 | add r11, r11, r0 |
563 | add r12, r12, r0 |
564 | |
565 | #ifndef CONFIG_ZBOOT_ROM |
566 | /* |
567 | * If we're running fully PIC === CONFIG_ZBOOT_ROM = n, |
568 | * we need to fix up pointers into the BSS region. |
569 | * Note that the stack pointer has already been fixed up. |
570 | */ |
571 | add r2, r2, r0 |
572 | add r3, r3, r0 |
573 | |
574 | /* |
575 | * Relocate all entries in the GOT table. |
576 | * Bump bss entries to _edata + dtb size |
577 | */ |
578 | 1: ldr r1, [r11, #0] @ relocate entries in the GOT |
579 | add r1, r1, r0 @ This fixes up C references |
580 | cmp r1, r2 @ if entry >= bss_start && |
581 | cmphs r3, r1 @ bss_end > entry |
582 | addhi r1, r1, r5 @ entry += dtb size |
583 | str r1, [r11], #4 @ next entry |
584 | cmp r11, r12 |
585 | blo 1b |
586 | |
587 | /* bump our bss pointers too */ |
588 | add r2, r2, r5 |
589 | add r3, r3, r5 |
590 | |
591 | #else |
592 | |
593 | /* |
594 | * Relocate entries in the GOT table. We only relocate |
595 | * the entries that are outside the (relocated) BSS region. |
596 | */ |
597 | 1: ldr r1, [r11, #0] @ relocate entries in the GOT |
598 | cmp r1, r2 @ entry < bss_start || |
599 | cmphs r3, r1 @ _end < entry |
600 | addlo r1, r1, r0 @ table. This fixes up the |
601 | str r1, [r11], #4 @ C references. |
602 | cmp r11, r12 |
603 | blo 1b |
604 | #endif |
605 | |
606 | not_relocated: mov r0, #0 |
607 | 1: str r0, [r2], #4 @ clear bss |
608 | str r0, [r2], #4 |
609 | str r0, [r2], #4 |
610 | str r0, [r2], #4 |
611 | cmp r2, r3 |
612 | blo 1b |
613 | |
614 | /* |
615 | * Did we skip the cache setup earlier? |
616 | * That is indicated by the LSB in r4. |
617 | * Do it now if so. |
618 | */ |
619 | tst r4, #1 |
620 | bic r4, r4, #1 |
621 | blne cache_on |
622 | |
623 | /* |
624 | * The C runtime environment should now be setup sufficiently. |
625 | * Set up some pointers, and start decompressing. |
626 | * r4 = kernel execution address |
627 | * r7 = architecture ID |
628 | * r8 = atags pointer |
629 | */ |
630 | mov r0, r4 |
631 | mov r1, sp @ malloc space above stack |
632 | add r2, sp, #MALLOC_SIZE @ 64k max |
633 | mov r3, r7 |
634 | bl decompress_kernel |
635 | |
636 | get_inflated_image_size r1, r2, r3 |
637 | |
638 | mov r0, r4 @ start of inflated image |
639 | add r1, r1, r0 @ end of inflated image |
640 | bl cache_clean_flush |
641 | bl cache_off |
642 | |
643 | #ifdef CONFIG_ARM_VIRT_EXT |
644 | mrs r0, spsr @ Get saved CPU boot mode |
645 | and r0, r0, #MODE_MASK |
646 | cmp r0, #HYP_MODE @ if not booted in HYP mode... |
647 | bne __enter_kernel @ boot kernel directly |
648 | |
649 | adr_l r0, __hyp_reentry_vectors |
650 | bl __hyp_set_vectors |
651 | __HVC(0) @ otherwise bounce to hyp mode |
652 | |
653 | b . @ should never be reached |
654 | #else |
655 | b __enter_kernel |
656 | #endif |
657 | |
658 | .align 2 |
659 | .type LC0, #object |
660 | LC0: .word LC0 @ r1 |
661 | .word __bss_start @ r2 |
662 | .word _end @ r3 |
663 | .word _got_start @ r11 |
664 | .word _got_end @ ip |
665 | .size LC0, . - LC0 |
666 | |
667 | .type LC1, #object |
668 | LC1: .word .L_user_stack_end - LC1 @ sp |
669 | .word _edata - LC1 @ r6 |
670 | .size LC1, . - LC1 |
671 | |
672 | .Lheadroom: |
673 | .word _end - restart + 16384 + 1024*1024 |
674 | |
675 | .Linflated_image_size_offset: |
676 | .long (input_data_end - 4) - . |
677 | |
678 | #ifdef CONFIG_ARCH_RPC |
679 | .globl params |
680 | params: ldr r0, =0x10000100 @ params_phys for RPC |
681 | mov pc, lr |
682 | .ltorg |
683 | .align |
684 | #endif |
685 | |
686 | /* |
687 | * dcache_line_size - get the minimum D-cache line size from the CTR register |
688 | * on ARMv7. |
689 | */ |
690 | .macro dcache_line_size, reg, tmp |
691 | #ifdef CONFIG_CPU_V7M |
692 | movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR |
693 | movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR |
694 | ldr \tmp, [\tmp] |
695 | #else |
696 | mrc p15, 0, \tmp, c0, c0, 1 @ read ctr |
697 | #endif |
698 | lsr \tmp, \tmp, #16 |
699 | and \tmp, \tmp, #0xf @ cache line size encoding |
700 | mov \reg, #4 @ bytes per word |
701 | mov \reg, \reg, lsl \tmp @ actual cache line size |
702 | .endm |
703 | |
704 | /* |
705 | * Turn on the cache. We need to setup some page tables so that we |
706 | * can have both the I and D caches on. |
707 | * |
708 | * We place the page tables 16k down from the kernel execution address, |
709 | * and we hope that nothing else is using it. If we're using it, we |
710 | * will go pop! |
711 | * |
712 | * On entry, |
713 | * r4 = kernel execution address |
714 | * r7 = architecture number |
715 | * r8 = atags pointer |
716 | * On exit, |
717 | * r0, r1, r2, r3, r9, r10, r12 corrupted |
718 | * This routine must preserve: |
719 | * r4, r7, r8 |
720 | */ |
721 | .align 5 |
722 | cache_on: mov r3, #8 @ cache_on function |
723 | b call_cache_fn |
724 | |
725 | /* |
726 | * Initialize the highest priority protection region, PR7 |
727 | * to cover all 32bit address and cacheable and bufferable. |
728 | */ |
729 | __armv4_mpu_cache_on: |
730 | mov r0, #0x3f @ 4G, the whole |
731 | mcr p15, 0, r0, c6, c7, 0 @ PR7 Area Setting |
732 | mcr p15, 0, r0, c6, c7, 1 |
733 | |
734 | mov r0, #0x80 @ PR7 |
735 | mcr p15, 0, r0, c2, c0, 0 @ D-cache on |
736 | mcr p15, 0, r0, c2, c0, 1 @ I-cache on |
737 | mcr p15, 0, r0, c3, c0, 0 @ write-buffer on |
738 | |
739 | mov r0, #0xc000 |
740 | mcr p15, 0, r0, c5, c0, 1 @ I-access permission |
741 | mcr p15, 0, r0, c5, c0, 0 @ D-access permission |
742 | |
743 | mov r0, #0 |
744 | mcr p15, 0, r0, c7, c10, 4 @ drain write buffer |
745 | mcr p15, 0, r0, c7, c5, 0 @ flush(inval) I-Cache |
746 | mcr p15, 0, r0, c7, c6, 0 @ flush(inval) D-Cache |
747 | mrc p15, 0, r0, c1, c0, 0 @ read control reg |
748 | @ ...I .... ..D. WC.M |
749 | orr r0, r0, #0x002d @ .... .... ..1. 11.1 |
750 | orr r0, r0, #0x1000 @ ...1 .... .... .... |
751 | |
752 | mcr p15, 0, r0, c1, c0, 0 @ write control reg |
753 | |
754 | mov r0, #0 |
755 | mcr p15, 0, r0, c7, c5, 0 @ flush(inval) I-Cache |
756 | mcr p15, 0, r0, c7, c6, 0 @ flush(inval) D-Cache |
757 | mov pc, lr |
758 | |
759 | __armv3_mpu_cache_on: |
760 | mov r0, #0x3f @ 4G, the whole |
761 | mcr p15, 0, r0, c6, c7, 0 @ PR7 Area Setting |
762 | |
763 | mov r0, #0x80 @ PR7 |
764 | mcr p15, 0, r0, c2, c0, 0 @ cache on |
765 | mcr p15, 0, r0, c3, c0, 0 @ write-buffer on |
766 | |
767 | mov r0, #0xc000 |
768 | mcr p15, 0, r0, c5, c0, 0 @ access permission |
769 | |
770 | mov r0, #0 |
771 | mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 |
772 | /* |
773 | * ?? ARMv3 MMU does not allow reading the control register, |
774 | * does this really work on ARMv3 MPU? |
775 | */ |
776 | mrc p15, 0, r0, c1, c0, 0 @ read control reg |
777 | @ .... .... .... WC.M |
778 | orr r0, r0, #0x000d @ .... .... .... 11.1 |
779 | /* ?? this overwrites the value constructed above? */ |
780 | mov r0, #0 |
781 | mcr p15, 0, r0, c1, c0, 0 @ write control reg |
782 | |
783 | /* ?? invalidate for the second time? */ |
784 | mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 |
785 | mov pc, lr |
786 | |
787 | #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH |
788 | #define CB_BITS 0x08 |
789 | #else |
790 | #define CB_BITS 0x0c |
791 | #endif |
792 | |
793 | __setup_mmu: sub r3, r4, #16384 @ Page directory size |
794 | bic r3, r3, #0xff @ Align the pointer |
795 | bic r3, r3, #0x3f00 |
796 | /* |
797 | * Initialise the page tables, turning on the cacheable and bufferable |
798 | * bits for the RAM area only. |
799 | */ |
800 | mov r0, r3 |
801 | mov r9, r0, lsr #18 |
802 | mov r9, r9, lsl #18 @ start of RAM |
803 | add r10, r9, #0x10000000 @ a reasonable RAM size |
804 | mov r1, #0x12 @ XN|U + section mapping |
805 | orr r1, r1, #3 << 10 @ AP=11 |
806 | add r2, r3, #16384 |
807 | 1: cmp r1, r9 @ if virt > start of RAM |
808 | cmphs r10, r1 @ && end of RAM > virt |
809 | bic r1, r1, #0x1c @ clear XN|U + C + B |
810 | orrlo r1, r1, #0x10 @ Set XN|U for non-RAM |
811 | orrhs r1, r1, r6 @ set RAM section settings |
812 | str r1, [r0], #4 @ 1:1 mapping |
813 | add r1, r1, #1048576 |
814 | teq r0, r2 |
815 | bne 1b |
816 | /* |
817 | * If ever we are running from Flash, then we surely want the cache |
818 | * to be enabled also for our execution instance... We map 2MB of it |
819 | * so there is no map overlap problem for up to 1 MB compressed kernel. |
820 | * If the execution is in RAM then we would only be duplicating the above. |
821 | */ |
822 | orr r1, r6, #0x04 @ ensure B is set for this |
823 | orr r1, r1, #3 << 10 |
824 | mov r2, pc |
825 | mov r2, r2, lsr #20 |
826 | orr r1, r1, r2, lsl #20 |
827 | add r0, r3, r2, lsl #2 |
828 | str r1, [r0], #4 |
829 | add r1, r1, #1048576 |
830 | str r1, [r0] |
831 | mov pc, lr |
832 | ENDPROC(__setup_mmu) |
833 | |
834 | @ Enable unaligned access on v6, to allow better code generation |
835 | @ for the decompressor C code: |
836 | __armv6_mmu_cache_on: |
837 | mrc p15, 0, r0, c1, c0, 0 @ read SCTLR |
838 | bic r0, r0, #2 @ A (no unaligned access fault) |
839 | orr r0, r0, #1 << 22 @ U (v6 unaligned access model) |
840 | mcr p15, 0, r0, c1, c0, 0 @ write SCTLR |
841 | b __armv4_mmu_cache_on |
842 | |
843 | __arm926ejs_mmu_cache_on: |
844 | #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH |
845 | mov r0, #4 @ put dcache in WT mode |
846 | mcr p15, 7, r0, c15, c0, 0 |
847 | #endif |
848 | |
849 | __armv4_mmu_cache_on: |
850 | mov r12, lr |
851 | #ifdef CONFIG_MMU |
852 | mov r6, #CB_BITS | 0x12 @ U |
853 | bl __setup_mmu |
854 | mov r0, #0 |
855 | mcr p15, 0, r0, c7, c10, 4 @ drain write buffer |
856 | mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs |
857 | mrc p15, 0, r0, c1, c0, 0 @ read control reg |
858 | orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement |
859 | orr r0, r0, #0x0030 |
860 | ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables |
861 | bl __common_mmu_cache_on |
862 | mov r0, #0 |
863 | mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs |
864 | #endif |
865 | mov pc, r12 |
866 | |
867 | __armv7_mmu_cache_on: |
868 | enable_cp15_barriers r11 |
869 | mov r12, lr |
870 | #ifdef CONFIG_MMU |
871 | mrc p15, 0, r11, c0, c1, 4 @ read ID_MMFR0 |
872 | tst r11, #0xf @ VMSA |
873 | movne r6, #CB_BITS | 0x02 @ !XN |
874 | blne __setup_mmu |
875 | mov r0, #0 |
876 | mcr p15, 0, r0, c7, c10, 4 @ drain write buffer |
877 | tst r11, #0xf @ VMSA |
878 | mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs |
879 | #endif |
880 | mrc p15, 0, r0, c1, c0, 0 @ read control reg |
881 | bic r0, r0, #1 << 28 @ clear SCTLR.TRE |
882 | orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement |
883 | orr r0, r0, #0x003c @ write buffer |
884 | bic r0, r0, #2 @ A (no unaligned access fault) |
885 | orr r0, r0, #1 << 22 @ U (v6 unaligned access model) |
886 | @ (needed for ARM1176) |
887 | #ifdef CONFIG_MMU |
888 | ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables |
889 | mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg |
890 | orrne r0, r0, #1 @ MMU enabled |
891 | movne r1, #0xfffffffd @ domain 0 = client |
892 | bic r6, r6, #1 << 31 @ 32-bit translation system |
893 | bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0 |
894 | mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer |
895 | mcrne p15, 0, r1, c3, c0, 0 @ load domain access control |
896 | mcrne p15, 0, r6, c2, c0, 2 @ load ttb control |
897 | #endif |
898 | mcr p15, 0, r0, c7, c5, 4 @ ISB |
899 | mcr p15, 0, r0, c1, c0, 0 @ load control register |
900 | mrc p15, 0, r0, c1, c0, 0 @ and read it back |
901 | mov r0, #0 |
902 | mcr p15, 0, r0, c7, c5, 4 @ ISB |
903 | mov pc, r12 |
904 | |
905 | __fa526_cache_on: |
906 | mov r12, lr |
907 | mov r6, #CB_BITS | 0x12 @ U |
908 | bl __setup_mmu |
909 | mov r0, #0 |
910 | mcr p15, 0, r0, c7, c7, 0 @ Invalidate whole cache |
911 | mcr p15, 0, r0, c7, c10, 4 @ drain write buffer |
912 | mcr p15, 0, r0, c8, c7, 0 @ flush UTLB |
913 | mrc p15, 0, r0, c1, c0, 0 @ read control reg |
914 | orr r0, r0, #0x1000 @ I-cache enable |
915 | bl __common_mmu_cache_on |
916 | mov r0, #0 |
917 | mcr p15, 0, r0, c8, c7, 0 @ flush UTLB |
918 | mov pc, r12 |
919 | |
920 | __common_mmu_cache_on: |
921 | #ifndef CONFIG_THUMB2_KERNEL |
922 | #ifndef DEBUG |
923 | orr r0, r0, #0x000d @ Write buffer, mmu |
924 | #endif |
925 | mov r1, #-1 |
926 | mcr p15, 0, r3, c2, c0, 0 @ load page table pointer |
927 | mcr p15, 0, r1, c3, c0, 0 @ load domain access control |
928 | b 1f |
929 | .align 5 @ cache line aligned |
930 | 1: mcr p15, 0, r0, c1, c0, 0 @ load control register |
931 | mrc p15, 0, r0, c1, c0, 0 @ and read it back to |
932 | sub pc, lr, r0, lsr #32 @ properly flush pipeline |
933 | #endif |
934 | |
935 | #define PROC_ENTRY_SIZE (4*5) |
936 | |
937 | /* |
938 | * Here follow the relocatable cache support functions for the |
939 | * various processors. This is a generic hook for locating an |
940 | * entry and jumping to an instruction at the specified offset |
941 | * from the start of the block. Please note this is all position |
942 | * independent code. |
943 | * |
944 | * r1 = corrupted |
945 | * r2 = corrupted |
946 | * r3 = block offset |
947 | * r9 = corrupted |
948 | * r12 = corrupted |
949 | */ |
950 | |
951 | call_cache_fn: adr r12, proc_types |
952 | #ifdef CONFIG_CPU_CP15 |
953 | mrc p15, 0, r9, c0, c0 @ get processor ID |
954 | #elif defined(CONFIG_CPU_V7M) |
955 | /* |
956 | * On v7-M the processor id is located in the V7M_SCB_CPUID |
957 | * register, but as cache handling is IMPLEMENTATION DEFINED on |
958 | * v7-M (if existant at all) we just return early here. |
959 | * If V7M_SCB_CPUID were used the cpu ID functions (i.e. |
960 | * __armv7_mmu_cache_{on,off,flush}) would be selected which |
961 | * use cp15 registers that are not implemented on v7-M. |
962 | */ |
963 | bx lr |
964 | #else |
965 | ldr r9, =CONFIG_PROCESSOR_ID |
966 | #endif |
967 | 1: ldr r1, [r12, #0] @ get value |
968 | ldr r2, [r12, #4] @ get mask |
969 | eor r1, r1, r9 @ (real ^ match) |
970 | tst r1, r2 @ & mask |
971 | ARM( addeq pc, r12, r3 ) @ call cache function |
972 | THUMB( addeq r12, r3 ) |
973 | THUMB( moveq pc, r12 ) @ call cache function |
974 | add r12, r12, #PROC_ENTRY_SIZE |
975 | b 1b |
976 | |
977 | /* |
978 | * Table for cache operations. This is basically: |
979 | * - CPU ID match |
980 | * - CPU ID mask |
981 | * - 'cache on' method instruction |
982 | * - 'cache off' method instruction |
983 | * - 'cache flush' method instruction |
984 | * |
985 | * We match an entry using: ((real_id ^ match) & mask) == 0 |
986 | * |
987 | * Writethrough caches generally only need 'on' and 'off' |
988 | * methods. Writeback caches _must_ have the flush method |
989 | * defined. |
990 | */ |
991 | .align 2 |
992 | .type proc_types,#object |
993 | proc_types: |
994 | .word 0x41000000 @ old ARM ID |
995 | .word 0xff00f000 |
996 | mov pc, lr |
997 | THUMB( nop ) |
998 | mov pc, lr |
999 | THUMB( nop ) |
1000 | mov pc, lr |
1001 | THUMB( nop ) |
1002 | |
1003 | .word 0x41007000 @ ARM7/710 |
1004 | .word 0xfff8fe00 |
1005 | mov pc, lr |
1006 | THUMB( nop ) |
1007 | mov pc, lr |
1008 | THUMB( nop ) |
1009 | mov pc, lr |
1010 | THUMB( nop ) |
1011 | |
1012 | .word 0x41807200 @ ARM720T (writethrough) |
1013 | .word 0xffffff00 |
1014 | W(b) __armv4_mmu_cache_on |
1015 | W(b) __armv4_mmu_cache_off |
1016 | mov pc, lr |
1017 | THUMB( nop ) |
1018 | |
1019 | .word 0x41007400 @ ARM74x |
1020 | .word 0xff00ff00 |
1021 | W(b) __armv3_mpu_cache_on |
1022 | W(b) __armv3_mpu_cache_off |
1023 | W(b) __armv3_mpu_cache_flush |
1024 | |
1025 | .word 0x41009400 @ ARM94x |
1026 | .word 0xff00ff00 |
1027 | W(b) __armv4_mpu_cache_on |
1028 | W(b) __armv4_mpu_cache_off |
1029 | W(b) __armv4_mpu_cache_flush |
1030 | |
1031 | .word 0x41069260 @ ARM926EJ-S (v5TEJ) |
1032 | .word 0xff0ffff0 |
1033 | W(b) __arm926ejs_mmu_cache_on |
1034 | W(b) __armv4_mmu_cache_off |
1035 | W(b) __armv5tej_mmu_cache_flush |
1036 | |
1037 | .word 0x00007000 @ ARM7 IDs |
1038 | .word 0x0000f000 |
1039 | mov pc, lr |
1040 | THUMB( nop ) |
1041 | mov pc, lr |
1042 | THUMB( nop ) |
1043 | mov pc, lr |
1044 | THUMB( nop ) |
1045 | |
1046 | @ Everything from here on will be the new ID system. |
1047 | |
1048 | .word 0x4401a100 @ sa110 / sa1100 |
1049 | .word 0xffffffe0 |
1050 | W(b) __armv4_mmu_cache_on |
1051 | W(b) __armv4_mmu_cache_off |
1052 | W(b) __armv4_mmu_cache_flush |
1053 | |
1054 | .word 0x6901b110 @ sa1110 |
1055 | .word 0xfffffff0 |
1056 | W(b) __armv4_mmu_cache_on |
1057 | W(b) __armv4_mmu_cache_off |
1058 | W(b) __armv4_mmu_cache_flush |
1059 | |
1060 | .word 0x56056900 |
1061 | .word 0xffffff00 @ PXA9xx |
1062 | W(b) __armv4_mmu_cache_on |
1063 | W(b) __armv4_mmu_cache_off |
1064 | W(b) __armv4_mmu_cache_flush |
1065 | |
1066 | .word 0x56158000 @ PXA168 |
1067 | .word 0xfffff000 |
1068 | W(b) __armv4_mmu_cache_on |
1069 | W(b) __armv4_mmu_cache_off |
1070 | W(b) __armv5tej_mmu_cache_flush |
1071 | |
1072 | .word 0x56050000 @ Feroceon |
1073 | .word 0xff0f0000 |
1074 | W(b) __armv4_mmu_cache_on |
1075 | W(b) __armv4_mmu_cache_off |
1076 | W(b) __armv5tej_mmu_cache_flush |
1077 | |
1078 | #ifdef CONFIG_CPU_FEROCEON_OLD_ID |
1079 | /* this conflicts with the standard ARMv5TE entry */ |
1080 | .long 0x41009260 @ Old Feroceon |
1081 | .long 0xff00fff0 |
1082 | b __armv4_mmu_cache_on |
1083 | b __armv4_mmu_cache_off |
1084 | b __armv5tej_mmu_cache_flush |
1085 | #endif |
1086 | |
1087 | .word 0x66015261 @ FA526 |
1088 | .word 0xff01fff1 |
1089 | W(b) __fa526_cache_on |
1090 | W(b) __armv4_mmu_cache_off |
1091 | W(b) __fa526_cache_flush |
1092 | |
1093 | @ These match on the architecture ID |
1094 | |
1095 | .word 0x00020000 @ ARMv4T |
1096 | .word 0x000f0000 |
1097 | W(b) __armv4_mmu_cache_on |
1098 | W(b) __armv4_mmu_cache_off |
1099 | W(b) __armv4_mmu_cache_flush |
1100 | |
1101 | .word 0x00050000 @ ARMv5TE |
1102 | .word 0x000f0000 |
1103 | W(b) __armv4_mmu_cache_on |
1104 | W(b) __armv4_mmu_cache_off |
1105 | W(b) __armv4_mmu_cache_flush |
1106 | |
1107 | .word 0x00060000 @ ARMv5TEJ |
1108 | .word 0x000f0000 |
1109 | W(b) __armv4_mmu_cache_on |
1110 | W(b) __armv4_mmu_cache_off |
1111 | W(b) __armv5tej_mmu_cache_flush |
1112 | |
1113 | .word 0x0007b000 @ ARMv6 |
1114 | .word 0x000ff000 |
1115 | W(b) __armv6_mmu_cache_on |
1116 | W(b) __armv4_mmu_cache_off |
1117 | W(b) __armv6_mmu_cache_flush |
1118 | |
1119 | .word 0x000f0000 @ new CPU Id |
1120 | .word 0x000f0000 |
1121 | W(b) __armv7_mmu_cache_on |
1122 | W(b) __armv7_mmu_cache_off |
1123 | W(b) __armv7_mmu_cache_flush |
1124 | |
1125 | .word 0 @ unrecognised type |
1126 | .word 0 |
1127 | mov pc, lr |
1128 | THUMB( nop ) |
1129 | mov pc, lr |
1130 | THUMB( nop ) |
1131 | mov pc, lr |
1132 | THUMB( nop ) |
1133 | |
1134 | .size proc_types, . - proc_types |
1135 | |
1136 | /* |
1137 | * If you get a "non-constant expression in ".if" statement" |
1138 | * error from the assembler on this line, check that you have |
1139 | * not accidentally written a "b" instruction where you should |
1140 | * have written W(b). |
1141 | */ |
1142 | .if (. - proc_types) % PROC_ENTRY_SIZE != 0 |
1143 | .error "The size of one or more proc_types entries is wrong." |
1144 | .endif |
1145 | |
1146 | /* |
1147 | * Turn off the Cache and MMU. ARMv3 does not support |
1148 | * reading the control register, but ARMv4 does. |
1149 | * |
1150 | * On exit, |
1151 | * r0, r1, r2, r3, r9, r12 corrupted |
1152 | * This routine must preserve: |
1153 | * r4, r7, r8 |
1154 | */ |
1155 | .align 5 |
1156 | cache_off: mov r3, #12 @ cache_off function |
1157 | b call_cache_fn |
1158 | |
1159 | __armv4_mpu_cache_off: |
1160 | mrc p15, 0, r0, c1, c0 |
1161 | bic r0, r0, #0x000d |
1162 | mcr p15, 0, r0, c1, c0 @ turn MPU and cache off |
1163 | mov r0, #0 |
1164 | mcr p15, 0, r0, c7, c10, 4 @ drain write buffer |
1165 | mcr p15, 0, r0, c7, c6, 0 @ flush D-Cache |
1166 | mcr p15, 0, r0, c7, c5, 0 @ flush I-Cache |
1167 | mov pc, lr |
1168 | |
1169 | __armv3_mpu_cache_off: |
1170 | mrc p15, 0, r0, c1, c0 |
1171 | bic r0, r0, #0x000d |
1172 | mcr p15, 0, r0, c1, c0, 0 @ turn MPU and cache off |
1173 | mov r0, #0 |
1174 | mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 |
1175 | mov pc, lr |
1176 | |
1177 | __armv4_mmu_cache_off: |
1178 | #ifdef CONFIG_MMU |
1179 | mrc p15, 0, r0, c1, c0 |
1180 | bic r0, r0, #0x000d |
1181 | mcr p15, 0, r0, c1, c0 @ turn MMU and cache off |
1182 | mov r0, #0 |
1183 | mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4 |
1184 | mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4 |
1185 | #endif |
1186 | mov pc, lr |
1187 | |
1188 | __armv7_mmu_cache_off: |
1189 | mrc p15, 0, r0, c1, c0 |
1190 | #ifdef CONFIG_MMU |
1191 | bic r0, r0, #0x0005 |
1192 | #else |
1193 | bic r0, r0, #0x0004 |
1194 | #endif |
1195 | mcr p15, 0, r0, c1, c0 @ turn MMU and cache off |
1196 | mov r0, #0 |
1197 | #ifdef CONFIG_MMU |
1198 | mcr p15, 0, r0, c8, c7, 0 @ invalidate whole TLB |
1199 | #endif |
1200 | mcr p15, 0, r0, c7, c5, 6 @ invalidate BTC |
1201 | mcr p15, 0, r0, c7, c10, 4 @ DSB |
1202 | mcr p15, 0, r0, c7, c5, 4 @ ISB |
1203 | mov pc, lr |
1204 | |
1205 | /* |
1206 | * Clean and flush the cache to maintain consistency. |
1207 | * |
1208 | * On entry, |
1209 | * r0 = start address |
1210 | * r1 = end address (exclusive) |
1211 | * On exit, |
1212 | * r1, r2, r3, r9, r10, r11, r12 corrupted |
1213 | * This routine must preserve: |
1214 | * r4, r6, r7, r8 |
1215 | */ |
1216 | .align 5 |
1217 | cache_clean_flush: |
1218 | mov r3, #16 |
1219 | mov r11, r1 |
1220 | b call_cache_fn |
1221 | |
1222 | __armv4_mpu_cache_flush: |
1223 | tst r4, #1 |
1224 | movne pc, lr |
1225 | mov r2, #1 |
1226 | mov r3, #0 |
1227 | mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache |
1228 | mov r1, #7 << 5 @ 8 segments |
1229 | 1: orr r3, r1, #63 << 26 @ 64 entries |
1230 | 2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index |
1231 | subs r3, r3, #1 << 26 |
1232 | bcs 2b @ entries 63 to 0 |
1233 | subs r1, r1, #1 << 5 |
1234 | bcs 1b @ segments 7 to 0 |
1235 | |
1236 | teq r2, #0 |
1237 | mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache |
1238 | mcr p15, 0, ip, c7, c10, 4 @ drain WB |
1239 | mov pc, lr |
1240 | |
1241 | __fa526_cache_flush: |
1242 | tst r4, #1 |
1243 | movne pc, lr |
1244 | mov r1, #0 |
1245 | mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache |
1246 | mcr p15, 0, r1, c7, c5, 0 @ flush I cache |
1247 | mcr p15, 0, r1, c7, c10, 4 @ drain WB |
1248 | mov pc, lr |
1249 | |
1250 | __armv6_mmu_cache_flush: |
1251 | mov r1, #0 |
1252 | tst r4, #1 |
1253 | mcreq p15, 0, r1, c7, c14, 0 @ clean+invalidate D |
1254 | mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB |
1255 | mcreq p15, 0, r1, c7, c15, 0 @ clean+invalidate unified |
1256 | mcr p15, 0, r1, c7, c10, 4 @ drain WB |
1257 | mov pc, lr |
1258 | |
1259 | __armv7_mmu_cache_flush: |
1260 | enable_cp15_barriers r10 |
1261 | tst r4, #1 |
1262 | bne iflush |
1263 | mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1 |
1264 | tst r10, #0xf << 16 @ hierarchical cache (ARMv7) |
1265 | mov r10, #0 |
1266 | beq hierarchical |
1267 | mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D |
1268 | b iflush |
1269 | hierarchical: |
1270 | dcache_line_size r1, r2 @ r1 := dcache min line size |
1271 | sub r2, r1, #1 @ r2 := line size mask |
1272 | bic r0, r0, r2 @ round down start to line size |
1273 | sub r11, r11, #1 @ end address is exclusive |
1274 | bic r11, r11, r2 @ round down end to line size |
1275 | 0: cmp r0, r11 @ finished? |
1276 | bgt iflush |
1277 | mcr p15, 0, r0, c7, c14, 1 @ Dcache clean/invalidate by VA |
1278 | add r0, r0, r1 |
1279 | b 0b |
1280 | iflush: |
1281 | mcr p15, 0, r10, c7, c10, 4 @ DSB |
1282 | mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB |
1283 | mcr p15, 0, r10, c7, c10, 4 @ DSB |
1284 | mcr p15, 0, r10, c7, c5, 4 @ ISB |
1285 | mov pc, lr |
1286 | |
1287 | __armv5tej_mmu_cache_flush: |
1288 | tst r4, #1 |
1289 | movne pc, lr |
1290 | 1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate D cache |
1291 | bne 1b |
1292 | mcr p15, 0, r0, c7, c5, 0 @ flush I cache |
1293 | mcr p15, 0, r0, c7, c10, 4 @ drain WB |
1294 | mov pc, lr |
1295 | |
1296 | __armv4_mmu_cache_flush: |
1297 | tst r4, #1 |
1298 | movne pc, lr |
1299 | mov r2, #64*1024 @ default: 32K dcache size (*2) |
1300 | mov r11, #32 @ default: 32 byte line size |
1301 | mrc p15, 0, r3, c0, c0, 1 @ read cache type |
1302 | teq r3, r9 @ cache ID register present? |
1303 | beq no_cache_id |
1304 | mov r1, r3, lsr #18 |
1305 | and r1, r1, #7 |
1306 | mov r2, #1024 |
1307 | mov r2, r2, lsl r1 @ base dcache size *2 |
1308 | tst r3, #1 << 14 @ test M bit |
1309 | addne r2, r2, r2, lsr #1 @ +1/2 size if M == 1 |
1310 | mov r3, r3, lsr #12 |
1311 | and r3, r3, #3 |
1312 | mov r11, #8 |
1313 | mov r11, r11, lsl r3 @ cache line size in bytes |
1314 | no_cache_id: |
1315 | mov r1, pc |
1316 | bic r1, r1, #63 @ align to longest cache line |
1317 | add r2, r1, r2 |
1318 | 1: |
1319 | ARM( ldr r3, [r1], r11 ) @ s/w flush D cache |
1320 | THUMB( ldr r3, [r1] ) @ s/w flush D cache |
1321 | THUMB( add r1, r1, r11 ) |
1322 | teq r1, r2 |
1323 | bne 1b |
1324 | |
1325 | mcr p15, 0, r1, c7, c5, 0 @ flush I cache |
1326 | mcr p15, 0, r1, c7, c6, 0 @ flush D cache |
1327 | mcr p15, 0, r1, c7, c10, 4 @ drain WB |
1328 | mov pc, lr |
1329 | |
1330 | __armv3_mmu_cache_flush: |
1331 | __armv3_mpu_cache_flush: |
1332 | tst r4, #1 |
1333 | movne pc, lr |
1334 | mov r1, #0 |
1335 | mcr p15, 0, r1, c7, c0, 0 @ invalidate whole cache v3 |
1336 | mov pc, lr |
1337 | |
1338 | /* |
1339 | * Various debugging routines for printing hex characters and |
1340 | * memory, which again must be relocatable. |
1341 | */ |
1342 | #ifdef DEBUG |
1343 | .align 2 |
1344 | .type phexbuf,#object |
1345 | phexbuf: .space 12 |
1346 | .size phexbuf, . - phexbuf |
1347 | |
1348 | @ phex corrupts {r0, r1, r2, r3} |
1349 | phex: adr r3, phexbuf |
1350 | mov r2, #0 |
1351 | strb r2, [r3, r1] |
1352 | 1: subs r1, r1, #1 |
1353 | movmi r0, r3 |
1354 | bmi puts |
1355 | and r2, r0, #15 |
1356 | mov r0, r0, lsr #4 |
1357 | cmp r2, #10 |
1358 | addge r2, r2, #7 |
1359 | add r2, r2, #'0' |
1360 | strb r2, [r3, r1] |
1361 | b 1b |
1362 | |
1363 | @ puts corrupts {r0, r1, r2, r3} |
1364 | puts: loadsp r3, r2, r1 |
1365 | 1: ldrb r2, [r0], #1 |
1366 | teq r2, #0 |
1367 | moveq pc, lr |
1368 | 2: writeb r2, r3, r1 |
1369 | mov r1, #0x00020000 |
1370 | 3: subs r1, r1, #1 |
1371 | bne 3b |
1372 | teq r2, #'\n' |
1373 | moveq r2, #'\r' |
1374 | beq 2b |
1375 | teq r0, #0 |
1376 | bne 1b |
1377 | mov pc, lr |
1378 | @ putc corrupts {r0, r1, r2, r3} |
1379 | putc: |
1380 | mov r2, r0 |
1381 | loadsp r3, r1, r0 |
1382 | mov r0, #0 |
1383 | b 2b |
1384 | |
1385 | @ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr} |
1386 | memdump: mov r12, r0 |
1387 | mov r10, lr |
1388 | mov r11, #0 |
1389 | 2: mov r0, r11, lsl #2 |
1390 | add r0, r0, r12 |
1391 | mov r1, #8 |
1392 | bl phex |
1393 | mov r0, #':' |
1394 | bl putc |
1395 | 1: mov r0, #' ' |
1396 | bl putc |
1397 | ldr r0, [r12, r11, lsl #2] |
1398 | mov r1, #8 |
1399 | bl phex |
1400 | and r0, r11, #7 |
1401 | teq r0, #3 |
1402 | moveq r0, #' ' |
1403 | bleq putc |
1404 | and r0, r11, #7 |
1405 | add r11, r11, #1 |
1406 | teq r0, #7 |
1407 | bne 1b |
1408 | mov r0, #'\n' |
1409 | bl putc |
1410 | cmp r11, #64 |
1411 | blt 2b |
1412 | mov pc, r10 |
1413 | #endif |
1414 | |
1415 | .ltorg |
1416 | |
1417 | #ifdef CONFIG_ARM_VIRT_EXT |
1418 | .align 5 |
1419 | __hyp_reentry_vectors: |
1420 | W(b) . @ reset |
1421 | W(b) . @ undef |
1422 | #ifdef CONFIG_EFI_STUB |
1423 | W(b) __enter_kernel_from_hyp @ hvc from HYP |
1424 | #else |
1425 | W(b) . @ svc |
1426 | #endif |
1427 | W(b) . @ pabort |
1428 | W(b) . @ dabort |
1429 | W(b) __enter_kernel @ hyp |
1430 | W(b) . @ irq |
1431 | W(b) . @ fiq |
1432 | #endif /* CONFIG_ARM_VIRT_EXT */ |
1433 | |
1434 | __enter_kernel: |
1435 | mov r0, #0 @ must be 0 |
1436 | mov r1, r7 @ restore architecture number |
1437 | mov r2, r8 @ restore atags pointer |
1438 | ARM( mov pc, r4 ) @ call kernel |
1439 | M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class |
1440 | THUMB( bx r4 ) @ entry point is always ARM for A/R classes |
1441 | |
1442 | reloc_code_end: |
1443 | |
1444 | #ifdef CONFIG_EFI_STUB |
1445 | __enter_kernel_from_hyp: |
1446 | mrc p15, 4, r0, c1, c0, 0 @ read HSCTLR |
1447 | bic r0, r0, #0x5 @ disable MMU and caches |
1448 | mcr p15, 4, r0, c1, c0, 0 @ write HSCTLR |
1449 | isb |
1450 | b __enter_kernel |
1451 | |
1452 | ENTRY(efi_enter_kernel) |
1453 | mov r4, r0 @ preserve image base |
1454 | mov r8, r1 @ preserve DT pointer |
1455 | |
1456 | adr_l r0, call_cache_fn |
1457 | adr r1, 0f @ clean the region of code we |
1458 | bl cache_clean_flush @ may run with the MMU off |
1459 | |
1460 | #ifdef CONFIG_ARM_VIRT_EXT |
1461 | @ |
1462 | @ The EFI spec does not support booting on ARM in HYP mode, |
1463 | @ since it mandates that the MMU and caches are on, with all |
1464 | @ 32-bit addressable DRAM mapped 1:1 using short descriptors. |
1465 | @ |
1466 | @ While the EDK2 reference implementation adheres to this, |
1467 | @ U-Boot might decide to enter the EFI stub in HYP mode |
1468 | @ anyway, with the MMU and caches either on or off. |
1469 | @ |
1470 | mrs r0, cpsr @ get the current mode |
1471 | msr spsr_cxsf, r0 @ record boot mode |
1472 | and r0, r0, #MODE_MASK @ are we running in HYP mode? |
1473 | cmp r0, #HYP_MODE |
1474 | bne .Lefi_svc |
1475 | |
1476 | mrc p15, 4, r1, c1, c0, 0 @ read HSCTLR |
1477 | tst r1, #0x1 @ MMU enabled at HYP? |
1478 | beq 1f |
1479 | |
1480 | @ |
1481 | @ When running in HYP mode with the caches on, we're better |
1482 | @ off just carrying on using the cached 1:1 mapping that the |
1483 | @ firmware provided. Set up the HYP vectors so HVC instructions |
1484 | @ issued from HYP mode take us to the correct handler code. We |
1485 | @ will disable the MMU before jumping to the kernel proper. |
1486 | @ |
1487 | ARM( bic r1, r1, #(1 << 30) ) @ clear HSCTLR.TE |
1488 | THUMB( orr r1, r1, #(1 << 30) ) @ set HSCTLR.TE |
1489 | mcr p15, 4, r1, c1, c0, 0 |
1490 | adr r0, __hyp_reentry_vectors |
1491 | mcr p15, 4, r0, c12, c0, 0 @ set HYP vector base (HVBAR) |
1492 | isb |
1493 | b .Lefi_hyp |
1494 | |
1495 | @ |
1496 | @ When running in HYP mode with the caches off, we need to drop |
1497 | @ into SVC mode now, and let the decompressor set up its cached |
1498 | @ 1:1 mapping as usual. |
1499 | @ |
1500 | 1: mov r9, r4 @ preserve image base |
1501 | bl __hyp_stub_install @ install HYP stub vectors |
1502 | safe_svcmode_maskall r1 @ drop to SVC mode |
1503 | msr spsr_cxsf, r0 @ record boot mode |
1504 | orr r4, r9, #1 @ restore image base and set LSB |
1505 | b .Lefi_hyp |
1506 | .Lefi_svc: |
1507 | #endif |
1508 | mrc p15, 0, r0, c1, c0, 0 @ read SCTLR |
1509 | tst r0, #0x1 @ MMU enabled? |
1510 | orreq r4, r4, #1 @ set LSB if not |
1511 | |
1512 | .Lefi_hyp: |
1513 | mov r0, r8 @ DT start |
1514 | add r1, r8, r2 @ DT end |
1515 | bl cache_clean_flush |
1516 | |
1517 | adr r0, 0f @ switch to our stack |
1518 | ldr sp, [r0] |
1519 | add sp, sp, r0 |
1520 | |
1521 | mov r5, #0 @ appended DTB size |
1522 | mov r7, #0xFFFFFFFF @ machine ID |
1523 | b wont_overwrite |
1524 | ENDPROC(efi_enter_kernel) |
1525 | 0: .long .L_user_stack_end - . |
1526 | #endif |
1527 | |
1528 | .align |
1529 | .section ".stack" , "aw" , %nobits |
1530 | .L_user_stack: .space 4096 |
1531 | .L_user_stack_end: |
1532 | |