1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * linux/arch/arm/mm/mmu.c |
4 | * |
5 | * Copyright (C) 1995-2005 Russell King |
6 | */ |
7 | #include <linux/module.h> |
8 | #include <linux/kernel.h> |
9 | #include <linux/errno.h> |
10 | #include <linux/init.h> |
11 | #include <linux/mman.h> |
12 | #include <linux/nodemask.h> |
13 | #include <linux/memblock.h> |
14 | #include <linux/fs.h> |
15 | #include <linux/vmalloc.h> |
16 | #include <linux/sizes.h> |
17 | |
18 | #include <asm/cp15.h> |
19 | #include <asm/cputype.h> |
20 | #include <asm/cachetype.h> |
21 | #include <asm/sections.h> |
22 | #include <asm/setup.h> |
23 | #include <asm/smp_plat.h> |
24 | #include <asm/tcm.h> |
25 | #include <asm/tlb.h> |
26 | #include <asm/highmem.h> |
27 | #include <asm/system_info.h> |
28 | #include <asm/traps.h> |
29 | #include <asm/procinfo.h> |
30 | #include <asm/page.h> |
31 | #include <asm/pgalloc.h> |
32 | #include <asm/kasan_def.h> |
33 | |
34 | #include <asm/mach/arch.h> |
35 | #include <asm/mach/map.h> |
36 | #include <asm/mach/pci.h> |
37 | #include <asm/fixmap.h> |
38 | |
39 | #include "fault.h" |
40 | #include "mm.h" |
41 | |
42 | extern unsigned long __atags_pointer; |
43 | |
44 | /* |
45 | * empty_zero_page is a special page that is used for |
46 | * zero-initialized data and COW. |
47 | */ |
48 | struct page *empty_zero_page; |
49 | EXPORT_SYMBOL(empty_zero_page); |
50 | |
51 | /* |
52 | * The pmd table for the upper-most set of pages. |
53 | */ |
54 | pmd_t *top_pmd; |
55 | |
56 | pmdval_t user_pmd_table = _PAGE_USER_TABLE; |
57 | |
58 | #define CPOLICY_UNCACHED 0 |
59 | #define CPOLICY_BUFFERED 1 |
60 | #define CPOLICY_WRITETHROUGH 2 |
61 | #define CPOLICY_WRITEBACK 3 |
62 | #define CPOLICY_WRITEALLOC 4 |
63 | |
64 | static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; |
65 | static unsigned int ecc_mask __initdata = 0; |
66 | pgprot_t pgprot_user; |
67 | pgprot_t pgprot_kernel; |
68 | |
69 | EXPORT_SYMBOL(pgprot_user); |
70 | EXPORT_SYMBOL(pgprot_kernel); |
71 | |
72 | struct cachepolicy { |
73 | const char policy[16]; |
74 | unsigned int cr_mask; |
75 | pmdval_t pmd; |
76 | pteval_t pte; |
77 | }; |
78 | |
79 | static struct cachepolicy cache_policies[] __initdata = { |
80 | { |
81 | .policy = "uncached" , |
82 | .cr_mask = CR_W|CR_C, |
83 | .pmd = PMD_SECT_UNCACHED, |
84 | .pte = L_PTE_MT_UNCACHED, |
85 | }, { |
86 | .policy = "buffered" , |
87 | .cr_mask = CR_C, |
88 | .pmd = PMD_SECT_BUFFERED, |
89 | .pte = L_PTE_MT_BUFFERABLE, |
90 | }, { |
91 | .policy = "writethrough" , |
92 | .cr_mask = 0, |
93 | .pmd = PMD_SECT_WT, |
94 | .pte = L_PTE_MT_WRITETHROUGH, |
95 | }, { |
96 | .policy = "writeback" , |
97 | .cr_mask = 0, |
98 | .pmd = PMD_SECT_WB, |
99 | .pte = L_PTE_MT_WRITEBACK, |
100 | }, { |
101 | .policy = "writealloc" , |
102 | .cr_mask = 0, |
103 | .pmd = PMD_SECT_WBWA, |
104 | .pte = L_PTE_MT_WRITEALLOC, |
105 | } |
106 | }; |
107 | |
108 | #ifdef CONFIG_CPU_CP15 |
109 | static unsigned long initial_pmd_value __initdata = 0; |
110 | |
111 | /* |
112 | * Initialise the cache_policy variable with the initial state specified |
113 | * via the "pmd" value. This is used to ensure that on ARMv6 and later, |
114 | * the C code sets the page tables up with the same policy as the head |
115 | * assembly code, which avoids an illegal state where the TLBs can get |
116 | * confused. See comments in early_cachepolicy() for more information. |
117 | */ |
118 | void __init init_default_cache_policy(unsigned long pmd) |
119 | { |
120 | int i; |
121 | |
122 | initial_pmd_value = pmd; |
123 | |
124 | pmd &= PMD_SECT_CACHE_MASK; |
125 | |
126 | for (i = 0; i < ARRAY_SIZE(cache_policies); i++) |
127 | if (cache_policies[i].pmd == pmd) { |
128 | cachepolicy = i; |
129 | break; |
130 | } |
131 | |
132 | if (i == ARRAY_SIZE(cache_policies)) |
133 | pr_err("ERROR: could not find cache policy\n" ); |
134 | } |
135 | |
136 | /* |
137 | * These are useful for identifying cache coherency problems by allowing |
138 | * the cache or the cache and writebuffer to be turned off. (Note: the |
139 | * write buffer should not be on and the cache off). |
140 | */ |
141 | static int __init early_cachepolicy(char *p) |
142 | { |
143 | int i, selected = -1; |
144 | |
145 | for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { |
146 | int len = strlen(cache_policies[i].policy); |
147 | |
148 | if (memcmp(p, cache_policies[i].policy, len) == 0) { |
149 | selected = i; |
150 | break; |
151 | } |
152 | } |
153 | |
154 | if (selected == -1) |
155 | pr_err("ERROR: unknown or unsupported cache policy\n" ); |
156 | |
157 | /* |
158 | * This restriction is partly to do with the way we boot; it is |
159 | * unpredictable to have memory mapped using two different sets of |
160 | * memory attributes (shared, type, and cache attribs). We can not |
161 | * change these attributes once the initial assembly has setup the |
162 | * page tables. |
163 | */ |
164 | if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) { |
165 | pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n" , |
166 | cache_policies[cachepolicy].policy); |
167 | return 0; |
168 | } |
169 | |
170 | if (selected != cachepolicy) { |
171 | unsigned long cr = __clear_cr(cache_policies[selected].cr_mask); |
172 | cachepolicy = selected; |
173 | flush_cache_all(); |
174 | set_cr(cr); |
175 | } |
176 | return 0; |
177 | } |
178 | early_param("cachepolicy" , early_cachepolicy); |
179 | |
180 | static int __init early_nocache(char *__unused) |
181 | { |
182 | char *p = "buffered" ; |
183 | pr_warn("nocache is deprecated; use cachepolicy=%s\n" , p); |
184 | early_cachepolicy(p); |
185 | return 0; |
186 | } |
187 | early_param("nocache" , early_nocache); |
188 | |
189 | static int __init early_nowrite(char *__unused) |
190 | { |
191 | char *p = "uncached" ; |
192 | pr_warn("nowb is deprecated; use cachepolicy=%s\n" , p); |
193 | early_cachepolicy(p); |
194 | return 0; |
195 | } |
196 | early_param("nowb" , early_nowrite); |
197 | |
198 | #ifndef CONFIG_ARM_LPAE |
199 | static int __init early_ecc(char *p) |
200 | { |
201 | if (memcmp(p, "on" , 2) == 0) |
202 | ecc_mask = PMD_PROTECTION; |
203 | else if (memcmp(p, "off" , 3) == 0) |
204 | ecc_mask = 0; |
205 | return 0; |
206 | } |
207 | early_param("ecc" , early_ecc); |
208 | #endif |
209 | |
210 | #else /* ifdef CONFIG_CPU_CP15 */ |
211 | |
212 | static int __init early_cachepolicy(char *p) |
213 | { |
214 | pr_warn("cachepolicy kernel parameter not supported without cp15\n" ); |
215 | return 0; |
216 | } |
217 | early_param("cachepolicy" , early_cachepolicy); |
218 | |
219 | static int __init noalign_setup(char *__unused) |
220 | { |
221 | pr_warn("noalign kernel parameter not supported without cp15\n" ); |
222 | return 1; |
223 | } |
224 | __setup("noalign" , noalign_setup); |
225 | |
226 | #endif /* ifdef CONFIG_CPU_CP15 / else */ |
227 | |
228 | #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN |
229 | #define PROT_PTE_S2_DEVICE PROT_PTE_DEVICE |
230 | #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE |
231 | |
232 | static struct mem_type mem_types[] __ro_after_init = { |
233 | [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ |
234 | .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | |
235 | L_PTE_SHARED, |
236 | .prot_l1 = PMD_TYPE_TABLE, |
237 | .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, |
238 | .domain = DOMAIN_IO, |
239 | }, |
240 | [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ |
241 | .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, |
242 | .prot_l1 = PMD_TYPE_TABLE, |
243 | .prot_sect = PROT_SECT_DEVICE, |
244 | .domain = DOMAIN_IO, |
245 | }, |
246 | [MT_DEVICE_CACHED] = { /* ioremap_cache */ |
247 | .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, |
248 | .prot_l1 = PMD_TYPE_TABLE, |
249 | .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, |
250 | .domain = DOMAIN_IO, |
251 | }, |
252 | [MT_DEVICE_WC] = { /* ioremap_wc */ |
253 | .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, |
254 | .prot_l1 = PMD_TYPE_TABLE, |
255 | .prot_sect = PROT_SECT_DEVICE, |
256 | .domain = DOMAIN_IO, |
257 | }, |
258 | [MT_UNCACHED] = { |
259 | .prot_pte = PROT_PTE_DEVICE, |
260 | .prot_l1 = PMD_TYPE_TABLE, |
261 | .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, |
262 | .domain = DOMAIN_IO, |
263 | }, |
264 | [MT_CACHECLEAN] = { |
265 | .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, |
266 | .domain = DOMAIN_KERNEL, |
267 | }, |
268 | #ifndef CONFIG_ARM_LPAE |
269 | [MT_MINICLEAN] = { |
270 | .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE, |
271 | .domain = DOMAIN_KERNEL, |
272 | }, |
273 | #endif |
274 | [MT_LOW_VECTORS] = { |
275 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | |
276 | L_PTE_RDONLY, |
277 | .prot_l1 = PMD_TYPE_TABLE, |
278 | .domain = DOMAIN_VECTORS, |
279 | }, |
280 | [MT_HIGH_VECTORS] = { |
281 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | |
282 | L_PTE_USER | L_PTE_RDONLY, |
283 | .prot_l1 = PMD_TYPE_TABLE, |
284 | .domain = DOMAIN_VECTORS, |
285 | }, |
286 | [MT_MEMORY_RWX] = { |
287 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, |
288 | .prot_l1 = PMD_TYPE_TABLE, |
289 | .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, |
290 | .domain = DOMAIN_KERNEL, |
291 | }, |
292 | [MT_MEMORY_RW] = { |
293 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | |
294 | L_PTE_XN, |
295 | .prot_l1 = PMD_TYPE_TABLE, |
296 | .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, |
297 | .domain = DOMAIN_KERNEL, |
298 | }, |
299 | [MT_MEMORY_RO] = { |
300 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | |
301 | L_PTE_XN | L_PTE_RDONLY, |
302 | .prot_l1 = PMD_TYPE_TABLE, |
303 | #ifdef CONFIG_ARM_LPAE |
304 | .prot_sect = PMD_TYPE_SECT | L_PMD_SECT_RDONLY | PMD_SECT_AP2, |
305 | #else |
306 | .prot_sect = PMD_TYPE_SECT, |
307 | #endif |
308 | .domain = DOMAIN_KERNEL, |
309 | }, |
310 | [MT_ROM] = { |
311 | .prot_sect = PMD_TYPE_SECT, |
312 | .domain = DOMAIN_KERNEL, |
313 | }, |
314 | [MT_MEMORY_RWX_NONCACHED] = { |
315 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | |
316 | L_PTE_MT_BUFFERABLE, |
317 | .prot_l1 = PMD_TYPE_TABLE, |
318 | .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, |
319 | .domain = DOMAIN_KERNEL, |
320 | }, |
321 | [MT_MEMORY_RW_DTCM] = { |
322 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | |
323 | L_PTE_XN, |
324 | .prot_l1 = PMD_TYPE_TABLE, |
325 | .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, |
326 | .domain = DOMAIN_KERNEL, |
327 | }, |
328 | [MT_MEMORY_RWX_ITCM] = { |
329 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, |
330 | .prot_l1 = PMD_TYPE_TABLE, |
331 | .domain = DOMAIN_KERNEL, |
332 | }, |
333 | [MT_MEMORY_RW_SO] = { |
334 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | |
335 | L_PTE_MT_UNCACHED | L_PTE_XN, |
336 | .prot_l1 = PMD_TYPE_TABLE, |
337 | .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S | |
338 | PMD_SECT_UNCACHED | PMD_SECT_XN, |
339 | .domain = DOMAIN_KERNEL, |
340 | }, |
341 | [MT_MEMORY_DMA_READY] = { |
342 | .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | |
343 | L_PTE_XN, |
344 | .prot_l1 = PMD_TYPE_TABLE, |
345 | .domain = DOMAIN_KERNEL, |
346 | }, |
347 | }; |
348 | |
349 | const struct mem_type *get_mem_type(unsigned int type) |
350 | { |
351 | return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL; |
352 | } |
353 | EXPORT_SYMBOL(get_mem_type); |
354 | |
355 | static pte_t *(*pte_offset_fixmap)(pmd_t *dir, unsigned long addr); |
356 | |
357 | static pte_t bm_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS] |
358 | __aligned(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE) __initdata; |
359 | |
360 | static pte_t * __init pte_offset_early_fixmap(pmd_t *dir, unsigned long addr) |
361 | { |
362 | return &bm_pte[pte_index(address: addr)]; |
363 | } |
364 | |
365 | static pte_t *pte_offset_late_fixmap(pmd_t *dir, unsigned long addr) |
366 | { |
367 | return pte_offset_kernel(pmd: dir, address: addr); |
368 | } |
369 | |
370 | static inline pmd_t * __init fixmap_pmd(unsigned long addr) |
371 | { |
372 | return pmd_off_k(va: addr); |
373 | } |
374 | |
375 | void __init early_fixmap_init(void) |
376 | { |
377 | pmd_t *pmd; |
378 | |
379 | /* |
380 | * The early fixmap range spans multiple pmds, for which |
381 | * we are not prepared: |
382 | */ |
383 | BUILD_BUG_ON((__fix_to_virt(__end_of_early_ioremap_region) >> PMD_SHIFT) |
384 | != FIXADDR_TOP >> PMD_SHIFT); |
385 | |
386 | pmd = fixmap_pmd(FIXADDR_TOP); |
387 | pmd_populate_kernel(mm: &init_mm, pmd, pte: bm_pte); |
388 | |
389 | pte_offset_fixmap = pte_offset_early_fixmap; |
390 | } |
391 | |
392 | /* |
393 | * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range(). |
394 | * As a result, this can only be called with preemption disabled, as under |
395 | * stop_machine(). |
396 | */ |
397 | void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) |
398 | { |
399 | unsigned long vaddr = __fix_to_virt(idx); |
400 | pte_t *pte = pte_offset_fixmap(pmd_off_k(va: vaddr), vaddr); |
401 | |
402 | /* Make sure fixmap region does not exceed available allocation. */ |
403 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) < FIXADDR_START); |
404 | BUG_ON(idx >= __end_of_fixed_addresses); |
405 | |
406 | /* We support only device mappings before pgprot_kernel is set. */ |
407 | if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) && |
408 | pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0)) |
409 | return; |
410 | |
411 | if (pgprot_val(prot)) |
412 | set_pte_at(NULL, vaddr, pte, |
413 | pfn_pte(phys >> PAGE_SHIFT, prot)); |
414 | else |
415 | pte_clear(NULL, addr: vaddr, ptep: pte); |
416 | local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); |
417 | } |
418 | |
419 | static pgprot_t protection_map[16] __ro_after_init = { |
420 | [VM_NONE] = __PAGE_NONE, |
421 | [VM_READ] = __PAGE_READONLY, |
422 | [VM_WRITE] = __PAGE_COPY, |
423 | [VM_WRITE | VM_READ] = __PAGE_COPY, |
424 | [VM_EXEC] = __PAGE_READONLY_EXEC, |
425 | [VM_EXEC | VM_READ] = __PAGE_READONLY_EXEC, |
426 | [VM_EXEC | VM_WRITE] = __PAGE_COPY_EXEC, |
427 | [VM_EXEC | VM_WRITE | VM_READ] = __PAGE_COPY_EXEC, |
428 | [VM_SHARED] = __PAGE_NONE, |
429 | [VM_SHARED | VM_READ] = __PAGE_READONLY, |
430 | [VM_SHARED | VM_WRITE] = __PAGE_SHARED, |
431 | [VM_SHARED | VM_WRITE | VM_READ] = __PAGE_SHARED, |
432 | [VM_SHARED | VM_EXEC] = __PAGE_READONLY_EXEC, |
433 | [VM_SHARED | VM_EXEC | VM_READ] = __PAGE_READONLY_EXEC, |
434 | [VM_SHARED | VM_EXEC | VM_WRITE] = __PAGE_SHARED_EXEC, |
435 | [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = __PAGE_SHARED_EXEC |
436 | }; |
437 | DECLARE_VM_GET_PAGE_PROT |
438 | |
439 | /* |
440 | * Adjust the PMD section entries according to the CPU in use. |
441 | */ |
442 | static void __init build_mem_type_table(void) |
443 | { |
444 | struct cachepolicy *cp; |
445 | unsigned int cr = get_cr(); |
446 | pteval_t user_pgprot, kern_pgprot, vecs_pgprot; |
447 | int cpu_arch = cpu_architecture(); |
448 | int i; |
449 | |
450 | if (cpu_arch < CPU_ARCH_ARMv6) { |
451 | #if defined(CONFIG_CPU_DCACHE_DISABLE) |
452 | if (cachepolicy > CPOLICY_BUFFERED) |
453 | cachepolicy = CPOLICY_BUFFERED; |
454 | #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH) |
455 | if (cachepolicy > CPOLICY_WRITETHROUGH) |
456 | cachepolicy = CPOLICY_WRITETHROUGH; |
457 | #endif |
458 | } |
459 | if (cpu_arch < CPU_ARCH_ARMv5) { |
460 | if (cachepolicy >= CPOLICY_WRITEALLOC) |
461 | cachepolicy = CPOLICY_WRITEBACK; |
462 | ecc_mask = 0; |
463 | } |
464 | |
465 | if (is_smp()) { |
466 | if (cachepolicy != CPOLICY_WRITEALLOC) { |
467 | pr_warn("Forcing write-allocate cache policy for SMP\n" ); |
468 | cachepolicy = CPOLICY_WRITEALLOC; |
469 | } |
470 | if (!(initial_pmd_value & PMD_SECT_S)) { |
471 | pr_warn("Forcing shared mappings for SMP\n" ); |
472 | initial_pmd_value |= PMD_SECT_S; |
473 | } |
474 | } |
475 | |
476 | /* |
477 | * Strip out features not present on earlier architectures. |
478 | * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those |
479 | * without extended page tables don't have the 'Shared' bit. |
480 | */ |
481 | if (cpu_arch < CPU_ARCH_ARMv5) |
482 | for (i = 0; i < ARRAY_SIZE(mem_types); i++) |
483 | mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); |
484 | if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) |
485 | for (i = 0; i < ARRAY_SIZE(mem_types); i++) |
486 | mem_types[i].prot_sect &= ~PMD_SECT_S; |
487 | |
488 | /* |
489 | * ARMv5 and lower, bit 4 must be set for page tables (was: cache |
490 | * "update-able on write" bit on ARM610). However, Xscale and |
491 | * Xscale3 require this bit to be cleared. |
492 | */ |
493 | if (cpu_is_xscale_family()) { |
494 | for (i = 0; i < ARRAY_SIZE(mem_types); i++) { |
495 | mem_types[i].prot_sect &= ~PMD_BIT4; |
496 | mem_types[i].prot_l1 &= ~PMD_BIT4; |
497 | } |
498 | } else if (cpu_arch < CPU_ARCH_ARMv6) { |
499 | for (i = 0; i < ARRAY_SIZE(mem_types); i++) { |
500 | if (mem_types[i].prot_l1) |
501 | mem_types[i].prot_l1 |= PMD_BIT4; |
502 | if (mem_types[i].prot_sect) |
503 | mem_types[i].prot_sect |= PMD_BIT4; |
504 | } |
505 | } |
506 | |
507 | /* |
508 | * Mark the device areas according to the CPU/architecture. |
509 | */ |
510 | if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { |
511 | if (!cpu_is_xsc3()) { |
512 | /* |
513 | * Mark device regions on ARMv6+ as execute-never |
514 | * to prevent speculative instruction fetches. |
515 | */ |
516 | mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN; |
517 | mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; |
518 | mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; |
519 | mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; |
520 | |
521 | /* Also setup NX memory mapping */ |
522 | mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; |
523 | mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN; |
524 | } |
525 | if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { |
526 | /* |
527 | * For ARMv7 with TEX remapping, |
528 | * - shared device is SXCB=1100 |
529 | * - nonshared device is SXCB=0100 |
530 | * - write combine device mem is SXCB=0001 |
531 | * (Uncached Normal memory) |
532 | */ |
533 | mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); |
534 | mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); |
535 | mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; |
536 | } else if (cpu_is_xsc3()) { |
537 | /* |
538 | * For Xscale3, |
539 | * - shared device is TEXCB=00101 |
540 | * - nonshared device is TEXCB=01000 |
541 | * - write combine device mem is TEXCB=00100 |
542 | * (Inner/Outer Uncacheable in xsc3 parlance) |
543 | */ |
544 | mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; |
545 | mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); |
546 | mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); |
547 | } else { |
548 | /* |
549 | * For ARMv6 and ARMv7 without TEX remapping, |
550 | * - shared device is TEXCB=00001 |
551 | * - nonshared device is TEXCB=01000 |
552 | * - write combine device mem is TEXCB=00100 |
553 | * (Uncached Normal in ARMv6 parlance). |
554 | */ |
555 | mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; |
556 | mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); |
557 | mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); |
558 | } |
559 | } else { |
560 | /* |
561 | * On others, write combining is "Uncached/Buffered" |
562 | */ |
563 | mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; |
564 | } |
565 | |
566 | /* |
567 | * Now deal with the memory-type mappings |
568 | */ |
569 | cp = &cache_policies[cachepolicy]; |
570 | vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; |
571 | |
572 | #ifndef CONFIG_ARM_LPAE |
573 | /* |
574 | * We don't use domains on ARMv6 (since this causes problems with |
575 | * v6/v7 kernels), so we must use a separate memory type for user |
576 | * r/o, kernel r/w to map the vectors page. |
577 | */ |
578 | if (cpu_arch == CPU_ARCH_ARMv6) |
579 | vecs_pgprot |= L_PTE_MT_VECTORS; |
580 | |
581 | /* |
582 | * Check is it with support for the PXN bit |
583 | * in the Short-descriptor translation table format descriptors. |
584 | */ |
585 | if (cpu_arch == CPU_ARCH_ARMv7 && |
586 | (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) >= 4) { |
587 | user_pmd_table |= PMD_PXNTABLE; |
588 | } |
589 | #endif |
590 | |
591 | /* |
592 | * ARMv6 and above have extended page tables. |
593 | */ |
594 | if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { |
595 | #ifndef CONFIG_ARM_LPAE |
596 | /* |
597 | * Mark cache clean areas and XIP ROM read only |
598 | * from SVC mode and no access from userspace. |
599 | */ |
600 | mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; |
601 | mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; |
602 | mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; |
603 | mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; |
604 | #endif |
605 | |
606 | /* |
607 | * If the initial page tables were created with the S bit |
608 | * set, then we need to do the same here for the same |
609 | * reasons given in early_cachepolicy(). |
610 | */ |
611 | if (initial_pmd_value & PMD_SECT_S) { |
612 | user_pgprot |= L_PTE_SHARED; |
613 | kern_pgprot |= L_PTE_SHARED; |
614 | vecs_pgprot |= L_PTE_SHARED; |
615 | mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; |
616 | mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; |
617 | mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; |
618 | mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; |
619 | mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S; |
620 | mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; |
621 | mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; |
622 | mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; |
623 | mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S; |
624 | mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED; |
625 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; |
626 | mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; |
627 | mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; |
628 | } |
629 | } |
630 | |
631 | /* |
632 | * Non-cacheable Normal - intended for memory areas that must |
633 | * not cause dirty cache line writebacks when used |
634 | */ |
635 | if (cpu_arch >= CPU_ARCH_ARMv6) { |
636 | if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { |
637 | /* Non-cacheable Normal is XCB = 001 */ |
638 | mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= |
639 | PMD_SECT_BUFFERED; |
640 | } else { |
641 | /* For both ARMv6 and non-TEX-remapping ARMv7 */ |
642 | mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= |
643 | PMD_SECT_TEX(1); |
644 | } |
645 | } else { |
646 | mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; |
647 | } |
648 | |
649 | #ifdef CONFIG_ARM_LPAE |
650 | /* |
651 | * Do not generate access flag faults for the kernel mappings. |
652 | */ |
653 | for (i = 0; i < ARRAY_SIZE(mem_types); i++) { |
654 | mem_types[i].prot_pte |= PTE_EXT_AF; |
655 | if (mem_types[i].prot_sect) |
656 | mem_types[i].prot_sect |= PMD_SECT_AF; |
657 | } |
658 | kern_pgprot |= PTE_EXT_AF; |
659 | vecs_pgprot |= PTE_EXT_AF; |
660 | |
661 | /* |
662 | * Set PXN for user mappings |
663 | */ |
664 | user_pgprot |= PTE_EXT_PXN; |
665 | #endif |
666 | |
667 | for (i = 0; i < 16; i++) { |
668 | pteval_t v = pgprot_val(protection_map[i]); |
669 | protection_map[i] = __pgprot(v | user_pgprot); |
670 | } |
671 | |
672 | mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; |
673 | mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; |
674 | |
675 | pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); |
676 | pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | |
677 | L_PTE_DIRTY | kern_pgprot); |
678 | |
679 | mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; |
680 | mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; |
681 | mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd; |
682 | mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; |
683 | mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; |
684 | mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; |
685 | mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd; |
686 | mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot; |
687 | mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; |
688 | mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; |
689 | mem_types[MT_ROM].prot_sect |= cp->pmd; |
690 | |
691 | switch (cp->pmd) { |
692 | case PMD_SECT_WT: |
693 | mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT; |
694 | break; |
695 | case PMD_SECT_WB: |
696 | case PMD_SECT_WBWA: |
697 | mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; |
698 | break; |
699 | } |
700 | pr_info("Memory policy: %sData cache %s\n" , |
701 | ecc_mask ? "ECC enabled, " : "" , cp->policy); |
702 | |
703 | for (i = 0; i < ARRAY_SIZE(mem_types); i++) { |
704 | struct mem_type *t = &mem_types[i]; |
705 | if (t->prot_l1) |
706 | t->prot_l1 |= PMD_DOMAIN(t->domain); |
707 | if (t->prot_sect) |
708 | t->prot_sect |= PMD_DOMAIN(t->domain); |
709 | } |
710 | } |
711 | |
712 | #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE |
713 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
714 | unsigned long size, pgprot_t vma_prot) |
715 | { |
716 | if (!pfn_valid(pfn)) |
717 | return pgprot_noncached(vma_prot); |
718 | else if (file->f_flags & O_SYNC) |
719 | return pgprot_writecombine(vma_prot); |
720 | return vma_prot; |
721 | } |
722 | EXPORT_SYMBOL(phys_mem_access_prot); |
723 | #endif |
724 | |
725 | #define vectors_base() (vectors_high() ? 0xffff0000 : 0) |
726 | |
727 | static void __init *early_alloc(unsigned long sz) |
728 | { |
729 | void *ptr = memblock_alloc(size: sz, align: sz); |
730 | |
731 | if (!ptr) |
732 | panic(fmt: "%s: Failed to allocate %lu bytes align=0x%lx\n" , |
733 | __func__, sz, sz); |
734 | |
735 | return ptr; |
736 | } |
737 | |
738 | static void *__init late_alloc(unsigned long sz) |
739 | { |
740 | void *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_HIGHMEM, |
741 | order: get_order(size: sz)); |
742 | |
743 | if (!ptdesc || !pagetable_pte_ctor(ptdesc)) |
744 | BUG(); |
745 | return ptdesc_to_virt(pt: ptdesc); |
746 | } |
747 | |
748 | static pte_t * __init arm_pte_alloc(pmd_t *pmd, unsigned long addr, |
749 | unsigned long prot, |
750 | void *(*alloc)(unsigned long sz)) |
751 | { |
752 | if (pmd_none(pmd: *pmd)) { |
753 | pte_t *pte = alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); |
754 | __pmd_populate(pmd, __pa(pte), prot); |
755 | } |
756 | BUG_ON(pmd_bad(*pmd)); |
757 | return pte_offset_kernel(pmd, address: addr); |
758 | } |
759 | |
760 | static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, |
761 | unsigned long prot) |
762 | { |
763 | return arm_pte_alloc(pmd, addr, prot, alloc: early_alloc); |
764 | } |
765 | |
766 | static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, |
767 | unsigned long end, unsigned long pfn, |
768 | const struct mem_type *type, |
769 | void *(*alloc)(unsigned long sz), |
770 | bool ng) |
771 | { |
772 | pte_t *pte = arm_pte_alloc(pmd, addr, prot: type->prot_l1, alloc); |
773 | do { |
774 | set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), |
775 | ng ? PTE_EXT_NG : 0); |
776 | pfn++; |
777 | } while (pte++, addr += PAGE_SIZE, addr != end); |
778 | } |
779 | |
780 | static void __init __map_init_section(pmd_t *pmd, unsigned long addr, |
781 | unsigned long end, phys_addr_t phys, |
782 | const struct mem_type *type, bool ng) |
783 | { |
784 | pmd_t *p = pmd; |
785 | |
786 | #ifndef CONFIG_ARM_LPAE |
787 | /* |
788 | * In classic MMU format, puds and pmds are folded in to |
789 | * the pgds. pmd_offset gives the PGD entry. PGDs refer to a |
790 | * group of L1 entries making up one logical pointer to |
791 | * an L2 table (2MB), where as PMDs refer to the individual |
792 | * L1 entries (1MB). Hence increment to get the correct |
793 | * offset for odd 1MB sections. |
794 | * (See arch/arm/include/asm/pgtable-2level.h) |
795 | */ |
796 | if (addr & SECTION_SIZE) |
797 | pmd++; |
798 | #endif |
799 | do { |
800 | *pmd = __pmd(phys | type->prot_sect | (ng ? PMD_SECT_nG : 0)); |
801 | phys += SECTION_SIZE; |
802 | } while (pmd++, addr += SECTION_SIZE, addr != end); |
803 | |
804 | flush_pmd_entry(p); |
805 | } |
806 | |
807 | static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, |
808 | unsigned long end, phys_addr_t phys, |
809 | const struct mem_type *type, |
810 | void *(*alloc)(unsigned long sz), bool ng) |
811 | { |
812 | pmd_t *pmd = pmd_offset(pud, address: addr); |
813 | unsigned long next; |
814 | |
815 | do { |
816 | /* |
817 | * With LPAE, we must loop over to map |
818 | * all the pmds for the given range. |
819 | */ |
820 | next = pmd_addr_end(addr, end); |
821 | |
822 | /* |
823 | * Try a section mapping - addr, next and phys must all be |
824 | * aligned to a section boundary. |
825 | */ |
826 | if (type->prot_sect && |
827 | ((addr | next | phys) & ~SECTION_MASK) == 0) { |
828 | __map_init_section(pmd, addr, end: next, phys, type, ng); |
829 | } else { |
830 | alloc_init_pte(pmd, addr, end: next, |
831 | __phys_to_pfn(phys), type, alloc, ng); |
832 | } |
833 | |
834 | phys += next - addr; |
835 | |
836 | } while (pmd++, addr = next, addr != end); |
837 | } |
838 | |
839 | static void __init alloc_init_pud(p4d_t *p4d, unsigned long addr, |
840 | unsigned long end, phys_addr_t phys, |
841 | const struct mem_type *type, |
842 | void *(*alloc)(unsigned long sz), bool ng) |
843 | { |
844 | pud_t *pud = pud_offset(p4d, address: addr); |
845 | unsigned long next; |
846 | |
847 | do { |
848 | next = pud_addr_end(addr, end); |
849 | alloc_init_pmd(pud, addr, end: next, phys, type, alloc, ng); |
850 | phys += next - addr; |
851 | } while (pud++, addr = next, addr != end); |
852 | } |
853 | |
854 | static void __init alloc_init_p4d(pgd_t *pgd, unsigned long addr, |
855 | unsigned long end, phys_addr_t phys, |
856 | const struct mem_type *type, |
857 | void *(*alloc)(unsigned long sz), bool ng) |
858 | { |
859 | p4d_t *p4d = p4d_offset(pgd, address: addr); |
860 | unsigned long next; |
861 | |
862 | do { |
863 | next = p4d_addr_end(addr, end); |
864 | alloc_init_pud(p4d, addr, end: next, phys, type, alloc, ng); |
865 | phys += next - addr; |
866 | } while (p4d++, addr = next, addr != end); |
867 | } |
868 | |
869 | #ifndef CONFIG_ARM_LPAE |
870 | static void __init create_36bit_mapping(struct mm_struct *mm, |
871 | struct map_desc *md, |
872 | const struct mem_type *type, |
873 | bool ng) |
874 | { |
875 | unsigned long addr, length, end; |
876 | phys_addr_t phys; |
877 | pgd_t *pgd; |
878 | |
879 | addr = md->virtual; |
880 | phys = __pfn_to_phys(md->pfn); |
881 | length = PAGE_ALIGN(md->length); |
882 | |
883 | if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) { |
884 | pr_err("MM: CPU does not support supersection mapping for 0x%08llx at 0x%08lx\n" , |
885 | (long long)__pfn_to_phys((u64)md->pfn), addr); |
886 | return; |
887 | } |
888 | |
889 | /* N.B. ARMv6 supersections are only defined to work with domain 0. |
890 | * Since domain assignments can in fact be arbitrary, the |
891 | * 'domain == 0' check below is required to insure that ARMv6 |
892 | * supersections are only allocated for domain 0 regardless |
893 | * of the actual domain assignments in use. |
894 | */ |
895 | if (type->domain) { |
896 | pr_err("MM: invalid domain in supersection mapping for 0x%08llx at 0x%08lx\n" , |
897 | (long long)__pfn_to_phys((u64)md->pfn), addr); |
898 | return; |
899 | } |
900 | |
901 | if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) { |
902 | pr_err("MM: cannot create mapping for 0x%08llx at 0x%08lx invalid alignment\n" , |
903 | (long long)__pfn_to_phys((u64)md->pfn), addr); |
904 | return; |
905 | } |
906 | |
907 | /* |
908 | * Shift bits [35:32] of address into bits [23:20] of PMD |
909 | * (See ARMv6 spec). |
910 | */ |
911 | phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); |
912 | |
913 | pgd = pgd_offset(mm, addr); |
914 | end = addr + length; |
915 | do { |
916 | p4d_t *p4d = p4d_offset(pgd, address: addr); |
917 | pud_t *pud = pud_offset(p4d, address: addr); |
918 | pmd_t *pmd = pmd_offset(pud, address: addr); |
919 | int i; |
920 | |
921 | for (i = 0; i < 16; i++) |
922 | *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER | |
923 | (ng ? PMD_SECT_nG : 0)); |
924 | |
925 | addr += SUPERSECTION_SIZE; |
926 | phys += SUPERSECTION_SIZE; |
927 | pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT; |
928 | } while (addr != end); |
929 | } |
930 | #endif /* !CONFIG_ARM_LPAE */ |
931 | |
932 | static void __init __create_mapping(struct mm_struct *mm, struct map_desc *md, |
933 | void *(*alloc)(unsigned long sz), |
934 | bool ng) |
935 | { |
936 | unsigned long addr, length, end; |
937 | phys_addr_t phys; |
938 | const struct mem_type *type; |
939 | pgd_t *pgd; |
940 | |
941 | type = &mem_types[md->type]; |
942 | |
943 | #ifndef CONFIG_ARM_LPAE |
944 | /* |
945 | * Catch 36-bit addresses |
946 | */ |
947 | if (md->pfn >= 0x100000) { |
948 | create_36bit_mapping(mm, md, type, ng); |
949 | return; |
950 | } |
951 | #endif |
952 | |
953 | addr = md->virtual & PAGE_MASK; |
954 | phys = __pfn_to_phys(md->pfn); |
955 | length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); |
956 | |
957 | if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { |
958 | pr_warn("BUG: map for 0x%08llx at 0x%08lx can not be mapped using pages, ignoring.\n" , |
959 | (long long)__pfn_to_phys(md->pfn), addr); |
960 | return; |
961 | } |
962 | |
963 | pgd = pgd_offset(mm, addr); |
964 | end = addr + length; |
965 | do { |
966 | unsigned long next = pgd_addr_end(addr, end); |
967 | |
968 | alloc_init_p4d(pgd, addr, end: next, phys, type, alloc, ng); |
969 | |
970 | phys += next - addr; |
971 | addr = next; |
972 | } while (pgd++, addr != end); |
973 | } |
974 | |
975 | /* |
976 | * Create the page directory entries and any necessary |
977 | * page tables for the mapping specified by `md'. We |
978 | * are able to cope here with varying sizes and address |
979 | * offsets, and we take full advantage of sections and |
980 | * supersections. |
981 | */ |
982 | static void __init create_mapping(struct map_desc *md) |
983 | { |
984 | if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { |
985 | pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n" , |
986 | (long long)__pfn_to_phys((u64)md->pfn), md->virtual); |
987 | return; |
988 | } |
989 | |
990 | if (md->type == MT_DEVICE && |
991 | md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && |
992 | (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { |
993 | pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n" , |
994 | (long long)__pfn_to_phys((u64)md->pfn), md->virtual); |
995 | } |
996 | |
997 | __create_mapping(mm: &init_mm, md, alloc: early_alloc, ng: false); |
998 | } |
999 | |
1000 | void __init create_mapping_late(struct mm_struct *mm, struct map_desc *md, |
1001 | bool ng) |
1002 | { |
1003 | #ifdef CONFIG_ARM_LPAE |
1004 | p4d_t *p4d; |
1005 | pud_t *pud; |
1006 | |
1007 | p4d = p4d_alloc(mm, pgd_offset(mm, md->virtual), md->virtual); |
1008 | if (WARN_ON(!p4d)) |
1009 | return; |
1010 | pud = pud_alloc(mm, p4d, md->virtual); |
1011 | if (WARN_ON(!pud)) |
1012 | return; |
1013 | pmd_alloc(mm, pud, 0); |
1014 | #endif |
1015 | __create_mapping(mm, md, alloc: late_alloc, ng); |
1016 | } |
1017 | |
1018 | /* |
1019 | * Create the architecture specific mappings |
1020 | */ |
1021 | void __init iotable_init(struct map_desc *io_desc, int nr) |
1022 | { |
1023 | struct map_desc *md; |
1024 | struct vm_struct *vm; |
1025 | struct static_vm *svm; |
1026 | |
1027 | if (!nr) |
1028 | return; |
1029 | |
1030 | svm = memblock_alloc(size: sizeof(*svm) * nr, align: __alignof__(*svm)); |
1031 | if (!svm) |
1032 | panic(fmt: "%s: Failed to allocate %zu bytes align=0x%zx\n" , |
1033 | __func__, sizeof(*svm) * nr, __alignof__(*svm)); |
1034 | |
1035 | for (md = io_desc; nr; md++, nr--) { |
1036 | create_mapping(md); |
1037 | |
1038 | vm = &svm->vm; |
1039 | vm->addr = (void *)(md->virtual & PAGE_MASK); |
1040 | vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); |
1041 | vm->phys_addr = __pfn_to_phys(md->pfn); |
1042 | vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; |
1043 | vm->flags |= VM_ARM_MTYPE(md->type); |
1044 | vm->caller = iotable_init; |
1045 | add_static_vm_early(svm: svm++); |
1046 | } |
1047 | } |
1048 | |
1049 | void __init vm_reserve_area_early(unsigned long addr, unsigned long size, |
1050 | void *caller) |
1051 | { |
1052 | struct vm_struct *vm; |
1053 | struct static_vm *svm; |
1054 | |
1055 | svm = memblock_alloc(size: sizeof(*svm), align: __alignof__(*svm)); |
1056 | if (!svm) |
1057 | panic(fmt: "%s: Failed to allocate %zu bytes align=0x%zx\n" , |
1058 | __func__, sizeof(*svm), __alignof__(*svm)); |
1059 | |
1060 | vm = &svm->vm; |
1061 | vm->addr = (void *)addr; |
1062 | vm->size = size; |
1063 | vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING; |
1064 | vm->caller = caller; |
1065 | add_static_vm_early(svm); |
1066 | } |
1067 | |
1068 | #ifndef CONFIG_ARM_LPAE |
1069 | |
1070 | /* |
1071 | * The Linux PMD is made of two consecutive section entries covering 2MB |
1072 | * (see definition in include/asm/pgtable-2level.h). However a call to |
1073 | * create_mapping() may optimize static mappings by using individual |
1074 | * 1MB section mappings. This leaves the actual PMD potentially half |
1075 | * initialized if the top or bottom section entry isn't used, leaving it |
1076 | * open to problems if a subsequent ioremap() or vmalloc() tries to use |
1077 | * the virtual space left free by that unused section entry. |
1078 | * |
1079 | * Let's avoid the issue by inserting dummy vm entries covering the unused |
1080 | * PMD halves once the static mappings are in place. |
1081 | */ |
1082 | |
1083 | static void __init pmd_empty_section_gap(unsigned long addr) |
1084 | { |
1085 | vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap); |
1086 | } |
1087 | |
1088 | static void __init fill_pmd_gaps(void) |
1089 | { |
1090 | struct static_vm *svm; |
1091 | struct vm_struct *vm; |
1092 | unsigned long addr, next = 0; |
1093 | pmd_t *pmd; |
1094 | |
1095 | list_for_each_entry(svm, &static_vmlist, list) { |
1096 | vm = &svm->vm; |
1097 | addr = (unsigned long)vm->addr; |
1098 | if (addr < next) |
1099 | continue; |
1100 | |
1101 | /* |
1102 | * Check if this vm starts on an odd section boundary. |
1103 | * If so and the first section entry for this PMD is free |
1104 | * then we block the corresponding virtual address. |
1105 | */ |
1106 | if ((addr & ~PMD_MASK) == SECTION_SIZE) { |
1107 | pmd = pmd_off_k(va: addr); |
1108 | if (pmd_none(pmd: *pmd)) |
1109 | pmd_empty_section_gap(addr: addr & PMD_MASK); |
1110 | } |
1111 | |
1112 | /* |
1113 | * Then check if this vm ends on an odd section boundary. |
1114 | * If so and the second section entry for this PMD is empty |
1115 | * then we block the corresponding virtual address. |
1116 | */ |
1117 | addr += vm->size; |
1118 | if ((addr & ~PMD_MASK) == SECTION_SIZE) { |
1119 | pmd = pmd_off_k(va: addr) + 1; |
1120 | if (pmd_none(pmd: *pmd)) |
1121 | pmd_empty_section_gap(addr); |
1122 | } |
1123 | |
1124 | /* no need to look at any vm entry until we hit the next PMD */ |
1125 | next = (addr + PMD_SIZE - 1) & PMD_MASK; |
1126 | } |
1127 | } |
1128 | |
1129 | #else |
1130 | #define fill_pmd_gaps() do { } while (0) |
1131 | #endif |
1132 | |
1133 | #if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H) |
1134 | static void __init pci_reserve_io(void) |
1135 | { |
1136 | struct static_vm *svm; |
1137 | |
1138 | svm = find_static_vm_vaddr((void *)PCI_IO_VIRT_BASE); |
1139 | if (svm) |
1140 | return; |
1141 | |
1142 | vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io); |
1143 | } |
1144 | #else |
1145 | #define pci_reserve_io() do { } while (0) |
1146 | #endif |
1147 | |
1148 | #ifdef CONFIG_DEBUG_LL |
1149 | void __init debug_ll_io_init(void) |
1150 | { |
1151 | struct map_desc map; |
1152 | |
1153 | debug_ll_addr(&map.pfn, &map.virtual); |
1154 | if (!map.pfn || !map.virtual) |
1155 | return; |
1156 | map.pfn = __phys_to_pfn(map.pfn); |
1157 | map.virtual &= PAGE_MASK; |
1158 | map.length = PAGE_SIZE; |
1159 | map.type = MT_DEVICE; |
1160 | iotable_init(&map, 1); |
1161 | } |
1162 | #endif |
1163 | |
1164 | static unsigned long __initdata vmalloc_size = 240 * SZ_1M; |
1165 | |
1166 | /* |
1167 | * vmalloc=size forces the vmalloc area to be exactly 'size' |
1168 | * bytes. This can be used to increase (or decrease) the vmalloc |
1169 | * area - the default is 240MiB. |
1170 | */ |
1171 | static int __init early_vmalloc(char *arg) |
1172 | { |
1173 | unsigned long vmalloc_reserve = memparse(ptr: arg, NULL); |
1174 | unsigned long vmalloc_max; |
1175 | |
1176 | if (vmalloc_reserve < SZ_16M) { |
1177 | vmalloc_reserve = SZ_16M; |
1178 | pr_warn("vmalloc area is too small, limiting to %luMiB\n" , |
1179 | vmalloc_reserve >> 20); |
1180 | } |
1181 | |
1182 | vmalloc_max = VMALLOC_END - (PAGE_OFFSET + SZ_32M + VMALLOC_OFFSET); |
1183 | if (vmalloc_reserve > vmalloc_max) { |
1184 | vmalloc_reserve = vmalloc_max; |
1185 | pr_warn("vmalloc area is too big, limiting to %luMiB\n" , |
1186 | vmalloc_reserve >> 20); |
1187 | } |
1188 | |
1189 | vmalloc_size = vmalloc_reserve; |
1190 | return 0; |
1191 | } |
1192 | early_param("vmalloc" , early_vmalloc); |
1193 | |
1194 | phys_addr_t arm_lowmem_limit __initdata = 0; |
1195 | |
1196 | void __init adjust_lowmem_bounds(void) |
1197 | { |
1198 | phys_addr_t block_start, block_end, memblock_limit = 0; |
1199 | u64 vmalloc_limit, i; |
1200 | phys_addr_t lowmem_limit = 0; |
1201 | |
1202 | /* |
1203 | * Let's use our own (unoptimized) equivalent of __pa() that is |
1204 | * not affected by wrap-arounds when sizeof(phys_addr_t) == 4. |
1205 | * The result is used as the upper bound on physical memory address |
1206 | * and may itself be outside the valid range for which phys_addr_t |
1207 | * and therefore __pa() is defined. |
1208 | */ |
1209 | vmalloc_limit = (u64)VMALLOC_END - vmalloc_size - VMALLOC_OFFSET - |
1210 | PAGE_OFFSET + PHYS_OFFSET; |
1211 | |
1212 | /* |
1213 | * The first usable region must be PMD aligned. Mark its start |
1214 | * as MEMBLOCK_NOMAP if it isn't |
1215 | */ |
1216 | for_each_mem_range(i, &block_start, &block_end) { |
1217 | if (!IS_ALIGNED(block_start, PMD_SIZE)) { |
1218 | phys_addr_t len; |
1219 | |
1220 | len = round_up(block_start, PMD_SIZE) - block_start; |
1221 | memblock_mark_nomap(base: block_start, size: len); |
1222 | } |
1223 | break; |
1224 | } |
1225 | |
1226 | for_each_mem_range(i, &block_start, &block_end) { |
1227 | if (block_start < vmalloc_limit) { |
1228 | if (block_end > lowmem_limit) |
1229 | /* |
1230 | * Compare as u64 to ensure vmalloc_limit does |
1231 | * not get truncated. block_end should always |
1232 | * fit in phys_addr_t so there should be no |
1233 | * issue with assignment. |
1234 | */ |
1235 | lowmem_limit = min_t(u64, |
1236 | vmalloc_limit, |
1237 | block_end); |
1238 | |
1239 | /* |
1240 | * Find the first non-pmd-aligned page, and point |
1241 | * memblock_limit at it. This relies on rounding the |
1242 | * limit down to be pmd-aligned, which happens at the |
1243 | * end of this function. |
1244 | * |
1245 | * With this algorithm, the start or end of almost any |
1246 | * bank can be non-pmd-aligned. The only exception is |
1247 | * that the start of the bank 0 must be section- |
1248 | * aligned, since otherwise memory would need to be |
1249 | * allocated when mapping the start of bank 0, which |
1250 | * occurs before any free memory is mapped. |
1251 | */ |
1252 | if (!memblock_limit) { |
1253 | if (!IS_ALIGNED(block_start, PMD_SIZE)) |
1254 | memblock_limit = block_start; |
1255 | else if (!IS_ALIGNED(block_end, PMD_SIZE)) |
1256 | memblock_limit = lowmem_limit; |
1257 | } |
1258 | |
1259 | } |
1260 | } |
1261 | |
1262 | arm_lowmem_limit = lowmem_limit; |
1263 | |
1264 | high_memory = __va(arm_lowmem_limit - 1) + 1; |
1265 | |
1266 | if (!memblock_limit) |
1267 | memblock_limit = arm_lowmem_limit; |
1268 | |
1269 | /* |
1270 | * Round the memblock limit down to a pmd size. This |
1271 | * helps to ensure that we will allocate memory from the |
1272 | * last full pmd, which should be mapped. |
1273 | */ |
1274 | memblock_limit = round_down(memblock_limit, PMD_SIZE); |
1275 | |
1276 | if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) { |
1277 | if (memblock_end_of_DRAM() > arm_lowmem_limit) { |
1278 | phys_addr_t end = memblock_end_of_DRAM(); |
1279 | |
1280 | pr_notice("Ignoring RAM at %pa-%pa\n" , |
1281 | &memblock_limit, &end); |
1282 | pr_notice("Consider using a HIGHMEM enabled kernel.\n" ); |
1283 | |
1284 | memblock_remove(base: memblock_limit, size: end - memblock_limit); |
1285 | } |
1286 | } |
1287 | |
1288 | memblock_set_current_limit(limit: memblock_limit); |
1289 | } |
1290 | |
1291 | static __init void prepare_page_table(void) |
1292 | { |
1293 | unsigned long addr; |
1294 | phys_addr_t end; |
1295 | |
1296 | /* |
1297 | * Clear out all the mappings below the kernel image. |
1298 | */ |
1299 | #ifdef CONFIG_KASAN |
1300 | /* |
1301 | * KASan's shadow memory inserts itself between the TASK_SIZE |
1302 | * and MODULES_VADDR. Do not clear the KASan shadow memory mappings. |
1303 | */ |
1304 | for (addr = 0; addr < KASAN_SHADOW_START; addr += PMD_SIZE) |
1305 | pmd_clear(pmd_off_k(addr)); |
1306 | /* |
1307 | * Skip over the KASan shadow area. KASAN_SHADOW_END is sometimes |
1308 | * equal to MODULES_VADDR and then we exit the pmd clearing. If we |
1309 | * are using a thumb-compiled kernel, there there will be 8MB more |
1310 | * to clear as KASan always offset to 16 MB below MODULES_VADDR. |
1311 | */ |
1312 | for (addr = KASAN_SHADOW_END; addr < MODULES_VADDR; addr += PMD_SIZE) |
1313 | pmd_clear(pmd_off_k(addr)); |
1314 | #else |
1315 | for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) |
1316 | pmd_clear(pmdp: pmd_off_k(va: addr)); |
1317 | #endif |
1318 | |
1319 | #ifdef CONFIG_XIP_KERNEL |
1320 | /* The XIP kernel is mapped in the module area -- skip over it */ |
1321 | addr = ((unsigned long)_exiprom + PMD_SIZE - 1) & PMD_MASK; |
1322 | #endif |
1323 | for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) |
1324 | pmd_clear(pmdp: pmd_off_k(va: addr)); |
1325 | |
1326 | /* |
1327 | * Find the end of the first block of lowmem. |
1328 | */ |
1329 | end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; |
1330 | if (end >= arm_lowmem_limit) |
1331 | end = arm_lowmem_limit; |
1332 | |
1333 | /* |
1334 | * Clear out all the kernel space mappings, except for the first |
1335 | * memory bank, up to the vmalloc region. |
1336 | */ |
1337 | for (addr = __phys_to_virt(end); |
1338 | addr < VMALLOC_START; addr += PMD_SIZE) |
1339 | pmd_clear(pmdp: pmd_off_k(va: addr)); |
1340 | } |
1341 | |
1342 | #ifdef CONFIG_ARM_LPAE |
1343 | /* the first page is reserved for pgd */ |
1344 | #define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \ |
1345 | PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t)) |
1346 | #else |
1347 | #define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) |
1348 | #endif |
1349 | |
1350 | /* |
1351 | * Reserve the special regions of memory |
1352 | */ |
1353 | void __init arm_mm_memblock_reserve(void) |
1354 | { |
1355 | /* |
1356 | * Reserve the page tables. These are already in use, |
1357 | * and can only be in node 0. |
1358 | */ |
1359 | memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE); |
1360 | |
1361 | #ifdef CONFIG_SA1111 |
1362 | /* |
1363 | * Because of the SA1111 DMA bug, we want to preserve our |
1364 | * precious DMA-able memory... |
1365 | */ |
1366 | memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); |
1367 | #endif |
1368 | } |
1369 | |
1370 | /* |
1371 | * Set up the device mappings. Since we clear out the page tables for all |
1372 | * mappings above VMALLOC_START, except early fixmap, we might remove debug |
1373 | * device mappings. This means earlycon can be used to debug this function |
1374 | * Any other function or debugging method which may touch any device _will_ |
1375 | * crash the kernel. |
1376 | */ |
1377 | static void __init devicemaps_init(const struct machine_desc *mdesc) |
1378 | { |
1379 | struct map_desc map; |
1380 | unsigned long addr; |
1381 | void *vectors; |
1382 | |
1383 | /* |
1384 | * Allocate the vector page early. |
1385 | */ |
1386 | vectors = early_alloc(PAGE_SIZE * 2); |
1387 | |
1388 | early_trap_init(vectors); |
1389 | |
1390 | /* |
1391 | * Clear page table except top pmd used by early fixmaps |
1392 | */ |
1393 | for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE) |
1394 | pmd_clear(pmdp: pmd_off_k(va: addr)); |
1395 | |
1396 | if (__atags_pointer) { |
1397 | /* create a read-only mapping of the device tree */ |
1398 | map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); |
1399 | map.virtual = FDT_FIXED_BASE; |
1400 | map.length = FDT_FIXED_SIZE; |
1401 | map.type = MT_MEMORY_RO; |
1402 | create_mapping(md: &map); |
1403 | } |
1404 | |
1405 | /* |
1406 | * Map the kernel if it is XIP. |
1407 | * It is always first in the modulearea. |
1408 | */ |
1409 | #ifdef CONFIG_XIP_KERNEL |
1410 | map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); |
1411 | map.virtual = MODULES_VADDR; |
1412 | map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK; |
1413 | map.type = MT_ROM; |
1414 | create_mapping(&map); |
1415 | #endif |
1416 | |
1417 | /* |
1418 | * Map the cache flushing regions. |
1419 | */ |
1420 | #ifdef FLUSH_BASE |
1421 | map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS); |
1422 | map.virtual = FLUSH_BASE; |
1423 | map.length = SZ_1M; |
1424 | map.type = MT_CACHECLEAN; |
1425 | create_mapping(&map); |
1426 | #endif |
1427 | #ifdef FLUSH_BASE_MINICACHE |
1428 | map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M); |
1429 | map.virtual = FLUSH_BASE_MINICACHE; |
1430 | map.length = SZ_1M; |
1431 | map.type = MT_MINICLEAN; |
1432 | create_mapping(&map); |
1433 | #endif |
1434 | |
1435 | /* |
1436 | * Create a mapping for the machine vectors at the high-vectors |
1437 | * location (0xffff0000). If we aren't using high-vectors, also |
1438 | * create a mapping at the low-vectors virtual address. |
1439 | */ |
1440 | map.pfn = __phys_to_pfn(virt_to_phys(vectors)); |
1441 | map.virtual = 0xffff0000; |
1442 | map.length = PAGE_SIZE; |
1443 | #ifdef CONFIG_KUSER_HELPERS |
1444 | map.type = MT_HIGH_VECTORS; |
1445 | #else |
1446 | map.type = MT_LOW_VECTORS; |
1447 | #endif |
1448 | create_mapping(md: &map); |
1449 | |
1450 | if (!vectors_high()) { |
1451 | map.virtual = 0; |
1452 | map.length = PAGE_SIZE * 2; |
1453 | map.type = MT_LOW_VECTORS; |
1454 | create_mapping(md: &map); |
1455 | } |
1456 | |
1457 | /* Now create a kernel read-only mapping */ |
1458 | map.pfn += 1; |
1459 | map.virtual = 0xffff0000 + PAGE_SIZE; |
1460 | map.length = PAGE_SIZE; |
1461 | map.type = MT_LOW_VECTORS; |
1462 | create_mapping(md: &map); |
1463 | |
1464 | /* |
1465 | * Ask the machine support to map in the statically mapped devices. |
1466 | */ |
1467 | if (mdesc->map_io) |
1468 | mdesc->map_io(); |
1469 | else |
1470 | debug_ll_io_init(); |
1471 | fill_pmd_gaps(); |
1472 | |
1473 | /* Reserve fixed i/o space in VMALLOC region */ |
1474 | pci_reserve_io(); |
1475 | |
1476 | /* |
1477 | * Finally flush the caches and tlb to ensure that we're in a |
1478 | * consistent state wrt the writebuffer. This also ensures that |
1479 | * any write-allocated cache lines in the vector page are written |
1480 | * back. After this point, we can start to touch devices again. |
1481 | */ |
1482 | local_flush_tlb_all(); |
1483 | flush_cache_all(); |
1484 | |
1485 | /* Enable asynchronous aborts */ |
1486 | early_abt_enable(); |
1487 | } |
1488 | |
1489 | static void __init kmap_init(void) |
1490 | { |
1491 | #ifdef CONFIG_HIGHMEM |
1492 | pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE), |
1493 | PKMAP_BASE, _PAGE_KERNEL_TABLE); |
1494 | #endif |
1495 | |
1496 | early_pte_alloc(pmd_off_k(FIXADDR_START), FIXADDR_START, |
1497 | _PAGE_KERNEL_TABLE); |
1498 | } |
1499 | |
1500 | static void __init map_lowmem(void) |
1501 | { |
1502 | phys_addr_t start, end; |
1503 | u64 i; |
1504 | |
1505 | /* Map all the lowmem memory banks. */ |
1506 | for_each_mem_range(i, &start, &end) { |
1507 | struct map_desc map; |
1508 | |
1509 | pr_debug("map lowmem start: 0x%08llx, end: 0x%08llx\n" , |
1510 | (long long)start, (long long)end); |
1511 | if (end > arm_lowmem_limit) |
1512 | end = arm_lowmem_limit; |
1513 | if (start >= end) |
1514 | break; |
1515 | |
1516 | /* |
1517 | * If our kernel image is in the VMALLOC area we need to remove |
1518 | * the kernel physical memory from lowmem since the kernel will |
1519 | * be mapped separately. |
1520 | * |
1521 | * The kernel will typically be at the very start of lowmem, |
1522 | * but any placement relative to memory ranges is possible. |
1523 | * |
1524 | * If the memblock contains the kernel, we have to chisel out |
1525 | * the kernel memory from it and map each part separately. We |
1526 | * get 6 different theoretical cases: |
1527 | * |
1528 | * +--------+ +--------+ |
1529 | * +-- start --+ +--------+ | Kernel | | Kernel | |
1530 | * | | | Kernel | | case 2 | | case 5 | |
1531 | * | | | case 1 | +--------+ | | +--------+ |
1532 | * | Memory | +--------+ | | | Kernel | |
1533 | * | range | +--------+ | | | case 6 | |
1534 | * | | | Kernel | +--------+ | | +--------+ |
1535 | * | | | case 3 | | Kernel | | | |
1536 | * +-- end ----+ +--------+ | case 4 | | | |
1537 | * +--------+ +--------+ |
1538 | */ |
1539 | |
1540 | /* Case 5: kernel covers range, don't map anything, should be rare */ |
1541 | if ((start > kernel_sec_start) && (end < kernel_sec_end)) |
1542 | break; |
1543 | |
1544 | /* Cases where the kernel is starting inside the range */ |
1545 | if ((kernel_sec_start >= start) && (kernel_sec_start <= end)) { |
1546 | /* Case 6: kernel is embedded in the range, we need two mappings */ |
1547 | if ((start < kernel_sec_start) && (end > kernel_sec_end)) { |
1548 | /* Map memory below the kernel */ |
1549 | map.pfn = __phys_to_pfn(start); |
1550 | map.virtual = __phys_to_virt(start); |
1551 | map.length = kernel_sec_start - start; |
1552 | map.type = MT_MEMORY_RW; |
1553 | create_mapping(md: &map); |
1554 | /* Map memory above the kernel */ |
1555 | map.pfn = __phys_to_pfn(kernel_sec_end); |
1556 | map.virtual = __phys_to_virt(kernel_sec_end); |
1557 | map.length = end - kernel_sec_end; |
1558 | map.type = MT_MEMORY_RW; |
1559 | create_mapping(md: &map); |
1560 | break; |
1561 | } |
1562 | /* Case 1: kernel and range start at the same address, should be common */ |
1563 | if (kernel_sec_start == start) |
1564 | start = kernel_sec_end; |
1565 | /* Case 3: kernel and range end at the same address, should be rare */ |
1566 | if (kernel_sec_end == end) |
1567 | end = kernel_sec_start; |
1568 | } else if ((kernel_sec_start < start) && (kernel_sec_end > start) && (kernel_sec_end < end)) { |
1569 | /* Case 2: kernel ends inside range, starts below it */ |
1570 | start = kernel_sec_end; |
1571 | } else if ((kernel_sec_start > start) && (kernel_sec_start < end) && (kernel_sec_end > end)) { |
1572 | /* Case 4: kernel starts inside range, ends above it */ |
1573 | end = kernel_sec_start; |
1574 | } |
1575 | map.pfn = __phys_to_pfn(start); |
1576 | map.virtual = __phys_to_virt(start); |
1577 | map.length = end - start; |
1578 | map.type = MT_MEMORY_RW; |
1579 | create_mapping(md: &map); |
1580 | } |
1581 | } |
1582 | |
1583 | static void __init map_kernel(void) |
1584 | { |
1585 | /* |
1586 | * We use the well known kernel section start and end and split the area in the |
1587 | * middle like this: |
1588 | * . . |
1589 | * | RW memory | |
1590 | * +----------------+ kernel_x_start |
1591 | * | Executable | |
1592 | * | kernel memory | |
1593 | * +----------------+ kernel_x_end / kernel_nx_start |
1594 | * | Non-executable | |
1595 | * | kernel memory | |
1596 | * +----------------+ kernel_nx_end |
1597 | * | RW memory | |
1598 | * . . |
1599 | * |
1600 | * Notice that we are dealing with section sized mappings here so all of this |
1601 | * will be bumped to the closest section boundary. This means that some of the |
1602 | * non-executable part of the kernel memory is actually mapped as executable. |
1603 | * This will only persist until we turn on proper memory management later on |
1604 | * and we remap the whole kernel with page granularity. |
1605 | */ |
1606 | phys_addr_t kernel_x_start = kernel_sec_start; |
1607 | phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); |
1608 | phys_addr_t kernel_nx_start = kernel_x_end; |
1609 | phys_addr_t kernel_nx_end = kernel_sec_end; |
1610 | struct map_desc map; |
1611 | |
1612 | map.pfn = __phys_to_pfn(kernel_x_start); |
1613 | map.virtual = __phys_to_virt(kernel_x_start); |
1614 | map.length = kernel_x_end - kernel_x_start; |
1615 | map.type = MT_MEMORY_RWX; |
1616 | create_mapping(md: &map); |
1617 | |
1618 | /* If the nx part is small it may end up covered by the tail of the RWX section */ |
1619 | if (kernel_x_end == kernel_nx_end) |
1620 | return; |
1621 | |
1622 | map.pfn = __phys_to_pfn(kernel_nx_start); |
1623 | map.virtual = __phys_to_virt(kernel_nx_start); |
1624 | map.length = kernel_nx_end - kernel_nx_start; |
1625 | map.type = MT_MEMORY_RW; |
1626 | create_mapping(md: &map); |
1627 | } |
1628 | |
1629 | #ifdef CONFIG_ARM_PV_FIXUP |
1630 | typedef void pgtables_remap(long long offset, unsigned long pgd); |
1631 | pgtables_remap lpae_pgtables_remap_asm; |
1632 | |
1633 | /* |
1634 | * early_paging_init() recreates boot time page table setup, allowing machines |
1635 | * to switch over to a high (>4G) address space on LPAE systems |
1636 | */ |
1637 | static void __init early_paging_init(const struct machine_desc *mdesc) |
1638 | { |
1639 | pgtables_remap *lpae_pgtables_remap; |
1640 | unsigned long pa_pgd; |
1641 | unsigned int cr, ttbcr; |
1642 | long long offset; |
1643 | |
1644 | if (!mdesc->pv_fixup) |
1645 | return; |
1646 | |
1647 | offset = mdesc->pv_fixup(); |
1648 | if (offset == 0) |
1649 | return; |
1650 | |
1651 | /* |
1652 | * Offset the kernel section physical offsets so that the kernel |
1653 | * mapping will work out later on. |
1654 | */ |
1655 | kernel_sec_start += offset; |
1656 | kernel_sec_end += offset; |
1657 | |
1658 | /* |
1659 | * Get the address of the remap function in the 1:1 identity |
1660 | * mapping setup by the early page table assembly code. We |
1661 | * must get this prior to the pv update. The following barrier |
1662 | * ensures that this is complete before we fixup any P:V offsets. |
1663 | */ |
1664 | lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); |
1665 | pa_pgd = __pa(swapper_pg_dir); |
1666 | barrier(); |
1667 | |
1668 | pr_info("Switching physical address space to 0x%08llx\n" , |
1669 | (u64)PHYS_OFFSET + offset); |
1670 | |
1671 | /* Re-set the phys pfn offset, and the pv offset */ |
1672 | __pv_offset += offset; |
1673 | __pv_phys_pfn_offset += PFN_DOWN(offset); |
1674 | |
1675 | /* Run the patch stub to update the constants */ |
1676 | fixup_pv_table(&__pv_table_begin, |
1677 | (&__pv_table_end - &__pv_table_begin) << 2); |
1678 | |
1679 | /* |
1680 | * We changing not only the virtual to physical mapping, but also |
1681 | * the physical addresses used to access memory. We need to flush |
1682 | * all levels of cache in the system with caching disabled to |
1683 | * ensure that all data is written back, and nothing is prefetched |
1684 | * into the caches. We also need to prevent the TLB walkers |
1685 | * allocating into the caches too. Note that this is ARMv7 LPAE |
1686 | * specific. |
1687 | */ |
1688 | cr = get_cr(); |
1689 | set_cr(cr & ~(CR_I | CR_C)); |
1690 | asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr)); |
1691 | asm volatile("mcr p15, 0, %0, c2, c0, 2" |
1692 | : : "r" (ttbcr & ~(3 << 8 | 3 << 10))); |
1693 | flush_cache_all(); |
1694 | |
1695 | /* |
1696 | * Fixup the page tables - this must be in the idmap region as |
1697 | * we need to disable the MMU to do this safely, and hence it |
1698 | * needs to be assembly. It's fairly simple, as we're using the |
1699 | * temporary tables setup by the initial assembly code. |
1700 | */ |
1701 | lpae_pgtables_remap(offset, pa_pgd); |
1702 | |
1703 | /* Re-enable the caches and cacheable TLB walks */ |
1704 | asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); |
1705 | set_cr(cr); |
1706 | } |
1707 | |
1708 | #else |
1709 | |
1710 | static void __init early_paging_init(const struct machine_desc *mdesc) |
1711 | { |
1712 | long long offset; |
1713 | |
1714 | if (!mdesc->pv_fixup) |
1715 | return; |
1716 | |
1717 | offset = mdesc->pv_fixup(); |
1718 | if (offset == 0) |
1719 | return; |
1720 | |
1721 | pr_crit("Physical address space modification is only to support Keystone2.\n" ); |
1722 | pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n" ); |
1723 | pr_crit("feature. Your kernel may crash now, have a good day.\n" ); |
1724 | add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); |
1725 | } |
1726 | |
1727 | #endif |
1728 | |
1729 | static void __init early_fixmap_shutdown(void) |
1730 | { |
1731 | int i; |
1732 | unsigned long va = fix_to_virt(idx: __end_of_permanent_fixed_addresses - 1); |
1733 | |
1734 | pte_offset_fixmap = pte_offset_late_fixmap; |
1735 | pmd_clear(pmdp: fixmap_pmd(addr: va)); |
1736 | local_flush_tlb_kernel_page(va); |
1737 | |
1738 | for (i = 0; i < __end_of_permanent_fixed_addresses; i++) { |
1739 | pte_t *pte; |
1740 | struct map_desc map; |
1741 | |
1742 | map.virtual = fix_to_virt(idx: i); |
1743 | pte = pte_offset_early_fixmap(dir: pmd_off_k(va: map.virtual), addr: map.virtual); |
1744 | |
1745 | /* Only i/o device mappings are supported ATM */ |
1746 | if (pte_none(*pte) || |
1747 | (pte_val(*pte) & L_PTE_MT_MASK) != L_PTE_MT_DEV_SHARED) |
1748 | continue; |
1749 | |
1750 | map.pfn = pte_pfn(pte: *pte); |
1751 | map.type = MT_DEVICE; |
1752 | map.length = PAGE_SIZE; |
1753 | |
1754 | create_mapping(md: &map); |
1755 | } |
1756 | } |
1757 | |
1758 | /* |
1759 | * paging_init() sets up the page tables, initialises the zone memory |
1760 | * maps, and sets up the zero page, bad page and bad page tables. |
1761 | */ |
1762 | void __init paging_init(const struct machine_desc *mdesc) |
1763 | { |
1764 | void *zero_page; |
1765 | |
1766 | pr_debug("physical kernel sections: 0x%08llx-0x%08llx\n" , |
1767 | kernel_sec_start, kernel_sec_end); |
1768 | |
1769 | prepare_page_table(); |
1770 | map_lowmem(); |
1771 | memblock_set_current_limit(limit: arm_lowmem_limit); |
1772 | pr_debug("lowmem limit is %08llx\n" , (long long)arm_lowmem_limit); |
1773 | /* |
1774 | * After this point early_alloc(), i.e. the memblock allocator, can |
1775 | * be used |
1776 | */ |
1777 | map_kernel(); |
1778 | dma_contiguous_remap(); |
1779 | early_fixmap_shutdown(); |
1780 | devicemaps_init(mdesc); |
1781 | kmap_init(); |
1782 | tcm_init(); |
1783 | |
1784 | top_pmd = pmd_off_k(va: 0xffff0000); |
1785 | |
1786 | /* allocate the zero page. */ |
1787 | zero_page = early_alloc(PAGE_SIZE); |
1788 | |
1789 | bootmem_init(); |
1790 | |
1791 | empty_zero_page = virt_to_page(zero_page); |
1792 | __flush_dcache_folio(NULL, page_folio(empty_zero_page)); |
1793 | } |
1794 | |
1795 | void __init early_mm_init(const struct machine_desc *mdesc) |
1796 | { |
1797 | build_mem_type_table(); |
1798 | early_paging_init(mdesc); |
1799 | } |
1800 | |
1801 | void set_ptes(struct mm_struct *mm, unsigned long addr, |
1802 | pte_t *ptep, pte_t pteval, unsigned int nr) |
1803 | { |
1804 | unsigned long ext = 0; |
1805 | |
1806 | if (addr < TASK_SIZE && pte_valid_user(pteval)) { |
1807 | if (!pte_special(pte: pteval)) |
1808 | __sync_icache_dcache(pteval); |
1809 | ext |= PTE_EXT_NG; |
1810 | } |
1811 | |
1812 | for (;;) { |
1813 | set_pte_ext(ptep, pteval, ext); |
1814 | if (--nr == 0) |
1815 | break; |
1816 | ptep++; |
1817 | pteval = pte_next_pfn(pteval); |
1818 | } |
1819 | } |
1820 | |