1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/sched/task.h> |
3 | #include <linux/pgtable.h> |
4 | #include <linux/kasan.h> |
5 | #include <asm/page-states.h> |
6 | #include <asm/pgalloc.h> |
7 | #include <asm/facility.h> |
8 | #include <asm/sections.h> |
9 | #include <asm/ctlreg.h> |
10 | #include <asm/physmem_info.h> |
11 | #include <asm/maccess.h> |
12 | #include <asm/abs_lowcore.h> |
13 | #include "decompressor.h" |
14 | #include "boot.h" |
15 | |
16 | struct ctlreg __bootdata_preserved(s390_invalid_asce); |
17 | |
18 | #ifdef CONFIG_PROC_FS |
19 | atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); |
20 | #endif |
21 | |
22 | #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) |
23 | #define swapper_pg_dir vmlinux.swapper_pg_dir_off |
24 | #define invalid_pg_dir vmlinux.invalid_pg_dir_off |
25 | |
26 | enum populate_mode { |
27 | POPULATE_NONE, |
28 | POPULATE_DIRECT, |
29 | POPULATE_ABS_LOWCORE, |
30 | #ifdef CONFIG_KASAN |
31 | POPULATE_KASAN_MAP_SHADOW, |
32 | POPULATE_KASAN_ZERO_SHADOW, |
33 | POPULATE_KASAN_SHALLOW |
34 | #endif |
35 | }; |
36 | |
37 | static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); |
38 | |
39 | #ifdef CONFIG_KASAN |
40 | |
41 | #define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off |
42 | #define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) |
43 | #define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) |
44 | #define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) |
45 | #define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) |
46 | #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) |
47 | |
48 | static pte_t pte_z; |
49 | |
50 | static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) |
51 | { |
52 | start = PAGE_ALIGN_DOWN(__sha(start)); |
53 | end = PAGE_ALIGN(__sha(end)); |
54 | pgtable_populate(start, end, mode); |
55 | } |
56 | |
57 | static void kasan_populate_shadow(void) |
58 | { |
59 | pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); |
60 | pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); |
61 | p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); |
62 | unsigned long memgap_start = 0; |
63 | unsigned long untracked_end; |
64 | unsigned long start, end; |
65 | int i; |
66 | |
67 | pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); |
68 | if (!machine.has_nx) |
69 | pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); |
70 | crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); |
71 | crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); |
72 | crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); |
73 | memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); |
74 | __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER); |
75 | __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER); |
76 | __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); |
77 | __arch_set_page_dat(kasan_early_shadow_pte, 1); |
78 | |
79 | /* |
80 | * Current memory layout: |
81 | * +- 0 -------------+ +- shadow start -+ |
82 | * |1:1 ident mapping| /|1/8 of ident map| |
83 | * | | / | | |
84 | * +-end of ident map+ / +----------------+ |
85 | * | ... gap ... | / | kasan | |
86 | * | | / | zero page | |
87 | * +- vmalloc area -+ / | mapping | |
88 | * | vmalloc_size | / | (untracked) | |
89 | * +- modules vaddr -+ / +----------------+ |
90 | * | 2Gb |/ | unmapped | allocated per module |
91 | * +- shadow start -+ +----------------+ |
92 | * | 1/8 addr space | | zero pg mapping| (untracked) |
93 | * +- shadow end ----+---------+- shadow end ---+ |
94 | * |
95 | * Current memory layout (KASAN_VMALLOC): |
96 | * +- 0 -------------+ +- shadow start -+ |
97 | * |1:1 ident mapping| /|1/8 of ident map| |
98 | * | | / | | |
99 | * +-end of ident map+ / +----------------+ |
100 | * | ... gap ... | / | kasan zero page| (untracked) |
101 | * | | / | mapping | |
102 | * +- vmalloc area -+ / +----------------+ |
103 | * | vmalloc_size | / |shallow populate| |
104 | * +- modules vaddr -+ / +----------------+ |
105 | * | 2Gb |/ |shallow populate| |
106 | * +- shadow start -+ +----------------+ |
107 | * | 1/8 addr space | | zero pg mapping| (untracked) |
108 | * +- shadow end ----+---------+- shadow end ---+ |
109 | */ |
110 | |
111 | for_each_physmem_usable_range(i, &start, &end) { |
112 | kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); |
113 | if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) |
114 | kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW); |
115 | memgap_start = end; |
116 | } |
117 | if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { |
118 | untracked_end = VMALLOC_START; |
119 | /* shallowly populate kasan shadow for vmalloc and modules */ |
120 | kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); |
121 | } else { |
122 | untracked_end = MODULES_VADDR; |
123 | } |
124 | /* populate kasan shadow for untracked memory */ |
125 | kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW); |
126 | kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); |
127 | } |
128 | |
129 | static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, |
130 | unsigned long end, enum populate_mode mode) |
131 | { |
132 | if (mode == POPULATE_KASAN_ZERO_SHADOW && |
133 | IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { |
134 | pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); |
135 | return true; |
136 | } |
137 | return false; |
138 | } |
139 | |
140 | static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, |
141 | unsigned long end, enum populate_mode mode) |
142 | { |
143 | if (mode == POPULATE_KASAN_ZERO_SHADOW && |
144 | IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { |
145 | p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); |
146 | return true; |
147 | } |
148 | return false; |
149 | } |
150 | |
151 | static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, |
152 | unsigned long end, enum populate_mode mode) |
153 | { |
154 | if (mode == POPULATE_KASAN_ZERO_SHADOW && |
155 | IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { |
156 | pud_populate(&init_mm, pud, kasan_early_shadow_pmd); |
157 | return true; |
158 | } |
159 | return false; |
160 | } |
161 | |
162 | static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, |
163 | unsigned long end, enum populate_mode mode) |
164 | { |
165 | if (mode == POPULATE_KASAN_ZERO_SHADOW && |
166 | IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { |
167 | pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); |
168 | return true; |
169 | } |
170 | return false; |
171 | } |
172 | |
173 | static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) |
174 | { |
175 | if (mode == POPULATE_KASAN_ZERO_SHADOW) { |
176 | set_pte(pte, pte_z); |
177 | return true; |
178 | } |
179 | return false; |
180 | } |
181 | #else |
182 | |
183 | static inline void kasan_populate_shadow(void) {} |
184 | |
185 | static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, |
186 | unsigned long end, enum populate_mode mode) |
187 | { |
188 | return false; |
189 | } |
190 | |
191 | static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, |
192 | unsigned long end, enum populate_mode mode) |
193 | { |
194 | return false; |
195 | } |
196 | |
197 | static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, |
198 | unsigned long end, enum populate_mode mode) |
199 | { |
200 | return false; |
201 | } |
202 | |
203 | static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, |
204 | unsigned long end, enum populate_mode mode) |
205 | { |
206 | return false; |
207 | } |
208 | |
209 | static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) |
210 | { |
211 | return false; |
212 | } |
213 | |
214 | #endif |
215 | |
216 | /* |
217 | * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. |
218 | */ |
219 | static inline pte_t *__virt_to_kpte(unsigned long va) |
220 | { |
221 | return pte_offset_kernel(pmd_offset(pud_offset(p4d: p4d_offset(pgd_offset_k(va), address: va), address: va), address: va), address: va); |
222 | } |
223 | |
224 | static void *boot_crst_alloc(unsigned long val) |
225 | { |
226 | unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; |
227 | unsigned long *table; |
228 | |
229 | table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); |
230 | crst_table_init(table, val); |
231 | __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); |
232 | return table; |
233 | } |
234 | |
235 | static pte_t *boot_pte_alloc(void) |
236 | { |
237 | static void *pte_leftover; |
238 | pte_t *pte; |
239 | |
240 | /* |
241 | * handling pte_leftovers this way helps to avoid memory fragmentation |
242 | * during POPULATE_KASAN_MAP_SHADOW when EDAT is off |
243 | */ |
244 | if (!pte_leftover) { |
245 | pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); |
246 | pte = pte_leftover + _PAGE_TABLE_SIZE; |
247 | __arch_set_page_dat(pte, 1); |
248 | } else { |
249 | pte = pte_leftover; |
250 | pte_leftover = NULL; |
251 | } |
252 | |
253 | memset64(s: (u64 *)pte, v: _PAGE_INVALID, PTRS_PER_PTE); |
254 | return pte; |
255 | } |
256 | |
257 | static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) |
258 | { |
259 | switch (mode) { |
260 | case POPULATE_NONE: |
261 | return -1; |
262 | case POPULATE_DIRECT: |
263 | return addr; |
264 | case POPULATE_ABS_LOWCORE: |
265 | return __abs_lowcore_pa(addr); |
266 | #ifdef CONFIG_KASAN |
267 | case POPULATE_KASAN_MAP_SHADOW: |
268 | addr = physmem_alloc_top_down(RR_VMEM, size, size); |
269 | memset((void *)addr, 0, size); |
270 | return addr; |
271 | #endif |
272 | default: |
273 | return -1; |
274 | } |
275 | } |
276 | |
277 | static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end) |
278 | { |
279 | return machine.has_edat2 && |
280 | IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE; |
281 | } |
282 | |
283 | static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) |
284 | { |
285 | return machine.has_edat1 && |
286 | IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE; |
287 | } |
288 | |
289 | static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, |
290 | enum populate_mode mode) |
291 | { |
292 | unsigned long pages = 0; |
293 | pte_t *pte, entry; |
294 | |
295 | pte = pte_offset_kernel(pmd, address: addr); |
296 | for (; addr < end; addr += PAGE_SIZE, pte++) { |
297 | if (pte_none(pte: *pte)) { |
298 | if (kasan_pte_populate_zero_shadow(pte, mode)) |
299 | continue; |
300 | entry = __pte(val: _pa(addr, PAGE_SIZE, mode)); |
301 | entry = set_pte_bit(entry, PAGE_KERNEL); |
302 | if (!machine.has_nx) |
303 | entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); |
304 | set_pte(ptep: pte, pte: entry); |
305 | pages++; |
306 | } |
307 | } |
308 | if (mode == POPULATE_DIRECT) |
309 | update_page_count(level: PG_DIRECT_MAP_4K, pages); |
310 | } |
311 | |
312 | static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, |
313 | enum populate_mode mode) |
314 | { |
315 | unsigned long next, pages = 0; |
316 | pmd_t *pmd, entry; |
317 | pte_t *pte; |
318 | |
319 | pmd = pmd_offset(pud, address: addr); |
320 | for (; addr < end; addr = next, pmd++) { |
321 | next = pmd_addr_end(addr, end); |
322 | if (pmd_none(pmd: *pmd)) { |
323 | if (kasan_pmd_populate_zero_shadow(pmd, addr, end: next, mode)) |
324 | continue; |
325 | if (can_large_pmd(pm_dir: pmd, addr, end: next)) { |
326 | entry = __pmd(val: _pa(addr, size: _SEGMENT_SIZE, mode)); |
327 | entry = set_pmd_bit(entry, SEGMENT_KERNEL); |
328 | if (!machine.has_nx) |
329 | entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); |
330 | set_pmd(pmdp: pmd, pmd: entry); |
331 | pages++; |
332 | continue; |
333 | } |
334 | pte = boot_pte_alloc(); |
335 | pmd_populate(mm: &init_mm, pmd, pte); |
336 | } else if (pmd_leaf(pte: *pmd)) { |
337 | continue; |
338 | } |
339 | pgtable_pte_populate(pmd, addr, end: next, mode); |
340 | } |
341 | if (mode == POPULATE_DIRECT) |
342 | update_page_count(level: PG_DIRECT_MAP_1M, pages); |
343 | } |
344 | |
345 | static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, |
346 | enum populate_mode mode) |
347 | { |
348 | unsigned long next, pages = 0; |
349 | pud_t *pud, entry; |
350 | pmd_t *pmd; |
351 | |
352 | pud = pud_offset(p4d, address: addr); |
353 | for (; addr < end; addr = next, pud++) { |
354 | next = pud_addr_end(addr, end); |
355 | if (pud_none(pud: *pud)) { |
356 | if (kasan_pud_populate_zero_shadow(pud, addr, end: next, mode)) |
357 | continue; |
358 | if (can_large_pud(pu_dir: pud, addr, end: next)) { |
359 | entry = __pud(val: _pa(addr, size: _REGION3_SIZE, mode)); |
360 | entry = set_pud_bit(entry, REGION3_KERNEL); |
361 | if (!machine.has_nx) |
362 | entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); |
363 | set_pud(pudp: pud, pud: entry); |
364 | pages++; |
365 | continue; |
366 | } |
367 | pmd = boot_crst_alloc(val: _SEGMENT_ENTRY_EMPTY); |
368 | pud_populate(mm: &init_mm, pud, pmd); |
369 | } else if (pud_leaf(pud: *pud)) { |
370 | continue; |
371 | } |
372 | pgtable_pmd_populate(pud, addr, end: next, mode); |
373 | } |
374 | if (mode == POPULATE_DIRECT) |
375 | update_page_count(level: PG_DIRECT_MAP_2G, pages); |
376 | } |
377 | |
378 | static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, |
379 | enum populate_mode mode) |
380 | { |
381 | unsigned long next; |
382 | p4d_t *p4d; |
383 | pud_t *pud; |
384 | |
385 | p4d = p4d_offset(pgd, address: addr); |
386 | for (; addr < end; addr = next, p4d++) { |
387 | next = p4d_addr_end(addr, end); |
388 | if (p4d_none(p4d: *p4d)) { |
389 | if (kasan_p4d_populate_zero_shadow(p4d, addr, end: next, mode)) |
390 | continue; |
391 | pud = boot_crst_alloc(val: _REGION3_ENTRY_EMPTY); |
392 | p4d_populate(mm: &init_mm, p4d, pud); |
393 | } |
394 | pgtable_pud_populate(p4d, addr, end: next, mode); |
395 | } |
396 | } |
397 | |
398 | static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) |
399 | { |
400 | unsigned long next; |
401 | pgd_t *pgd; |
402 | p4d_t *p4d; |
403 | |
404 | pgd = pgd_offset(&init_mm, addr); |
405 | for (; addr < end; addr = next, pgd++) { |
406 | next = pgd_addr_end(addr, end); |
407 | if (pgd_none(pgd: *pgd)) { |
408 | if (kasan_pgd_populate_zero_shadow(pgd, addr, end: next, mode)) |
409 | continue; |
410 | p4d = boot_crst_alloc(val: _REGION2_ENTRY_EMPTY); |
411 | pgd_populate(mm: &init_mm, pgd, p4d); |
412 | } |
413 | #ifdef CONFIG_KASAN |
414 | if (mode == POPULATE_KASAN_SHALLOW) |
415 | continue; |
416 | #endif |
417 | pgtable_p4d_populate(pgd, addr, end: next, mode); |
418 | } |
419 | } |
420 | |
421 | void setup_vmem(unsigned long asce_limit) |
422 | { |
423 | unsigned long start, end; |
424 | unsigned long asce_type; |
425 | unsigned long asce_bits; |
426 | int i; |
427 | |
428 | /* |
429 | * Mark whole memory as no-dat. This must be done before any |
430 | * page tables are allocated, or kernel image builtin pages |
431 | * are marked as dat tables. |
432 | */ |
433 | for_each_physmem_online_range(i, &start, &end) |
434 | __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); |
435 | |
436 | if (asce_limit == _REGION1_SIZE) { |
437 | asce_type = _REGION2_ENTRY_EMPTY; |
438 | asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; |
439 | } else { |
440 | asce_type = _REGION3_ENTRY_EMPTY; |
441 | asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; |
442 | } |
443 | s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; |
444 | |
445 | crst_table_init((unsigned long *)swapper_pg_dir, asce_type); |
446 | crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); |
447 | __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); |
448 | __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); |
449 | |
450 | /* |
451 | * To allow prefixing the lowcore must be mapped with 4KB pages. |
452 | * To prevent creation of a large page at address 0 first map |
453 | * the lowcore and create the identity mapping only afterwards. |
454 | */ |
455 | pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); |
456 | for_each_physmem_usable_range(i, &start, &end) |
457 | pgtable_populate(addr: start, end, mode: POPULATE_DIRECT); |
458 | pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), |
459 | POPULATE_ABS_LOWCORE); |
460 | pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, |
461 | POPULATE_NONE); |
462 | memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area); |
463 | |
464 | kasan_populate_shadow(); |
465 | |
466 | S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits; |
467 | S390_lowcore.user_asce = s390_invalid_asce; |
468 | |
469 | local_ctl_load(1, &S390_lowcore.kernel_asce); |
470 | local_ctl_load(7, &S390_lowcore.user_asce); |
471 | local_ctl_load(13, &S390_lowcore.kernel_asce); |
472 | |
473 | init_mm.context.asce = S390_lowcore.kernel_asce.val; |
474 | } |
475 | |