1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright IBM Corp. 2006 |
4 | */ |
5 | |
6 | #include <linux/memory_hotplug.h> |
7 | #include <linux/memblock.h> |
8 | #include <linux/pfn.h> |
9 | #include <linux/mm.h> |
10 | #include <linux/init.h> |
11 | #include <linux/list.h> |
12 | #include <linux/hugetlb.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/sort.h> |
15 | #include <asm/page-states.h> |
16 | #include <asm/cacheflush.h> |
17 | #include <asm/nospec-branch.h> |
18 | #include <asm/ctlreg.h> |
19 | #include <asm/pgalloc.h> |
20 | #include <asm/setup.h> |
21 | #include <asm/tlbflush.h> |
22 | #include <asm/sections.h> |
23 | #include <asm/set_memory.h> |
24 | |
25 | static DEFINE_MUTEX(vmem_mutex); |
26 | |
27 | static void __ref *vmem_alloc_pages(unsigned int order) |
28 | { |
29 | unsigned long size = PAGE_SIZE << order; |
30 | |
31 | if (slab_is_available()) |
32 | return (void *)__get_free_pages(GFP_KERNEL, order); |
33 | return memblock_alloc(size, align: size); |
34 | } |
35 | |
36 | static void vmem_free_pages(unsigned long addr, int order, struct vmem_altmap *altmap) |
37 | { |
38 | if (altmap) { |
39 | vmem_altmap_free(altmap, nr_pfns: 1 << order); |
40 | return; |
41 | } |
42 | /* We don't expect boot memory to be removed ever. */ |
43 | if (!slab_is_available() || |
44 | WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr)))) |
45 | return; |
46 | free_pages(addr, order); |
47 | } |
48 | |
49 | void *vmem_crst_alloc(unsigned long val) |
50 | { |
51 | unsigned long *table; |
52 | |
53 | table = vmem_alloc_pages(order: CRST_ALLOC_ORDER); |
54 | if (!table) |
55 | return NULL; |
56 | crst_table_init(table, val); |
57 | __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); |
58 | return table; |
59 | } |
60 | |
61 | pte_t __ref *vmem_pte_alloc(void) |
62 | { |
63 | unsigned long size = PTRS_PER_PTE * sizeof(pte_t); |
64 | pte_t *pte; |
65 | |
66 | if (slab_is_available()) |
67 | pte = (pte_t *) page_table_alloc(&init_mm); |
68 | else |
69 | pte = (pte_t *) memblock_alloc(size, align: size); |
70 | if (!pte) |
71 | return NULL; |
72 | memset64(s: (u64 *)pte, v: _PAGE_INVALID, PTRS_PER_PTE); |
73 | __arch_set_page_dat(pte, 1); |
74 | return pte; |
75 | } |
76 | |
77 | static void vmem_pte_free(unsigned long *table) |
78 | { |
79 | /* We don't expect boot memory to be removed ever. */ |
80 | if (!slab_is_available() || |
81 | WARN_ON_ONCE(PageReserved(virt_to_page(table)))) |
82 | return; |
83 | page_table_free(&init_mm, table); |
84 | } |
85 | |
86 | #define PAGE_UNUSED 0xFD |
87 | |
88 | /* |
89 | * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges |
90 | * from unused_sub_pmd_start to next PMD_SIZE boundary. |
91 | */ |
92 | static unsigned long unused_sub_pmd_start; |
93 | |
94 | static void vmemmap_flush_unused_sub_pmd(void) |
95 | { |
96 | if (!unused_sub_pmd_start) |
97 | return; |
98 | memset((void *)unused_sub_pmd_start, PAGE_UNUSED, |
99 | ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start); |
100 | unused_sub_pmd_start = 0; |
101 | } |
102 | |
103 | static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end) |
104 | { |
105 | /* |
106 | * As we expect to add in the same granularity as we remove, it's |
107 | * sufficient to mark only some piece used to block the memmap page from |
108 | * getting removed (just in case the memmap never gets initialized, |
109 | * e.g., because the memory block never gets onlined). |
110 | */ |
111 | memset((void *)start, 0, sizeof(struct page)); |
112 | } |
113 | |
114 | static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) |
115 | { |
116 | /* |
117 | * We only optimize if the new used range directly follows the |
118 | * previously unused range (esp., when populating consecutive sections). |
119 | */ |
120 | if (unused_sub_pmd_start == start) { |
121 | unused_sub_pmd_start = end; |
122 | if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE))) |
123 | unused_sub_pmd_start = 0; |
124 | return; |
125 | } |
126 | vmemmap_flush_unused_sub_pmd(); |
127 | vmemmap_mark_sub_pmd_used(start, end); |
128 | } |
129 | |
130 | static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) |
131 | { |
132 | unsigned long page = ALIGN_DOWN(start, PMD_SIZE); |
133 | |
134 | vmemmap_flush_unused_sub_pmd(); |
135 | |
136 | /* Could be our memmap page is filled with PAGE_UNUSED already ... */ |
137 | vmemmap_mark_sub_pmd_used(start, end); |
138 | |
139 | /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ |
140 | if (!IS_ALIGNED(start, PMD_SIZE)) |
141 | memset((void *)page, PAGE_UNUSED, start - page); |
142 | /* |
143 | * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of |
144 | * consecutive sections. Remember for the last added PMD the last |
145 | * unused range in the populated PMD. |
146 | */ |
147 | if (!IS_ALIGNED(end, PMD_SIZE)) |
148 | unused_sub_pmd_start = end; |
149 | } |
150 | |
151 | /* Returns true if the PMD is completely unused and can be freed. */ |
152 | static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) |
153 | { |
154 | unsigned long page = ALIGN_DOWN(start, PMD_SIZE); |
155 | |
156 | vmemmap_flush_unused_sub_pmd(); |
157 | memset((void *)start, PAGE_UNUSED, end - start); |
158 | return !memchr_inv(p: (void *)page, PAGE_UNUSED, PMD_SIZE); |
159 | } |
160 | |
161 | /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ |
162 | static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, |
163 | unsigned long end, bool add, bool direct, |
164 | struct vmem_altmap *altmap) |
165 | { |
166 | unsigned long prot, pages = 0; |
167 | int ret = -ENOMEM; |
168 | pte_t *pte; |
169 | |
170 | prot = pgprot_val(PAGE_KERNEL); |
171 | if (!MACHINE_HAS_NX) |
172 | prot &= ~_PAGE_NOEXEC; |
173 | |
174 | pte = pte_offset_kernel(pmd, address: addr); |
175 | for (; addr < end; addr += PAGE_SIZE, pte++) { |
176 | if (!add) { |
177 | if (pte_none(pte: *pte)) |
178 | continue; |
179 | if (!direct) |
180 | vmem_free_pages(addr: (unsigned long)pfn_to_virt(pte_pfn(pte: *pte)), order: get_order(PAGE_SIZE), altmap); |
181 | pte_clear(mm: &init_mm, addr, ptep: pte); |
182 | } else if (pte_none(pte: *pte)) { |
183 | if (!direct) { |
184 | void *new_page = vmemmap_alloc_block_buf(PAGE_SIZE, NUMA_NO_NODE, altmap); |
185 | |
186 | if (!new_page) |
187 | goto out; |
188 | set_pte(ptep: pte, pte: __pte(__pa(new_page) | prot)); |
189 | } else { |
190 | set_pte(ptep: pte, pte: __pte(__pa(addr) | prot)); |
191 | } |
192 | } else { |
193 | continue; |
194 | } |
195 | pages++; |
196 | } |
197 | ret = 0; |
198 | out: |
199 | if (direct) |
200 | update_page_count(level: PG_DIRECT_MAP_4K, pages: add ? pages : -pages); |
201 | return ret; |
202 | } |
203 | |
204 | static void try_free_pte_table(pmd_t *pmd, unsigned long start) |
205 | { |
206 | pte_t *pte; |
207 | int i; |
208 | |
209 | /* We can safely assume this is fully in 1:1 mapping & vmemmap area */ |
210 | pte = pte_offset_kernel(pmd, address: start); |
211 | for (i = 0; i < PTRS_PER_PTE; i++, pte++) { |
212 | if (!pte_none(pte: *pte)) |
213 | return; |
214 | } |
215 | vmem_pte_free(table: (unsigned long *) pmd_deref(*pmd)); |
216 | pmd_clear(pmdp: pmd); |
217 | } |
218 | |
219 | /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ |
220 | static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, |
221 | unsigned long end, bool add, bool direct, |
222 | struct vmem_altmap *altmap) |
223 | { |
224 | unsigned long next, prot, pages = 0; |
225 | int ret = -ENOMEM; |
226 | pmd_t *pmd; |
227 | pte_t *pte; |
228 | |
229 | prot = pgprot_val(SEGMENT_KERNEL); |
230 | if (!MACHINE_HAS_NX) |
231 | prot &= ~_SEGMENT_ENTRY_NOEXEC; |
232 | |
233 | pmd = pmd_offset(pud, address: addr); |
234 | for (; addr < end; addr = next, pmd++) { |
235 | next = pmd_addr_end(addr, end); |
236 | if (!add) { |
237 | if (pmd_none(pmd: *pmd)) |
238 | continue; |
239 | if (pmd_leaf(pte: *pmd)) { |
240 | if (IS_ALIGNED(addr, PMD_SIZE) && |
241 | IS_ALIGNED(next, PMD_SIZE)) { |
242 | if (!direct) |
243 | vmem_free_pages(addr: pmd_deref(*pmd), order: get_order(PMD_SIZE), altmap); |
244 | pmd_clear(pmdp: pmd); |
245 | pages++; |
246 | } else if (!direct && vmemmap_unuse_sub_pmd(start: addr, end: next)) { |
247 | vmem_free_pages(addr: pmd_deref(*pmd), order: get_order(PMD_SIZE), altmap); |
248 | pmd_clear(pmdp: pmd); |
249 | } |
250 | continue; |
251 | } |
252 | } else if (pmd_none(pmd: *pmd)) { |
253 | if (IS_ALIGNED(addr, PMD_SIZE) && |
254 | IS_ALIGNED(next, PMD_SIZE) && |
255 | MACHINE_HAS_EDAT1 && direct && |
256 | !debug_pagealloc_enabled()) { |
257 | set_pmd(pmdp: pmd, pmd: __pmd(__pa(addr) | prot)); |
258 | pages++; |
259 | continue; |
260 | } else if (!direct && MACHINE_HAS_EDAT1) { |
261 | void *new_page; |
262 | |
263 | /* |
264 | * Use 1MB frames for vmemmap if available. We |
265 | * always use large frames even if they are only |
266 | * partially used. Otherwise we would have also |
267 | * page tables since vmemmap_populate gets |
268 | * called for each section separately. |
269 | */ |
270 | new_page = vmemmap_alloc_block_buf(PMD_SIZE, NUMA_NO_NODE, altmap); |
271 | if (new_page) { |
272 | set_pmd(pmdp: pmd, pmd: __pmd(__pa(new_page) | prot)); |
273 | if (!IS_ALIGNED(addr, PMD_SIZE) || |
274 | !IS_ALIGNED(next, PMD_SIZE)) { |
275 | vmemmap_use_new_sub_pmd(start: addr, end: next); |
276 | } |
277 | continue; |
278 | } |
279 | } |
280 | pte = vmem_pte_alloc(); |
281 | if (!pte) |
282 | goto out; |
283 | pmd_populate(mm: &init_mm, pmd, pte); |
284 | } else if (pmd_leaf(pte: *pmd)) { |
285 | if (!direct) |
286 | vmemmap_use_sub_pmd(start: addr, end: next); |
287 | continue; |
288 | } |
289 | ret = modify_pte_table(pmd, addr, end: next, add, direct, altmap); |
290 | if (ret) |
291 | goto out; |
292 | if (!add) |
293 | try_free_pte_table(pmd, start: addr & PMD_MASK); |
294 | } |
295 | ret = 0; |
296 | out: |
297 | if (direct) |
298 | update_page_count(level: PG_DIRECT_MAP_1M, pages: add ? pages : -pages); |
299 | return ret; |
300 | } |
301 | |
302 | static void try_free_pmd_table(pud_t *pud, unsigned long start) |
303 | { |
304 | pmd_t *pmd; |
305 | int i; |
306 | |
307 | pmd = pmd_offset(pud, address: start); |
308 | for (i = 0; i < PTRS_PER_PMD; i++, pmd++) |
309 | if (!pmd_none(pmd: *pmd)) |
310 | return; |
311 | vmem_free_pages(addr: pud_deref(*pud), order: CRST_ALLOC_ORDER, NULL); |
312 | pud_clear(pudp: pud); |
313 | } |
314 | |
315 | static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, |
316 | bool add, bool direct, struct vmem_altmap *altmap) |
317 | { |
318 | unsigned long next, prot, pages = 0; |
319 | int ret = -ENOMEM; |
320 | pud_t *pud; |
321 | pmd_t *pmd; |
322 | |
323 | prot = pgprot_val(REGION3_KERNEL); |
324 | if (!MACHINE_HAS_NX) |
325 | prot &= ~_REGION_ENTRY_NOEXEC; |
326 | pud = pud_offset(p4d, address: addr); |
327 | for (; addr < end; addr = next, pud++) { |
328 | next = pud_addr_end(addr, end); |
329 | if (!add) { |
330 | if (pud_none(pud: *pud)) |
331 | continue; |
332 | if (pud_leaf(pud: *pud)) { |
333 | if (IS_ALIGNED(addr, PUD_SIZE) && |
334 | IS_ALIGNED(next, PUD_SIZE)) { |
335 | pud_clear(pudp: pud); |
336 | pages++; |
337 | } |
338 | continue; |
339 | } |
340 | } else if (pud_none(pud: *pud)) { |
341 | if (IS_ALIGNED(addr, PUD_SIZE) && |
342 | IS_ALIGNED(next, PUD_SIZE) && |
343 | MACHINE_HAS_EDAT2 && direct && |
344 | !debug_pagealloc_enabled()) { |
345 | set_pud(pudp: pud, pud: __pud(__pa(addr) | prot)); |
346 | pages++; |
347 | continue; |
348 | } |
349 | pmd = vmem_crst_alloc(val: _SEGMENT_ENTRY_EMPTY); |
350 | if (!pmd) |
351 | goto out; |
352 | pud_populate(mm: &init_mm, pud, pmd); |
353 | } else if (pud_leaf(pud: *pud)) { |
354 | continue; |
355 | } |
356 | ret = modify_pmd_table(pud, addr, end: next, add, direct, altmap); |
357 | if (ret) |
358 | goto out; |
359 | if (!add) |
360 | try_free_pmd_table(pud, start: addr & PUD_MASK); |
361 | } |
362 | ret = 0; |
363 | out: |
364 | if (direct) |
365 | update_page_count(level: PG_DIRECT_MAP_2G, pages: add ? pages : -pages); |
366 | return ret; |
367 | } |
368 | |
369 | static void try_free_pud_table(p4d_t *p4d, unsigned long start) |
370 | { |
371 | pud_t *pud; |
372 | int i; |
373 | |
374 | pud = pud_offset(p4d, address: start); |
375 | for (i = 0; i < PTRS_PER_PUD; i++, pud++) { |
376 | if (!pud_none(pud: *pud)) |
377 | return; |
378 | } |
379 | vmem_free_pages(addr: p4d_deref(*p4d), order: CRST_ALLOC_ORDER, NULL); |
380 | p4d_clear(p4dp: p4d); |
381 | } |
382 | |
383 | static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, |
384 | bool add, bool direct, struct vmem_altmap *altmap) |
385 | { |
386 | unsigned long next; |
387 | int ret = -ENOMEM; |
388 | p4d_t *p4d; |
389 | pud_t *pud; |
390 | |
391 | p4d = p4d_offset(pgd, address: addr); |
392 | for (; addr < end; addr = next, p4d++) { |
393 | next = p4d_addr_end(addr, end); |
394 | if (!add) { |
395 | if (p4d_none(p4d: *p4d)) |
396 | continue; |
397 | } else if (p4d_none(p4d: *p4d)) { |
398 | pud = vmem_crst_alloc(val: _REGION3_ENTRY_EMPTY); |
399 | if (!pud) |
400 | goto out; |
401 | p4d_populate(mm: &init_mm, p4d, pud); |
402 | } |
403 | ret = modify_pud_table(p4d, addr, end: next, add, direct, altmap); |
404 | if (ret) |
405 | goto out; |
406 | if (!add) |
407 | try_free_pud_table(p4d, start: addr & P4D_MASK); |
408 | } |
409 | ret = 0; |
410 | out: |
411 | return ret; |
412 | } |
413 | |
414 | static void try_free_p4d_table(pgd_t *pgd, unsigned long start) |
415 | { |
416 | p4d_t *p4d; |
417 | int i; |
418 | |
419 | p4d = p4d_offset(pgd, address: start); |
420 | for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { |
421 | if (!p4d_none(p4d: *p4d)) |
422 | return; |
423 | } |
424 | vmem_free_pages(addr: pgd_deref(*pgd), order: CRST_ALLOC_ORDER, NULL); |
425 | pgd_clear(pgd); |
426 | } |
427 | |
428 | static int modify_pagetable(unsigned long start, unsigned long end, bool add, |
429 | bool direct, struct vmem_altmap *altmap) |
430 | { |
431 | unsigned long addr, next; |
432 | int ret = -ENOMEM; |
433 | pgd_t *pgd; |
434 | p4d_t *p4d; |
435 | |
436 | if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) |
437 | return -EINVAL; |
438 | /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ |
439 | if (WARN_ON_ONCE(end > VMALLOC_START)) |
440 | return -EINVAL; |
441 | for (addr = start; addr < end; addr = next) { |
442 | next = pgd_addr_end(addr, end); |
443 | pgd = pgd_offset_k(addr); |
444 | |
445 | if (!add) { |
446 | if (pgd_none(pgd: *pgd)) |
447 | continue; |
448 | } else if (pgd_none(pgd: *pgd)) { |
449 | p4d = vmem_crst_alloc(val: _REGION2_ENTRY_EMPTY); |
450 | if (!p4d) |
451 | goto out; |
452 | pgd_populate(mm: &init_mm, pgd, p4d); |
453 | } |
454 | ret = modify_p4d_table(pgd, addr, end: next, add, direct, altmap); |
455 | if (ret) |
456 | goto out; |
457 | if (!add) |
458 | try_free_p4d_table(pgd, start: addr & PGDIR_MASK); |
459 | } |
460 | ret = 0; |
461 | out: |
462 | if (!add) |
463 | flush_tlb_kernel_range(start, end); |
464 | return ret; |
465 | } |
466 | |
467 | static int add_pagetable(unsigned long start, unsigned long end, bool direct, |
468 | struct vmem_altmap *altmap) |
469 | { |
470 | return modify_pagetable(start, end, add: true, direct, altmap); |
471 | } |
472 | |
473 | static int remove_pagetable(unsigned long start, unsigned long end, bool direct, |
474 | struct vmem_altmap *altmap) |
475 | { |
476 | return modify_pagetable(start, end, add: false, direct, altmap); |
477 | } |
478 | |
479 | /* |
480 | * Add a physical memory range to the 1:1 mapping. |
481 | */ |
482 | static int vmem_add_range(unsigned long start, unsigned long size) |
483 | { |
484 | start = (unsigned long)__va(start); |
485 | return add_pagetable(start, end: start + size, direct: true, NULL); |
486 | } |
487 | |
488 | /* |
489 | * Remove a physical memory range from the 1:1 mapping. |
490 | */ |
491 | static void vmem_remove_range(unsigned long start, unsigned long size) |
492 | { |
493 | start = (unsigned long)__va(start); |
494 | remove_pagetable(start, end: start + size, direct: true, NULL); |
495 | } |
496 | |
497 | /* |
498 | * Add a backed mem_map array to the virtual mem_map array. |
499 | */ |
500 | int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, |
501 | struct vmem_altmap *altmap) |
502 | { |
503 | int ret; |
504 | |
505 | mutex_lock(&vmem_mutex); |
506 | /* We don't care about the node, just use NUMA_NO_NODE on allocations */ |
507 | ret = add_pagetable(start, end, direct: false, altmap); |
508 | if (ret) |
509 | remove_pagetable(start, end, direct: false, altmap); |
510 | mutex_unlock(lock: &vmem_mutex); |
511 | return ret; |
512 | } |
513 | |
514 | #ifdef CONFIG_MEMORY_HOTPLUG |
515 | |
516 | void vmemmap_free(unsigned long start, unsigned long end, |
517 | struct vmem_altmap *altmap) |
518 | { |
519 | mutex_lock(&vmem_mutex); |
520 | remove_pagetable(start, end, direct: false, altmap); |
521 | mutex_unlock(lock: &vmem_mutex); |
522 | } |
523 | |
524 | #endif |
525 | |
526 | void vmem_remove_mapping(unsigned long start, unsigned long size) |
527 | { |
528 | mutex_lock(&vmem_mutex); |
529 | vmem_remove_range(start, size); |
530 | mutex_unlock(lock: &vmem_mutex); |
531 | } |
532 | |
533 | struct range arch_get_mappable_range(void) |
534 | { |
535 | struct range mhp_range; |
536 | |
537 | mhp_range.start = 0; |
538 | mhp_range.end = max_mappable - 1; |
539 | return mhp_range; |
540 | } |
541 | |
542 | int vmem_add_mapping(unsigned long start, unsigned long size) |
543 | { |
544 | struct range range = arch_get_mappable_range(); |
545 | int ret; |
546 | |
547 | if (start < range.start || |
548 | start + size > range.end + 1 || |
549 | start + size < start) |
550 | return -ERANGE; |
551 | |
552 | mutex_lock(&vmem_mutex); |
553 | ret = vmem_add_range(start, size); |
554 | if (ret) |
555 | vmem_remove_range(start, size); |
556 | mutex_unlock(lock: &vmem_mutex); |
557 | return ret; |
558 | } |
559 | |
560 | /* |
561 | * Allocate new or return existing page-table entry, but do not map it |
562 | * to any physical address. If missing, allocate segment- and region- |
563 | * table entries along. Meeting a large segment- or region-table entry |
564 | * while traversing is an error, since the function is expected to be |
565 | * called against virtual regions reserved for 4KB mappings only. |
566 | */ |
567 | pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) |
568 | { |
569 | pte_t *ptep = NULL; |
570 | pgd_t *pgd; |
571 | p4d_t *p4d; |
572 | pud_t *pud; |
573 | pmd_t *pmd; |
574 | pte_t *pte; |
575 | |
576 | pgd = pgd_offset_k(addr); |
577 | if (pgd_none(pgd: *pgd)) { |
578 | if (!alloc) |
579 | goto out; |
580 | p4d = vmem_crst_alloc(val: _REGION2_ENTRY_EMPTY); |
581 | if (!p4d) |
582 | goto out; |
583 | pgd_populate(mm: &init_mm, pgd, p4d); |
584 | } |
585 | p4d = p4d_offset(pgd, address: addr); |
586 | if (p4d_none(p4d: *p4d)) { |
587 | if (!alloc) |
588 | goto out; |
589 | pud = vmem_crst_alloc(val: _REGION3_ENTRY_EMPTY); |
590 | if (!pud) |
591 | goto out; |
592 | p4d_populate(mm: &init_mm, p4d, pud); |
593 | } |
594 | pud = pud_offset(p4d, address: addr); |
595 | if (pud_none(pud: *pud)) { |
596 | if (!alloc) |
597 | goto out; |
598 | pmd = vmem_crst_alloc(val: _SEGMENT_ENTRY_EMPTY); |
599 | if (!pmd) |
600 | goto out; |
601 | pud_populate(mm: &init_mm, pud, pmd); |
602 | } else if (WARN_ON_ONCE(pud_leaf(*pud))) { |
603 | goto out; |
604 | } |
605 | pmd = pmd_offset(pud, address: addr); |
606 | if (pmd_none(pmd: *pmd)) { |
607 | if (!alloc) |
608 | goto out; |
609 | pte = vmem_pte_alloc(); |
610 | if (!pte) |
611 | goto out; |
612 | pmd_populate(mm: &init_mm, pmd, pte); |
613 | } else if (WARN_ON_ONCE(pmd_leaf(*pmd))) { |
614 | goto out; |
615 | } |
616 | ptep = pte_offset_kernel(pmd, address: addr); |
617 | out: |
618 | return ptep; |
619 | } |
620 | |
621 | int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc) |
622 | { |
623 | pte_t *ptep, pte; |
624 | |
625 | if (!IS_ALIGNED(addr, PAGE_SIZE)) |
626 | return -EINVAL; |
627 | ptep = vmem_get_alloc_pte(addr, alloc); |
628 | if (!ptep) |
629 | return -ENOMEM; |
630 | __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); |
631 | pte = mk_pte_phys(phys, prot); |
632 | set_pte(ptep, pte); |
633 | return 0; |
634 | } |
635 | |
636 | int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot) |
637 | { |
638 | int rc; |
639 | |
640 | mutex_lock(&vmem_mutex); |
641 | rc = __vmem_map_4k_page(addr, phys, prot, alloc: true); |
642 | mutex_unlock(lock: &vmem_mutex); |
643 | return rc; |
644 | } |
645 | |
646 | void vmem_unmap_4k_page(unsigned long addr) |
647 | { |
648 | pte_t *ptep; |
649 | |
650 | mutex_lock(&vmem_mutex); |
651 | ptep = virt_to_kpte(vaddr: addr); |
652 | __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); |
653 | pte_clear(mm: &init_mm, addr, ptep); |
654 | mutex_unlock(lock: &vmem_mutex); |
655 | } |
656 | |
657 | void __init vmem_map_init(void) |
658 | { |
659 | __set_memory_rox(_stext, _etext); |
660 | __set_memory_ro(_etext, __end_rodata); |
661 | __set_memory_rox(_sinittext, _einittext); |
662 | __set_memory_rox(__stext_amode31, __etext_amode31); |
663 | /* |
664 | * If the BEAR-enhancement facility is not installed the first |
665 | * prefix page is used to return to the previous context with |
666 | * an LPSWE instruction and therefore must be executable. |
667 | */ |
668 | if (!static_key_enabled(&cpu_has_bear)) |
669 | set_memory_x(addr: 0, numpages: 1); |
670 | if (debug_pagealloc_enabled()) { |
671 | /* |
672 | * Use RELOC_HIDE() as long as __va(0) translates to NULL, |
673 | * since performing pointer arithmetic on a NULL pointer |
674 | * has undefined behavior and generates compiler warnings. |
675 | */ |
676 | __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size)); |
677 | } |
678 | if (MACHINE_HAS_NX) |
679 | system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); |
680 | pr_info("Write protected kernel read-only data: %luk\n" , |
681 | (unsigned long)(__end_rodata - _stext) >> 10); |
682 | } |
683 | |