1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * This kernel test validates architecture page table helpers and |
4 | * accessors and helps in verifying their continued compliance with |
5 | * expected generic MM semantics. |
6 | * |
7 | * Copyright (C) 2019 ARM Ltd. |
8 | * |
9 | * Author: Anshuman Khandual <anshuman.khandual@arm.com> |
10 | */ |
11 | #define pr_fmt(fmt) "debug_vm_pgtable: [%-25s]: " fmt, __func__ |
12 | |
13 | #include <linux/gfp.h> |
14 | #include <linux/highmem.h> |
15 | #include <linux/hugetlb.h> |
16 | #include <linux/kernel.h> |
17 | #include <linux/kconfig.h> |
18 | #include <linux/memblock.h> |
19 | #include <linux/mm.h> |
20 | #include <linux/mman.h> |
21 | #include <linux/mm_types.h> |
22 | #include <linux/module.h> |
23 | #include <linux/pfn_t.h> |
24 | #include <linux/printk.h> |
25 | #include <linux/pgtable.h> |
26 | #include <linux/random.h> |
27 | #include <linux/spinlock.h> |
28 | #include <linux/swap.h> |
29 | #include <linux/swapops.h> |
30 | #include <linux/start_kernel.h> |
31 | #include <linux/sched/mm.h> |
32 | #include <linux/io.h> |
33 | |
34 | #include <asm/cacheflush.h> |
35 | #include <asm/pgalloc.h> |
36 | #include <asm/tlbflush.h> |
37 | |
38 | /* |
39 | * Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics |
40 | * expectations that are being validated here. All future changes in here |
41 | * or the documentation need to be in sync. |
42 | * |
43 | * On s390 platform, the lower 4 bits are used to identify given page table |
44 | * entry type. But these bits might affect the ability to clear entries with |
45 | * pxx_clear() because of how dynamic page table folding works on s390. So |
46 | * while loading up the entries do not change the lower 4 bits. It does not |
47 | * have affect any other platform. Also avoid the 62nd bit on ppc64 that is |
48 | * used to mark a pte entry. |
49 | */ |
50 | #define S390_SKIP_MASK GENMASK(3, 0) |
51 | #if __BITS_PER_LONG == 64 |
52 | #define PPC64_SKIP_MASK GENMASK(62, 62) |
53 | #else |
54 | #define PPC64_SKIP_MASK 0x0 |
55 | #endif |
56 | #define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK) |
57 | #define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK) |
58 | #define RANDOM_NZVALUE GENMASK(7, 0) |
59 | |
60 | struct pgtable_debug_args { |
61 | struct mm_struct *mm; |
62 | struct vm_area_struct *vma; |
63 | |
64 | pgd_t *pgdp; |
65 | p4d_t *p4dp; |
66 | pud_t *pudp; |
67 | pmd_t *pmdp; |
68 | pte_t *ptep; |
69 | |
70 | p4d_t *start_p4dp; |
71 | pud_t *start_pudp; |
72 | pmd_t *start_pmdp; |
73 | pgtable_t start_ptep; |
74 | |
75 | unsigned long vaddr; |
76 | pgprot_t page_prot; |
77 | pgprot_t page_prot_none; |
78 | |
79 | bool is_contiguous_page; |
80 | unsigned long pud_pfn; |
81 | unsigned long pmd_pfn; |
82 | unsigned long pte_pfn; |
83 | |
84 | unsigned long fixed_alignment; |
85 | unsigned long fixed_pgd_pfn; |
86 | unsigned long fixed_p4d_pfn; |
87 | unsigned long fixed_pud_pfn; |
88 | unsigned long fixed_pmd_pfn; |
89 | unsigned long fixed_pte_pfn; |
90 | }; |
91 | |
92 | static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx) |
93 | { |
94 | pgprot_t prot = vm_get_page_prot(vm_flags: idx); |
95 | pte_t pte = pfn_pte(page_nr: args->fixed_pte_pfn, pgprot: prot); |
96 | unsigned long val = idx, *ptr = &val; |
97 | |
98 | pr_debug("Validating PTE basic (%pGv)\n" , ptr); |
99 | |
100 | /* |
101 | * This test needs to be executed after the given page table entry |
102 | * is created with pfn_pte() to make sure that vm_get_page_prot(idx) |
103 | * does not have the dirty bit enabled from the beginning. This is |
104 | * important for platforms like arm64 where (!PTE_RDONLY) indicate |
105 | * dirty bit being set. |
106 | */ |
107 | WARN_ON(pte_dirty(pte_wrprotect(pte))); |
108 | |
109 | WARN_ON(!pte_same(pte, pte)); |
110 | WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte)))); |
111 | WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte)))); |
112 | WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte), args->vma))); |
113 | WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte)))); |
114 | WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte)))); |
115 | WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte, args->vma)))); |
116 | WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte)))); |
117 | WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte)))); |
118 | } |
119 | |
120 | static void __init pte_advanced_tests(struct pgtable_debug_args *args) |
121 | { |
122 | struct page *page; |
123 | pte_t pte; |
124 | |
125 | /* |
126 | * Architectures optimize set_pte_at by avoiding TLB flush. |
127 | * This requires set_pte_at to be not used to update an |
128 | * existing pte entry. Clear pte before we do set_pte_at |
129 | * |
130 | * flush_dcache_page() is called after set_pte_at() to clear |
131 | * PG_arch_1 for the page on ARM64. The page flag isn't cleared |
132 | * when it's released and page allocation check will fail when |
133 | * the page is allocated again. For architectures other than ARM64, |
134 | * the unexpected overhead of cache flushing is acceptable. |
135 | */ |
136 | page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL; |
137 | if (!page) |
138 | return; |
139 | |
140 | pr_debug("Validating PTE advanced\n" ); |
141 | if (WARN_ON(!args->ptep)) |
142 | return; |
143 | |
144 | pte = pfn_pte(page_nr: args->pte_pfn, pgprot: args->page_prot); |
145 | set_pte_at(args->mm, args->vaddr, args->ptep, pte); |
146 | flush_dcache_page(page); |
147 | ptep_set_wrprotect(mm: args->mm, addr: args->vaddr, ptep: args->ptep); |
148 | pte = ptep_get(ptep: args->ptep); |
149 | WARN_ON(pte_write(pte)); |
150 | ptep_get_and_clear(mm: args->mm, addr: args->vaddr, ptep: args->ptep); |
151 | pte = ptep_get(ptep: args->ptep); |
152 | WARN_ON(!pte_none(pte)); |
153 | |
154 | pte = pfn_pte(page_nr: args->pte_pfn, pgprot: args->page_prot); |
155 | pte = pte_wrprotect(pte); |
156 | pte = pte_mkclean(pte); |
157 | set_pte_at(args->mm, args->vaddr, args->ptep, pte); |
158 | flush_dcache_page(page); |
159 | pte = pte_mkwrite(pte, vma: args->vma); |
160 | pte = pte_mkdirty(pte); |
161 | ptep_set_access_flags(vma: args->vma, address: args->vaddr, ptep: args->ptep, entry: pte, dirty: 1); |
162 | pte = ptep_get(ptep: args->ptep); |
163 | WARN_ON(!(pte_write(pte) && pte_dirty(pte))); |
164 | ptep_get_and_clear_full(mm: args->mm, addr: args->vaddr, ptep: args->ptep, full: 1); |
165 | pte = ptep_get(ptep: args->ptep); |
166 | WARN_ON(!pte_none(pte)); |
167 | |
168 | pte = pfn_pte(page_nr: args->pte_pfn, pgprot: args->page_prot); |
169 | pte = pte_mkyoung(pte); |
170 | set_pte_at(args->mm, args->vaddr, args->ptep, pte); |
171 | flush_dcache_page(page); |
172 | ptep_test_and_clear_young(vma: args->vma, addr: args->vaddr, ptep: args->ptep); |
173 | pte = ptep_get(ptep: args->ptep); |
174 | WARN_ON(pte_young(pte)); |
175 | |
176 | ptep_get_and_clear_full(mm: args->mm, addr: args->vaddr, ptep: args->ptep, full: 1); |
177 | } |
178 | |
179 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
180 | static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) |
181 | { |
182 | pgprot_t prot = vm_get_page_prot(vm_flags: idx); |
183 | unsigned long val = idx, *ptr = &val; |
184 | pmd_t pmd; |
185 | |
186 | if (!has_transparent_hugepage()) |
187 | return; |
188 | |
189 | pr_debug("Validating PMD basic (%pGv)\n" , ptr); |
190 | pmd = pfn_pmd(page_nr: args->fixed_pmd_pfn, pgprot: prot); |
191 | |
192 | /* |
193 | * This test needs to be executed after the given page table entry |
194 | * is created with pfn_pmd() to make sure that vm_get_page_prot(idx) |
195 | * does not have the dirty bit enabled from the beginning. This is |
196 | * important for platforms like arm64 where (!PTE_RDONLY) indicate |
197 | * dirty bit being set. |
198 | */ |
199 | WARN_ON(pmd_dirty(pmd_wrprotect(pmd))); |
200 | |
201 | |
202 | WARN_ON(!pmd_same(pmd, pmd)); |
203 | WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd)))); |
204 | WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd)))); |
205 | WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd), args->vma))); |
206 | WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd)))); |
207 | WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd)))); |
208 | WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd, args->vma)))); |
209 | WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd)))); |
210 | WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd)))); |
211 | /* |
212 | * A huge page does not point to next level page table |
213 | * entry. Hence this must qualify as pmd_bad(). |
214 | */ |
215 | WARN_ON(!pmd_bad(pmd_mkhuge(pmd))); |
216 | } |
217 | |
218 | static void __init pmd_advanced_tests(struct pgtable_debug_args *args) |
219 | { |
220 | struct page *page; |
221 | pmd_t pmd; |
222 | unsigned long vaddr = args->vaddr; |
223 | |
224 | if (!has_transparent_hugepage()) |
225 | return; |
226 | |
227 | page = (args->pmd_pfn != ULONG_MAX) ? pfn_to_page(args->pmd_pfn) : NULL; |
228 | if (!page) |
229 | return; |
230 | |
231 | /* |
232 | * flush_dcache_page() is called after set_pmd_at() to clear |
233 | * PG_arch_1 for the page on ARM64. The page flag isn't cleared |
234 | * when it's released and page allocation check will fail when |
235 | * the page is allocated again. For architectures other than ARM64, |
236 | * the unexpected overhead of cache flushing is acceptable. |
237 | */ |
238 | pr_debug("Validating PMD advanced\n" ); |
239 | /* Align the address wrt HPAGE_PMD_SIZE */ |
240 | vaddr &= HPAGE_PMD_MASK; |
241 | |
242 | pgtable_trans_huge_deposit(mm: args->mm, pmdp: args->pmdp, pgtable: args->start_ptep); |
243 | |
244 | pmd = pfn_pmd(page_nr: args->pmd_pfn, pgprot: args->page_prot); |
245 | set_pmd_at(mm: args->mm, addr: vaddr, pmdp: args->pmdp, pmd); |
246 | flush_dcache_page(page); |
247 | pmdp_set_wrprotect(mm: args->mm, addr: vaddr, pmdp: args->pmdp); |
248 | pmd = READ_ONCE(*args->pmdp); |
249 | WARN_ON(pmd_write(pmd)); |
250 | pmdp_huge_get_and_clear(mm: args->mm, addr: vaddr, pmdp: args->pmdp); |
251 | pmd = READ_ONCE(*args->pmdp); |
252 | WARN_ON(!pmd_none(pmd)); |
253 | |
254 | pmd = pfn_pmd(page_nr: args->pmd_pfn, pgprot: args->page_prot); |
255 | pmd = pmd_wrprotect(pmd); |
256 | pmd = pmd_mkclean(pmd); |
257 | set_pmd_at(mm: args->mm, addr: vaddr, pmdp: args->pmdp, pmd); |
258 | flush_dcache_page(page); |
259 | pmd = pmd_mkwrite(pmd, vma: args->vma); |
260 | pmd = pmd_mkdirty(pmd); |
261 | pmdp_set_access_flags(vma: args->vma, address: vaddr, pmdp: args->pmdp, entry: pmd, dirty: 1); |
262 | pmd = READ_ONCE(*args->pmdp); |
263 | WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd))); |
264 | pmdp_huge_get_and_clear_full(vma: args->vma, address: vaddr, pmdp: args->pmdp, full: 1); |
265 | pmd = READ_ONCE(*args->pmdp); |
266 | WARN_ON(!pmd_none(pmd)); |
267 | |
268 | pmd = pmd_mkhuge(pmd: pfn_pmd(page_nr: args->pmd_pfn, pgprot: args->page_prot)); |
269 | pmd = pmd_mkyoung(pmd); |
270 | set_pmd_at(mm: args->mm, addr: vaddr, pmdp: args->pmdp, pmd); |
271 | flush_dcache_page(page); |
272 | pmdp_test_and_clear_young(vma: args->vma, addr: vaddr, pmdp: args->pmdp); |
273 | pmd = READ_ONCE(*args->pmdp); |
274 | WARN_ON(pmd_young(pmd)); |
275 | |
276 | /* Clear the pte entries */ |
277 | pmdp_huge_get_and_clear(mm: args->mm, addr: vaddr, pmdp: args->pmdp); |
278 | pgtable_trans_huge_withdraw(mm: args->mm, pmdp: args->pmdp); |
279 | } |
280 | |
281 | static void __init pmd_leaf_tests(struct pgtable_debug_args *args) |
282 | { |
283 | pmd_t pmd; |
284 | |
285 | if (!has_transparent_hugepage()) |
286 | return; |
287 | |
288 | pr_debug("Validating PMD leaf\n" ); |
289 | pmd = pfn_pmd(page_nr: args->fixed_pmd_pfn, pgprot: args->page_prot); |
290 | |
291 | /* |
292 | * PMD based THP is a leaf entry. |
293 | */ |
294 | pmd = pmd_mkhuge(pmd); |
295 | WARN_ON(!pmd_leaf(pmd)); |
296 | } |
297 | |
298 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD |
299 | static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) |
300 | { |
301 | pgprot_t prot = vm_get_page_prot(vm_flags: idx); |
302 | unsigned long val = idx, *ptr = &val; |
303 | pud_t pud; |
304 | |
305 | if (!has_transparent_pud_hugepage()) |
306 | return; |
307 | |
308 | pr_debug("Validating PUD basic (%pGv)\n" , ptr); |
309 | pud = pfn_pud(page_nr: args->fixed_pud_pfn, pgprot: prot); |
310 | |
311 | /* |
312 | * This test needs to be executed after the given page table entry |
313 | * is created with pfn_pud() to make sure that vm_get_page_prot(idx) |
314 | * does not have the dirty bit enabled from the beginning. This is |
315 | * important for platforms like arm64 where (!PTE_RDONLY) indicate |
316 | * dirty bit being set. |
317 | */ |
318 | WARN_ON(pud_dirty(pud_wrprotect(pud))); |
319 | |
320 | WARN_ON(!pud_same(pud, pud)); |
321 | WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud)))); |
322 | WARN_ON(!pud_dirty(pud_mkdirty(pud_mkclean(pud)))); |
323 | WARN_ON(pud_dirty(pud_mkclean(pud_mkdirty(pud)))); |
324 | WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud)))); |
325 | WARN_ON(pud_write(pud_wrprotect(pud_mkwrite(pud)))); |
326 | WARN_ON(pud_young(pud_mkold(pud_mkyoung(pud)))); |
327 | WARN_ON(pud_dirty(pud_wrprotect(pud_mkclean(pud)))); |
328 | WARN_ON(!pud_dirty(pud_wrprotect(pud_mkdirty(pud)))); |
329 | |
330 | if (mm_pmd_folded(args->mm)) |
331 | return; |
332 | |
333 | /* |
334 | * A huge page does not point to next level page table |
335 | * entry. Hence this must qualify as pud_bad(). |
336 | */ |
337 | WARN_ON(!pud_bad(pud_mkhuge(pud))); |
338 | } |
339 | |
340 | static void __init pud_advanced_tests(struct pgtable_debug_args *args) |
341 | { |
342 | struct page *page; |
343 | unsigned long vaddr = args->vaddr; |
344 | pud_t pud; |
345 | |
346 | if (!has_transparent_pud_hugepage()) |
347 | return; |
348 | |
349 | page = (args->pud_pfn != ULONG_MAX) ? pfn_to_page(args->pud_pfn) : NULL; |
350 | if (!page) |
351 | return; |
352 | |
353 | /* |
354 | * flush_dcache_page() is called after set_pud_at() to clear |
355 | * PG_arch_1 for the page on ARM64. The page flag isn't cleared |
356 | * when it's released and page allocation check will fail when |
357 | * the page is allocated again. For architectures other than ARM64, |
358 | * the unexpected overhead of cache flushing is acceptable. |
359 | */ |
360 | pr_debug("Validating PUD advanced\n" ); |
361 | /* Align the address wrt HPAGE_PUD_SIZE */ |
362 | vaddr &= HPAGE_PUD_MASK; |
363 | |
364 | pud = pfn_pud(page_nr: args->pud_pfn, pgprot: args->page_prot); |
365 | set_pud_at(mm: args->mm, addr: vaddr, pudp: args->pudp, pud); |
366 | flush_dcache_page(page); |
367 | pudp_set_wrprotect(mm: args->mm, address: vaddr, pudp: args->pudp); |
368 | pud = READ_ONCE(*args->pudp); |
369 | WARN_ON(pud_write(pud)); |
370 | |
371 | #ifndef __PAGETABLE_PMD_FOLDED |
372 | pudp_huge_get_and_clear(mm: args->mm, addr: vaddr, pudp: args->pudp); |
373 | pud = READ_ONCE(*args->pudp); |
374 | WARN_ON(!pud_none(pud)); |
375 | #endif /* __PAGETABLE_PMD_FOLDED */ |
376 | pud = pfn_pud(page_nr: args->pud_pfn, pgprot: args->page_prot); |
377 | pud = pud_wrprotect(pud); |
378 | pud = pud_mkclean(pud); |
379 | set_pud_at(mm: args->mm, addr: vaddr, pudp: args->pudp, pud); |
380 | flush_dcache_page(page); |
381 | pud = pud_mkwrite(pud); |
382 | pud = pud_mkdirty(pud); |
383 | pudp_set_access_flags(vma: args->vma, address: vaddr, pudp: args->pudp, entry: pud, dirty: 1); |
384 | pud = READ_ONCE(*args->pudp); |
385 | WARN_ON(!(pud_write(pud) && pud_dirty(pud))); |
386 | |
387 | #ifndef __PAGETABLE_PMD_FOLDED |
388 | pudp_huge_get_and_clear_full(vma: args->vma, address: vaddr, pudp: args->pudp, full: 1); |
389 | pud = READ_ONCE(*args->pudp); |
390 | WARN_ON(!pud_none(pud)); |
391 | #endif /* __PAGETABLE_PMD_FOLDED */ |
392 | |
393 | pud = pfn_pud(page_nr: args->pud_pfn, pgprot: args->page_prot); |
394 | pud = pud_mkyoung(pud); |
395 | set_pud_at(mm: args->mm, addr: vaddr, pudp: args->pudp, pud); |
396 | flush_dcache_page(page); |
397 | pudp_test_and_clear_young(vma: args->vma, addr: vaddr, pudp: args->pudp); |
398 | pud = READ_ONCE(*args->pudp); |
399 | WARN_ON(pud_young(pud)); |
400 | |
401 | pudp_huge_get_and_clear(mm: args->mm, addr: vaddr, pudp: args->pudp); |
402 | } |
403 | |
404 | static void __init pud_leaf_tests(struct pgtable_debug_args *args) |
405 | { |
406 | pud_t pud; |
407 | |
408 | if (!has_transparent_pud_hugepage()) |
409 | return; |
410 | |
411 | pr_debug("Validating PUD leaf\n" ); |
412 | pud = pfn_pud(page_nr: args->fixed_pud_pfn, pgprot: args->page_prot); |
413 | /* |
414 | * PUD based THP is a leaf entry. |
415 | */ |
416 | pud = pud_mkhuge(pud); |
417 | WARN_ON(!pud_leaf(pud)); |
418 | } |
419 | #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ |
420 | static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { } |
421 | static void __init pud_advanced_tests(struct pgtable_debug_args *args) { } |
422 | static void __init pud_leaf_tests(struct pgtable_debug_args *args) { } |
423 | #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ |
424 | #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ |
425 | static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { } |
426 | static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { } |
427 | static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { } |
428 | static void __init pud_advanced_tests(struct pgtable_debug_args *args) { } |
429 | static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { } |
430 | static void __init pud_leaf_tests(struct pgtable_debug_args *args) { } |
431 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
432 | |
433 | #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP |
434 | static void __init pmd_huge_tests(struct pgtable_debug_args *args) |
435 | { |
436 | pmd_t pmd; |
437 | |
438 | if (!arch_vmap_pmd_supported(prot: args->page_prot) || |
439 | args->fixed_alignment < PMD_SIZE) |
440 | return; |
441 | |
442 | pr_debug("Validating PMD huge\n" ); |
443 | /* |
444 | * X86 defined pmd_set_huge() verifies that the given |
445 | * PMD is not a populated non-leaf entry. |
446 | */ |
447 | WRITE_ONCE(*args->pmdp, __pmd(0)); |
448 | WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), args->page_prot)); |
449 | WARN_ON(!pmd_clear_huge(args->pmdp)); |
450 | pmd = READ_ONCE(*args->pmdp); |
451 | WARN_ON(!pmd_none(pmd)); |
452 | } |
453 | |
454 | static void __init pud_huge_tests(struct pgtable_debug_args *args) |
455 | { |
456 | pud_t pud; |
457 | |
458 | if (!arch_vmap_pud_supported(prot: args->page_prot) || |
459 | args->fixed_alignment < PUD_SIZE) |
460 | return; |
461 | |
462 | pr_debug("Validating PUD huge\n" ); |
463 | /* |
464 | * X86 defined pud_set_huge() verifies that the given |
465 | * PUD is not a populated non-leaf entry. |
466 | */ |
467 | WRITE_ONCE(*args->pudp, __pud(0)); |
468 | WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), args->page_prot)); |
469 | WARN_ON(!pud_clear_huge(args->pudp)); |
470 | pud = READ_ONCE(*args->pudp); |
471 | WARN_ON(!pud_none(pud)); |
472 | } |
473 | #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ |
474 | static void __init pmd_huge_tests(struct pgtable_debug_args *args) { } |
475 | static void __init pud_huge_tests(struct pgtable_debug_args *args) { } |
476 | #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ |
477 | |
478 | static void __init p4d_basic_tests(struct pgtable_debug_args *args) |
479 | { |
480 | p4d_t p4d; |
481 | |
482 | pr_debug("Validating P4D basic\n" ); |
483 | memset(&p4d, RANDOM_NZVALUE, sizeof(p4d_t)); |
484 | WARN_ON(!p4d_same(p4d, p4d)); |
485 | } |
486 | |
487 | static void __init pgd_basic_tests(struct pgtable_debug_args *args) |
488 | { |
489 | pgd_t pgd; |
490 | |
491 | pr_debug("Validating PGD basic\n" ); |
492 | memset(&pgd, RANDOM_NZVALUE, sizeof(pgd_t)); |
493 | WARN_ON(!pgd_same(pgd, pgd)); |
494 | } |
495 | |
496 | #ifndef __PAGETABLE_PUD_FOLDED |
497 | static void __init pud_clear_tests(struct pgtable_debug_args *args) |
498 | { |
499 | pud_t pud = READ_ONCE(*args->pudp); |
500 | |
501 | if (mm_pmd_folded(args->mm)) |
502 | return; |
503 | |
504 | pr_debug("Validating PUD clear\n" ); |
505 | pud = __pud(val: pud_val(pud) | RANDOM_ORVALUE); |
506 | WRITE_ONCE(*args->pudp, pud); |
507 | pud_clear(pudp: args->pudp); |
508 | pud = READ_ONCE(*args->pudp); |
509 | WARN_ON(!pud_none(pud)); |
510 | } |
511 | |
512 | static void __init pud_populate_tests(struct pgtable_debug_args *args) |
513 | { |
514 | pud_t pud; |
515 | |
516 | if (mm_pmd_folded(args->mm)) |
517 | return; |
518 | |
519 | pr_debug("Validating PUD populate\n" ); |
520 | /* |
521 | * This entry points to next level page table page. |
522 | * Hence this must not qualify as pud_bad(). |
523 | */ |
524 | pud_populate(mm: args->mm, pud: args->pudp, pmd: args->start_pmdp); |
525 | pud = READ_ONCE(*args->pudp); |
526 | WARN_ON(pud_bad(pud)); |
527 | } |
528 | #else /* !__PAGETABLE_PUD_FOLDED */ |
529 | static void __init pud_clear_tests(struct pgtable_debug_args *args) { } |
530 | static void __init pud_populate_tests(struct pgtable_debug_args *args) { } |
531 | #endif /* PAGETABLE_PUD_FOLDED */ |
532 | |
533 | #ifndef __PAGETABLE_P4D_FOLDED |
534 | static void __init p4d_clear_tests(struct pgtable_debug_args *args) |
535 | { |
536 | p4d_t p4d = READ_ONCE(*args->p4dp); |
537 | |
538 | if (mm_pud_folded(args->mm)) |
539 | return; |
540 | |
541 | pr_debug("Validating P4D clear\n" ); |
542 | p4d = __p4d(val: p4d_val(p4d) | RANDOM_ORVALUE); |
543 | WRITE_ONCE(*args->p4dp, p4d); |
544 | p4d_clear(p4dp: args->p4dp); |
545 | p4d = READ_ONCE(*args->p4dp); |
546 | WARN_ON(!p4d_none(p4d)); |
547 | } |
548 | |
549 | static void __init p4d_populate_tests(struct pgtable_debug_args *args) |
550 | { |
551 | p4d_t p4d; |
552 | |
553 | if (mm_pud_folded(args->mm)) |
554 | return; |
555 | |
556 | pr_debug("Validating P4D populate\n" ); |
557 | /* |
558 | * This entry points to next level page table page. |
559 | * Hence this must not qualify as p4d_bad(). |
560 | */ |
561 | pud_clear(pudp: args->pudp); |
562 | p4d_clear(p4dp: args->p4dp); |
563 | p4d_populate(mm: args->mm, p4d: args->p4dp, pud: args->start_pudp); |
564 | p4d = READ_ONCE(*args->p4dp); |
565 | WARN_ON(p4d_bad(p4d)); |
566 | } |
567 | |
568 | static void __init pgd_clear_tests(struct pgtable_debug_args *args) |
569 | { |
570 | pgd_t pgd = READ_ONCE(*(args->pgdp)); |
571 | |
572 | if (mm_p4d_folded(mm: args->mm)) |
573 | return; |
574 | |
575 | pr_debug("Validating PGD clear\n" ); |
576 | pgd = __pgd(val: pgd_val(pgd) | RANDOM_ORVALUE); |
577 | WRITE_ONCE(*args->pgdp, pgd); |
578 | pgd_clear(args->pgdp); |
579 | pgd = READ_ONCE(*args->pgdp); |
580 | WARN_ON(!pgd_none(pgd)); |
581 | } |
582 | |
583 | static void __init pgd_populate_tests(struct pgtable_debug_args *args) |
584 | { |
585 | pgd_t pgd; |
586 | |
587 | if (mm_p4d_folded(mm: args->mm)) |
588 | return; |
589 | |
590 | pr_debug("Validating PGD populate\n" ); |
591 | /* |
592 | * This entry points to next level page table page. |
593 | * Hence this must not qualify as pgd_bad(). |
594 | */ |
595 | p4d_clear(p4dp: args->p4dp); |
596 | pgd_clear(args->pgdp); |
597 | pgd_populate(mm: args->mm, pgd: args->pgdp, p4d: args->start_p4dp); |
598 | pgd = READ_ONCE(*args->pgdp); |
599 | WARN_ON(pgd_bad(pgd)); |
600 | } |
601 | #else /* !__PAGETABLE_P4D_FOLDED */ |
602 | static void __init p4d_clear_tests(struct pgtable_debug_args *args) { } |
603 | static void __init pgd_clear_tests(struct pgtable_debug_args *args) { } |
604 | static void __init p4d_populate_tests(struct pgtable_debug_args *args) { } |
605 | static void __init pgd_populate_tests(struct pgtable_debug_args *args) { } |
606 | #endif /* PAGETABLE_P4D_FOLDED */ |
607 | |
608 | static void __init pte_clear_tests(struct pgtable_debug_args *args) |
609 | { |
610 | struct page *page; |
611 | pte_t pte = pfn_pte(page_nr: args->pte_pfn, pgprot: args->page_prot); |
612 | |
613 | page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL; |
614 | if (!page) |
615 | return; |
616 | |
617 | /* |
618 | * flush_dcache_page() is called after set_pte_at() to clear |
619 | * PG_arch_1 for the page on ARM64. The page flag isn't cleared |
620 | * when it's released and page allocation check will fail when |
621 | * the page is allocated again. For architectures other than ARM64, |
622 | * the unexpected overhead of cache flushing is acceptable. |
623 | */ |
624 | pr_debug("Validating PTE clear\n" ); |
625 | if (WARN_ON(!args->ptep)) |
626 | return; |
627 | |
628 | #ifndef CONFIG_RISCV |
629 | pte = __pte(val: pte_val(pte) | RANDOM_ORVALUE); |
630 | #endif |
631 | set_pte_at(args->mm, args->vaddr, args->ptep, pte); |
632 | flush_dcache_page(page); |
633 | barrier(); |
634 | ptep_clear(mm: args->mm, addr: args->vaddr, ptep: args->ptep); |
635 | pte = ptep_get(ptep: args->ptep); |
636 | WARN_ON(!pte_none(pte)); |
637 | } |
638 | |
639 | static void __init pmd_clear_tests(struct pgtable_debug_args *args) |
640 | { |
641 | pmd_t pmd = READ_ONCE(*args->pmdp); |
642 | |
643 | pr_debug("Validating PMD clear\n" ); |
644 | pmd = __pmd(val: pmd_val(pmd) | RANDOM_ORVALUE); |
645 | WRITE_ONCE(*args->pmdp, pmd); |
646 | pmd_clear(pmdp: args->pmdp); |
647 | pmd = READ_ONCE(*args->pmdp); |
648 | WARN_ON(!pmd_none(pmd)); |
649 | } |
650 | |
651 | static void __init pmd_populate_tests(struct pgtable_debug_args *args) |
652 | { |
653 | pmd_t pmd; |
654 | |
655 | pr_debug("Validating PMD populate\n" ); |
656 | /* |
657 | * This entry points to next level page table page. |
658 | * Hence this must not qualify as pmd_bad(). |
659 | */ |
660 | pmd_populate(mm: args->mm, pmd: args->pmdp, pte: args->start_ptep); |
661 | pmd = READ_ONCE(*args->pmdp); |
662 | WARN_ON(pmd_bad(pmd)); |
663 | } |
664 | |
665 | static void __init pte_special_tests(struct pgtable_debug_args *args) |
666 | { |
667 | pte_t pte = pfn_pte(page_nr: args->fixed_pte_pfn, pgprot: args->page_prot); |
668 | |
669 | if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) |
670 | return; |
671 | |
672 | pr_debug("Validating PTE special\n" ); |
673 | WARN_ON(!pte_special(pte_mkspecial(pte))); |
674 | } |
675 | |
676 | static void __init pte_protnone_tests(struct pgtable_debug_args *args) |
677 | { |
678 | pte_t pte = pfn_pte(page_nr: args->fixed_pte_pfn, pgprot: args->page_prot_none); |
679 | |
680 | if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) |
681 | return; |
682 | |
683 | pr_debug("Validating PTE protnone\n" ); |
684 | WARN_ON(!pte_protnone(pte)); |
685 | WARN_ON(!pte_present(pte)); |
686 | } |
687 | |
688 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
689 | static void __init pmd_protnone_tests(struct pgtable_debug_args *args) |
690 | { |
691 | pmd_t pmd; |
692 | |
693 | if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) |
694 | return; |
695 | |
696 | if (!has_transparent_hugepage()) |
697 | return; |
698 | |
699 | pr_debug("Validating PMD protnone\n" ); |
700 | pmd = pmd_mkhuge(pmd: pfn_pmd(page_nr: args->fixed_pmd_pfn, pgprot: args->page_prot_none)); |
701 | WARN_ON(!pmd_protnone(pmd)); |
702 | WARN_ON(!pmd_present(pmd)); |
703 | } |
704 | #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ |
705 | static void __init pmd_protnone_tests(struct pgtable_debug_args *args) { } |
706 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
707 | |
708 | #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP |
709 | static void __init pte_devmap_tests(struct pgtable_debug_args *args) |
710 | { |
711 | pte_t pte = pfn_pte(page_nr: args->fixed_pte_pfn, pgprot: args->page_prot); |
712 | |
713 | pr_debug("Validating PTE devmap\n" ); |
714 | WARN_ON(!pte_devmap(pte_mkdevmap(pte))); |
715 | } |
716 | |
717 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
718 | static void __init pmd_devmap_tests(struct pgtable_debug_args *args) |
719 | { |
720 | pmd_t pmd; |
721 | |
722 | if (!has_transparent_hugepage()) |
723 | return; |
724 | |
725 | pr_debug("Validating PMD devmap\n" ); |
726 | pmd = pfn_pmd(page_nr: args->fixed_pmd_pfn, pgprot: args->page_prot); |
727 | WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd))); |
728 | } |
729 | |
730 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD |
731 | static void __init pud_devmap_tests(struct pgtable_debug_args *args) |
732 | { |
733 | pud_t pud; |
734 | |
735 | if (!has_transparent_pud_hugepage()) |
736 | return; |
737 | |
738 | pr_debug("Validating PUD devmap\n" ); |
739 | pud = pfn_pud(page_nr: args->fixed_pud_pfn, pgprot: args->page_prot); |
740 | WARN_ON(!pud_devmap(pud_mkdevmap(pud))); |
741 | } |
742 | #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ |
743 | static void __init pud_devmap_tests(struct pgtable_debug_args *args) { } |
744 | #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ |
745 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ |
746 | static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { } |
747 | static void __init pud_devmap_tests(struct pgtable_debug_args *args) { } |
748 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
749 | #else |
750 | static void __init pte_devmap_tests(struct pgtable_debug_args *args) { } |
751 | static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { } |
752 | static void __init pud_devmap_tests(struct pgtable_debug_args *args) { } |
753 | #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */ |
754 | |
755 | static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) |
756 | { |
757 | pte_t pte = pfn_pte(page_nr: args->fixed_pte_pfn, pgprot: args->page_prot); |
758 | |
759 | if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) |
760 | return; |
761 | |
762 | pr_debug("Validating PTE soft dirty\n" ); |
763 | WARN_ON(!pte_soft_dirty(pte_mksoft_dirty(pte))); |
764 | WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte))); |
765 | } |
766 | |
767 | static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args) |
768 | { |
769 | pte_t pte = pfn_pte(page_nr: args->fixed_pte_pfn, pgprot: args->page_prot); |
770 | |
771 | if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) |
772 | return; |
773 | |
774 | pr_debug("Validating PTE swap soft dirty\n" ); |
775 | WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte))); |
776 | WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte))); |
777 | } |
778 | |
779 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
780 | static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) |
781 | { |
782 | pmd_t pmd; |
783 | |
784 | if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) |
785 | return; |
786 | |
787 | if (!has_transparent_hugepage()) |
788 | return; |
789 | |
790 | pr_debug("Validating PMD soft dirty\n" ); |
791 | pmd = pfn_pmd(page_nr: args->fixed_pmd_pfn, pgprot: args->page_prot); |
792 | WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd))); |
793 | WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd))); |
794 | } |
795 | |
796 | static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) |
797 | { |
798 | pmd_t pmd; |
799 | |
800 | if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || |
801 | !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) |
802 | return; |
803 | |
804 | if (!has_transparent_hugepage()) |
805 | return; |
806 | |
807 | pr_debug("Validating PMD swap soft dirty\n" ); |
808 | pmd = pfn_pmd(page_nr: args->fixed_pmd_pfn, pgprot: args->page_prot); |
809 | WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd))); |
810 | WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd))); |
811 | } |
812 | #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ |
813 | static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { } |
814 | static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { } |
815 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
816 | |
817 | static void __init pte_swap_exclusive_tests(struct pgtable_debug_args *args) |
818 | { |
819 | unsigned long max_swap_offset; |
820 | swp_entry_t entry, entry2; |
821 | pte_t pte; |
822 | |
823 | pr_debug("Validating PTE swap exclusive\n" ); |
824 | |
825 | /* See generic_max_swapfile_size(): probe the maximum offset */ |
826 | max_swap_offset = swp_offset(entry: pte_to_swp_entry(pte: swp_entry_to_pte(entry: swp_entry(type: 0, offset: ~0UL)))); |
827 | |
828 | /* Create a swp entry with all possible bits set */ |
829 | entry = swp_entry(type: (1 << MAX_SWAPFILES_SHIFT) - 1, offset: max_swap_offset); |
830 | |
831 | pte = swp_entry_to_pte(entry); |
832 | WARN_ON(pte_swp_exclusive(pte)); |
833 | WARN_ON(!is_swap_pte(pte)); |
834 | entry2 = pte_to_swp_entry(pte); |
835 | WARN_ON(memcmp(&entry, &entry2, sizeof(entry))); |
836 | |
837 | pte = pte_swp_mkexclusive(pte); |
838 | WARN_ON(!pte_swp_exclusive(pte)); |
839 | WARN_ON(!is_swap_pte(pte)); |
840 | WARN_ON(pte_swp_soft_dirty(pte)); |
841 | entry2 = pte_to_swp_entry(pte); |
842 | WARN_ON(memcmp(&entry, &entry2, sizeof(entry))); |
843 | |
844 | pte = pte_swp_clear_exclusive(pte); |
845 | WARN_ON(pte_swp_exclusive(pte)); |
846 | WARN_ON(!is_swap_pte(pte)); |
847 | entry2 = pte_to_swp_entry(pte); |
848 | WARN_ON(memcmp(&entry, &entry2, sizeof(entry))); |
849 | } |
850 | |
851 | static void __init pte_swap_tests(struct pgtable_debug_args *args) |
852 | { |
853 | swp_entry_t swp; |
854 | pte_t pte; |
855 | |
856 | pr_debug("Validating PTE swap\n" ); |
857 | pte = pfn_pte(page_nr: args->fixed_pte_pfn, pgprot: args->page_prot); |
858 | swp = __pte_to_swp_entry(pte); |
859 | pte = __swp_entry_to_pte(swp); |
860 | WARN_ON(args->fixed_pte_pfn != pte_pfn(pte)); |
861 | } |
862 | |
863 | #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION |
864 | static void __init pmd_swap_tests(struct pgtable_debug_args *args) |
865 | { |
866 | swp_entry_t swp; |
867 | pmd_t pmd; |
868 | |
869 | if (!has_transparent_hugepage()) |
870 | return; |
871 | |
872 | pr_debug("Validating PMD swap\n" ); |
873 | pmd = pfn_pmd(page_nr: args->fixed_pmd_pfn, pgprot: args->page_prot); |
874 | swp = __pmd_to_swp_entry(pmd); |
875 | pmd = __swp_entry_to_pmd(swp); |
876 | WARN_ON(args->fixed_pmd_pfn != pmd_pfn(pmd)); |
877 | } |
878 | #else /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */ |
879 | static void __init pmd_swap_tests(struct pgtable_debug_args *args) { } |
880 | #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ |
881 | |
882 | static void __init swap_migration_tests(struct pgtable_debug_args *args) |
883 | { |
884 | struct page *page; |
885 | swp_entry_t swp; |
886 | |
887 | if (!IS_ENABLED(CONFIG_MIGRATION)) |
888 | return; |
889 | |
890 | /* |
891 | * swap_migration_tests() requires a dedicated page as it needs to |
892 | * be locked before creating a migration entry from it. Locking the |
893 | * page that actually maps kernel text ('start_kernel') can be real |
894 | * problematic. Lets use the allocated page explicitly for this |
895 | * purpose. |
896 | */ |
897 | page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL; |
898 | if (!page) |
899 | return; |
900 | |
901 | pr_debug("Validating swap migration\n" ); |
902 | |
903 | /* |
904 | * make_[readable|writable]_migration_entry() expects given page to |
905 | * be locked, otherwise it stumbles upon a BUG_ON(). |
906 | */ |
907 | __SetPageLocked(page); |
908 | swp = make_writable_migration_entry(page_to_pfn(page)); |
909 | WARN_ON(!is_migration_entry(swp)); |
910 | WARN_ON(!is_writable_migration_entry(swp)); |
911 | |
912 | swp = make_readable_migration_entry(offset: swp_offset(entry: swp)); |
913 | WARN_ON(!is_migration_entry(swp)); |
914 | WARN_ON(is_writable_migration_entry(swp)); |
915 | |
916 | swp = make_readable_migration_entry(page_to_pfn(page)); |
917 | WARN_ON(!is_migration_entry(swp)); |
918 | WARN_ON(is_writable_migration_entry(swp)); |
919 | __ClearPageLocked(page); |
920 | } |
921 | |
922 | #ifdef CONFIG_HUGETLB_PAGE |
923 | static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) |
924 | { |
925 | struct page *page; |
926 | pte_t pte; |
927 | |
928 | pr_debug("Validating HugeTLB basic\n" ); |
929 | /* |
930 | * Accessing the page associated with the pfn is safe here, |
931 | * as it was previously derived from a real kernel symbol. |
932 | */ |
933 | page = pfn_to_page(args->fixed_pmd_pfn); |
934 | pte = mk_huge_pte(page, pgprot: args->page_prot); |
935 | |
936 | WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte))); |
937 | WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte)))); |
938 | WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte)))); |
939 | |
940 | #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB |
941 | pte = pfn_pte(page_nr: args->fixed_pmd_pfn, pgprot: args->page_prot); |
942 | |
943 | WARN_ON(!pte_huge(arch_make_huge_pte(pte, PMD_SHIFT, VM_ACCESS_FLAGS))); |
944 | #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ |
945 | } |
946 | #else /* !CONFIG_HUGETLB_PAGE */ |
947 | static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) { } |
948 | #endif /* CONFIG_HUGETLB_PAGE */ |
949 | |
950 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
951 | static void __init pmd_thp_tests(struct pgtable_debug_args *args) |
952 | { |
953 | pmd_t pmd; |
954 | |
955 | if (!has_transparent_hugepage()) |
956 | return; |
957 | |
958 | pr_debug("Validating PMD based THP\n" ); |
959 | /* |
960 | * pmd_trans_huge() and pmd_present() must return positive after |
961 | * MMU invalidation with pmd_mkinvalid(). This behavior is an |
962 | * optimization for transparent huge page. pmd_trans_huge() must |
963 | * be true if pmd_page() returns a valid THP to avoid taking the |
964 | * pmd_lock when others walk over non transhuge pmds (i.e. there |
965 | * are no THP allocated). Especially when splitting a THP and |
966 | * removing the present bit from the pmd, pmd_trans_huge() still |
967 | * needs to return true. pmd_present() should be true whenever |
968 | * pmd_trans_huge() returns true. |
969 | */ |
970 | pmd = pfn_pmd(page_nr: args->fixed_pmd_pfn, pgprot: args->page_prot); |
971 | WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd))); |
972 | |
973 | #ifndef __HAVE_ARCH_PMDP_INVALIDATE |
974 | WARN_ON(!pmd_trans_huge(pmd_mkinvalid(pmd_mkhuge(pmd)))); |
975 | WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd)))); |
976 | #endif /* __HAVE_ARCH_PMDP_INVALIDATE */ |
977 | } |
978 | |
979 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD |
980 | static void __init pud_thp_tests(struct pgtable_debug_args *args) |
981 | { |
982 | pud_t pud; |
983 | |
984 | if (!has_transparent_pud_hugepage()) |
985 | return; |
986 | |
987 | pr_debug("Validating PUD based THP\n" ); |
988 | pud = pfn_pud(page_nr: args->fixed_pud_pfn, pgprot: args->page_prot); |
989 | WARN_ON(!pud_trans_huge(pud_mkhuge(pud))); |
990 | |
991 | /* |
992 | * pud_mkinvalid() has been dropped for now. Enable back |
993 | * these tests when it comes back with a modified pud_present(). |
994 | * |
995 | * WARN_ON(!pud_trans_huge(pud_mkinvalid(pud_mkhuge(pud)))); |
996 | * WARN_ON(!pud_present(pud_mkinvalid(pud_mkhuge(pud)))); |
997 | */ |
998 | } |
999 | #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ |
1000 | static void __init pud_thp_tests(struct pgtable_debug_args *args) { } |
1001 | #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ |
1002 | #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ |
1003 | static void __init pmd_thp_tests(struct pgtable_debug_args *args) { } |
1004 | static void __init pud_thp_tests(struct pgtable_debug_args *args) { } |
1005 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
1006 | |
1007 | static unsigned long __init get_random_vaddr(void) |
1008 | { |
1009 | unsigned long random_vaddr, random_pages, total_user_pages; |
1010 | |
1011 | total_user_pages = (TASK_SIZE - FIRST_USER_ADDRESS) / PAGE_SIZE; |
1012 | |
1013 | random_pages = get_random_long() % total_user_pages; |
1014 | random_vaddr = FIRST_USER_ADDRESS + random_pages * PAGE_SIZE; |
1015 | |
1016 | return random_vaddr; |
1017 | } |
1018 | |
1019 | static void __init destroy_args(struct pgtable_debug_args *args) |
1020 | { |
1021 | struct page *page = NULL; |
1022 | |
1023 | /* Free (huge) page */ |
1024 | if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
1025 | has_transparent_pud_hugepage() && |
1026 | args->pud_pfn != ULONG_MAX) { |
1027 | if (args->is_contiguous_page) { |
1028 | free_contig_range(pfn: args->pud_pfn, |
1029 | nr_pages: (1 << (HPAGE_PUD_SHIFT - PAGE_SHIFT))); |
1030 | } else { |
1031 | page = pfn_to_page(args->pud_pfn); |
1032 | __free_pages(page, HPAGE_PUD_SHIFT - PAGE_SHIFT); |
1033 | } |
1034 | |
1035 | args->pud_pfn = ULONG_MAX; |
1036 | args->pmd_pfn = ULONG_MAX; |
1037 | args->pte_pfn = ULONG_MAX; |
1038 | } |
1039 | |
1040 | if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
1041 | has_transparent_hugepage() && |
1042 | args->pmd_pfn != ULONG_MAX) { |
1043 | if (args->is_contiguous_page) { |
1044 | free_contig_range(pfn: args->pmd_pfn, nr_pages: (1 << HPAGE_PMD_ORDER)); |
1045 | } else { |
1046 | page = pfn_to_page(args->pmd_pfn); |
1047 | __free_pages(page, HPAGE_PMD_ORDER); |
1048 | } |
1049 | |
1050 | args->pmd_pfn = ULONG_MAX; |
1051 | args->pte_pfn = ULONG_MAX; |
1052 | } |
1053 | |
1054 | if (args->pte_pfn != ULONG_MAX) { |
1055 | page = pfn_to_page(args->pte_pfn); |
1056 | __free_page(page); |
1057 | |
1058 | args->pte_pfn = ULONG_MAX; |
1059 | } |
1060 | |
1061 | /* Free page table entries */ |
1062 | if (args->start_ptep) { |
1063 | pte_free(mm: args->mm, pte_page: args->start_ptep); |
1064 | mm_dec_nr_ptes(mm: args->mm); |
1065 | } |
1066 | |
1067 | if (args->start_pmdp) { |
1068 | pmd_free(mm: args->mm, pmd: args->start_pmdp); |
1069 | mm_dec_nr_pmds(mm: args->mm); |
1070 | } |
1071 | |
1072 | if (args->start_pudp) { |
1073 | pud_free(mm: args->mm, pud: args->start_pudp); |
1074 | mm_dec_nr_puds(mm: args->mm); |
1075 | } |
1076 | |
1077 | if (args->start_p4dp) |
1078 | p4d_free(mm: args->mm, p4d: args->start_p4dp); |
1079 | |
1080 | /* Free vma and mm struct */ |
1081 | if (args->vma) |
1082 | vm_area_free(args->vma); |
1083 | |
1084 | if (args->mm) |
1085 | mmdrop(mm: args->mm); |
1086 | } |
1087 | |
1088 | static struct page * __init |
1089 | debug_vm_pgtable_alloc_huge_page(struct pgtable_debug_args *args, int order) |
1090 | { |
1091 | struct page *page = NULL; |
1092 | |
1093 | #ifdef CONFIG_CONTIG_ALLOC |
1094 | if (order > MAX_ORDER) { |
1095 | page = alloc_contig_pages(nr_pages: (1 << order), GFP_KERNEL, |
1096 | first_online_node, NULL); |
1097 | if (page) { |
1098 | args->is_contiguous_page = true; |
1099 | return page; |
1100 | } |
1101 | } |
1102 | #endif |
1103 | |
1104 | if (order <= MAX_ORDER) |
1105 | page = alloc_pages(GFP_KERNEL, order); |
1106 | |
1107 | return page; |
1108 | } |
1109 | |
1110 | /* |
1111 | * Check if a physical memory range described by <pstart, pend> contains |
1112 | * an area that is of size psize, and aligned to psize. |
1113 | * |
1114 | * Don't use address 0, an all-zeroes physical address might mask bugs, and |
1115 | * it's not used on x86. |
1116 | */ |
1117 | static void __init phys_align_check(phys_addr_t pstart, |
1118 | phys_addr_t pend, unsigned long psize, |
1119 | phys_addr_t *physp, unsigned long *alignp) |
1120 | { |
1121 | phys_addr_t aligned_start, aligned_end; |
1122 | |
1123 | if (pstart == 0) |
1124 | pstart = PAGE_SIZE; |
1125 | |
1126 | aligned_start = ALIGN(pstart, psize); |
1127 | aligned_end = aligned_start + psize; |
1128 | |
1129 | if (aligned_end > aligned_start && aligned_end <= pend) { |
1130 | *alignp = psize; |
1131 | *physp = aligned_start; |
1132 | } |
1133 | } |
1134 | |
1135 | static void __init init_fixed_pfns(struct pgtable_debug_args *args) |
1136 | { |
1137 | u64 idx; |
1138 | phys_addr_t phys, pstart, pend; |
1139 | |
1140 | /* |
1141 | * Initialize the fixed pfns. To do this, try to find a |
1142 | * valid physical range, preferably aligned to PUD_SIZE, |
1143 | * but settling for aligned to PMD_SIZE as a fallback. If |
1144 | * neither of those is found, use the physical address of |
1145 | * the start_kernel symbol. |
1146 | * |
1147 | * The memory doesn't need to be allocated, it just needs to exist |
1148 | * as usable memory. It won't be touched. |
1149 | * |
1150 | * The alignment is recorded, and can be checked to see if we |
1151 | * can run the tests that require an actual valid physical |
1152 | * address range on some architectures ({pmd,pud}_huge_test |
1153 | * on x86). |
1154 | */ |
1155 | |
1156 | phys = __pa_symbol(&start_kernel); |
1157 | args->fixed_alignment = PAGE_SIZE; |
1158 | |
1159 | for_each_mem_range(idx, &pstart, &pend) { |
1160 | /* First check for a PUD-aligned area */ |
1161 | phys_align_check(pstart, pend, PUD_SIZE, physp: &phys, |
1162 | alignp: &args->fixed_alignment); |
1163 | |
1164 | /* If a PUD-aligned area is found, we're done */ |
1165 | if (args->fixed_alignment == PUD_SIZE) |
1166 | break; |
1167 | |
1168 | /* |
1169 | * If no PMD-aligned area found yet, check for one, |
1170 | * but continue the loop to look for a PUD-aligned area. |
1171 | */ |
1172 | if (args->fixed_alignment < PMD_SIZE) |
1173 | phys_align_check(pstart, pend, PMD_SIZE, physp: &phys, |
1174 | alignp: &args->fixed_alignment); |
1175 | } |
1176 | |
1177 | args->fixed_pgd_pfn = __phys_to_pfn(phys & PGDIR_MASK); |
1178 | args->fixed_p4d_pfn = __phys_to_pfn(phys & P4D_MASK); |
1179 | args->fixed_pud_pfn = __phys_to_pfn(phys & PUD_MASK); |
1180 | args->fixed_pmd_pfn = __phys_to_pfn(phys & PMD_MASK); |
1181 | args->fixed_pte_pfn = __phys_to_pfn(phys & PAGE_MASK); |
1182 | WARN_ON(!pfn_valid(args->fixed_pte_pfn)); |
1183 | } |
1184 | |
1185 | |
1186 | static int __init init_args(struct pgtable_debug_args *args) |
1187 | { |
1188 | struct page *page = NULL; |
1189 | int ret = 0; |
1190 | |
1191 | /* |
1192 | * Initialize the debugging data. |
1193 | * |
1194 | * vm_get_page_prot(VM_NONE) or vm_get_page_prot(VM_SHARED|VM_NONE) |
1195 | * will help create page table entries with PROT_NONE permission as |
1196 | * required for pxx_protnone_tests(). |
1197 | */ |
1198 | memset(args, 0, sizeof(*args)); |
1199 | args->vaddr = get_random_vaddr(); |
1200 | args->page_prot = vm_get_page_prot(VM_ACCESS_FLAGS); |
1201 | args->page_prot_none = vm_get_page_prot(VM_NONE); |
1202 | args->is_contiguous_page = false; |
1203 | args->pud_pfn = ULONG_MAX; |
1204 | args->pmd_pfn = ULONG_MAX; |
1205 | args->pte_pfn = ULONG_MAX; |
1206 | args->fixed_pgd_pfn = ULONG_MAX; |
1207 | args->fixed_p4d_pfn = ULONG_MAX; |
1208 | args->fixed_pud_pfn = ULONG_MAX; |
1209 | args->fixed_pmd_pfn = ULONG_MAX; |
1210 | args->fixed_pte_pfn = ULONG_MAX; |
1211 | |
1212 | /* Allocate mm and vma */ |
1213 | args->mm = mm_alloc(); |
1214 | if (!args->mm) { |
1215 | pr_err("Failed to allocate mm struct\n" ); |
1216 | ret = -ENOMEM; |
1217 | goto error; |
1218 | } |
1219 | |
1220 | args->vma = vm_area_alloc(args->mm); |
1221 | if (!args->vma) { |
1222 | pr_err("Failed to allocate vma\n" ); |
1223 | ret = -ENOMEM; |
1224 | goto error; |
1225 | } |
1226 | |
1227 | /* |
1228 | * Allocate page table entries. They will be modified in the tests. |
1229 | * Lets save the page table entries so that they can be released |
1230 | * when the tests are completed. |
1231 | */ |
1232 | args->pgdp = pgd_offset(args->mm, args->vaddr); |
1233 | args->p4dp = p4d_alloc(mm: args->mm, pgd: args->pgdp, address: args->vaddr); |
1234 | if (!args->p4dp) { |
1235 | pr_err("Failed to allocate p4d entries\n" ); |
1236 | ret = -ENOMEM; |
1237 | goto error; |
1238 | } |
1239 | args->start_p4dp = p4d_offset(pgd: args->pgdp, address: 0UL); |
1240 | WARN_ON(!args->start_p4dp); |
1241 | |
1242 | args->pudp = pud_alloc(mm: args->mm, p4d: args->p4dp, address: args->vaddr); |
1243 | if (!args->pudp) { |
1244 | pr_err("Failed to allocate pud entries\n" ); |
1245 | ret = -ENOMEM; |
1246 | goto error; |
1247 | } |
1248 | args->start_pudp = pud_offset(p4d: args->p4dp, address: 0UL); |
1249 | WARN_ON(!args->start_pudp); |
1250 | |
1251 | args->pmdp = pmd_alloc(mm: args->mm, pud: args->pudp, address: args->vaddr); |
1252 | if (!args->pmdp) { |
1253 | pr_err("Failed to allocate pmd entries\n" ); |
1254 | ret = -ENOMEM; |
1255 | goto error; |
1256 | } |
1257 | args->start_pmdp = pmd_offset(pud: args->pudp, address: 0UL); |
1258 | WARN_ON(!args->start_pmdp); |
1259 | |
1260 | if (pte_alloc(args->mm, args->pmdp)) { |
1261 | pr_err("Failed to allocate pte entries\n" ); |
1262 | ret = -ENOMEM; |
1263 | goto error; |
1264 | } |
1265 | args->start_ptep = pmd_pgtable(READ_ONCE(*args->pmdp)); |
1266 | WARN_ON(!args->start_ptep); |
1267 | |
1268 | init_fixed_pfns(args); |
1269 | |
1270 | /* |
1271 | * Allocate (huge) pages because some of the tests need to access |
1272 | * the data in the pages. The corresponding tests will be skipped |
1273 | * if we fail to allocate (huge) pages. |
1274 | */ |
1275 | if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
1276 | has_transparent_pud_hugepage()) { |
1277 | page = debug_vm_pgtable_alloc_huge_page(args, |
1278 | HPAGE_PUD_SHIFT - PAGE_SHIFT); |
1279 | if (page) { |
1280 | args->pud_pfn = page_to_pfn(page); |
1281 | args->pmd_pfn = args->pud_pfn; |
1282 | args->pte_pfn = args->pud_pfn; |
1283 | return 0; |
1284 | } |
1285 | } |
1286 | |
1287 | if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
1288 | has_transparent_hugepage()) { |
1289 | page = debug_vm_pgtable_alloc_huge_page(args, HPAGE_PMD_ORDER); |
1290 | if (page) { |
1291 | args->pmd_pfn = page_to_pfn(page); |
1292 | args->pte_pfn = args->pmd_pfn; |
1293 | return 0; |
1294 | } |
1295 | } |
1296 | |
1297 | page = alloc_page(GFP_KERNEL); |
1298 | if (page) |
1299 | args->pte_pfn = page_to_pfn(page); |
1300 | |
1301 | return 0; |
1302 | |
1303 | error: |
1304 | destroy_args(args); |
1305 | return ret; |
1306 | } |
1307 | |
1308 | static int __init debug_vm_pgtable(void) |
1309 | { |
1310 | struct pgtable_debug_args args; |
1311 | spinlock_t *ptl = NULL; |
1312 | int idx, ret; |
1313 | |
1314 | pr_info("Validating architecture page table helpers\n" ); |
1315 | ret = init_args(args: &args); |
1316 | if (ret) |
1317 | return ret; |
1318 | |
1319 | /* |
1320 | * Iterate over each possible vm_flags to make sure that all |
1321 | * the basic page table transformation validations just hold |
1322 | * true irrespective of the starting protection value for a |
1323 | * given page table entry. |
1324 | * |
1325 | * Protection based vm_flags combinations are always linear |
1326 | * and increasing i.e starting from VM_NONE and going up to |
1327 | * (VM_SHARED | READ | WRITE | EXEC). |
1328 | */ |
1329 | #define VM_FLAGS_START (VM_NONE) |
1330 | #define VM_FLAGS_END (VM_SHARED | VM_EXEC | VM_WRITE | VM_READ) |
1331 | |
1332 | for (idx = VM_FLAGS_START; idx <= VM_FLAGS_END; idx++) { |
1333 | pte_basic_tests(args: &args, idx); |
1334 | pmd_basic_tests(args: &args, idx); |
1335 | pud_basic_tests(args: &args, idx); |
1336 | } |
1337 | |
1338 | /* |
1339 | * Both P4D and PGD level tests are very basic which do not |
1340 | * involve creating page table entries from the protection |
1341 | * value and the given pfn. Hence just keep them out from |
1342 | * the above iteration for now to save some test execution |
1343 | * time. |
1344 | */ |
1345 | p4d_basic_tests(args: &args); |
1346 | pgd_basic_tests(args: &args); |
1347 | |
1348 | pmd_leaf_tests(args: &args); |
1349 | pud_leaf_tests(args: &args); |
1350 | |
1351 | pte_special_tests(args: &args); |
1352 | pte_protnone_tests(args: &args); |
1353 | pmd_protnone_tests(args: &args); |
1354 | |
1355 | pte_devmap_tests(args: &args); |
1356 | pmd_devmap_tests(args: &args); |
1357 | pud_devmap_tests(args: &args); |
1358 | |
1359 | pte_soft_dirty_tests(args: &args); |
1360 | pmd_soft_dirty_tests(args: &args); |
1361 | pte_swap_soft_dirty_tests(args: &args); |
1362 | pmd_swap_soft_dirty_tests(args: &args); |
1363 | |
1364 | pte_swap_exclusive_tests(args: &args); |
1365 | |
1366 | pte_swap_tests(args: &args); |
1367 | pmd_swap_tests(args: &args); |
1368 | |
1369 | swap_migration_tests(args: &args); |
1370 | |
1371 | pmd_thp_tests(args: &args); |
1372 | pud_thp_tests(args: &args); |
1373 | |
1374 | hugetlb_basic_tests(args: &args); |
1375 | |
1376 | /* |
1377 | * Page table modifying tests. They need to hold |
1378 | * proper page table lock. |
1379 | */ |
1380 | |
1381 | args.ptep = pte_offset_map_lock(mm: args.mm, pmd: args.pmdp, addr: args.vaddr, ptlp: &ptl); |
1382 | pte_clear_tests(args: &args); |
1383 | pte_advanced_tests(args: &args); |
1384 | if (args.ptep) |
1385 | pte_unmap_unlock(args.ptep, ptl); |
1386 | |
1387 | ptl = pmd_lock(mm: args.mm, pmd: args.pmdp); |
1388 | pmd_clear_tests(args: &args); |
1389 | pmd_advanced_tests(args: &args); |
1390 | pmd_huge_tests(args: &args); |
1391 | pmd_populate_tests(args: &args); |
1392 | spin_unlock(lock: ptl); |
1393 | |
1394 | ptl = pud_lock(mm: args.mm, pud: args.pudp); |
1395 | pud_clear_tests(args: &args); |
1396 | pud_advanced_tests(args: &args); |
1397 | pud_huge_tests(args: &args); |
1398 | pud_populate_tests(args: &args); |
1399 | spin_unlock(lock: ptl); |
1400 | |
1401 | spin_lock(lock: &(args.mm->page_table_lock)); |
1402 | p4d_clear_tests(args: &args); |
1403 | pgd_clear_tests(args: &args); |
1404 | p4d_populate_tests(args: &args); |
1405 | pgd_populate_tests(args: &args); |
1406 | spin_unlock(lock: &(args.mm->page_table_lock)); |
1407 | |
1408 | destroy_args(args: &args); |
1409 | return 0; |
1410 | } |
1411 | late_initcall(debug_vm_pgtable); |
1412 | |