1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2019 SiFive |
4 | */ |
5 | |
6 | #include <linux/efi.h> |
7 | #include <linux/init.h> |
8 | #include <linux/debugfs.h> |
9 | #include <linux/seq_file.h> |
10 | #include <linux/ptdump.h> |
11 | |
12 | #include <linux/pgtable.h> |
13 | #include <asm/kasan.h> |
14 | |
15 | #define pt_dump_seq_printf(m, fmt, args...) \ |
16 | ({ \ |
17 | if (m) \ |
18 | seq_printf(m, fmt, ##args); \ |
19 | }) |
20 | |
21 | #define pt_dump_seq_puts(m, fmt) \ |
22 | ({ \ |
23 | if (m) \ |
24 | seq_printf(m, fmt); \ |
25 | }) |
26 | |
27 | /* |
28 | * The page dumper groups page table entries of the same type into a single |
29 | * description. It uses pg_state to track the range information while |
30 | * iterating over the pte entries. When the continuity is broken it then |
31 | * dumps out a description of the range. |
32 | */ |
33 | struct pg_state { |
34 | struct ptdump_state ptdump; |
35 | struct seq_file *seq; |
36 | const struct addr_marker *marker; |
37 | unsigned long start_address; |
38 | unsigned long start_pa; |
39 | unsigned long last_pa; |
40 | int level; |
41 | u64 current_prot; |
42 | bool check_wx; |
43 | unsigned long wx_pages; |
44 | }; |
45 | |
46 | /* Address marker */ |
47 | struct addr_marker { |
48 | unsigned long start_address; |
49 | const char *name; |
50 | }; |
51 | |
52 | /* Private information for debugfs */ |
53 | struct ptd_mm_info { |
54 | struct mm_struct *mm; |
55 | const struct addr_marker *markers; |
56 | unsigned long base_addr; |
57 | unsigned long end; |
58 | }; |
59 | |
60 | enum address_markers_idx { |
61 | FIXMAP_START_NR, |
62 | FIXMAP_END_NR, |
63 | PCI_IO_START_NR, |
64 | PCI_IO_END_NR, |
65 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
66 | VMEMMAP_START_NR, |
67 | VMEMMAP_END_NR, |
68 | #endif |
69 | VMALLOC_START_NR, |
70 | VMALLOC_END_NR, |
71 | PAGE_OFFSET_NR, |
72 | #ifdef CONFIG_KASAN |
73 | KASAN_SHADOW_START_NR, |
74 | KASAN_SHADOW_END_NR, |
75 | #endif |
76 | #ifdef CONFIG_64BIT |
77 | MODULES_MAPPING_NR, |
78 | KERNEL_MAPPING_NR, |
79 | #endif |
80 | END_OF_SPACE_NR |
81 | }; |
82 | |
83 | static struct addr_marker address_markers[] = { |
84 | {0, "Fixmap start" }, |
85 | {0, "Fixmap end" }, |
86 | {0, "PCI I/O start" }, |
87 | {0, "PCI I/O end" }, |
88 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
89 | {0, "vmemmap start" }, |
90 | {0, "vmemmap end" }, |
91 | #endif |
92 | {0, "vmalloc() area" }, |
93 | {0, "vmalloc() end" }, |
94 | {0, "Linear mapping" }, |
95 | #ifdef CONFIG_KASAN |
96 | {0, "Kasan shadow start" }, |
97 | {0, "Kasan shadow end" }, |
98 | #endif |
99 | #ifdef CONFIG_64BIT |
100 | {0, "Modules/BPF mapping" }, |
101 | {0, "Kernel mapping" }, |
102 | #endif |
103 | {-1, NULL}, |
104 | }; |
105 | |
106 | static struct ptd_mm_info kernel_ptd_info = { |
107 | .mm = &init_mm, |
108 | .markers = address_markers, |
109 | .base_addr = 0, |
110 | .end = ULONG_MAX, |
111 | }; |
112 | |
113 | #ifdef CONFIG_EFI |
114 | static struct addr_marker efi_addr_markers[] = { |
115 | { 0, "UEFI runtime start" }, |
116 | { SZ_1G, "UEFI runtime end" }, |
117 | { -1, NULL } |
118 | }; |
119 | |
120 | static struct ptd_mm_info efi_ptd_info = { |
121 | .mm = &efi_mm, |
122 | .markers = efi_addr_markers, |
123 | .base_addr = 0, |
124 | .end = SZ_2G, |
125 | }; |
126 | #endif |
127 | |
128 | /* Page Table Entry */ |
129 | struct prot_bits { |
130 | u64 mask; |
131 | const char *set; |
132 | const char *clear; |
133 | }; |
134 | |
135 | static const struct prot_bits pte_bits[] = { |
136 | { |
137 | #ifdef CONFIG_64BIT |
138 | .mask = _PAGE_NAPOT, |
139 | .set = "N" , |
140 | .clear = "." , |
141 | }, { |
142 | .mask = _PAGE_MTMASK_SVPBMT, |
143 | .set = "MT(%s)" , |
144 | .clear = " .. " , |
145 | }, { |
146 | #endif |
147 | .mask = _PAGE_SOFT, |
148 | .set = "RSW(%d)" , |
149 | .clear = " .. " , |
150 | }, { |
151 | .mask = _PAGE_DIRTY, |
152 | .set = "D" , |
153 | .clear = "." , |
154 | }, { |
155 | .mask = _PAGE_ACCESSED, |
156 | .set = "A" , |
157 | .clear = "." , |
158 | }, { |
159 | .mask = _PAGE_GLOBAL, |
160 | .set = "G" , |
161 | .clear = "." , |
162 | }, { |
163 | .mask = _PAGE_USER, |
164 | .set = "U" , |
165 | .clear = "." , |
166 | }, { |
167 | .mask = _PAGE_EXEC, |
168 | .set = "X" , |
169 | .clear = "." , |
170 | }, { |
171 | .mask = _PAGE_WRITE, |
172 | .set = "W" , |
173 | .clear = "." , |
174 | }, { |
175 | .mask = _PAGE_READ, |
176 | .set = "R" , |
177 | .clear = "." , |
178 | }, { |
179 | .mask = _PAGE_PRESENT, |
180 | .set = "V" , |
181 | .clear = "." , |
182 | } |
183 | }; |
184 | |
185 | /* Page Level */ |
186 | struct pg_level { |
187 | const char *name; |
188 | u64 mask; |
189 | }; |
190 | |
191 | static struct pg_level pg_level[] = { |
192 | { /* pgd */ |
193 | .name = "PGD" , |
194 | }, { /* p4d */ |
195 | .name = (CONFIG_PGTABLE_LEVELS > 4) ? "P4D" : "PGD" , |
196 | }, { /* pud */ |
197 | .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD" , |
198 | }, { /* pmd */ |
199 | .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD" , |
200 | }, { /* pte */ |
201 | .name = "PTE" , |
202 | }, |
203 | }; |
204 | |
205 | static void dump_prot(struct pg_state *st) |
206 | { |
207 | unsigned int i; |
208 | |
209 | for (i = 0; i < ARRAY_SIZE(pte_bits); i++) { |
210 | char s[7]; |
211 | unsigned long val; |
212 | |
213 | val = st->current_prot & pte_bits[i].mask; |
214 | if (val) { |
215 | if (pte_bits[i].mask == _PAGE_SOFT) |
216 | sprintf(s, pte_bits[i].set, val >> 8); |
217 | #ifdef CONFIG_64BIT |
218 | else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) { |
219 | if (val == _PAGE_NOCACHE_SVPBMT) |
220 | sprintf(s, pte_bits[i].set, "NC" ); |
221 | else if (val == _PAGE_IO_SVPBMT) |
222 | sprintf(s, pte_bits[i].set, "IO" ); |
223 | else |
224 | sprintf(s, pte_bits[i].set, "??" ); |
225 | } |
226 | #endif |
227 | else |
228 | sprintf(s, "%s" , pte_bits[i].set); |
229 | } else { |
230 | sprintf(s, "%s" , pte_bits[i].clear); |
231 | } |
232 | |
233 | pt_dump_seq_printf(st->seq, " %s" , s); |
234 | } |
235 | } |
236 | |
237 | #ifdef CONFIG_64BIT |
238 | #define ADDR_FORMAT "0x%016lx" |
239 | #else |
240 | #define ADDR_FORMAT "0x%08lx" |
241 | #endif |
242 | static void dump_addr(struct pg_state *st, unsigned long addr) |
243 | { |
244 | static const char units[] = "KMGTPE" ; |
245 | const char *unit = units; |
246 | unsigned long delta; |
247 | |
248 | pt_dump_seq_printf(st->seq, ADDR_FORMAT "-" ADDR_FORMAT " " , |
249 | st->start_address, addr); |
250 | |
251 | pt_dump_seq_printf(st->seq, " " ADDR_FORMAT " " , st->start_pa); |
252 | delta = (addr - st->start_address) >> 10; |
253 | |
254 | while (!(delta & 1023) && unit[1]) { |
255 | delta >>= 10; |
256 | unit++; |
257 | } |
258 | |
259 | pt_dump_seq_printf(st->seq, "%9lu%c %s" , delta, *unit, |
260 | pg_level[st->level].name); |
261 | } |
262 | |
263 | static void note_prot_wx(struct pg_state *st, unsigned long addr) |
264 | { |
265 | if (!st->check_wx) |
266 | return; |
267 | |
268 | if ((st->current_prot & (_PAGE_WRITE | _PAGE_EXEC)) != |
269 | (_PAGE_WRITE | _PAGE_EXEC)) |
270 | return; |
271 | |
272 | WARN_ONCE(1, "riscv/mm: Found insecure W+X mapping at address %p/%pS\n" , |
273 | (void *)st->start_address, (void *)st->start_address); |
274 | |
275 | st->wx_pages += (addr - st->start_address) / PAGE_SIZE; |
276 | } |
277 | |
278 | static void note_page(struct ptdump_state *pt_st, unsigned long addr, |
279 | int level, u64 val) |
280 | { |
281 | struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); |
282 | u64 pa = PFN_PHYS(pte_pfn(__pte(val))); |
283 | u64 prot = 0; |
284 | |
285 | if (level >= 0) |
286 | prot = val & pg_level[level].mask; |
287 | |
288 | if (st->level == -1) { |
289 | st->level = level; |
290 | st->current_prot = prot; |
291 | st->start_address = addr; |
292 | st->start_pa = pa; |
293 | st->last_pa = pa; |
294 | pt_dump_seq_printf(st->seq, "---[ %s ]---\n" , st->marker->name); |
295 | } else if (prot != st->current_prot || |
296 | level != st->level || addr >= st->marker[1].start_address) { |
297 | if (st->current_prot) { |
298 | note_prot_wx(st, addr); |
299 | dump_addr(st, addr); |
300 | dump_prot(st); |
301 | pt_dump_seq_puts(st->seq, "\n" ); |
302 | } |
303 | |
304 | while (addr >= st->marker[1].start_address) { |
305 | st->marker++; |
306 | pt_dump_seq_printf(st->seq, "---[ %s ]---\n" , |
307 | st->marker->name); |
308 | } |
309 | |
310 | st->start_address = addr; |
311 | st->start_pa = pa; |
312 | st->last_pa = pa; |
313 | st->current_prot = prot; |
314 | st->level = level; |
315 | } else { |
316 | st->last_pa = pa; |
317 | } |
318 | } |
319 | |
320 | static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo) |
321 | { |
322 | struct pg_state st = { |
323 | .seq = s, |
324 | .marker = pinfo->markers, |
325 | .level = -1, |
326 | .ptdump = { |
327 | .note_page = note_page, |
328 | .range = (struct ptdump_range[]) { |
329 | {pinfo->base_addr, pinfo->end}, |
330 | {0, 0} |
331 | } |
332 | } |
333 | }; |
334 | |
335 | ptdump_walk_pgd(st: &st.ptdump, mm: pinfo->mm, NULL); |
336 | } |
337 | |
338 | bool ptdump_check_wx(void) |
339 | { |
340 | struct pg_state st = { |
341 | .seq = NULL, |
342 | .marker = (struct addr_marker[]) { |
343 | {0, NULL}, |
344 | {-1, NULL}, |
345 | }, |
346 | .level = -1, |
347 | .check_wx = true, |
348 | .ptdump = { |
349 | .note_page = note_page, |
350 | .range = (struct ptdump_range[]) { |
351 | {KERN_VIRT_START, ULONG_MAX}, |
352 | {0, 0} |
353 | } |
354 | } |
355 | }; |
356 | |
357 | ptdump_walk_pgd(st: &st.ptdump, mm: &init_mm, NULL); |
358 | |
359 | if (st.wx_pages) { |
360 | pr_warn("Checked W+X mappings: failed, %lu W+X pages found\n" , |
361 | st.wx_pages); |
362 | |
363 | return false; |
364 | } else { |
365 | pr_info("Checked W+X mappings: passed, no W+X pages found\n" ); |
366 | |
367 | return true; |
368 | } |
369 | } |
370 | |
371 | static int ptdump_show(struct seq_file *m, void *v) |
372 | { |
373 | ptdump_walk(s: m, pinfo: m->private); |
374 | |
375 | return 0; |
376 | } |
377 | |
378 | DEFINE_SHOW_ATTRIBUTE(ptdump); |
379 | |
380 | static int __init ptdump_init(void) |
381 | { |
382 | unsigned int i, j; |
383 | |
384 | address_markers[FIXMAP_START_NR].start_address = FIXADDR_START; |
385 | address_markers[FIXMAP_END_NR].start_address = FIXADDR_TOP; |
386 | address_markers[PCI_IO_START_NR].start_address = PCI_IO_START; |
387 | address_markers[PCI_IO_END_NR].start_address = PCI_IO_END; |
388 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
389 | address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; |
390 | address_markers[VMEMMAP_END_NR].start_address = VMEMMAP_END; |
391 | #endif |
392 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
393 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; |
394 | address_markers[PAGE_OFFSET_NR].start_address = PAGE_OFFSET; |
395 | #ifdef CONFIG_KASAN |
396 | address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; |
397 | address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; |
398 | #endif |
399 | #ifdef CONFIG_64BIT |
400 | address_markers[MODULES_MAPPING_NR].start_address = MODULES_VADDR; |
401 | address_markers[KERNEL_MAPPING_NR].start_address = kernel_map.virt_addr; |
402 | #endif |
403 | |
404 | kernel_ptd_info.base_addr = KERN_VIRT_START; |
405 | |
406 | pg_level[1].name = pgtable_l5_enabled ? "P4D" : "PGD" ; |
407 | pg_level[2].name = pgtable_l4_enabled ? "PUD" : "PGD" ; |
408 | |
409 | for (i = 0; i < ARRAY_SIZE(pg_level); i++) |
410 | for (j = 0; j < ARRAY_SIZE(pte_bits); j++) |
411 | pg_level[i].mask |= pte_bits[j].mask; |
412 | |
413 | debugfs_create_file(name: "kernel_page_tables" , mode: 0400, NULL, data: &kernel_ptd_info, |
414 | fops: &ptdump_fops); |
415 | #ifdef CONFIG_EFI |
416 | if (efi_enabled(EFI_RUNTIME_SERVICES)) |
417 | debugfs_create_file(name: "efi_page_tables" , mode: 0400, NULL, data: &efi_ptd_info, |
418 | fops: &ptdump_fops); |
419 | #endif |
420 | |
421 | return 0; |
422 | } |
423 | |
424 | device_initcall(ptdump_init); |
425 | |