1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/set_memory.h> |
3 | #include <linux/ptdump.h> |
4 | #include <linux/seq_file.h> |
5 | #include <linux/debugfs.h> |
6 | #include <linux/mm.h> |
7 | #include <linux/kfence.h> |
8 | #include <linux/kasan.h> |
9 | #include <asm/kasan.h> |
10 | #include <asm/abs_lowcore.h> |
11 | #include <asm/nospec-branch.h> |
12 | #include <asm/sections.h> |
13 | #include <asm/maccess.h> |
14 | |
15 | static unsigned long max_addr; |
16 | |
17 | struct addr_marker { |
18 | unsigned long start_address; |
19 | const char *name; |
20 | }; |
21 | |
22 | enum address_markers_idx { |
23 | IDENTITY_BEFORE_NR = 0, |
24 | IDENTITY_BEFORE_END_NR, |
25 | AMODE31_START_NR, |
26 | AMODE31_END_NR, |
27 | KERNEL_START_NR, |
28 | KERNEL_END_NR, |
29 | #ifdef CONFIG_KFENCE |
30 | KFENCE_START_NR, |
31 | KFENCE_END_NR, |
32 | #endif |
33 | IDENTITY_AFTER_NR, |
34 | IDENTITY_AFTER_END_NR, |
35 | VMEMMAP_NR, |
36 | VMEMMAP_END_NR, |
37 | VMALLOC_NR, |
38 | VMALLOC_END_NR, |
39 | MODULES_NR, |
40 | MODULES_END_NR, |
41 | ABS_LOWCORE_NR, |
42 | ABS_LOWCORE_END_NR, |
43 | MEMCPY_REAL_NR, |
44 | MEMCPY_REAL_END_NR, |
45 | #ifdef CONFIG_KASAN |
46 | KASAN_SHADOW_START_NR, |
47 | KASAN_SHADOW_END_NR, |
48 | #endif |
49 | }; |
50 | |
51 | static struct addr_marker address_markers[] = { |
52 | [IDENTITY_BEFORE_NR] = {.start_address: 0, .name: "Identity Mapping Start" }, |
53 | [IDENTITY_BEFORE_END_NR] = {.start_address: (unsigned long)_stext, .name: "Identity Mapping End" }, |
54 | [AMODE31_START_NR] = {.start_address: 0, .name: "Amode31 Area Start" }, |
55 | [AMODE31_END_NR] = {.start_address: 0, .name: "Amode31 Area End" }, |
56 | [KERNEL_START_NR] = {.start_address: (unsigned long)_stext, .name: "Kernel Image Start" }, |
57 | [KERNEL_END_NR] = {.start_address: (unsigned long)_end, .name: "Kernel Image End" }, |
58 | #ifdef CONFIG_KFENCE |
59 | [KFENCE_START_NR] = {.start_address: 0, .name: "KFence Pool Start" }, |
60 | [KFENCE_END_NR] = {.start_address: 0, .name: "KFence Pool End" }, |
61 | #endif |
62 | [IDENTITY_AFTER_NR] = {.start_address: (unsigned long)_end, .name: "Identity Mapping Start" }, |
63 | [IDENTITY_AFTER_END_NR] = {.start_address: 0, .name: "Identity Mapping End" }, |
64 | [VMEMMAP_NR] = {.start_address: 0, .name: "vmemmap Area Start" }, |
65 | [VMEMMAP_END_NR] = {.start_address: 0, .name: "vmemmap Area End" }, |
66 | [VMALLOC_NR] = {.start_address: 0, .name: "vmalloc Area Start" }, |
67 | [VMALLOC_END_NR] = {.start_address: 0, .name: "vmalloc Area End" }, |
68 | [MODULES_NR] = {.start_address: 0, .name: "Modules Area Start" }, |
69 | [MODULES_END_NR] = {.start_address: 0, .name: "Modules Area End" }, |
70 | [ABS_LOWCORE_NR] = {.start_address: 0, .name: "Lowcore Area Start" }, |
71 | [ABS_LOWCORE_END_NR] = {.start_address: 0, .name: "Lowcore Area End" }, |
72 | [MEMCPY_REAL_NR] = {.start_address: 0, .name: "Real Memory Copy Area Start" }, |
73 | [MEMCPY_REAL_END_NR] = {.start_address: 0, .name: "Real Memory Copy Area End" }, |
74 | #ifdef CONFIG_KASAN |
75 | [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start" }, |
76 | [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End" }, |
77 | #endif |
78 | { -1, NULL } |
79 | }; |
80 | |
81 | struct pg_state { |
82 | struct ptdump_state ptdump; |
83 | struct seq_file *seq; |
84 | int level; |
85 | unsigned int current_prot; |
86 | bool check_wx; |
87 | unsigned long wx_pages; |
88 | unsigned long start_address; |
89 | const struct addr_marker *marker; |
90 | }; |
91 | |
92 | #define pt_dump_seq_printf(m, fmt, args...) \ |
93 | ({ \ |
94 | struct seq_file *__m = (m); \ |
95 | \ |
96 | if (__m) \ |
97 | seq_printf(__m, fmt, ##args); \ |
98 | }) |
99 | |
100 | #define pt_dump_seq_puts(m, fmt) \ |
101 | ({ \ |
102 | struct seq_file *__m = (m); \ |
103 | \ |
104 | if (__m) \ |
105 | seq_printf(__m, fmt); \ |
106 | }) |
107 | |
108 | static void print_prot(struct seq_file *m, unsigned int pr, int level) |
109 | { |
110 | static const char * const level_name[] = |
111 | { "ASCE" , "PGD" , "PUD" , "PMD" , "PTE" }; |
112 | |
113 | pt_dump_seq_printf(m, "%s " , level_name[level]); |
114 | if (pr & _PAGE_INVALID) { |
115 | pt_dump_seq_printf(m, "I\n" ); |
116 | return; |
117 | } |
118 | pt_dump_seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW " ); |
119 | pt_dump_seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n" ); |
120 | } |
121 | |
122 | static void note_prot_wx(struct pg_state *st, unsigned long addr) |
123 | { |
124 | if (!st->check_wx) |
125 | return; |
126 | if (st->current_prot & _PAGE_INVALID) |
127 | return; |
128 | if (st->current_prot & _PAGE_PROTECT) |
129 | return; |
130 | if (st->current_prot & _PAGE_NOEXEC) |
131 | return; |
132 | /* |
133 | * The first lowcore page is W+X if spectre mitigations are using |
134 | * trampolines or the BEAR enhancements facility is not installed, |
135 | * in which case we have two lpswe instructions in lowcore that need |
136 | * to be executable. |
137 | */ |
138 | if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear))) |
139 | return; |
140 | WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX), |
141 | "s390/mm: Found insecure W+X mapping at address %pS\n" , |
142 | (void *)st->start_address); |
143 | st->wx_pages += (addr - st->start_address) / PAGE_SIZE; |
144 | } |
145 | |
146 | static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) |
147 | { |
148 | int width = sizeof(unsigned long) * 2; |
149 | static const char units[] = "KMGTPE" ; |
150 | const char *unit = units; |
151 | unsigned long delta; |
152 | struct pg_state *st; |
153 | struct seq_file *m; |
154 | unsigned int prot; |
155 | |
156 | st = container_of(pt_st, struct pg_state, ptdump); |
157 | m = st->seq; |
158 | prot = val & (_PAGE_PROTECT | _PAGE_NOEXEC); |
159 | if (level == 4 && (val & _PAGE_INVALID)) |
160 | prot = _PAGE_INVALID; |
161 | /* For pmd_none() & friends val gets passed as zero. */ |
162 | if (level != 4 && !val) |
163 | prot = _PAGE_INVALID; |
164 | /* Final flush from generic code. */ |
165 | if (level == -1) |
166 | addr = max_addr; |
167 | if (st->level == -1) { |
168 | pt_dump_seq_printf(m, "---[ %s ]---\n" , st->marker->name); |
169 | st->start_address = addr; |
170 | st->current_prot = prot; |
171 | st->level = level; |
172 | } else if (prot != st->current_prot || level != st->level || |
173 | addr >= st->marker[1].start_address) { |
174 | note_prot_wx(st, addr); |
175 | pt_dump_seq_printf(m, "0x%0*lx-0x%0*lx " , |
176 | width, st->start_address, |
177 | width, addr); |
178 | delta = (addr - st->start_address) >> 10; |
179 | while (!(delta & 0x3ff) && unit[1]) { |
180 | delta >>= 10; |
181 | unit++; |
182 | } |
183 | pt_dump_seq_printf(m, "%9lu%c " , delta, *unit); |
184 | print_prot(m, pr: st->current_prot, level: st->level); |
185 | while (addr >= st->marker[1].start_address) { |
186 | st->marker++; |
187 | pt_dump_seq_printf(m, "---[ %s ]---\n" , st->marker->name); |
188 | } |
189 | st->start_address = addr; |
190 | st->current_prot = prot; |
191 | st->level = level; |
192 | } |
193 | } |
194 | |
195 | bool ptdump_check_wx(void) |
196 | { |
197 | struct pg_state st = { |
198 | .ptdump = { |
199 | .note_page = note_page, |
200 | .range = (struct ptdump_range[]) { |
201 | {.start = 0, .end = max_addr}, |
202 | {.start = 0, .end = 0}, |
203 | } |
204 | }, |
205 | .seq = NULL, |
206 | .level = -1, |
207 | .current_prot = 0, |
208 | .check_wx = true, |
209 | .wx_pages = 0, |
210 | .start_address = 0, |
211 | .marker = (struct addr_marker[]) { |
212 | { .start_address = 0, .name = NULL}, |
213 | { .start_address = -1, .name = NULL}, |
214 | }, |
215 | }; |
216 | |
217 | if (!MACHINE_HAS_NX) |
218 | return true; |
219 | ptdump_walk_pgd(st: &st.ptdump, mm: &init_mm, NULL); |
220 | if (st.wx_pages) { |
221 | pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n" , st.wx_pages); |
222 | |
223 | return false; |
224 | } else { |
225 | pr_info("Checked W+X mappings: passed, no %sW+X pages found\n" , |
226 | (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ? |
227 | "unexpected " : "" ); |
228 | |
229 | return true; |
230 | } |
231 | } |
232 | |
233 | #ifdef CONFIG_PTDUMP_DEBUGFS |
234 | static int ptdump_show(struct seq_file *m, void *v) |
235 | { |
236 | struct pg_state st = { |
237 | .ptdump = { |
238 | .note_page = note_page, |
239 | .range = (struct ptdump_range[]) { |
240 | {.start = 0, .end = max_addr}, |
241 | {.start = 0, .end = 0}, |
242 | } |
243 | }, |
244 | .seq = m, |
245 | .level = -1, |
246 | .current_prot = 0, |
247 | .check_wx = false, |
248 | .wx_pages = 0, |
249 | .start_address = 0, |
250 | .marker = address_markers, |
251 | }; |
252 | |
253 | get_online_mems(); |
254 | mutex_lock(&cpa_mutex); |
255 | ptdump_walk_pgd(st: &st.ptdump, mm: &init_mm, NULL); |
256 | mutex_unlock(lock: &cpa_mutex); |
257 | put_online_mems(); |
258 | return 0; |
259 | } |
260 | DEFINE_SHOW_ATTRIBUTE(ptdump); |
261 | #endif /* CONFIG_PTDUMP_DEBUGFS */ |
262 | |
263 | /* |
264 | * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple |
265 | * insertion sort to preserve the original order of markers with the same |
266 | * start address. |
267 | */ |
268 | static void sort_address_markers(void) |
269 | { |
270 | struct addr_marker tmp; |
271 | int i, j; |
272 | |
273 | for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) { |
274 | tmp = address_markers[i]; |
275 | for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--) |
276 | address_markers[j + 1] = address_markers[j]; |
277 | address_markers[j + 1] = tmp; |
278 | } |
279 | } |
280 | |
281 | static int pt_dump_init(void) |
282 | { |
283 | #ifdef CONFIG_KFENCE |
284 | unsigned long kfence_start = (unsigned long)__kfence_pool; |
285 | #endif |
286 | /* |
287 | * Figure out the maximum virtual address being accessible with the |
288 | * kernel ASCE. We need this to keep the page table walker functions |
289 | * from accessing non-existent entries. |
290 | */ |
291 | max_addr = (S390_lowcore.kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2; |
292 | max_addr = 1UL << (max_addr * 11 + 31); |
293 | address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; |
294 | address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31; |
295 | address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31; |
296 | address_markers[MODULES_NR].start_address = MODULES_VADDR; |
297 | address_markers[MODULES_END_NR].start_address = MODULES_END; |
298 | address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore; |
299 | address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE; |
300 | address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area; |
301 | address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + MEMCPY_REAL_SIZE; |
302 | address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; |
303 | address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; |
304 | address_markers[VMALLOC_NR].start_address = VMALLOC_START; |
305 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; |
306 | #ifdef CONFIG_KFENCE |
307 | address_markers[KFENCE_START_NR].start_address = kfence_start; |
308 | address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE; |
309 | #endif |
310 | sort_address_markers(); |
311 | #ifdef CONFIG_PTDUMP_DEBUGFS |
312 | debugfs_create_file(name: "kernel_page_tables" , mode: 0400, NULL, NULL, fops: &ptdump_fops); |
313 | #endif /* CONFIG_PTDUMP_DEBUGFS */ |
314 | return 0; |
315 | } |
316 | device_initcall(pt_dump_init); |
317 | |