1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * linux/arch/arm/mm/fault.c |
4 | * |
5 | * Copyright (C) 1995 Linus Torvalds |
6 | * Modifications for ARM processor (c) 1995-2004 Russell King |
7 | */ |
8 | #include <linux/extable.h> |
9 | #include <linux/signal.h> |
10 | #include <linux/mm.h> |
11 | #include <linux/hardirq.h> |
12 | #include <linux/init.h> |
13 | #include <linux/kprobes.h> |
14 | #include <linux/uaccess.h> |
15 | #include <linux/page-flags.h> |
16 | #include <linux/sched/signal.h> |
17 | #include <linux/sched/debug.h> |
18 | #include <linux/highmem.h> |
19 | #include <linux/perf_event.h> |
20 | #include <linux/kfence.h> |
21 | |
22 | #include <asm/system_misc.h> |
23 | #include <asm/system_info.h> |
24 | #include <asm/tlbflush.h> |
25 | |
26 | #include "fault.h" |
27 | |
28 | #ifdef CONFIG_MMU |
29 | |
30 | /* |
31 | * This is useful to dump out the page tables associated with |
32 | * 'addr' in mm 'mm'. |
33 | */ |
34 | void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) |
35 | { |
36 | pgd_t *pgd; |
37 | |
38 | if (!mm) |
39 | mm = &init_mm; |
40 | |
41 | pgd = pgd_offset(mm, addr); |
42 | printk("%s[%08lx] *pgd=%08llx" , lvl, addr, (long long)pgd_val(*pgd)); |
43 | |
44 | do { |
45 | p4d_t *p4d; |
46 | pud_t *pud; |
47 | pmd_t *pmd; |
48 | pte_t *pte; |
49 | |
50 | p4d = p4d_offset(pgd, address: addr); |
51 | if (p4d_none(p4d: *p4d)) |
52 | break; |
53 | |
54 | if (p4d_bad(p4d: *p4d)) { |
55 | pr_cont("(bad)" ); |
56 | break; |
57 | } |
58 | |
59 | pud = pud_offset(p4d, address: addr); |
60 | if (PTRS_PER_PUD != 1) |
61 | pr_cont(", *pud=%08llx" , (long long)pud_val(*pud)); |
62 | |
63 | if (pud_none(pud: *pud)) |
64 | break; |
65 | |
66 | if (pud_bad(pud: *pud)) { |
67 | pr_cont("(bad)" ); |
68 | break; |
69 | } |
70 | |
71 | pmd = pmd_offset(pud, address: addr); |
72 | if (PTRS_PER_PMD != 1) |
73 | pr_cont(", *pmd=%08llx" , (long long)pmd_val(*pmd)); |
74 | |
75 | if (pmd_none(pmd: *pmd)) |
76 | break; |
77 | |
78 | if (pmd_bad(pmd: *pmd)) { |
79 | pr_cont("(bad)" ); |
80 | break; |
81 | } |
82 | |
83 | /* We must not map this if we have highmem enabled */ |
84 | if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT))) |
85 | break; |
86 | |
87 | pte = pte_offset_map(pmd, addr); |
88 | if (!pte) |
89 | break; |
90 | |
91 | pr_cont(", *pte=%08llx" , (long long)pte_val(*pte)); |
92 | #ifndef CONFIG_ARM_LPAE |
93 | pr_cont(", *ppte=%08llx" , |
94 | (long long)pte_val(pte[PTE_HWTABLE_PTRS])); |
95 | #endif |
96 | pte_unmap(pte); |
97 | } while(0); |
98 | |
99 | pr_cont("\n" ); |
100 | } |
101 | #else /* CONFIG_MMU */ |
102 | void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) |
103 | { } |
104 | #endif /* CONFIG_MMU */ |
105 | |
106 | static inline bool is_write_fault(unsigned int fsr) |
107 | { |
108 | return (fsr & FSR_WRITE) && !(fsr & FSR_CM); |
109 | } |
110 | |
111 | static inline bool is_translation_fault(unsigned int fsr) |
112 | { |
113 | int fs = fsr_fs(fsr); |
114 | #ifdef CONFIG_ARM_LPAE |
115 | if ((fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL) |
116 | return true; |
117 | #else |
118 | if (fs == FS_L1_TRANS || fs == FS_L2_TRANS) |
119 | return true; |
120 | #endif |
121 | return false; |
122 | } |
123 | |
124 | static void die_kernel_fault(const char *msg, struct mm_struct *mm, |
125 | unsigned long addr, unsigned int fsr, |
126 | struct pt_regs *regs) |
127 | { |
128 | bust_spinlocks(yes: 1); |
129 | pr_alert("8<--- cut here ---\n" ); |
130 | pr_alert("Unable to handle kernel %s at virtual address %08lx when %s\n" , |
131 | msg, addr, fsr & FSR_LNX_PF ? "execute" : |
132 | fsr & FSR_WRITE ? "write" : "read" ); |
133 | |
134 | show_pte(KERN_ALERT, mm, addr); |
135 | die("Oops" , regs, fsr); |
136 | bust_spinlocks(yes: 0); |
137 | make_task_dead(SIGKILL); |
138 | } |
139 | |
140 | /* |
141 | * Oops. The kernel tried to access some page that wasn't present. |
142 | */ |
143 | static void |
144 | __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, |
145 | struct pt_regs *regs) |
146 | { |
147 | const char *msg; |
148 | /* |
149 | * Are we prepared to handle this kernel fault? |
150 | */ |
151 | if (fixup_exception(regs)) |
152 | return; |
153 | |
154 | /* |
155 | * No handler, we'll have to terminate things with extreme prejudice. |
156 | */ |
157 | if (addr < PAGE_SIZE) { |
158 | msg = "NULL pointer dereference" ; |
159 | } else { |
160 | if (is_translation_fault(fsr) && |
161 | kfence_handle_page_fault(addr, is_write: is_write_fault(fsr), regs)) |
162 | return; |
163 | |
164 | msg = "paging request" ; |
165 | } |
166 | |
167 | die_kernel_fault(msg, mm, addr, fsr, regs); |
168 | } |
169 | |
170 | /* |
171 | * Something tried to access memory that isn't in our memory map.. |
172 | * User mode accesses just cause a SIGSEGV |
173 | */ |
174 | static void |
175 | __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig, |
176 | int code, struct pt_regs *regs) |
177 | { |
178 | struct task_struct *tsk = current; |
179 | |
180 | if (addr > TASK_SIZE) |
181 | harden_branch_predictor(); |
182 | |
183 | #ifdef CONFIG_DEBUG_USER |
184 | if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || |
185 | ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { |
186 | pr_err("8<--- cut here ---\n" ); |
187 | pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n" , |
188 | tsk->comm, sig, addr, fsr); |
189 | show_pte(KERN_ERR, tsk->mm, addr); |
190 | show_regs(regs); |
191 | } |
192 | #endif |
193 | #ifndef CONFIG_KUSER_HELPERS |
194 | if ((sig == SIGSEGV) && ((addr & PAGE_MASK) == 0xffff0000)) |
195 | printk_ratelimited(KERN_DEBUG |
196 | "%s: CONFIG_KUSER_HELPERS disabled at 0x%08lx\n" , |
197 | tsk->comm, addr); |
198 | #endif |
199 | |
200 | tsk->thread.address = addr; |
201 | tsk->thread.error_code = fsr; |
202 | tsk->thread.trap_no = 14; |
203 | force_sig_fault(sig, code, addr: (void __user *)addr); |
204 | } |
205 | |
206 | void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) |
207 | { |
208 | struct task_struct *tsk = current; |
209 | struct mm_struct *mm = tsk->active_mm; |
210 | |
211 | /* |
212 | * If we are in kernel mode at this point, we |
213 | * have no context to handle this fault with. |
214 | */ |
215 | if (user_mode(regs)) |
216 | __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs); |
217 | else |
218 | __do_kernel_fault(mm, addr, fsr, regs); |
219 | } |
220 | |
221 | #ifdef CONFIG_MMU |
222 | #define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) |
223 | #define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) |
224 | |
225 | static inline bool is_permission_fault(unsigned int fsr) |
226 | { |
227 | int fs = fsr_fs(fsr); |
228 | #ifdef CONFIG_ARM_LPAE |
229 | if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL) |
230 | return true; |
231 | #else |
232 | if (fs == FS_L1_PERM || fs == FS_L2_PERM) |
233 | return true; |
234 | #endif |
235 | return false; |
236 | } |
237 | |
238 | static int __kprobes |
239 | do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) |
240 | { |
241 | struct mm_struct *mm = current->mm; |
242 | struct vm_area_struct *vma; |
243 | int sig, code; |
244 | vm_fault_t fault; |
245 | unsigned int flags = FAULT_FLAG_DEFAULT; |
246 | unsigned long vm_flags = VM_ACCESS_FLAGS; |
247 | |
248 | if (kprobe_page_fault(regs, trap: fsr)) |
249 | return 0; |
250 | |
251 | |
252 | /* Enable interrupts if they were enabled in the parent context. */ |
253 | if (interrupts_enabled(regs)) |
254 | local_irq_enable(); |
255 | |
256 | /* |
257 | * If we're in an interrupt or have no user |
258 | * context, we must not take the fault.. |
259 | */ |
260 | if (faulthandler_disabled() || !mm) |
261 | goto no_context; |
262 | |
263 | if (user_mode(regs)) |
264 | flags |= FAULT_FLAG_USER; |
265 | |
266 | if (is_write_fault(fsr)) { |
267 | flags |= FAULT_FLAG_WRITE; |
268 | vm_flags = VM_WRITE; |
269 | } |
270 | |
271 | if (fsr & FSR_LNX_PF) { |
272 | vm_flags = VM_EXEC; |
273 | |
274 | if (is_permission_fault(fsr) && !user_mode(regs)) |
275 | die_kernel_fault(msg: "execution of memory" , |
276 | mm, addr, fsr, regs); |
277 | } |
278 | |
279 | perf_sw_event(event_id: PERF_COUNT_SW_PAGE_FAULTS, nr: 1, regs, addr); |
280 | |
281 | retry: |
282 | vma = lock_mm_and_find_vma(mm, address: addr, regs); |
283 | if (unlikely(!vma)) { |
284 | fault = VM_FAULT_BADMAP; |
285 | goto bad_area; |
286 | } |
287 | |
288 | /* |
289 | * ok, we have a good vm_area for this memory access, check the |
290 | * permissions on the VMA allow for the fault which occurred. |
291 | */ |
292 | if (!(vma->vm_flags & vm_flags)) |
293 | fault = VM_FAULT_BADACCESS; |
294 | else |
295 | fault = handle_mm_fault(vma, address: addr & PAGE_MASK, flags, regs); |
296 | |
297 | /* If we need to retry but a fatal signal is pending, handle the |
298 | * signal first. We do not need to release the mmap_lock because |
299 | * it would already be released in __lock_page_or_retry in |
300 | * mm/filemap.c. */ |
301 | if (fault_signal_pending(fault_flags: fault, regs)) { |
302 | if (!user_mode(regs)) |
303 | goto no_context; |
304 | return 0; |
305 | } |
306 | |
307 | /* The fault is fully completed (including releasing mmap lock) */ |
308 | if (fault & VM_FAULT_COMPLETED) |
309 | return 0; |
310 | |
311 | if (!(fault & VM_FAULT_ERROR)) { |
312 | if (fault & VM_FAULT_RETRY) { |
313 | flags |= FAULT_FLAG_TRIED; |
314 | goto retry; |
315 | } |
316 | } |
317 | |
318 | mmap_read_unlock(mm); |
319 | |
320 | /* |
321 | * Handle the "normal" case first - VM_FAULT_MAJOR |
322 | */ |
323 | if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) |
324 | return 0; |
325 | |
326 | bad_area: |
327 | /* |
328 | * If we are in kernel mode at this point, we |
329 | * have no context to handle this fault with. |
330 | */ |
331 | if (!user_mode(regs)) |
332 | goto no_context; |
333 | |
334 | if (fault & VM_FAULT_OOM) { |
335 | /* |
336 | * We ran out of memory, call the OOM killer, and return to |
337 | * userspace (which will retry the fault, or kill us if we |
338 | * got oom-killed) |
339 | */ |
340 | pagefault_out_of_memory(); |
341 | return 0; |
342 | } |
343 | |
344 | if (fault & VM_FAULT_SIGBUS) { |
345 | /* |
346 | * We had some memory, but were unable to |
347 | * successfully fix up this page fault. |
348 | */ |
349 | sig = SIGBUS; |
350 | code = BUS_ADRERR; |
351 | } else { |
352 | /* |
353 | * Something tried to access memory that |
354 | * isn't in our memory map.. |
355 | */ |
356 | sig = SIGSEGV; |
357 | code = fault == VM_FAULT_BADACCESS ? |
358 | SEGV_ACCERR : SEGV_MAPERR; |
359 | } |
360 | |
361 | __do_user_fault(addr, fsr, sig, code, regs); |
362 | return 0; |
363 | |
364 | no_context: |
365 | __do_kernel_fault(mm, addr, fsr, regs); |
366 | return 0; |
367 | } |
368 | #else /* CONFIG_MMU */ |
369 | static int |
370 | do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) |
371 | { |
372 | return 0; |
373 | } |
374 | #endif /* CONFIG_MMU */ |
375 | |
376 | /* |
377 | * First Level Translation Fault Handler |
378 | * |
379 | * We enter here because the first level page table doesn't contain |
380 | * a valid entry for the address. |
381 | * |
382 | * If the address is in kernel space (>= TASK_SIZE), then we are |
383 | * probably faulting in the vmalloc() area. |
384 | * |
385 | * If the init_task's first level page tables contains the relevant |
386 | * entry, we copy the it to this task. If not, we send the process |
387 | * a signal, fixup the exception, or oops the kernel. |
388 | * |
389 | * NOTE! We MUST NOT take any locks for this case. We may be in an |
390 | * interrupt or a critical region, and should only copy the information |
391 | * from the master page table, nothing more. |
392 | */ |
393 | #ifdef CONFIG_MMU |
394 | static int __kprobes |
395 | do_translation_fault(unsigned long addr, unsigned int fsr, |
396 | struct pt_regs *regs) |
397 | { |
398 | unsigned int index; |
399 | pgd_t *pgd, *pgd_k; |
400 | p4d_t *p4d, *p4d_k; |
401 | pud_t *pud, *pud_k; |
402 | pmd_t *pmd, *pmd_k; |
403 | |
404 | if (addr < TASK_SIZE) |
405 | return do_page_fault(addr, fsr, regs); |
406 | |
407 | if (user_mode(regs)) |
408 | goto bad_area; |
409 | |
410 | index = pgd_index(addr); |
411 | |
412 | pgd = cpu_get_pgd() + index; |
413 | pgd_k = init_mm.pgd + index; |
414 | |
415 | p4d = p4d_offset(pgd, address: addr); |
416 | p4d_k = p4d_offset(pgd: pgd_k, address: addr); |
417 | |
418 | if (p4d_none(p4d: *p4d_k)) |
419 | goto bad_area; |
420 | if (!p4d_present(p4d: *p4d)) |
421 | set_p4d(p4dp: p4d, p4d: *p4d_k); |
422 | |
423 | pud = pud_offset(p4d, address: addr); |
424 | pud_k = pud_offset(p4d: p4d_k, address: addr); |
425 | |
426 | if (pud_none(pud: *pud_k)) |
427 | goto bad_area; |
428 | if (!pud_present(pud: *pud)) |
429 | set_pud(pudp: pud, pud: *pud_k); |
430 | |
431 | pmd = pmd_offset(pud, address: addr); |
432 | pmd_k = pmd_offset(pud: pud_k, address: addr); |
433 | |
434 | #ifdef CONFIG_ARM_LPAE |
435 | /* |
436 | * Only one hardware entry per PMD with LPAE. |
437 | */ |
438 | index = 0; |
439 | #else |
440 | /* |
441 | * On ARM one Linux PGD entry contains two hardware entries (see page |
442 | * tables layout in pgtable.h). We normally guarantee that we always |
443 | * fill both L1 entries. But create_mapping() doesn't follow the rule. |
444 | * It can create inidividual L1 entries, so here we have to call |
445 | * pmd_none() check for the entry really corresponded to address, not |
446 | * for the first of pair. |
447 | */ |
448 | index = (addr >> SECTION_SHIFT) & 1; |
449 | #endif |
450 | if (pmd_none(pmd: pmd_k[index])) |
451 | goto bad_area; |
452 | |
453 | copy_pmd(pmd, pmd_k); |
454 | return 0; |
455 | |
456 | bad_area: |
457 | do_bad_area(addr, fsr, regs); |
458 | return 0; |
459 | } |
460 | #else /* CONFIG_MMU */ |
461 | static int |
462 | do_translation_fault(unsigned long addr, unsigned int fsr, |
463 | struct pt_regs *regs) |
464 | { |
465 | return 0; |
466 | } |
467 | #endif /* CONFIG_MMU */ |
468 | |
469 | /* |
470 | * Some section permission faults need to be handled gracefully. |
471 | * They can happen due to a __{get,put}_user during an oops. |
472 | */ |
473 | #ifndef CONFIG_ARM_LPAE |
474 | static int |
475 | do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) |
476 | { |
477 | do_bad_area(addr, fsr, regs); |
478 | return 0; |
479 | } |
480 | #endif /* CONFIG_ARM_LPAE */ |
481 | |
482 | /* |
483 | * This abort handler always returns "fault". |
484 | */ |
485 | static int |
486 | do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) |
487 | { |
488 | return 1; |
489 | } |
490 | |
491 | struct fsr_info { |
492 | int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs); |
493 | int sig; |
494 | int code; |
495 | const char *name; |
496 | }; |
497 | |
498 | /* FSR definition */ |
499 | #ifdef CONFIG_ARM_LPAE |
500 | #include "fsr-3level.c" |
501 | #else |
502 | #include "fsr-2level.c" |
503 | #endif |
504 | |
505 | void __init |
506 | hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), |
507 | int sig, int code, const char *name) |
508 | { |
509 | if (nr < 0 || nr >= ARRAY_SIZE(fsr_info)) |
510 | BUG(); |
511 | |
512 | fsr_info[nr].fn = fn; |
513 | fsr_info[nr].sig = sig; |
514 | fsr_info[nr].code = code; |
515 | fsr_info[nr].name = name; |
516 | } |
517 | |
518 | /* |
519 | * Dispatch a data abort to the relevant handler. |
520 | */ |
521 | asmlinkage void |
522 | do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) |
523 | { |
524 | const struct fsr_info *inf = fsr_info + fsr_fs(fsr); |
525 | |
526 | if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) |
527 | return; |
528 | |
529 | pr_alert("8<--- cut here ---\n" ); |
530 | pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n" , |
531 | inf->name, fsr, addr); |
532 | show_pte(KERN_ALERT, current->mm, addr); |
533 | |
534 | arm_notify_die("" , regs, inf->sig, inf->code, (void __user *)addr, |
535 | fsr, 0); |
536 | } |
537 | |
538 | void __init |
539 | hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), |
540 | int sig, int code, const char *name) |
541 | { |
542 | if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info)) |
543 | BUG(); |
544 | |
545 | ifsr_info[nr].fn = fn; |
546 | ifsr_info[nr].sig = sig; |
547 | ifsr_info[nr].code = code; |
548 | ifsr_info[nr].name = name; |
549 | } |
550 | |
551 | asmlinkage void |
552 | do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) |
553 | { |
554 | const struct fsr_info *inf = ifsr_info + fsr_fs(fsr: ifsr); |
555 | |
556 | if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) |
557 | return; |
558 | |
559 | pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n" , |
560 | inf->name, ifsr, addr); |
561 | |
562 | arm_notify_die("" , regs, inf->sig, inf->code, (void __user *)addr, |
563 | ifsr, 0); |
564 | } |
565 | |
566 | /* |
567 | * Abort handler to be used only during first unmasking of asynchronous aborts |
568 | * on the boot CPU. This makes sure that the machine will not die if the |
569 | * firmware/bootloader left an imprecise abort pending for us to trip over. |
570 | */ |
571 | static int __init early_abort_handler(unsigned long addr, unsigned int fsr, |
572 | struct pt_regs *regs) |
573 | { |
574 | pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during " |
575 | "first unmask, this is most likely caused by a " |
576 | "firmware/bootloader bug.\n" , fsr); |
577 | |
578 | return 0; |
579 | } |
580 | |
581 | void __init early_abt_enable(void) |
582 | { |
583 | fsr_info[FSR_FS_AEA].fn = early_abort_handler; |
584 | local_abt_enable(); |
585 | fsr_info[FSR_FS_AEA].fn = do_bad; |
586 | } |
587 | |
588 | #ifndef CONFIG_ARM_LPAE |
589 | static int __init exceptions_init(void) |
590 | { |
591 | if (cpu_architecture() >= CPU_ARCH_ARMv6) { |
592 | hook_fault_code(nr: 4, fn: do_translation_fault, SIGSEGV, SEGV_MAPERR, |
593 | name: "I-cache maintenance fault" ); |
594 | } |
595 | |
596 | if (cpu_architecture() >= CPU_ARCH_ARMv7) { |
597 | /* |
598 | * TODO: Access flag faults introduced in ARMv6K. |
599 | * Runtime check for 'K' extension is needed |
600 | */ |
601 | hook_fault_code(nr: 3, fn: do_bad, SIGSEGV, SEGV_MAPERR, |
602 | name: "section access flag fault" ); |
603 | hook_fault_code(nr: 6, fn: do_bad, SIGSEGV, SEGV_MAPERR, |
604 | name: "section access flag fault" ); |
605 | } |
606 | |
607 | return 0; |
608 | } |
609 | |
610 | arch_initcall(exceptions_init); |
611 | #endif |
612 | |