1// SPDX-License-Identifier: GPL-2.0
2#include <linux/slab.h>
3#include <linux/file.h>
4#include <linux/fdtable.h>
5#include <linux/freezer.h>
6#include <linux/mm.h>
7#include <linux/stat.h>
8#include <linux/fcntl.h>
9#include <linux/swap.h>
10#include <linux/ctype.h>
11#include <linux/string.h>
12#include <linux/init.h>
13#include <linux/pagemap.h>
14#include <linux/perf_event.h>
15#include <linux/highmem.h>
16#include <linux/spinlock.h>
17#include <linux/key.h>
18#include <linux/personality.h>
19#include <linux/binfmts.h>
20#include <linux/coredump.h>
21#include <linux/sort.h>
22#include <linux/sched/coredump.h>
23#include <linux/sched/signal.h>
24#include <linux/sched/task_stack.h>
25#include <linux/utsname.h>
26#include <linux/pid_namespace.h>
27#include <linux/module.h>
28#include <linux/namei.h>
29#include <linux/mount.h>
30#include <linux/security.h>
31#include <linux/syscalls.h>
32#include <linux/tsacct_kern.h>
33#include <linux/cn_proc.h>
34#include <linux/audit.h>
35#include <linux/kmod.h>
36#include <linux/fsnotify.h>
37#include <linux/fs_struct.h>
38#include <linux/pipe_fs_i.h>
39#include <linux/oom.h>
40#include <linux/compat.h>
41#include <linux/fs.h>
42#include <linux/path.h>
43#include <linux/timekeeping.h>
44#include <linux/sysctl.h>
45#include <linux/elf.h>
46#include <linux/pidfs.h>
47#include <linux/net.h>
48#include <linux/socket.h>
49#include <net/af_unix.h>
50#include <net/net_namespace.h>
51#include <net/sock.h>
52#include <uapi/linux/pidfd.h>
53#include <uapi/linux/un.h>
54
55#include <linux/uaccess.h>
56#include <asm/mmu_context.h>
57#include <asm/tlb.h>
58#include <asm/exec.h>
59
60#include <trace/events/task.h>
61#include "internal.h"
62
63#include <trace/events/sched.h>
64
65static bool dump_vma_snapshot(struct coredump_params *cprm);
66static void free_vma_snapshot(struct coredump_params *cprm);
67
68#define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024)
69/* Define a reasonable max cap */
70#define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024)
71/*
72 * File descriptor number for the pidfd for the thread-group leader of
73 * the coredumping task installed into the usermode helper's file
74 * descriptor table.
75 */
76#define COREDUMP_PIDFD_NUMBER 3
77
78static int core_uses_pid;
79static unsigned int core_pipe_limit;
80static unsigned int core_sort_vma;
81static char core_pattern[CORENAME_MAX_SIZE] = "core";
82static int core_name_size = CORENAME_MAX_SIZE;
83unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
84
85enum coredump_type_t {
86 COREDUMP_FILE = 1,
87 COREDUMP_PIPE = 2,
88 COREDUMP_SOCK = 3,
89};
90
91struct core_name {
92 char *corename;
93 int used, size;
94 enum coredump_type_t core_type;
95};
96
97static int expand_corename(struct core_name *cn, int size)
98{
99 char *corename;
100
101 size = kmalloc_size_roundup(size);
102 corename = krealloc(cn->corename, size, GFP_KERNEL);
103
104 if (!corename)
105 return -ENOMEM;
106
107 if (size > core_name_size) /* racy but harmless */
108 core_name_size = size;
109
110 cn->size = size;
111 cn->corename = corename;
112 return 0;
113}
114
115static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt,
116 va_list arg)
117{
118 int free, need;
119 va_list arg_copy;
120
121again:
122 free = cn->size - cn->used;
123
124 va_copy(arg_copy, arg);
125 need = vsnprintf(buf: cn->corename + cn->used, size: free, fmt, args: arg_copy);
126 va_end(arg_copy);
127
128 if (need < free) {
129 cn->used += need;
130 return 0;
131 }
132
133 if (!expand_corename(cn, size: cn->size + need - free + 1))
134 goto again;
135
136 return -ENOMEM;
137}
138
139static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...)
140{
141 va_list arg;
142 int ret;
143
144 va_start(arg, fmt);
145 ret = cn_vprintf(cn, fmt, arg);
146 va_end(arg);
147
148 return ret;
149}
150
151static __printf(2, 3)
152int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
153{
154 int cur = cn->used;
155 va_list arg;
156 int ret;
157
158 va_start(arg, fmt);
159 ret = cn_vprintf(cn, fmt, arg);
160 va_end(arg);
161
162 if (ret == 0) {
163 /*
164 * Ensure that this coredump name component can't cause the
165 * resulting corefile path to consist of a ".." or ".".
166 */
167 if ((cn->used - cur == 1 && cn->corename[cur] == '.') ||
168 (cn->used - cur == 2 && cn->corename[cur] == '.'
169 && cn->corename[cur+1] == '.'))
170 cn->corename[cur] = '!';
171
172 /*
173 * Empty names are fishy and could be used to create a "//" in a
174 * corefile name, causing the coredump to happen one directory
175 * level too high. Enforce that all components of the core
176 * pattern are at least one character long.
177 */
178 if (cn->used == cur)
179 ret = cn_printf(cn, fmt: "!");
180 }
181
182 for (; cur < cn->used; ++cur) {
183 if (cn->corename[cur] == '/')
184 cn->corename[cur] = '!';
185 }
186 return ret;
187}
188
189static int cn_print_exe_file(struct core_name *cn, bool name_only)
190{
191 struct file *exe_file;
192 char *pathbuf, *path, *ptr;
193 int ret;
194
195 exe_file = get_mm_exe_file(current->mm);
196 if (!exe_file)
197 return cn_esc_printf(cn, fmt: "%s (path unknown)", current->comm);
198
199 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
200 if (!pathbuf) {
201 ret = -ENOMEM;
202 goto put_exe_file;
203 }
204
205 path = file_path(exe_file, pathbuf, PATH_MAX);
206 if (IS_ERR(ptr: path)) {
207 ret = PTR_ERR(ptr: path);
208 goto free_buf;
209 }
210
211 if (name_only) {
212 ptr = strrchr(path, '/');
213 if (ptr)
214 path = ptr + 1;
215 }
216 ret = cn_esc_printf(cn, fmt: "%s", path);
217
218free_buf:
219 kfree(objp: pathbuf);
220put_exe_file:
221 fput(exe_file);
222 return ret;
223}
224
225/* format_corename will inspect the pattern parameter, and output a
226 * name into corename, which must have space for at least
227 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
228 */
229static int format_corename(struct core_name *cn, struct coredump_params *cprm,
230 size_t **argv, int *argc)
231{
232 const struct cred *cred = current_cred();
233 const char *pat_ptr = core_pattern;
234 bool was_space = false;
235 int pid_in_pattern = 0;
236 int err = 0;
237
238 cn->used = 0;
239 cn->corename = NULL;
240 if (*pat_ptr == '|')
241 cn->core_type = COREDUMP_PIPE;
242 else if (*pat_ptr == '@')
243 cn->core_type = COREDUMP_SOCK;
244 else
245 cn->core_type = COREDUMP_FILE;
246 if (expand_corename(cn, size: core_name_size))
247 return -ENOMEM;
248 cn->corename[0] = '\0';
249
250 switch (cn->core_type) {
251 case COREDUMP_PIPE: {
252 int argvs = sizeof(core_pattern) / 2;
253 (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
254 if (!(*argv))
255 return -ENOMEM;
256 (*argv)[(*argc)++] = 0;
257 ++pat_ptr;
258 if (!(*pat_ptr))
259 return -ENOMEM;
260 break;
261 }
262 case COREDUMP_SOCK: {
263 /* skip the @ */
264 pat_ptr++;
265 if (!(*pat_ptr))
266 return -ENOMEM;
267
268 err = cn_printf(cn, fmt: "%s", pat_ptr);
269 if (err)
270 return err;
271
272 /* Require absolute paths. */
273 if (cn->corename[0] != '/')
274 return -EINVAL;
275
276 /*
277 * Ensure we can uses spaces to indicate additional
278 * parameters in the future.
279 */
280 if (strchr(cn->corename, ' ')) {
281 coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename);
282 return -EINVAL;
283 }
284
285 /*
286 * Currently no need to parse any other options.
287 * Relevant information can be retrieved from the peer
288 * pidfd retrievable via SO_PEERPIDFD by the receiver or
289 * via /proc/<pid>, using the SO_PEERPIDFD to guard
290 * against pid recycling when opening /proc/<pid>.
291 */
292 return 0;
293 }
294 case COREDUMP_FILE:
295 break;
296 default:
297 WARN_ON_ONCE(true);
298 return -EINVAL;
299 }
300
301 /* Repeat as long as we have more pattern to process and more output
302 space */
303 while (*pat_ptr) {
304 /*
305 * Split on spaces before doing template expansion so that
306 * %e and %E don't get split if they have spaces in them
307 */
308 if (cn->core_type == COREDUMP_PIPE) {
309 if (isspace(*pat_ptr)) {
310 if (cn->used != 0)
311 was_space = true;
312 pat_ptr++;
313 continue;
314 } else if (was_space) {
315 was_space = false;
316 err = cn_printf(cn, fmt: "%c", '\0');
317 if (err)
318 return err;
319 (*argv)[(*argc)++] = cn->used;
320 }
321 }
322 if (*pat_ptr != '%') {
323 err = cn_printf(cn, fmt: "%c", *pat_ptr++);
324 } else {
325 switch (*++pat_ptr) {
326 /* single % at the end, drop that */
327 case 0:
328 goto out;
329 /* Double percent, output one percent */
330 case '%':
331 err = cn_printf(cn, fmt: "%c", '%');
332 break;
333 /* pid */
334 case 'p':
335 pid_in_pattern = 1;
336 err = cn_printf(cn, fmt: "%d",
337 task_tgid_vnr(current));
338 break;
339 /* global pid */
340 case 'P':
341 err = cn_printf(cn, fmt: "%d",
342 task_tgid_nr(current));
343 break;
344 case 'i':
345 err = cn_printf(cn, fmt: "%d",
346 task_pid_vnr(current));
347 break;
348 case 'I':
349 err = cn_printf(cn, fmt: "%d",
350 task_pid_nr(current));
351 break;
352 /* uid */
353 case 'u':
354 err = cn_printf(cn, fmt: "%u",
355 from_kuid(to: &init_user_ns,
356 uid: cred->uid));
357 break;
358 /* gid */
359 case 'g':
360 err = cn_printf(cn, fmt: "%u",
361 from_kgid(to: &init_user_ns,
362 gid: cred->gid));
363 break;
364 case 'd':
365 err = cn_printf(cn, fmt: "%d",
366 __get_dumpable(mm_flags: cprm->mm_flags));
367 break;
368 /* signal that caused the coredump */
369 case 's':
370 err = cn_printf(cn, fmt: "%d",
371 cprm->siginfo->si_signo);
372 break;
373 /* UNIX time of coredump */
374 case 't': {
375 time64_t time;
376
377 time = ktime_get_real_seconds();
378 err = cn_printf(cn, fmt: "%lld", time);
379 break;
380 }
381 /* hostname */
382 case 'h':
383 down_read(sem: &uts_sem);
384 err = cn_esc_printf(cn, fmt: "%s",
385 utsname()->nodename);
386 up_read(sem: &uts_sem);
387 break;
388 /* executable, could be changed by prctl PR_SET_NAME etc */
389 case 'e':
390 err = cn_esc_printf(cn, fmt: "%s", current->comm);
391 break;
392 /* file name of executable */
393 case 'f':
394 err = cn_print_exe_file(cn, name_only: true);
395 break;
396 case 'E':
397 err = cn_print_exe_file(cn, name_only: false);
398 break;
399 /* core limit size */
400 case 'c':
401 err = cn_printf(cn, fmt: "%lu",
402 rlimit(RLIMIT_CORE));
403 break;
404 /* CPU the task ran on */
405 case 'C':
406 err = cn_printf(cn, fmt: "%d", cprm->cpu);
407 break;
408 /* pidfd number */
409 case 'F': {
410 /*
411 * Installing a pidfd only makes sense if
412 * we actually spawn a usermode helper.
413 */
414 if (cn->core_type != COREDUMP_PIPE)
415 break;
416
417 /*
418 * Note that we'll install a pidfd for the
419 * thread-group leader. We know that task
420 * linkage hasn't been removed yet and even if
421 * this @current isn't the actual thread-group
422 * leader we know that the thread-group leader
423 * cannot be reaped until @current has exited.
424 */
425 cprm->pid = task_tgid(current);
426 err = cn_printf(cn, fmt: "%d", COREDUMP_PIDFD_NUMBER);
427 break;
428 }
429 default:
430 break;
431 }
432 ++pat_ptr;
433 }
434
435 if (err)
436 return err;
437 }
438
439out:
440 /* Backward compatibility with core_uses_pid:
441 *
442 * If core_pattern does not include a %p (as is the default)
443 * and core_uses_pid is set, then .%pid will be appended to
444 * the filename. Do not do this for piped commands. */
445 if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid)
446 return cn_printf(cn, fmt: ".%d", task_tgid_vnr(current));
447
448 return 0;
449}
450
451static int zap_process(struct signal_struct *signal, int exit_code)
452{
453 struct task_struct *t;
454 int nr = 0;
455
456 signal->flags = SIGNAL_GROUP_EXIT;
457 signal->group_exit_code = exit_code;
458 signal->group_stop_count = 0;
459
460 __for_each_thread(signal, t) {
461 task_clear_jobctl_pending(task: t, JOBCTL_PENDING_MASK);
462 if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
463 sigaddset(set: &t->pending.signal, SIGKILL);
464 signal_wake_up(t, fatal: 1);
465 nr++;
466 }
467 }
468
469 return nr;
470}
471
472static int zap_threads(struct task_struct *tsk,
473 struct core_state *core_state, int exit_code)
474{
475 struct signal_struct *signal = tsk->signal;
476 int nr = -EAGAIN;
477
478 spin_lock_irq(lock: &tsk->sighand->siglock);
479 if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
480 /* Allow SIGKILL, see prepare_signal() */
481 signal->core_state = core_state;
482 nr = zap_process(signal, exit_code);
483 clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
484 tsk->flags |= PF_DUMPCORE;
485 atomic_set(v: &core_state->nr_threads, i: nr);
486 }
487 spin_unlock_irq(lock: &tsk->sighand->siglock);
488 return nr;
489}
490
491static int coredump_wait(int exit_code, struct core_state *core_state)
492{
493 struct task_struct *tsk = current;
494 int core_waiters = -EBUSY;
495
496 init_completion(x: &core_state->startup);
497 core_state->dumper.task = tsk;
498 core_state->dumper.next = NULL;
499
500 core_waiters = zap_threads(tsk, core_state, exit_code);
501 if (core_waiters > 0) {
502 struct core_thread *ptr;
503
504 wait_for_completion_state(x: &core_state->startup,
505 TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
506 /*
507 * Wait for all the threads to become inactive, so that
508 * all the thread context (extended register state, like
509 * fpu etc) gets copied to the memory.
510 */
511 ptr = core_state->dumper.next;
512 while (ptr != NULL) {
513 wait_task_inactive(ptr->task, TASK_ANY);
514 ptr = ptr->next;
515 }
516 }
517
518 return core_waiters;
519}
520
521static void coredump_finish(bool core_dumped)
522{
523 struct core_thread *curr, *next;
524 struct task_struct *task;
525
526 spin_lock_irq(lock: &current->sighand->siglock);
527 if (core_dumped && !__fatal_signal_pending(current))
528 current->signal->group_exit_code |= 0x80;
529 next = current->signal->core_state->dumper.next;
530 current->signal->core_state = NULL;
531 spin_unlock_irq(lock: &current->sighand->siglock);
532
533 while ((curr = next) != NULL) {
534 next = curr->next;
535 task = curr->task;
536 /*
537 * see coredump_task_exit(), curr->task must not see
538 * ->task == NULL before we read ->next.
539 */
540 smp_mb();
541 curr->task = NULL;
542 wake_up_process(tsk: task);
543 }
544}
545
546static bool dump_interrupted(void)
547{
548 /*
549 * SIGKILL or freezing() interrupt the coredumping. Perhaps we
550 * can do try_to_freeze() and check __fatal_signal_pending(),
551 * but then we need to teach dump_write() to restart and clear
552 * TIF_SIGPENDING.
553 */
554 return fatal_signal_pending(current) || freezing(current);
555}
556
557static void wait_for_dump_helpers(struct file *file)
558{
559 struct pipe_inode_info *pipe = file->private_data;
560
561 pipe_lock(pipe);
562 pipe->readers++;
563 pipe->writers--;
564 wake_up_interruptible_sync(&pipe->rd_wait);
565 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
566 pipe_unlock(pipe);
567
568 /*
569 * We actually want wait_event_freezable() but then we need
570 * to clear TIF_SIGPENDING and improve dump_interrupted().
571 */
572 wait_event_interruptible(pipe->rd_wait, pipe->readers == 1);
573
574 pipe_lock(pipe);
575 pipe->readers--;
576 pipe->writers++;
577 pipe_unlock(pipe);
578}
579
580/*
581 * umh_coredump_setup
582 * helper function to customize the process used
583 * to collect the core in userspace. Specifically
584 * it sets up a pipe and installs it as fd 0 (stdin)
585 * for the process. Returns 0 on success, or
586 * PTR_ERR on failure.
587 * Note that it also sets the core limit to 1. This
588 * is a special value that we use to trap recursive
589 * core dumps
590 */
591static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
592{
593 struct file *files[2];
594 struct coredump_params *cp = (struct coredump_params *)info->data;
595 int err;
596
597 if (cp->pid) {
598 struct file *pidfs_file __free(fput) = NULL;
599
600 pidfs_file = pidfs_alloc_file(pid: cp->pid, flags: 0);
601 if (IS_ERR(ptr: pidfs_file))
602 return PTR_ERR(ptr: pidfs_file);
603
604 pidfs_coredump(cprm: cp);
605
606 /*
607 * Usermode helpers are childen of either
608 * system_unbound_wq or of kthreadd. So we know that
609 * we're starting off with a clean file descriptor
610 * table. So we should always be able to use
611 * COREDUMP_PIDFD_NUMBER as our file descriptor value.
612 */
613 err = replace_fd(COREDUMP_PIDFD_NUMBER, file: pidfs_file, flags: 0);
614 if (err < 0)
615 return err;
616 }
617
618 err = create_pipe_files(files, 0);
619 if (err)
620 return err;
621
622 cp->file = files[1];
623
624 err = replace_fd(fd: 0, file: files[0], flags: 0);
625 fput(files[0]);
626 if (err < 0)
627 return err;
628
629 /* and disallow core files too */
630 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
631
632 return 0;
633}
634
635void do_coredump(const kernel_siginfo_t *siginfo)
636{
637 struct core_state core_state;
638 struct core_name cn;
639 struct mm_struct *mm = current->mm;
640 struct linux_binfmt * binfmt;
641 const struct cred *old_cred;
642 struct cred *cred;
643 int retval = 0;
644 size_t *argv = NULL;
645 int argc = 0;
646 /* require nonrelative corefile path and be extra careful */
647 bool need_suid_safe = false;
648 bool core_dumped = false;
649 static atomic_t core_dump_count = ATOMIC_INIT(0);
650 struct coredump_params cprm = {
651 .siginfo = siginfo,
652 .limit = rlimit(RLIMIT_CORE),
653 /*
654 * We must use the same mm->flags while dumping core to avoid
655 * inconsistency of bit flags, since this flag is not protected
656 * by any locks.
657 */
658 .mm_flags = mm->flags,
659 .vma_meta = NULL,
660 .cpu = raw_smp_processor_id(),
661 };
662
663 audit_core_dumps(signr: siginfo->si_signo);
664
665 binfmt = mm->binfmt;
666 if (!binfmt || !binfmt->core_dump)
667 goto fail;
668 if (!__get_dumpable(mm_flags: cprm.mm_flags))
669 goto fail;
670
671 cred = prepare_creds();
672 if (!cred)
673 goto fail;
674 /*
675 * We cannot trust fsuid as being the "true" uid of the process
676 * nor do we know its entire history. We only know it was tainted
677 * so we dump it as root in mode 2, and only into a controlled
678 * environment (pipe handler or fully qualified path).
679 */
680 if (__get_dumpable(mm_flags: cprm.mm_flags) == SUID_DUMP_ROOT) {
681 /* Setuid core dump mode */
682 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
683 need_suid_safe = true;
684 }
685
686 retval = coredump_wait(siginfo->si_signo, &core_state);
687 if (retval < 0)
688 goto fail_creds;
689
690 old_cred = override_creds(cred);
691
692 retval = format_corename(&cn, &cprm, &argv, &argc);
693 if (retval < 0) {
694 coredump_report_failure("format_corename failed, aborting core");
695 goto fail_unlock;
696 }
697
698 switch (cn.core_type) {
699 case COREDUMP_FILE: {
700 struct mnt_idmap *idmap;
701 struct inode *inode;
702 int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW |
703 O_LARGEFILE | O_EXCL;
704
705 if (cprm.limit < binfmt->min_coredump)
706 goto fail_unlock;
707
708 if (need_suid_safe && cn.corename[0] != '/') {
709 coredump_report_failure(
710 "this process can only dump core to a fully qualified path, skipping core dump");
711 goto fail_unlock;
712 }
713
714 /*
715 * Unlink the file if it exists unless this is a SUID
716 * binary - in that case, we're running around with root
717 * privs and don't want to unlink another user's coredump.
718 */
719 if (!need_suid_safe) {
720 /*
721 * If it doesn't exist, that's fine. If there's some
722 * other problem, we'll catch it at the filp_open().
723 */
724 do_unlinkat(AT_FDCWD, getname_kernel(cn.corename));
725 }
726
727 /*
728 * There is a race between unlinking and creating the
729 * file, but if that causes an EEXIST here, that's
730 * fine - another process raced with us while creating
731 * the corefile, and the other process won. To userspace,
732 * what matters is that at least one of the two processes
733 * writes its coredump successfully, not which one.
734 */
735 if (need_suid_safe) {
736 /*
737 * Using user namespaces, normal user tasks can change
738 * their current->fs->root to point to arbitrary
739 * directories. Since the intention of the "only dump
740 * with a fully qualified path" rule is to control where
741 * coredumps may be placed using root privileges,
742 * current->fs->root must not be used. Instead, use the
743 * root directory of init_task.
744 */
745 struct path root;
746
747 task_lock(&init_task);
748 get_fs_root(init_task.fs, &root);
749 task_unlock(&init_task);
750 cprm.file = file_open_root(&root, cn.corename,
751 open_flags, 0600);
752 path_put(&root);
753 } else {
754 cprm.file = filp_open(cn.corename, open_flags, 0600);
755 }
756 if (IS_ERR(cprm.file))
757 goto fail_unlock;
758
759 inode = file_inode(cprm.file);
760 if (inode->i_nlink > 1)
761 goto close_fail;
762 if (d_unhashed(cprm.file->f_path.dentry))
763 goto close_fail;
764 /*
765 * AK: actually i see no reason to not allow this for named
766 * pipes etc, but keep the previous behaviour for now.
767 */
768 if (!S_ISREG(inode->i_mode))
769 goto close_fail;
770 /*
771 * Don't dump core if the filesystem changed owner or mode
772 * of the file during file creation. This is an issue when
773 * a process dumps core while its cwd is e.g. on a vfat
774 * filesystem.
775 */
776 idmap = file_mnt_idmap(cprm.file);
777 if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode),
778 current_fsuid())) {
779 coredump_report_failure("Core dump to %s aborted: "
780 "cannot preserve file owner", cn.corename);
781 goto close_fail;
782 }
783 if ((inode->i_mode & 0677) != 0600) {
784 coredump_report_failure("Core dump to %s aborted: "
785 "cannot preserve file permissions", cn.corename);
786 goto close_fail;
787 }
788 if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
789 goto close_fail;
790 if (do_truncate(idmap, cprm.file->f_path.dentry,
791 0, 0, cprm.file))
792 goto close_fail;
793 break;
794 }
795 case COREDUMP_PIPE: {
796 int argi;
797 int dump_count;
798 char **helper_argv;
799 struct subprocess_info *sub_info;
800
801 if (cprm.limit == 1) {
802 /* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
803 *
804 * Normally core limits are irrelevant to pipes, since
805 * we're not writing to the file system, but we use
806 * cprm.limit of 1 here as a special value, this is a
807 * consistent way to catch recursive crashes.
808 * We can still crash if the core_pattern binary sets
809 * RLIM_CORE = !1, but it runs as root, and can do
810 * lots of stupid things.
811 *
812 * Note that we use task_tgid_vnr here to grab the pid
813 * of the process group leader. That way we get the
814 * right pid if a thread in a multi-threaded
815 * core_pattern process dies.
816 */
817 coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
818 goto fail_unlock;
819 }
820 cprm.limit = RLIM_INFINITY;
821
822 dump_count = atomic_inc_return(&core_dump_count);
823 if (core_pipe_limit && (core_pipe_limit < dump_count)) {
824 coredump_report_failure("over core_pipe_limit, skipping core dump");
825 goto fail_dropcount;
826 }
827
828 helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
829 GFP_KERNEL);
830 if (!helper_argv) {
831 coredump_report_failure("%s failed to allocate memory", __func__);
832 goto fail_dropcount;
833 }
834 for (argi = 0; argi < argc; argi++)
835 helper_argv[argi] = cn.corename + argv[argi];
836 helper_argv[argi] = NULL;
837
838 retval = -ENOMEM;
839 sub_info = call_usermodehelper_setup(helper_argv[0],
840 helper_argv, NULL, GFP_KERNEL,
841 umh_coredump_setup, NULL, &cprm);
842 if (sub_info)
843 retval = call_usermodehelper_exec(sub_info,
844 UMH_WAIT_EXEC);
845
846 kfree(helper_argv);
847 if (retval) {
848 coredump_report_failure("|%s pipe failed", cn.corename);
849 goto close_fail;
850 }
851 break;
852 }
853 case COREDUMP_SOCK: {
854#ifdef CONFIG_UNIX
855 struct file *file __free(fput) = NULL;
856 struct sockaddr_un addr = {
857 .sun_family = AF_UNIX,
858 };
859 ssize_t addr_len;
860 struct socket *socket;
861
862 addr_len = strscpy(addr.sun_path, cn.corename);
863 if (addr_len < 0)
864 goto close_fail;
865 addr_len += offsetof(struct sockaddr_un, sun_path) + 1;
866
867 /*
868 * It is possible that the userspace process which is
869 * supposed to handle the coredump and is listening on
870 * the AF_UNIX socket coredumps. Userspace should just
871 * mark itself non dumpable.
872 */
873
874 retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket);
875 if (retval < 0)
876 goto close_fail;
877
878 file = sock_alloc_file(socket, 0, NULL);
879 if (IS_ERR(file))
880 goto close_fail;
881
882 /*
883 * Set the thread-group leader pid which is used for the
884 * peer credentials during connect() below. Then
885 * immediately register it in pidfs...
886 */
887 cprm.pid = task_tgid(current);
888 retval = pidfs_register_pid(cprm.pid);
889 if (retval)
890 goto close_fail;
891
892 /*
893 * ... and set the coredump information so userspace
894 * has it available after connect()...
895 */
896 pidfs_coredump(&cprm);
897
898 retval = kernel_connect(socket, (struct sockaddr *)(&addr),
899 addr_len, O_NONBLOCK | SOCK_COREDUMP);
900
901 /*
902 * ... Make sure to only put our reference after connect() took
903 * its own reference keeping the pidfs entry alive ...
904 */
905 pidfs_put_pid(cprm.pid);
906
907 if (retval) {
908 if (retval == -EAGAIN)
909 coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path);
910 else
911 coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval);
912 goto close_fail;
913 }
914
915 /* ... and validate that @sk_peer_pid matches @cprm.pid. */
916 if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm.pid))
917 goto close_fail;
918
919 cprm.limit = RLIM_INFINITY;
920 cprm.file = no_free_ptr(file);
921#else
922 coredump_report_failure("Core dump socket support %s disabled", cn.corename);
923 goto close_fail;
924#endif
925 break;
926 }
927 default:
928 WARN_ON_ONCE(true);
929 goto close_fail;
930 }
931
932 /* get us an unshared descriptor table; almost always a no-op */
933 /* The cell spufs coredump code reads the file descriptor tables */
934 retval = unshare_files();
935 if (retval)
936 goto close_fail;
937 if (!dump_interrupted()) {
938 /*
939 * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
940 * have this set to NULL.
941 */
942 if (!cprm.file) {
943 coredump_report_failure("Core dump to |%s disabled", cn.corename);
944 goto close_fail;
945 }
946 if (!dump_vma_snapshot(&cprm))
947 goto close_fail;
948
949 file_start_write(cprm.file);
950 core_dumped = binfmt->core_dump(&cprm);
951 /*
952 * Ensures that file size is big enough to contain the current
953 * file postion. This prevents gdb from complaining about
954 * a truncated file if the last "write" to the file was
955 * dump_skip.
956 */
957 if (cprm.to_skip) {
958 cprm.to_skip--;
959 dump_emit(&cprm, "", 1);
960 }
961 file_end_write(cprm.file);
962 free_vma_snapshot(&cprm);
963 }
964
965#ifdef CONFIG_UNIX
966 /* Let userspace know we're done processing the coredump. */
967 if (sock_from_file(cprm.file))
968 kernel_sock_shutdown(sock_from_file(cprm.file), SHUT_WR);
969#endif
970
971 /*
972 * When core_pipe_limit is set we wait for the coredump server
973 * or usermodehelper to finish before exiting so it can e.g.,
974 * inspect /proc/<pid>.
975 */
976 if (core_pipe_limit) {
977 switch (cn.core_type) {
978 case COREDUMP_PIPE:
979 wait_for_dump_helpers(cprm.file);
980 break;
981#ifdef CONFIG_UNIX
982 case COREDUMP_SOCK: {
983 ssize_t n;
984
985 /*
986 * We use a simple read to wait for the coredump
987 * processing to finish. Either the socket is
988 * closed or we get sent unexpected data. In
989 * both cases, we're done.
990 */
991 n = __kernel_read(cprm.file, &(char){ 0 }, 1, NULL);
992 if (n != 0)
993 coredump_report_failure("Unexpected data on coredump socket");
994 break;
995 }
996#endif
997 default:
998 break;
999 }
1000 }
1001
1002close_fail:
1003 if (cprm.file)
1004 filp_close(cprm.file, NULL);
1005fail_dropcount:
1006 if (cn.core_type == COREDUMP_PIPE)
1007 atomic_dec(&core_dump_count);
1008fail_unlock:
1009 kfree(argv);
1010 kfree(cn.corename);
1011 coredump_finish(core_dumped);
1012 revert_creds(old_cred);
1013fail_creds:
1014 put_cred(cred);
1015fail:
1016 return;
1017}
1018
1019/*
1020 * Core dumping helper functions. These are the only things you should
1021 * do on a core-file: use only these functions to write out all the
1022 * necessary info.
1023 */
1024static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr)
1025{
1026 struct file *file = cprm->file;
1027 loff_t pos = file->f_pos;
1028 ssize_t n;
1029
1030 if (cprm->written + nr > cprm->limit)
1031 return 0;
1032 if (dump_interrupted())
1033 return 0;
1034 n = __kernel_write(file, addr, nr, &pos);
1035 if (n != nr)
1036 return 0;
1037 file->f_pos = pos;
1038 cprm->written += n;
1039 cprm->pos += n;
1040
1041 return 1;
1042}
1043
1044static int __dump_skip(struct coredump_params *cprm, size_t nr)
1045{
1046 static char zeroes[PAGE_SIZE];
1047 struct file *file = cprm->file;
1048
1049 if (file->f_mode & FMODE_LSEEK) {
1050 if (dump_interrupted() || vfs_llseek(file, offset: nr, SEEK_CUR) < 0)
1051 return 0;
1052 cprm->pos += nr;
1053 return 1;
1054 }
1055
1056 while (nr > PAGE_SIZE) {
1057 if (!__dump_emit(cprm, addr: zeroes, PAGE_SIZE))
1058 return 0;
1059 nr -= PAGE_SIZE;
1060 }
1061
1062 return __dump_emit(cprm, addr: zeroes, nr);
1063}
1064
1065int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
1066{
1067 if (cprm->to_skip) {
1068 if (!__dump_skip(cprm, nr: cprm->to_skip))
1069 return 0;
1070 cprm->to_skip = 0;
1071 }
1072 return __dump_emit(cprm, addr, nr);
1073}
1074EXPORT_SYMBOL(dump_emit);
1075
1076void dump_skip_to(struct coredump_params *cprm, unsigned long pos)
1077{
1078 cprm->to_skip = pos - cprm->pos;
1079}
1080EXPORT_SYMBOL(dump_skip_to);
1081
1082void dump_skip(struct coredump_params *cprm, size_t nr)
1083{
1084 cprm->to_skip += nr;
1085}
1086EXPORT_SYMBOL(dump_skip);
1087
1088#ifdef CONFIG_ELF_CORE
1089static int dump_emit_page(struct coredump_params *cprm, struct page *page)
1090{
1091 struct bio_vec bvec;
1092 struct iov_iter iter;
1093 struct file *file = cprm->file;
1094 loff_t pos;
1095 ssize_t n;
1096
1097 if (!page)
1098 return 0;
1099
1100 if (cprm->to_skip) {
1101 if (!__dump_skip(cprm, nr: cprm->to_skip))
1102 return 0;
1103 cprm->to_skip = 0;
1104 }
1105 if (cprm->written + PAGE_SIZE > cprm->limit)
1106 return 0;
1107 if (dump_interrupted())
1108 return 0;
1109 pos = file->f_pos;
1110 bvec_set_page(bv: &bvec, page, PAGE_SIZE, offset: 0);
1111 iov_iter_bvec(i: &iter, ITER_SOURCE, bvec: &bvec, nr_segs: 1, PAGE_SIZE);
1112 n = __kernel_write_iter(file: cprm->file, from: &iter, pos: &pos);
1113 if (n != PAGE_SIZE)
1114 return 0;
1115 file->f_pos = pos;
1116 cprm->written += PAGE_SIZE;
1117 cprm->pos += PAGE_SIZE;
1118
1119 return 1;
1120}
1121
1122/*
1123 * If we might get machine checks from kernel accesses during the
1124 * core dump, let's get those errors early rather than during the
1125 * IO. This is not performance-critical enough to warrant having
1126 * all the machine check logic in the iovec paths.
1127 */
1128#ifdef copy_mc_to_kernel
1129
1130#define dump_page_alloc() alloc_page(GFP_KERNEL)
1131#define dump_page_free(x) __free_page(x)
1132static struct page *dump_page_copy(struct page *src, struct page *dst)
1133{
1134 void *buf = kmap_local_page(page: src);
1135 size_t left = copy_mc_to_kernel(page_address(dst), from: buf, PAGE_SIZE);
1136 kunmap_local(buf);
1137 return left ? NULL : dst;
1138}
1139
1140#else
1141
1142/* We just want to return non-NULL; it's never used. */
1143#define dump_page_alloc() ERR_PTR(-EINVAL)
1144#define dump_page_free(x) ((void)(x))
1145static inline struct page *dump_page_copy(struct page *src, struct page *dst)
1146{
1147 return src;
1148}
1149#endif
1150
1151int dump_user_range(struct coredump_params *cprm, unsigned long start,
1152 unsigned long len)
1153{
1154 unsigned long addr;
1155 struct page *dump_page;
1156 int locked, ret;
1157
1158 dump_page = dump_page_alloc();
1159 if (!dump_page)
1160 return 0;
1161
1162 ret = 0;
1163 locked = 0;
1164 for (addr = start; addr < start + len; addr += PAGE_SIZE) {
1165 struct page *page;
1166
1167 if (!locked) {
1168 if (mmap_read_lock_killable(current->mm))
1169 goto out;
1170 locked = 1;
1171 }
1172
1173 /*
1174 * To avoid having to allocate page tables for virtual address
1175 * ranges that have never been used yet, and also to make it
1176 * easy to generate sparse core files, use a helper that returns
1177 * NULL when encountering an empty page table entry that would
1178 * otherwise have been filled with the zero page.
1179 */
1180 page = get_dump_page(addr, locked: &locked);
1181 if (page) {
1182 if (locked) {
1183 mmap_read_unlock(current->mm);
1184 locked = 0;
1185 }
1186 int stop = !dump_emit_page(cprm, page: dump_page_copy(src: page, dst: dump_page));
1187 put_page(page);
1188 if (stop)
1189 goto out;
1190 } else {
1191 dump_skip(cprm, PAGE_SIZE);
1192 }
1193
1194 if (dump_interrupted())
1195 goto out;
1196
1197 if (!need_resched())
1198 continue;
1199 if (locked) {
1200 mmap_read_unlock(current->mm);
1201 locked = 0;
1202 }
1203 cond_resched();
1204 }
1205 ret = 1;
1206out:
1207 if (locked)
1208 mmap_read_unlock(current->mm);
1209
1210 dump_page_free(dump_page);
1211 return ret;
1212}
1213#endif
1214
1215int dump_align(struct coredump_params *cprm, int align)
1216{
1217 unsigned mod = (cprm->pos + cprm->to_skip) & (align - 1);
1218 if (align & (align - 1))
1219 return 0;
1220 if (mod)
1221 cprm->to_skip += align - mod;
1222 return 1;
1223}
1224EXPORT_SYMBOL(dump_align);
1225
1226#ifdef CONFIG_SYSCTL
1227
1228void validate_coredump_safety(void)
1229{
1230 if (suid_dumpable == SUID_DUMP_ROOT &&
1231 core_pattern[0] != '/' && core_pattern[0] != '|' && core_pattern[0] != '@') {
1232
1233 coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: "
1234 "pipe handler or fully qualified core dump path required. "
1235 "Set kernel.core_pattern before fs.suid_dumpable.");
1236 }
1237}
1238
1239static inline bool check_coredump_socket(void)
1240{
1241 if (core_pattern[0] != '@')
1242 return true;
1243
1244 /*
1245 * Coredump socket must be located in the initial mount
1246 * namespace. Don't give the impression that anything else is
1247 * supported right now.
1248 */
1249 if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns)
1250 return false;
1251
1252 /* Must be an absolute path. */
1253 if (*(core_pattern + 1) != '/')
1254 return false;
1255
1256 return true;
1257}
1258
1259static int proc_dostring_coredump(const struct ctl_table *table, int write,
1260 void *buffer, size_t *lenp, loff_t *ppos)
1261{
1262 int error;
1263 ssize_t retval;
1264 char old_core_pattern[CORENAME_MAX_SIZE];
1265
1266 retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE);
1267
1268 error = proc_dostring(table, write, buffer, lenp, ppos);
1269 if (error)
1270 return error;
1271 if (!check_coredump_socket()) {
1272 strscpy(core_pattern, old_core_pattern, retval + 1);
1273 return -EINVAL;
1274 }
1275
1276 validate_coredump_safety();
1277 return error;
1278}
1279
1280static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
1281static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX;
1282static char core_modes[] = {
1283 "file\npipe"
1284#ifdef CONFIG_UNIX
1285 "\nsocket"
1286#endif
1287};
1288
1289static const struct ctl_table coredump_sysctls[] = {
1290 {
1291 .procname = "core_uses_pid",
1292 .data = &core_uses_pid,
1293 .maxlen = sizeof(int),
1294 .mode = 0644,
1295 .proc_handler = proc_dointvec,
1296 },
1297 {
1298 .procname = "core_pattern",
1299 .data = core_pattern,
1300 .maxlen = CORENAME_MAX_SIZE,
1301 .mode = 0644,
1302 .proc_handler = proc_dostring_coredump,
1303 },
1304 {
1305 .procname = "core_pipe_limit",
1306 .data = &core_pipe_limit,
1307 .maxlen = sizeof(unsigned int),
1308 .mode = 0644,
1309 .proc_handler = proc_dointvec_minmax,
1310 .extra1 = SYSCTL_ZERO,
1311 .extra2 = SYSCTL_INT_MAX,
1312 },
1313 {
1314 .procname = "core_file_note_size_limit",
1315 .data = &core_file_note_size_limit,
1316 .maxlen = sizeof(unsigned int),
1317 .mode = 0644,
1318 .proc_handler = proc_douintvec_minmax,
1319 .extra1 = (unsigned int *)&core_file_note_size_min,
1320 .extra2 = (unsigned int *)&core_file_note_size_max,
1321 },
1322 {
1323 .procname = "core_sort_vma",
1324 .data = &core_sort_vma,
1325 .maxlen = sizeof(int),
1326 .mode = 0644,
1327 .proc_handler = proc_douintvec_minmax,
1328 .extra1 = SYSCTL_ZERO,
1329 .extra2 = SYSCTL_ONE,
1330 },
1331 {
1332 .procname = "core_modes",
1333 .data = core_modes,
1334 .maxlen = sizeof(core_modes) - 1,
1335 .mode = 0444,
1336 .proc_handler = proc_dostring,
1337 },
1338};
1339
1340static int __init init_fs_coredump_sysctls(void)
1341{
1342 register_sysctl_init("kernel", coredump_sysctls);
1343 return 0;
1344}
1345fs_initcall(init_fs_coredump_sysctls);
1346#endif /* CONFIG_SYSCTL */
1347
1348/*
1349 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1350 * that are useful for post-mortem analysis are included in every core dump.
1351 * In that way we ensure that the core dump is fully interpretable later
1352 * without matching up the same kernel and hardware config to see what PC values
1353 * meant. These special mappings include - vDSO, vsyscall, and other
1354 * architecture specific mappings
1355 */
1356static bool always_dump_vma(struct vm_area_struct *vma)
1357{
1358 /* Any vsyscall mappings? */
1359 if (vma == get_gate_vma(mm: vma->vm_mm))
1360 return true;
1361
1362 /*
1363 * Assume that all vmas with a .name op should always be dumped.
1364 * If this changes, a new vm_ops field can easily be added.
1365 */
1366 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1367 return true;
1368
1369 /*
1370 * arch_vma_name() returns non-NULL for special architecture mappings,
1371 * such as vDSO sections.
1372 */
1373 if (arch_vma_name(vma))
1374 return true;
1375
1376 return false;
1377}
1378
1379#define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1
1380
1381/*
1382 * Decide how much of @vma's contents should be included in a core dump.
1383 */
1384static unsigned long vma_dump_size(struct vm_area_struct *vma,
1385 unsigned long mm_flags)
1386{
1387#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1388
1389 /* always dump the vdso and vsyscall sections */
1390 if (always_dump_vma(vma))
1391 goto whole;
1392
1393 if (vma->vm_flags & VM_DONTDUMP)
1394 return 0;
1395
1396 /* support for DAX */
1397 if (vma_is_dax(vma)) {
1398 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1399 goto whole;
1400 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1401 goto whole;
1402 return 0;
1403 }
1404
1405 /* Hugetlb memory check */
1406 if (is_vm_hugetlb_page(vma)) {
1407 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1408 goto whole;
1409 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1410 goto whole;
1411 return 0;
1412 }
1413
1414 /* Do not dump I/O mapped devices or special mappings */
1415 if (vma->vm_flags & VM_IO)
1416 return 0;
1417
1418 /* By default, dump shared memory if mapped from an anonymous file. */
1419 if (vma->vm_flags & VM_SHARED) {
1420 if (file_inode(f: vma->vm_file)->i_nlink == 0 ?
1421 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1422 goto whole;
1423 return 0;
1424 }
1425
1426 /* Dump segments that have been written to. */
1427 if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE))
1428 goto whole;
1429 if (vma->vm_file == NULL)
1430 return 0;
1431
1432 if (FILTER(MAPPED_PRIVATE))
1433 goto whole;
1434
1435 /*
1436 * If this is the beginning of an executable file mapping,
1437 * dump the first page to aid in determining what was mapped here.
1438 */
1439 if (FILTER(ELF_HEADERS) &&
1440 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1441 if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
1442 return PAGE_SIZE;
1443
1444 /*
1445 * ELF libraries aren't always executable.
1446 * We'll want to check whether the mapping starts with the ELF
1447 * magic, but not now - we're holding the mmap lock,
1448 * so copy_from_user() doesn't work here.
1449 * Use a placeholder instead, and fix it up later in
1450 * dump_vma_snapshot().
1451 */
1452 return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER;
1453 }
1454
1455#undef FILTER
1456
1457 return 0;
1458
1459whole:
1460 return vma->vm_end - vma->vm_start;
1461}
1462
1463/*
1464 * Helper function for iterating across a vma list. It ensures that the caller
1465 * will visit `gate_vma' prior to terminating the search.
1466 */
1467static struct vm_area_struct *coredump_next_vma(struct vma_iterator *vmi,
1468 struct vm_area_struct *vma,
1469 struct vm_area_struct *gate_vma)
1470{
1471 if (gate_vma && (vma == gate_vma))
1472 return NULL;
1473
1474 vma = vma_next(vmi);
1475 if (vma)
1476 return vma;
1477 return gate_vma;
1478}
1479
1480static void free_vma_snapshot(struct coredump_params *cprm)
1481{
1482 if (cprm->vma_meta) {
1483 int i;
1484 for (i = 0; i < cprm->vma_count; i++) {
1485 struct file *file = cprm->vma_meta[i].file;
1486 if (file)
1487 fput(file);
1488 }
1489 kvfree(addr: cprm->vma_meta);
1490 cprm->vma_meta = NULL;
1491 }
1492}
1493
1494static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr)
1495{
1496 const struct core_vma_metadata *vma_meta_lhs = vma_meta_lhs_ptr;
1497 const struct core_vma_metadata *vma_meta_rhs = vma_meta_rhs_ptr;
1498
1499 if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size)
1500 return -1;
1501 if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size)
1502 return 1;
1503 return 0;
1504}
1505
1506/*
1507 * Under the mmap_lock, take a snapshot of relevant information about the task's
1508 * VMAs.
1509 */
1510static bool dump_vma_snapshot(struct coredump_params *cprm)
1511{
1512 struct vm_area_struct *gate_vma, *vma = NULL;
1513 struct mm_struct *mm = current->mm;
1514 VMA_ITERATOR(vmi, mm, 0);
1515 int i = 0;
1516
1517 /*
1518 * Once the stack expansion code is fixed to not change VMA bounds
1519 * under mmap_lock in read mode, this can be changed to take the
1520 * mmap_lock in read mode.
1521 */
1522 if (mmap_write_lock_killable(mm))
1523 return false;
1524
1525 cprm->vma_data_size = 0;
1526 gate_vma = get_gate_vma(mm);
1527 cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0);
1528
1529 cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL);
1530 if (!cprm->vma_meta) {
1531 mmap_write_unlock(mm);
1532 return false;
1533 }
1534
1535 while ((vma = coredump_next_vma(vmi: &vmi, vma, gate_vma)) != NULL) {
1536 struct core_vma_metadata *m = cprm->vma_meta + i;
1537
1538 m->start = vma->vm_start;
1539 m->end = vma->vm_end;
1540 m->flags = vma->vm_flags;
1541 m->dump_size = vma_dump_size(vma, mm_flags: cprm->mm_flags);
1542 m->pgoff = vma->vm_pgoff;
1543 m->file = vma->vm_file;
1544 if (m->file)
1545 get_file(f: m->file);
1546 i++;
1547 }
1548
1549 mmap_write_unlock(mm);
1550
1551 for (i = 0; i < cprm->vma_count; i++) {
1552 struct core_vma_metadata *m = cprm->vma_meta + i;
1553
1554 if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
1555 char elfmag[SELFMAG];
1556
1557 if (copy_from_user(to: elfmag, from: (void __user *)m->start, SELFMAG) ||
1558 memcmp(p: elfmag, ELFMAG, SELFMAG) != 0) {
1559 m->dump_size = 0;
1560 } else {
1561 m->dump_size = PAGE_SIZE;
1562 }
1563 }
1564
1565 cprm->vma_data_size += m->dump_size;
1566 }
1567
1568 if (core_sort_vma)
1569 sort(base: cprm->vma_meta, num: cprm->vma_count, size: sizeof(*cprm->vma_meta),
1570 cmp_func: cmp_vma_size, NULL);
1571
1572 return true;
1573}
1574

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of linux/fs/coredump.c