coredump.c source code [linux/fs/coredump.c]

1	// SPDX-License-Identifier: GPL-2.0
2	#include <linux/slab.h>
3	#include <linux/file.h>
4	#include <linux/fdtable.h>
5	#include <linux/freezer.h>
6	#include <linux/mm.h>
7	#include <linux/stat.h>
8	#include <linux/fcntl.h>
9	#include <linux/swap.h>
10	#include <linux/ctype.h>
11	#include <linux/string.h>
12	#include <linux/init.h>
13	#include <linux/pagemap.h>
14	#include <linux/perf_event.h>
15	#include <linux/highmem.h>
16	#include <linux/spinlock.h>
17	#include <linux/key.h>
18	#include <linux/personality.h>
19	#include <linux/binfmts.h>
20	#include <linux/coredump.h>
21	#include <linux/sort.h>
22	#include <linux/sched/coredump.h>
23	#include <linux/sched/signal.h>
24	#include <linux/sched/task_stack.h>
25	#include <linux/utsname.h>
26	#include <linux/pid_namespace.h>
27	#include <linux/module.h>
28	#include <linux/namei.h>
29	#include <linux/mount.h>
30	#include <linux/security.h>
31	#include <linux/syscalls.h>
32	#include <linux/tsacct_kern.h>
33	#include <linux/cn_proc.h>
34	#include <linux/audit.h>
35	#include <linux/kmod.h>
36	#include <linux/fsnotify.h>
37	#include <linux/fs_struct.h>
38	#include <linux/pipe_fs_i.h>
39	#include <linux/oom.h>
40	#include <linux/compat.h>
41	#include <linux/fs.h>
42	#include <linux/path.h>
43	#include <linux/timekeeping.h>
44	#include <linux/sysctl.h>
45	#include <linux/elf.h>
46	#include <linux/pidfs.h>
47	#include <linux/net.h>
48	#include <linux/socket.h>
49	#include <net/af_unix.h>
50	#include <net/net_namespace.h>
51	#include <net/sock.h>
52	#include <uapi/linux/pidfd.h>
53	#include <uapi/linux/un.h>
54
55	#include <linux/uaccess.h>
56	#include <asm/mmu_context.h>
57	#include <asm/tlb.h>
58	#include <asm/exec.h>
59
60	#include <trace/events/task.h>
61	#include "internal.h"
62
63	#include <trace/events/sched.h>
64
65	static bool dump_vma_snapshot(struct coredump_params *cprm);
66	static void free_vma_snapshot(struct coredump_params *cprm);
67
68	#define CORE_FILE_NOTE_SIZE_DEFAULT (410241024)
69	/ Define a reasonable max cap /
70	#define CORE_FILE_NOTE_SIZE_MAX (1610241024)
71	/*
72	* File descriptor number for the pidfd for the thread-group leader of
73	* the coredumping task installed into the usermode helper's file
74	* descriptor table.
75	*/
76	#define COREDUMP_PIDFD_NUMBER 3
77
78	static int core_uses_pid;
79	static unsigned int core_pipe_limit;
80	static unsigned int core_sort_vma;
81	static char core_pattern[CORENAME_MAX_SIZE] = "core";
82	static int core_name_size = CORENAME_MAX_SIZE;
83	unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
84
85	enum coredump_type_t {
86	COREDUMP_FILE = `1`,
87	COREDUMP_PIPE = `2`,
88	COREDUMP_SOCK = `3`,
89	};
90
91	struct core_name {
92	char *corename;
93	int used, size;
94	enum coredump_type_t core_type;
95	};
96
97	static int expand_corename(struct core_name cn, int* size)
98	{
99	char *corename;
100
101	size = kmalloc_size_roundup(size);
102	corename = krealloc(cn->corename, size, GFP_KERNEL);
103
104	if (!corename)
105	return -ENOMEM;
106
107	if (size > core_name_size) / racy but harmless /
108	core_name_size = size;
109
110	cn->size = size;
111	cn->corename = corename;
112	return `0`;
113	}
114
115	static __printf(`2`, `0`) int cn_vprintf(struct core_name cn, const* char *fmt,
116	va_list arg)
117	{
118	int free, need;
119	va_list arg_copy;
120
121	again:
122	free = cn->size - cn->used;
123
124	va_copy(arg_copy, arg);
125	need = vsnprintf(buf: cn->corename + cn->used, size: free, fmt, args: arg_copy);
126	va_end(arg_copy);
127
128	if (need < free) {
129	cn->used += need;
130	return `0`;
131	}
132
133	if (!expand_corename(cn, size: cn->size + need - free + `1`))
134	goto again;
135
136	return -ENOMEM;
137	}
138
139	static __printf(`2`, `3`) int cn_printf(struct core_name cn, const* char *fmt, ...)
140	{
141	va_list arg;
142	int ret;
143
144	va_start(arg, fmt);
145	ret = cn_vprintf(cn, fmt, arg);
146	va_end(arg);
147
148	return ret;
149	}
150
151	static __printf(`2`, `3`)
152	int cn_esc_printf(struct core_name cn, const* char *fmt, ...)
153	{
154	int cur = cn->used;
155	va_list arg;
156	int ret;
157
158	va_start(arg, fmt);
159	ret = cn_vprintf(cn, fmt, arg);
160	va_end(arg);
161
162	if (ret == `0`) {
163	/*
164	* Ensure that this coredump name component can't cause the
165	* resulting corefile path to consist of a ".." or ".".
166	*/
167	if ((cn->used - cur == `1` && cn->corename[cur] == `'.'`) \|\|
168	(cn->used - cur == `2` && cn->corename[cur] == `'.'`
169	&& cn->corename[cur+`1`] == `'.'`))
170	cn->corename[cur] = `'!'`;
171
172	/*
173	* Empty names are fishy and could be used to create a "//" in a
174	* corefile name, causing the coredump to happen one directory
175	* level too high. Enforce that all components of the core
176	* pattern are at least one character long.
177	*/
178	if (cn->used == cur)
179	ret = cn_printf(cn, fmt: "!");
180	}
181
182	for (; cur < cn->used; ++cur) {
183	if (cn->corename[cur] == `'/'`)
184	cn->corename[cur] = `'!'`;
185	}
186	return ret;
187	}
188
189	static int cn_print_exe_file(struct core_name *cn, bool name_only)
190	{
191	struct file *exe_file;
192	char pathbuf, path, *ptr;
193	int ret;
194
195	exe_file = get_mm_exe_file(current->mm);
196	if (!exe_file)
197	return cn_esc_printf(cn, fmt: "%s (path unknown)", current->comm);
198
199	pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
200	if (!pathbuf) {
201	ret = -ENOMEM;
202	goto put_exe_file;
203	}
204
205	path = file_path(exe_file, pathbuf, PATH_MAX);
206	if (IS_ERR(ptr: path)) {
207	ret = PTR_ERR(ptr: path);
208	goto free_buf;
209	}
210
211	if (name_only) {
212	ptr = strrchr(path, `'/'`);
213	if (ptr)
214	path = ptr + `1`;
215	}
216	ret = cn_esc_printf(cn, fmt: "%s", path);
217
218	free_buf:
219	kfree(objp: pathbuf);
220	put_exe_file:
221	fput(exe_file);
222	return ret;
223	}
224
225	/ format_corename will inspect the pattern parameter, and output a*
226	* name into corename, which must have space for at least
227	* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
228	*/
229	static int format_corename(struct core_name cn, struct* coredump_params *cprm,
230	size_t *argv, int* *argc)
231	{
232	const struct cred *cred = current_cred();
233	const char *pat_ptr = core_pattern;
234	bool was_space = false;
235	int pid_in_pattern = `0`;
236	int err = `0`;
237
238	cn->used = `0`;
239	cn->corename = NULL;
240	if (*pat_ptr == `'\|'`)
241	cn->core_type = COREDUMP_PIPE;
242	else if (*pat_ptr == `'@'`)
243	cn->core_type = COREDUMP_SOCK;
244	else
245	cn->core_type = COREDUMP_FILE;
246	if (expand_corename(cn, size: core_name_size))
247	return -ENOMEM;
248	cn->corename[`0`] = `'\0'`;
249
250	switch (cn->core_type) {
251	case COREDUMP_PIPE: {
252	int argvs = sizeof(core_pattern) / `2`;
253	(argv) = kmalloc_array(argvs, sizeof(*argv), GFP_KERNEL);
254	if (!(*argv))
255	return -ENOMEM;
256	(argv)[(argc)++] = `0`;
257	++pat_ptr;
258	if (!(*pat_ptr))
259	return -ENOMEM;
260	break;
261	}
262	case COREDUMP_SOCK: {
263	/ skip the @ /
264	pat_ptr++;
265	if (!(*pat_ptr))
266	return -ENOMEM;
267
268	err = cn_printf(cn, fmt: "%s", pat_ptr);
269	if (err)
270	return err;
271
272	/ Require absolute paths. /
273	if (cn->corename[`0`] != `'/'`)
274	return -EINVAL;
275
276	/*
277	* Ensure we can uses spaces to indicate additional
278	* parameters in the future.
279	*/
280	if (strchr(cn->corename, `' '`)) {
281	coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename);
282	return -EINVAL;
283	}
284
285	/*
286	* Currently no need to parse any other options.
287	* Relevant information can be retrieved from the peer
288	* pidfd retrievable via SO_PEERPIDFD by the receiver or
289	* via /proc/<pid>, using the SO_PEERPIDFD to guard
290	* against pid recycling when opening /proc/<pid>.
291	*/
292	return `0`;
293	}
294	case COREDUMP_FILE:
295	break;
296	default:
297	WARN_ON_ONCE(true);
298	return -EINVAL;
299	}
300
301	/ Repeat as long as we have more pattern to process and more output*
302	space /*
303	while (*pat_ptr) {
304	/*
305	* Split on spaces before doing template expansion so that
306	* %e and %E don't get split if they have spaces in them
307	*/
308	if (cn->core_type == COREDUMP_PIPE) {
309	if (isspace(*pat_ptr)) {
310	if (cn->used != `0`)
311	was_space = true;
312	pat_ptr++;
313	continue;
314	} else if (was_space) {
315	was_space = false;
316	err = cn_printf(cn, fmt: "%c", `'\0'`);
317	if (err)
318	return err;
319	(argv)[(argc)++] = cn->used;
320	}
321	}
322	if (*pat_ptr != `'%'`) {
323	err = cn_printf(cn, fmt: "%c", *pat_ptr++);
324	} else {
325	switch (*++pat_ptr) {
326	/ single % at the end, drop that /
327	case `0`:
328	goto out;
329	/ Double percent, output one percent /
330	case `'%'`:
331	err = cn_printf(cn, fmt: "%c", `'%'`);
332	break;
333	/ pid /
334	case `'p'`:
335	pid_in_pattern = `1`;
336	err = cn_printf(cn, fmt: "%d",
337	task_tgid_vnr(current));
338	break;
339	/ global pid /
340	case `'P'`:
341	err = cn_printf(cn, fmt: "%d",
342	task_tgid_nr(current));
343	break;
344	case `'i'`:
345	err = cn_printf(cn, fmt: "%d",
346	task_pid_vnr(current));
347	break;
348	case `'I'`:
349	err = cn_printf(cn, fmt: "%d",
350	task_pid_nr(current));
351	break;
352	/ uid /
353	case `'u'`:
354	err = cn_printf(cn, fmt: "%u",
355	from_kuid(to: &init_user_ns,
356	uid: cred->uid));
357	break;
358	/ gid /
359	case `'g'`:
360	err = cn_printf(cn, fmt: "%u",
361	from_kgid(to: &init_user_ns,
362	gid: cred->gid));
363	break;
364	case `'d'`:
365	err = cn_printf(cn, fmt: "%d",
366	__get_dumpable(mm_flags: cprm->mm_flags));
367	break;
368	/ signal that caused the coredump /
369	case `'s'`:
370	err = cn_printf(cn, fmt: "%d",
371	cprm->siginfo->si_signo);
372	break;
373	/ UNIX time of coredump /
374	case `'t'`: {
375	time64_t time;
376
377	time = ktime_get_real_seconds();
378	err = cn_printf(cn, fmt: "%lld", time);
379	break;
380	}
381	/ hostname /
382	case `'h'`:
383	down_read(sem: &uts_sem);
384	err = cn_esc_printf(cn, fmt: "%s",
385	utsname()->nodename);
386	up_read(sem: &uts_sem);
387	break;
388	/ executable, could be changed by prctl PR_SET_NAME etc /
389	case `'e'`:
390	err = cn_esc_printf(cn, fmt: "%s", current->comm);
391	break;
392	/ file name of executable /
393	case `'f'`:
394	err = cn_print_exe_file(cn, name_only: true);
395	break;
396	case `'E'`:
397	err = cn_print_exe_file(cn, name_only: false);
398	break;
399	/ core limit size /
400	case `'c'`:
401	err = cn_printf(cn, fmt: "%lu",
402	rlimit(RLIMIT_CORE));
403	break;
404	/ CPU the task ran on /
405	case `'C'`:
406	err = cn_printf(cn, fmt: "%d", cprm->cpu);
407	break;
408	/ pidfd number /
409	case `'F'`: {
410	/*
411	* Installing a pidfd only makes sense if
412	* we actually spawn a usermode helper.
413	*/
414	if (cn->core_type != COREDUMP_PIPE)
415	break;
416
417	/*
418	* Note that we'll install a pidfd for the
419	* thread-group leader. We know that task
420	* linkage hasn't been removed yet and even if
421	* this @current isn't the actual thread-group
422	* leader we know that the thread-group leader
423	* cannot be reaped until @current has exited.
424	*/
425	cprm->pid = task_tgid(current);
426	err = cn_printf(cn, fmt: "%d", COREDUMP_PIDFD_NUMBER);
427	break;
428	}
429	default:
430	break;
431	}
432	++pat_ptr;
433	}
434
435	if (err)
436	return err;
437	}
438
439	out:
440	/ Backward compatibility with core_uses_pid:*
441	*
442	* If core_pattern does not include a %p (as is the default)
443	* and core_uses_pid is set, then .%pid will be appended to
444	* the filename. Do not do this for piped commands. */
445	if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid)
446	return cn_printf(cn, fmt: ".%d", task_tgid_vnr(current));
447
448	return `0`;
449	}
450
451	static int zap_process(struct signal_struct signal, int* exit_code)
452	{
453	struct task_struct *t;
454	int nr = `0`;
455
456	signal->flags = SIGNAL_GROUP_EXIT;
457	signal->group_exit_code = exit_code;
458	signal->group_stop_count = `0`;
459
460	__for_each_thread(signal, t) {
461	task_clear_jobctl_pending(task: t, JOBCTL_PENDING_MASK);
462	if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
463	sigaddset(set: &t->pending.signal, SIGKILL);
464	signal_wake_up(t, fatal: `1`);
465	nr++;
466	}
467	}
468
469	return nr;
470	}
471
472	static int zap_threads(struct task_struct *tsk,
473	struct core_state core_state, int* exit_code)
474	{
475	struct signal_struct *signal = tsk->signal;
476	int nr = -EAGAIN;
477
478	spin_lock_irq(lock: &tsk->sighand->siglock);
479	if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
480	/ Allow SIGKILL, see prepare_signal() /
481	signal->core_state = core_state;
482	nr = zap_process(signal, exit_code);
483	clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
484	tsk->flags \|= PF_DUMPCORE;
485	atomic_set(v: &core_state->nr_threads, i: nr);
486	}
487	spin_unlock_irq(lock: &tsk->sighand->siglock);
488	return nr;
489	}
490
491	static int coredump_wait(int exit_code, struct core_state *core_state)
492	{
493	struct task_struct *tsk = current;
494	int core_waiters = -EBUSY;
495
496	init_completion(x: &core_state->startup);
497	core_state->dumper.task = tsk;
498	core_state->dumper.next = NULL;
499
500	core_waiters = zap_threads(tsk, core_state, exit_code);
501	if (core_waiters > `0`) {
502	struct core_thread *ptr;
503
504	wait_for_completion_state(x: &core_state->startup,
505	TASK_UNINTERRUPTIBLE\|TASK_FREEZABLE);
506	/*
507	* Wait for all the threads to become inactive, so that
508	* all the thread context (extended register state, like
509	* fpu etc) gets copied to the memory.
510	*/
511	ptr = core_state->dumper.next;
512	while (ptr != NULL) {
513	wait_task_inactive(ptr->task, TASK_ANY);
514	ptr = ptr->next;
515	}
516	}
517
518	return core_waiters;
519	}
520
521	static void coredump_finish(bool core_dumped)
522	{
523	struct core_thread curr, next;
524	struct task_struct *task;
525
526	spin_lock_irq(lock: &current->sighand->siglock);
527	if (core_dumped && !__fatal_signal_pending(current))
528	current->signal->group_exit_code \|= `0x80`;
529	next = current->signal->core_state->dumper.next;
530	current->signal->core_state = NULL;
531	spin_unlock_irq(lock: &current->sighand->siglock);
532
533	while ((curr = next) != NULL) {
534	next = curr->next;
535	task = curr->task;
536	/*
537	* see coredump_task_exit(), curr->task must not see
538	* ->task == NULL before we read ->next.
539	*/
540	smp_mb();
541	curr->task = NULL;
542	wake_up_process(tsk: task);
543	}
544	}
545
546	static bool dump_interrupted(void)
547	{
548	/*
549	* SIGKILL or freezing() interrupt the coredumping. Perhaps we
550	* can do try_to_freeze() and check __fatal_signal_pending(),
551	* but then we need to teach dump_write() to restart and clear
552	* TIF_SIGPENDING.
553	*/
554	return fatal_signal_pending(current) \|\| freezing(current);
555	}
556
557	static void wait_for_dump_helpers(struct file *file)
558	{
559	struct pipe_inode_info *pipe = file->private_data;
560
561	pipe_lock(pipe);
562	pipe->readers++;
563	pipe->writers--;
564	wake_up_interruptible_sync(&pipe->rd_wait);
565	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
566	pipe_unlock(pipe);
567
568	/*
569	* We actually want wait_event_freezable() but then we need
570	* to clear TIF_SIGPENDING and improve dump_interrupted().
571	*/
572	wait_event_interruptible(pipe->rd_wait, pipe->readers == `1`);
573
574	pipe_lock(pipe);
575	pipe->readers--;
576	pipe->writers++;
577	pipe_unlock(pipe);
578	}
579
580	/*
581	* umh_coredump_setup
582	* helper function to customize the process used
583	* to collect the core in userspace. Specifically
584	* it sets up a pipe and installs it as fd 0 (stdin)
585	* for the process. Returns 0 on success, or
586	* PTR_ERR on failure.
587	* Note that it also sets the core limit to 1. This
588	* is a special value that we use to trap recursive
589	* core dumps
590	*/
591	static int umh_coredump_setup(struct subprocess_info info, struct* cred *new)
592	{
593	struct file *files[`2`];
594	struct coredump_params cp = (struct* coredump_params *)info->data;
595	int err;
596
597	if (cp->pid) {
598	struct file *pidfs_file __free(fput) = NULL;
599
600	pidfs_file = pidfs_alloc_file(pid: cp->pid, flags: `0`);
601	if (IS_ERR(ptr: pidfs_file))
602	return PTR_ERR(ptr: pidfs_file);
603
604	pidfs_coredump(cprm: cp);
605
606	/*
607	* Usermode helpers are childen of either
608	* system_unbound_wq or of kthreadd. So we know that
609	* we're starting off with a clean file descriptor
610	* table. So we should always be able to use
611	* COREDUMP_PIDFD_NUMBER as our file descriptor value.
612	*/
613	err = replace_fd(COREDUMP_PIDFD_NUMBER, file: pidfs_file, flags: `0`);
614	if (err < `0`)
615	return err;
616	}
617
618	err = create_pipe_files(files, `0`);
619	if (err)
620	return err;
621
622	cp->file = files[`1`];
623
624	err = replace_fd(fd: `0`, file: files[`0`], flags: `0`);
625	fput(files[`0`]);
626	if (err < `0`)
627	return err;
628
629	/ and disallow core files too /
630	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){`1`, `1`};
631
632	return `0`;
633	}
634
635	void do_coredump(const kernel_siginfo_t *siginfo)
636	{
637	struct core_state core_state;
638	struct core_name cn;
639	struct mm_struct *mm = current->mm;
640	struct linux_binfmt * binfmt;
641	const struct cred *old_cred;
642	struct cred *cred;
643	int retval = `0`;
644	size_t *argv = NULL;
645	int argc = `0`;
646	/ require nonrelative corefile path and be extra careful /
647	bool need_suid_safe = false;
648	bool core_dumped = false;
649	static atomic_t core_dump_count = ATOMIC_INIT(`0`);
650	struct coredump_params cprm = {
651	.siginfo = siginfo,
652	.limit = rlimit(RLIMIT_CORE),
653	/*
654	* We must use the same mm->flags while dumping core to avoid
655	* inconsistency of bit flags, since this flag is not protected
656	* by any locks.
657	*/
658	.mm_flags = mm->flags,
659	.vma_meta = NULL,
660	.cpu = raw_smp_processor_id(),
661	};
662
663	audit_core_dumps(signr: siginfo->si_signo);
664
665	binfmt = mm->binfmt;
666	if (!binfmt \|\| !binfmt->core_dump)
667	goto fail;
668	if (!__get_dumpable(mm_flags: cprm.mm_flags))
669	goto fail;
670
671	cred = prepare_creds();
672	if (!cred)
673	goto fail;
674	/*
675	* We cannot trust fsuid as being the "true" uid of the process
676	* nor do we know its entire history. We only know it was tainted
677	* so we dump it as root in mode 2, and only into a controlled
678	* environment (pipe handler or fully qualified path).
679	*/
680	if (__get_dumpable(mm_flags: cprm.mm_flags) == SUID_DUMP_ROOT) {
681	/ Setuid core dump mode /
682	cred->fsuid = GLOBAL_ROOT_UID; / Dump root private /
683	need_suid_safe = true;
684	}
685
686	retval = coredump_wait(siginfo->si_signo, &core_state);
687	if (retval < `0`)
688	goto fail_creds;
689
690	old_cred = override_creds(cred);
691
692	retval = format_corename(&cn, &cprm, &argv, &argc);
693	if (retval < `0`) {
694	coredump_report_failure("format_corename failed, aborting core");
695	goto fail_unlock;
696	}
697
698	switch (cn.core_type) {
699	case COREDUMP_FILE: {
700	struct mnt_idmap *idmap;
701	struct inode *inode;
702	int open_flags = O_CREAT \| O_WRONLY \| O_NOFOLLOW \|
703	O_LARGEFILE \| O_EXCL;
704
705	if (cprm.limit < binfmt->min_coredump)
706	goto fail_unlock;
707
708	if (need_suid_safe && cn.corename[`0`] != `'/'`) {
709	coredump_report_failure(
710	"this process can only dump core to a fully qualified path, skipping core dump");
711	goto fail_unlock;
712	}
713
714	/*
715	* Unlink the file if it exists unless this is a SUID
716	* binary - in that case, we're running around with root
717	* privs and don't want to unlink another user's coredump.
718	*/
719	if (!need_suid_safe) {
720	/*
721	* If it doesn't exist, that's fine. If there's some
722	* other problem, we'll catch it at the filp_open().
723	*/
724	do_unlinkat(AT_FDCWD, getname_kernel(cn.corename));
725	}
726
727	/*
728	* There is a race between unlinking and creating the
729	* file, but if that causes an EEXIST here, that's
730	* fine - another process raced with us while creating
731	* the corefile, and the other process won. To userspace,
732	* what matters is that at least one of the two processes
733	* writes its coredump successfully, not which one.
734	*/
735	if (need_suid_safe) {
736	/*
737	* Using user namespaces, normal user tasks can change
738	* their current->fs->root to point to arbitrary
739	* directories. Since the intention of the "only dump
740	* with a fully qualified path" rule is to control where
741	* coredumps may be placed using root privileges,
742	* current->fs->root must not be used. Instead, use the
743	* root directory of init_task.
744	*/
745	struct path root;
746
747	task_lock(&init_task);
748	get_fs_root(init_task.fs, &root);
749	task_unlock(&init_task);
750	cprm.file = file_open_root(&root, cn.corename,
751	open_flags, `0600`);
752	path_put(&root);
753	} else {
754	cprm.file = filp_open(cn.corename, open_flags, `0600`);
755	}
756	if (IS_ERR(cprm.file))
757	goto fail_unlock;
758
759	inode = file_inode(cprm.file);
760	if (inode->i_nlink > `1`)
761	goto close_fail;
762	if (d_unhashed(cprm.file->f_path.dentry))
763	goto close_fail;
764	/*
765	* AK: actually i see no reason to not allow this for named
766	* pipes etc, but keep the previous behaviour for now.
767	*/
768	if (!S_ISREG(inode->i_mode))
769	goto close_fail;
770	/*
771	* Don't dump core if the filesystem changed owner or mode
772	* of the file during file creation. This is an issue when
773	* a process dumps core while its cwd is e.g. on a vfat
774	* filesystem.
775	*/
776	idmap = file_mnt_idmap(cprm.file);
777	if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode),
778	current_fsuid())) {
779	coredump_report_failure("Core dump to %s aborted: "
780	"cannot preserve file owner", cn.corename);
781	goto close_fail;
782	}
783	if ((inode->i_mode & `0677`) != `0600`) {
784	coredump_report_failure("Core dump to %s aborted: "
785	"cannot preserve file permissions", cn.corename);
786	goto close_fail;
787	}
788	if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
789	goto close_fail;
790	if (do_truncate(idmap, cprm.file->f_path.dentry,
791	`0`, `0`, cprm.file))
792	goto close_fail;
793	break;
794	}
795	case COREDUMP_PIPE: {
796	int argi;
797	int dump_count;
798	char **helper_argv;
799	struct subprocess_info *sub_info;
800
801	if (cprm.limit == `1`) {
802	/ See umh_coredump_setup() which sets RLIMIT_CORE = 1.*
803	*
804	* Normally core limits are irrelevant to pipes, since
805	* we're not writing to the file system, but we use
806	* cprm.limit of 1 here as a special value, this is a
807	* consistent way to catch recursive crashes.
808	* We can still crash if the core_pattern binary sets
809	* RLIM_CORE = !1, but it runs as root, and can do
810	* lots of stupid things.
811	*
812	* Note that we use task_tgid_vnr here to grab the pid
813	* of the process group leader. That way we get the
814	* right pid if a thread in a multi-threaded
815	* core_pattern process dies.
816	*/
817	coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
818	goto fail_unlock;
819	}
820	cprm.limit = RLIM_INFINITY;
821
822	dump_count = atomic_inc_return(&core_dump_count);
823	if (core_pipe_limit && (core_pipe_limit < dump_count)) {
824	coredump_report_failure("over core_pipe_limit, skipping core dump");
825	goto fail_dropcount;
826	}
827
828	helper_argv = kmalloc_array(argc + `1`, sizeof(*helper_argv),
829	GFP_KERNEL);
830	if (!helper_argv) {
831	coredump_report_failure("%s failed to allocate memory", __func__);
832	goto fail_dropcount;
833	}
834	for (argi = `0`; argi < argc; argi++)
835	helper_argv[argi] = cn.corename + argv[argi];
836	helper_argv[argi] = NULL;
837
838	retval = -ENOMEM;
839	sub_info = call_usermodehelper_setup(helper_argv[`0`],
840	helper_argv, NULL, GFP_KERNEL,
841	umh_coredump_setup, NULL, &cprm);
842	if (sub_info)
843	retval = call_usermodehelper_exec(sub_info,
844	UMH_WAIT_EXEC);
845
846	kfree(helper_argv);
847	if (retval) {
848	coredump_report_failure("\|%s pipe failed", cn.corename);
849	goto close_fail;
850	}
851	break;
852	}
853	case COREDUMP_SOCK: {
854	#ifdef CONFIG_UNIX
855	struct file *file __free(fput) = NULL;
856	struct sockaddr_un addr = {
857	.sun_family = AF_UNIX,
858	};
859	ssize_t addr_len;
860	struct socket *socket;
861
862	addr_len = strscpy(addr.sun_path, cn.corename);
863	if (addr_len < `0`)
864	goto close_fail;
865	addr_len += offsetof(struct sockaddr_un, sun_path) + `1`;
866
867	/*
868	* It is possible that the userspace process which is
869	* supposed to handle the coredump and is listening on
870	* the AF_UNIX socket coredumps. Userspace should just
871	* mark itself non dumpable.
872	*/
873
874	retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, `0`, &socket);
875	if (retval < `0`)
876	goto close_fail;
877
878	file = sock_alloc_file(socket, `0`, NULL);
879	if (IS_ERR(file))
880	goto close_fail;
881
882	/*
883	* Set the thread-group leader pid which is used for the
884	* peer credentials during connect() below. Then
885	* immediately register it in pidfs...
886	*/
887	cprm.pid = task_tgid(current);
888	retval = pidfs_register_pid(cprm.pid);
889	if (retval)
890	goto close_fail;
891
892	/*
893	* ... and set the coredump information so userspace
894	* has it available after connect()...
895	*/
896	pidfs_coredump(&cprm);
897
898	retval = kernel_connect(socket, (struct sockaddr *)(&addr),
899	addr_len, O_NONBLOCK \| SOCK_COREDUMP);
900
901	/*
902	* ... Make sure to only put our reference after connect() took
903	* its own reference keeping the pidfs entry alive ...
904	*/
905	pidfs_put_pid(cprm.pid);
906
907	if (retval) {
908	if (retval == -EAGAIN)
909	coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path);
910	else
911	coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval);
912	goto close_fail;
913	}
914
915	/ ... and validate that @sk_peer_pid matches @cprm.pid. /
916	if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm.pid))
917	goto close_fail;
918
919	cprm.limit = RLIM_INFINITY;
920	cprm.file = no_free_ptr(file);
921	#else
922	coredump_report_failure("Core dump socket support %s disabled", cn.corename);
923	goto close_fail;
924	#endif
925	break;
926	}
927	default:
928	WARN_ON_ONCE(true);
929	goto close_fail;
930	}
931
932	/ get us an unshared descriptor table; almost always a no-op /
933	/ The cell spufs coredump code reads the file descriptor tables /
934	retval = unshare_files();
935	if (retval)
936	goto close_fail;
937	if (!dump_interrupted()) {
938	/*
939	* umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
940	* have this set to NULL.
941	*/
942	if (!cprm.file) {
943	coredump_report_failure("Core dump to \|%s disabled", cn.corename);
944	goto close_fail;
945	}
946	if (!dump_vma_snapshot(&cprm))
947	goto close_fail;
948
949	file_start_write(cprm.file);
950	core_dumped = binfmt->core_dump(&cprm);
951	/*
952	* Ensures that file size is big enough to contain the current
953	* file postion. This prevents gdb from complaining about
954	* a truncated file if the last "write" to the file was
955	* dump_skip.
956	*/
957	if (cprm.to_skip) {
958	cprm.to_skip--;
959	dump_emit(&cprm, "", `1`);
960	}
961	file_end_write(cprm.file);
962	free_vma_snapshot(&cprm);
963	}
964
965	#ifdef CONFIG_UNIX
966	/ Let userspace know we're done processing the coredump. /
967	if (sock_from_file(cprm.file))
968	kernel_sock_shutdown(sock_from_file(cprm.file), SHUT_WR);
969	#endif
970
971	/*
972	* When core_pipe_limit is set we wait for the coredump server
973	* or usermodehelper to finish before exiting so it can e.g.,
974	* inspect /proc/<pid>.
975	*/
976	if (core_pipe_limit) {
977	switch (cn.core_type) {
978	case COREDUMP_PIPE:
979	wait_for_dump_helpers(cprm.file);
980	break;
981	#ifdef CONFIG_UNIX
982	case COREDUMP_SOCK: {
983	ssize_t n;
984
985	/*
986	* We use a simple read to wait for the coredump
987	* processing to finish. Either the socket is
988	* closed or we get sent unexpected data. In
989	* both cases, we're done.
990	*/
991	n = __kernel_read(cprm.file, &(char){ `0` }, `1`, NULL);
992	if (n != `0`)
993	coredump_report_failure("Unexpected data on coredump socket");
994	break;
995	}
996	#endif
997	default:
998	break;
999	}
1000	}
1001
1002	close_fail:
1003	if (cprm.file)
1004	filp_close(cprm.file, NULL);
1005	fail_dropcount:
1006	if (cn.core_type == COREDUMP_PIPE)
1007	atomic_dec(&core_dump_count);
1008	fail_unlock:
1009	kfree(argv);
1010	kfree(cn.corename);
1011	coredump_finish(core_dumped);
1012	revert_creds(old_cred);
1013	fail_creds:
1014	put_cred(cred);
1015	fail:
1016	return;
1017	}
1018
1019	/*
1020	* Core dumping helper functions. These are the only things you should
1021	* do on a core-file: use only these functions to write out all the
1022	* necessary info.
1023	*/
1024	static int __dump_emit(struct coredump_params cprm, const* void addr, int* nr)
1025	{
1026	struct file *file = cprm->file;
1027	loff_t pos = file->f_pos;
1028	ssize_t n;
1029
1030	if (cprm->written + nr > cprm->limit)
1031	return `0`;
1032	if (dump_interrupted())
1033	return `0`;
1034	n = __kernel_write(file, addr, nr, &pos);
1035	if (n != nr)
1036	return `0`;
1037	file->f_pos = pos;
1038	cprm->written += n;
1039	cprm->pos += n;
1040
1041	return `1`;
1042	}
1043
1044	static int __dump_skip(struct coredump_params *cprm, size_t nr)
1045	{
1046	static char zeroes[PAGE_SIZE];
1047	struct file *file = cprm->file;
1048
1049	if (file->f_mode & FMODE_LSEEK) {
1050	if (dump_interrupted() \|\| vfs_llseek(file, offset: nr, SEEK_CUR) < `0`)
1051	return `0`;
1052	cprm->pos += nr;
1053	return `1`;
1054	}
1055
1056	while (nr > PAGE_SIZE) {
1057	if (!__dump_emit(cprm, addr: zeroes, PAGE_SIZE))
1058	return `0`;
1059	nr -= PAGE_SIZE;
1060	}
1061
1062	return __dump_emit(cprm, addr: zeroes, nr);
1063	}
1064
1065	int dump_emit(struct coredump_params cprm, const* void addr, int* nr)
1066	{
1067	if (cprm->to_skip) {
1068	if (!__dump_skip(cprm, nr: cprm->to_skip))
1069	return `0`;
1070	cprm->to_skip = `0`;
1071	}
1072	return __dump_emit(cprm, addr, nr);
1073	}
1074	EXPORT_SYMBOL(dump_emit);
1075
1076	void dump_skip_to(struct coredump_params cprm, unsigned* long pos)
1077	{
1078	cprm->to_skip = pos - cprm->pos;
1079	}
1080	EXPORT_SYMBOL(dump_skip_to);
1081
1082	void dump_skip(struct coredump_params *cprm, size_t nr)
1083	{
1084	cprm->to_skip += nr;
1085	}
1086	EXPORT_SYMBOL(dump_skip);
1087
1088	#ifdef CONFIG_ELF_CORE
1089	static int dump_emit_page(struct coredump_params cprm, struct* page *page)
1090	{
1091	struct bio_vec bvec;
1092	struct iov_iter iter;
1093	struct file *file = cprm->file;
1094	loff_t pos;
1095	ssize_t n;
1096
1097	if (!page)
1098	return `0`;
1099
1100	if (cprm->to_skip) {
1101	if (!__dump_skip(cprm, nr: cprm->to_skip))
1102	return `0`;
1103	cprm->to_skip = `0`;
1104	}
1105	if (cprm->written + PAGE_SIZE > cprm->limit)
1106	return `0`;
1107	if (dump_interrupted())
1108	return `0`;
1109	pos = file->f_pos;
1110	bvec_set_page(bv: &bvec, page, PAGE_SIZE, offset: `0`);
1111	iov_iter_bvec(i: &iter, ITER_SOURCE, bvec: &bvec, nr_segs: `1`, PAGE_SIZE);
1112	n = __kernel_write_iter(file: cprm->file, from: &iter, pos: &pos);
1113	if (n != PAGE_SIZE)
1114	return `0`;
1115	file->f_pos = pos;
1116	cprm->written += PAGE_SIZE;
1117	cprm->pos += PAGE_SIZE;
1118
1119	return `1`;
1120	}
1121
1122	/*
1123	* If we might get machine checks from kernel accesses during the
1124	* core dump, let's get those errors early rather than during the
1125	* IO. This is not performance-critical enough to warrant having
1126	* all the machine check logic in the iovec paths.
1127	*/
1128	#ifdef copy_mc_to_kernel
1129
1130	#define dump_page_alloc() alloc_page(GFP_KERNEL)
1131	#define dump_page_free(x) __free_page(x)
1132	static struct page dump_page_copy(struct* page src, struct* page *dst)
1133	{
1134	void *buf = kmap_local_page(page: src);
1135	size_t left = copy_mc_to_kernel(page_address(dst), from: buf, PAGE_SIZE);
1136	kunmap_local(buf);
1137	return left ? NULL : dst;
1138	}
1139
1140	#else
1141
1142	/ We just want to return non-NULL; it's never used. /
1143	#define dump_page_alloc() ERR_PTR(-EINVAL)
1144	#define dump_page_free(x) ((void)(x))
1145	static inline struct page dump_page_copy(struct* page src, struct* page *dst)
1146	{
1147	return src;
1148	}
1149	#endif
1150
1151	int dump_user_range(struct coredump_params cprm, unsigned* long start,
1152	unsigned long len)
1153	{
1154	unsigned long addr;
1155	struct page *dump_page;
1156	int locked, ret;
1157
1158	dump_page = dump_page_alloc();
1159	if (!dump_page)
1160	return `0`;
1161
1162	ret = `0`;
1163	locked = `0`;
1164	for (addr = start; addr < start + len; addr += PAGE_SIZE) {
1165	struct page *page;
1166
1167	if (!locked) {
1168	if (mmap_read_lock_killable(current->mm))
1169	goto out;
1170	locked = `1`;
1171	}
1172
1173	/*
1174	* To avoid having to allocate page tables for virtual address
1175	* ranges that have never been used yet, and also to make it
1176	* easy to generate sparse core files, use a helper that returns
1177	* NULL when encountering an empty page table entry that would
1178	* otherwise have been filled with the zero page.
1179	*/
1180	page = get_dump_page(addr, locked: &locked);
1181	if (page) {
1182	if (locked) {
1183	mmap_read_unlock(current->mm);
1184	locked = `0`;
1185	}
1186	int stop = !dump_emit_page(cprm, page: dump_page_copy(src: page, dst: dump_page));
1187	put_page(page);
1188	if (stop)
1189	goto out;
1190	} else {
1191	dump_skip(cprm, PAGE_SIZE);
1192	}
1193
1194	if (dump_interrupted())
1195	goto out;
1196
1197	if (!need_resched())
1198	continue;
1199	if (locked) {
1200	mmap_read_unlock(current->mm);
1201	locked = `0`;
1202	}
1203	cond_resched();
1204	}
1205	ret = `1`;
1206	out:
1207	if (locked)
1208	mmap_read_unlock(current->mm);
1209
1210	dump_page_free(dump_page);
1211	return ret;
1212	}
1213	#endif
1214
1215	int dump_align(struct coredump_params cprm, int* align)
1216	{
1217	unsigned mod = (cprm->pos + cprm->to_skip) & (align - `1`);
1218	if (align & (align - `1`))
1219	return `0`;
1220	if (mod)
1221	cprm->to_skip += align - mod;
1222	return `1`;
1223	}
1224	EXPORT_SYMBOL(dump_align);
1225
1226	#ifdef CONFIG_SYSCTL
1227
1228	void validate_coredump_safety(void)
1229	{
1230	if (suid_dumpable == SUID_DUMP_ROOT &&
1231	core_pattern[`0`] != `'/'` && core_pattern[`0`] != `'\|'` && core_pattern[`0`] != `'@'`) {
1232
1233	coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: "
1234	"pipe handler or fully qualified core dump path required. "
1235	"Set kernel.core_pattern before fs.suid_dumpable.");
1236	}
1237	}
1238
1239	static inline bool check_coredump_socket(void)
1240	{
1241	if (core_pattern[`0`] != `'@'`)
1242	return true;
1243
1244	/*
1245	* Coredump socket must be located in the initial mount
1246	* namespace. Don't give the impression that anything else is
1247	* supported right now.
1248	*/
1249	if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns)
1250	return false;
1251
1252	/ Must be an absolute path. /
1253	if (*(core_pattern + `1`) != `'/'`)
1254	return false;
1255
1256	return true;
1257	}
1258
1259	static int proc_dostring_coredump(const struct ctl_table table, int* write,
1260	void buffer, size_t lenp, loff_t *ppos)
1261	{
1262	int error;
1263	ssize_t retval;
1264	char old_core_pattern[CORENAME_MAX_SIZE];
1265
1266	retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE);
1267
1268	error = proc_dostring(table, write, buffer, lenp, ppos);
1269	if (error)
1270	return error;
1271	if (!check_coredump_socket()) {
1272	strscpy(core_pattern, old_core_pattern, retval + `1`);
1273	return -EINVAL;
1274	}
1275
1276	validate_coredump_safety();
1277	return error;
1278	}
1279
1280	static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
1281	static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX;
1282	static char core_modes[] = {
1283	"file\npipe"
1284	#ifdef CONFIG_UNIX
1285	"\nsocket"
1286	#endif
1287	};
1288
1289	static const struct ctl_table coredump_sysctls[] = {
1290	{
1291	.procname = "core_uses_pid",
1292	.data = &core_uses_pid,
1293	.maxlen = sizeof(int),
1294	.mode = `0644`,
1295	.proc_handler = proc_dointvec,
1296	},
1297	{
1298	.procname = "core_pattern",
1299	.data = core_pattern,
1300	.maxlen = CORENAME_MAX_SIZE,
1301	.mode = `0644`,
1302	.proc_handler = proc_dostring_coredump,
1303	},
1304	{
1305	.procname = "core_pipe_limit",
1306	.data = &core_pipe_limit,
1307	.maxlen = sizeof(unsigned int),
1308	.mode = `0644`,
1309	.proc_handler = proc_dointvec_minmax,
1310	.extra1 = SYSCTL_ZERO,
1311	.extra2 = SYSCTL_INT_MAX,
1312	},
1313	{
1314	.procname = "core_file_note_size_limit",
1315	.data = &core_file_note_size_limit,
1316	.maxlen = sizeof(unsigned int),
1317	.mode = `0644`,
1318	.proc_handler = proc_douintvec_minmax,
1319	.extra1 = (unsigned int *)&core_file_note_size_min,
1320	.extra2 = (unsigned int *)&core_file_note_size_max,
1321	},
1322	{
1323	.procname = "core_sort_vma",
1324	.data = &core_sort_vma,
1325	.maxlen = sizeof(int),
1326	.mode = `0644`,
1327	.proc_handler = proc_douintvec_minmax,
1328	.extra1 = SYSCTL_ZERO,
1329	.extra2 = SYSCTL_ONE,
1330	},
1331	{
1332	.procname = "core_modes",
1333	.data = core_modes,
1334	.maxlen = sizeof(core_modes) - `1`,
1335	.mode = `0444`,
1336	.proc_handler = proc_dostring,
1337	},
1338	};
1339
1340	static int __init init_fs_coredump_sysctls(void)
1341	{
1342	register_sysctl_init("kernel", coredump_sysctls);
1343	return `0`;
1344	}
1345	fs_initcall(init_fs_coredump_sysctls);
1346	#endif /* CONFIG_SYSCTL */
1347
1348	/*
1349	* The purpose of always_dump_vma() is to make sure that special kernel mappings
1350	* that are useful for post-mortem analysis are included in every core dump.
1351	* In that way we ensure that the core dump is fully interpretable later
1352	* without matching up the same kernel and hardware config to see what PC values
1353	* meant. These special mappings include - vDSO, vsyscall, and other
1354	* architecture specific mappings
1355	*/
1356	static bool always_dump_vma(struct vm_area_struct *vma)
1357	{
1358	/ Any vsyscall mappings? /
1359	if (vma == get_gate_vma(mm: vma->vm_mm))
1360	return true;
1361
1362	/*
1363	* Assume that all vmas with a .name op should always be dumped.
1364	* If this changes, a new vm_ops field can easily be added.
1365	*/
1366	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1367	return true;
1368
1369	/*
1370	* arch_vma_name() returns non-NULL for special architecture mappings,
1371	* such as vDSO sections.
1372	*/
1373	if (arch_vma_name(vma))
1374	return true;
1375
1376	return false;
1377	}
1378
1379	#define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1
1380
1381	/*
1382	* Decide how much of @vma's contents should be included in a core dump.
1383	*/
1384	static unsigned long vma_dump_size(struct vm_area_struct *vma,
1385	unsigned long mm_flags)
1386	{
1387	#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1388
1389	/ always dump the vdso and vsyscall sections /
1390	if (always_dump_vma(vma))
1391	goto whole;
1392
1393	if (vma->vm_flags & VM_DONTDUMP)
1394	return `0`;
1395
1396	/ support for DAX /
1397	if (vma_is_dax(vma)) {
1398	if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1399	goto whole;
1400	if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1401	goto whole;
1402	return `0`;
1403	}
1404
1405	/ Hugetlb memory check /
1406	if (is_vm_hugetlb_page(vma)) {
1407	if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1408	goto whole;
1409	if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1410	goto whole;
1411	return `0`;
1412	}
1413
1414	/ Do not dump I/O mapped devices or special mappings /
1415	if (vma->vm_flags & VM_IO)
1416	return `0`;
1417
1418	/ By default, dump shared memory if mapped from an anonymous file. /
1419	if (vma->vm_flags & VM_SHARED) {
1420	if (file_inode(f: vma->vm_file)->i_nlink == `0` ?
1421	FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1422	goto whole;
1423	return `0`;
1424	}
1425
1426	/ Dump segments that have been written to. /
1427	if ((!IS_ENABLED(CONFIG_MMU) \|\| vma->anon_vma) && FILTER(ANON_PRIVATE))
1428	goto whole;
1429	if (vma->vm_file == NULL)
1430	return `0`;
1431
1432	if (FILTER(MAPPED_PRIVATE))
1433	goto whole;
1434
1435	/*
1436	* If this is the beginning of an executable file mapping,
1437	* dump the first page to aid in determining what was mapped here.
1438	*/
1439	if (FILTER(ELF_HEADERS) &&
1440	vma->vm_pgoff == `0` && (vma->vm_flags & VM_READ)) {
1441	if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & `0111`) != `0`)
1442	return PAGE_SIZE;
1443
1444	/*
1445	* ELF libraries aren't always executable.
1446	* We'll want to check whether the mapping starts with the ELF
1447	* magic, but not now - we're holding the mmap lock,
1448	* so copy_from_user() doesn't work here.
1449	* Use a placeholder instead, and fix it up later in
1450	* dump_vma_snapshot().
1451	*/
1452	return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER;
1453	}
1454
1455	#undef FILTER
1456
1457	return `0`;
1458
1459	whole:
1460	return vma->vm_end - vma->vm_start;
1461	}
1462
1463	/*
1464	* Helper function for iterating across a vma list. It ensures that the caller
1465	* will visit `gate_vma' prior to terminating the search.
1466	*/
1467	static struct vm_area_struct coredump_next_vma(struct* vma_iterator *vmi,
1468	struct vm_area_struct *vma,
1469	struct vm_area_struct *gate_vma)
1470	{
1471	if (gate_vma && (vma == gate_vma))
1472	return NULL;
1473
1474	vma = vma_next(vmi);
1475	if (vma)
1476	return vma;
1477	return gate_vma;
1478	}
1479
1480	static void free_vma_snapshot(struct coredump_params *cprm)
1481	{
1482	if (cprm->vma_meta) {
1483	int i;
1484	for (i = `0`; i < cprm->vma_count; i++) {
1485	struct file *file = cprm->vma_meta[i].file;
1486	if (file)
1487	fput(file);
1488	}
1489	kvfree(addr: cprm->vma_meta);
1490	cprm->vma_meta = NULL;
1491	}
1492	}
1493
1494	static int cmp_vma_size(const void vma_meta_lhs_ptr, const* void *vma_meta_rhs_ptr)
1495	{
1496	const struct core_vma_metadata *vma_meta_lhs = vma_meta_lhs_ptr;
1497	const struct core_vma_metadata *vma_meta_rhs = vma_meta_rhs_ptr;
1498
1499	if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size)
1500	return -`1`;
1501	if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size)
1502	return `1`;
1503	return `0`;
1504	}
1505
1506	/*
1507	* Under the mmap_lock, take a snapshot of relevant information about the task's
1508	* VMAs.
1509	*/
1510	static bool dump_vma_snapshot(struct coredump_params *cprm)
1511	{
1512	struct vm_area_struct gate_vma, vma = NULL;
1513	struct mm_struct *mm = current->mm;
1514	VMA_ITERATOR(vmi, mm, `0`);
1515	int i = `0`;
1516
1517	/*
1518	* Once the stack expansion code is fixed to not change VMA bounds
1519	* under mmap_lock in read mode, this can be changed to take the
1520	* mmap_lock in read mode.
1521	*/
1522	if (mmap_write_lock_killable(mm))
1523	return false;
1524
1525	cprm->vma_data_size = `0`;
1526	gate_vma = get_gate_vma(mm);
1527	cprm->vma_count = mm->map_count + (gate_vma ? `1` : `0`);
1528
1529	cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL);
1530	if (!cprm->vma_meta) {
1531	mmap_write_unlock(mm);
1532	return false;
1533	}
1534
1535	while ((vma = coredump_next_vma(vmi: &vmi, vma, gate_vma)) != NULL) {
1536	struct core_vma_metadata *m = cprm->vma_meta + i;
1537
1538	m->start = vma->vm_start;
1539	m->end = vma->vm_end;
1540	m->flags = vma->vm_flags;
1541	m->dump_size = vma_dump_size(vma, mm_flags: cprm->mm_flags);
1542	m->pgoff = vma->vm_pgoff;
1543	m->file = vma->vm_file;
1544	if (m->file)
1545	get_file(f: m->file);
1546	i++;
1547	}
1548
1549	mmap_write_unlock(mm);
1550
1551	for (i = `0`; i < cprm->vma_count; i++) {
1552	struct core_vma_metadata *m = cprm->vma_meta + i;
1553
1554	if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) {
1555	char elfmag[SELFMAG];
1556
1557	if (copy_from_user(to: elfmag, from: (void __user *)m->start, SELFMAG) \|\|
1558	memcmp(p: elfmag, ELFMAG, SELFMAG) != `0`) {
1559	m->dump_size = `0`;
1560	} else {
1561	m->dump_size = PAGE_SIZE;
1562	}
1563	}
1564
1565	cprm->vma_data_size += m->dump_size;
1566	}
1567
1568	if (core_sort_vma)
1569	sort(base: cprm->vma_meta, num: cprm->vma_count, size: sizeof(*cprm->vma_meta),
1570	cmp_func: cmp_vma_size, NULL);
1571
1572	return true;
1573	}
1574

Provided by KDAB

Definitions

source code of linux/fs/coredump.c