base.c source code [linux/fs/proc/base.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* linux/fs/proc/base.c
4	*
5	* Copyright (C) 1991, 1992 Linus Torvalds
6	*
7	* proc base directory handling functions
8	*
9	* 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
10	* Instead of using magical inumbers to determine the kind of object
11	* we allocate and fill in-core inodes upon lookup. They don't even
12	* go into icache. We cache the reference to task_struct upon lookup too.
13	* Eventually it should become a filesystem in its own. We don't use the
14	* rest of procfs anymore.
15	*
16	*
17	* Changelog:
18	* 17-Jan-2005
19	* Allan Bezerra
20	* Bruna Moreira <bruna.moreira@indt.org.br>
21	* Edjard Mota <edjard.mota@indt.org.br>
22	* Ilias Biris <ilias.biris@indt.org.br>
23	* Mauricio Lin <mauricio.lin@indt.org.br>
24	*
25	* Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
26	*
27	* A new process specific entry (smaps) included in /proc. It shows the
28	* size of rss for each memory area. The maps entry lacks information
29	* about physical memory size (rss) for each mapped file, i.e.,
30	* rss information for executables and library files.
31	* This additional information is useful for any tools that need to know
32	* about physical memory consumption for a process specific library.
33	*
34	* Changelog:
35	* 21-Feb-2005
36	* Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
37	* Pud inclusion in the page table walking.
38	*
39	* ChangeLog:
40	* 10-Mar-2005
41	* 10LE Instituto Nokia de Tecnologia - INdT:
42	* A better way to walks through the page table as suggested by Hugh Dickins.
43	*
44	* Simo Piiroinen <simo.piiroinen@nokia.com>:
45	* Smaps information related to shared, private, clean and dirty pages.
46	*
47	* Paul Mundt <paul.mundt@nokia.com>:
48	* Overall revision about smaps.
49	*/
50
51	#include <linux/uaccess.h>
52
53	#include <linux/errno.h>
54	#include <linux/time.h>
55	#include <linux/proc_fs.h>
56	#include <linux/stat.h>
57	#include <linux/task_io_accounting_ops.h>
58	#include <linux/init.h>
59	#include <linux/capability.h>
60	#include <linux/file.h>
61	#include <linux/generic-radix-tree.h>
62	#include <linux/string.h>
63	#include <linux/seq_file.h>
64	#include <linux/namei.h>
65	#include <linux/mnt_namespace.h>
66	#include <linux/mm.h>
67	#include <linux/swap.h>
68	#include <linux/rcupdate.h>
69	#include <linux/kallsyms.h>
70	#include <linux/stacktrace.h>
71	#include <linux/resource.h>
72	#include <linux/module.h>
73	#include <linux/mount.h>
74	#include <linux/security.h>
75	#include <linux/ptrace.h>
76	#include <linux/printk.h>
77	#include <linux/cache.h>
78	#include <linux/cgroup.h>
79	#include <linux/cpuset.h>
80	#include <linux/audit.h>
81	#include <linux/poll.h>
82	#include <linux/nsproxy.h>
83	#include <linux/oom.h>
84	#include <linux/elf.h>
85	#include <linux/pid_namespace.h>
86	#include <linux/user_namespace.h>
87	#include <linux/fs_parser.h>
88	#include <linux/fs_struct.h>
89	#include <linux/slab.h>
90	#include <linux/sched/autogroup.h>
91	#include <linux/sched/mm.h>
92	#include <linux/sched/coredump.h>
93	#include <linux/sched/debug.h>
94	#include <linux/sched/stat.h>
95	#include <linux/posix-timers.h>
96	#include <linux/time_namespace.h>
97	#include <linux/resctrl.h>
98	#include <linux/cn_proc.h>
99	#include <linux/ksm.h>
100	#include <uapi/linux/lsm.h>
101	#include <trace/events/oom.h>
102	#include "internal.h"
103	#include "fd.h"
104
105	#include "../../lib/kstrtox.h"
106
107	/ NOTE:*
108	* Implementing inode permission operations in /proc is almost
109	* certainly an error. Permission checks need to happen during
110	* each system call not at open time. The reason is that most of
111	* what we wish to check for permissions in /proc varies at runtime.
112	*
113	* The classic example of a problem is opening file descriptors
114	* in /proc for a task before it execs a suid executable.
115	*/
116
117	static u8 nlink_tid __ro_after_init;
118	static u8 nlink_tgid __ro_after_init;
119
120	enum proc_mem_force {
121	PROC_MEM_FORCE_ALWAYS,
122	PROC_MEM_FORCE_PTRACE,
123	PROC_MEM_FORCE_NEVER
124	};
125
126	static enum proc_mem_force proc_mem_force_override __ro_after_init =
127	IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER :
128	IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE :
129	PROC_MEM_FORCE_ALWAYS;
130
131	static const struct constant_table proc_mem_force_table[] __initconst = {
132	{ "always", PROC_MEM_FORCE_ALWAYS },
133	{ "ptrace", PROC_MEM_FORCE_PTRACE },
134	{ "never", PROC_MEM_FORCE_NEVER },
135	{ }
136	};
137
138	static int __init early_proc_mem_force_override(char *buf)
139	{
140	if (!buf)
141	return -EINVAL;
142
143	/*
144	* lookup_constant() defaults to proc_mem_force_override to preseve
145	* the initial Kconfig choice in case an invalid param gets passed.
146	*/
147	proc_mem_force_override = lookup_constant(tbl: proc_mem_force_table,
148	name: buf, not_found: proc_mem_force_override);
149
150	return `0`;
151	}
152	early_param("proc_mem.force_override", early_proc_mem_force_override);
153
154	struct pid_entry {
155	const char *name;
156	unsigned int len;
157	umode_t mode;
158	const struct inode_operations *iop;
159	const struct file_operations *fop;
160	union proc_op op;
161	};
162
163	#define NOD(NAME, MODE, IOP, FOP, OP) { \
164	.name = (NAME), \
165	.len = sizeof(NAME) - 1, \
166	.mode = MODE, \
167	.iop = IOP, \
168	.fop = FOP, \
169	.op = OP, \
170	}
171
172	#define DIR(NAME, MODE, iops, fops) \
173	NOD(NAME, (S_IFDIR\|(MODE)), &iops, &fops, {} )
174	#define LNK(NAME, get_link) \
175	NOD(NAME, (S_IFLNK\|S_IRWXUGO), \
176	&proc_pid_link_inode_operations, NULL, \
177	{ .proc_get_link = get_link } )
178	#define REG(NAME, MODE, fops) \
179	NOD(NAME, (S_IFREG\|(MODE)), NULL, &fops, {})
180	#define ONE(NAME, MODE, show) \
181	NOD(NAME, (S_IFREG\|(MODE)), \
182	NULL, &proc_single_file_operations, \
183	{ .proc_show = show } )
184	#define ATTR(LSMID, NAME, MODE) \
185	NOD(NAME, (S_IFREG\|(MODE)), \
186	NULL, &proc_pid_attr_operations, \
187	{ .lsmid = LSMID })
188
189	/*
190	* Count the number of hardlinks for the pid_entry table, excluding the .
191	* and .. links.
192	*/
193	static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
194	unsigned int n)
195	{
196	unsigned int i;
197	unsigned int count;
198
199	count = `2`;
200	for (i = `0`; i < n; ++i) {
201	if (S_ISDIR(entries[i].mode))
202	++count;
203	}
204
205	return count;
206	}
207
208	static int get_task_root(struct task_struct task, struct* path *root)
209	{
210	int result = -ENOENT;
211
212	task_lock(p: task);
213	if (task->fs) {
214	get_fs_root(fs: task->fs, root);
215	result = `0`;
216	}
217	task_unlock(p: task);
218	return result;
219	}
220
221	static int proc_cwd_link(struct dentry dentry, struct* path *path)
222	{
223	struct task_struct *task = get_proc_task(inode: d_inode(dentry));
224	int result = -ENOENT;
225
226	if (task) {
227	task_lock(p: task);
228	if (task->fs) {
229	get_fs_pwd(fs: task->fs, pwd: path);
230	result = `0`;
231	}
232	task_unlock(p: task);
233	put_task_struct(t: task);
234	}
235	return result;
236	}
237
238	static int proc_root_link(struct dentry dentry, struct* path *path)
239	{
240	struct task_struct *task = get_proc_task(inode: d_inode(dentry));
241	int result = -ENOENT;
242
243	if (task) {
244	result = get_task_root(task, root: path);
245	put_task_struct(t: task);
246	}
247	return result;
248	}
249
250	/*
251	* If the user used setproctitle(), we just get the string from
252	* user space at arg_start, and limit it to a maximum of one page.
253	*/
254	static ssize_t get_mm_proctitle(struct mm_struct mm, char* __user *buf,
255	size_t count, unsigned long pos,
256	unsigned long arg_start)
257	{
258	char *page;
259	int ret, got;
260
261	if (pos >= PAGE_SIZE)
262	return `0`;
263
264	page = (char *)__get_free_page(GFP_KERNEL);
265	if (!page)
266	return -ENOMEM;
267
268	ret = `0`;
269	got = access_remote_vm(mm, addr: arg_start, buf: page, PAGE_SIZE, gup_flags: FOLL_ANON);
270	if (got > `0`) {
271	int len = strnlen(p: page, maxlen: got);
272
273	/ Include the NUL character if it was found /
274	if (len < got)
275	len++;
276
277	if (len > pos) {
278	len -= pos;
279	if (len > count)
280	len = count;
281	len -= copy_to_user(to: buf, from: page+pos, n: len);
282	if (!len)
283	len = -EFAULT;
284	ret = len;
285	}
286	}
287	free_page((unsigned long)page);
288	return ret;
289	}
290
291	static ssize_t get_mm_cmdline(struct mm_struct mm, char* __user *buf,
292	size_t count, loff_t *ppos)
293	{
294	unsigned long arg_start, arg_end, env_start, env_end;
295	unsigned long pos, len;
296	char *page, c;
297
298	/ Check if process spawned far enough to have cmdline. /
299	if (!mm->env_end)
300	return `0`;
301
302	spin_lock(lock: &mm->arg_lock);
303	arg_start = mm->arg_start;
304	arg_end = mm->arg_end;
305	env_start = mm->env_start;
306	env_end = mm->env_end;
307	spin_unlock(lock: &mm->arg_lock);
308
309	if (arg_start >= arg_end)
310	return `0`;
311
312	/*
313	* We allow setproctitle() to overwrite the argument
314	* strings, and overflow past the original end. But
315	* only when it overflows into the environment area.
316	*/
317	if (env_start != arg_end \|\| env_end < env_start)
318	env_start = env_end = arg_end;
319	len = env_end - arg_start;
320
321	/ We're not going to care if "ppos" has high bits set /*
322	pos = *ppos;
323	if (pos >= len)
324	return `0`;
325	if (count > len - pos)
326	count = len - pos;
327	if (!count)
328	return `0`;
329
330	/*
331	* Magical special case: if the argv[] end byte is not
332	* zero, the user has overwritten it with setproctitle(3).
333	*
334	* Possible future enhancement: do this only once when
335	* pos is 0, and set a flag in the 'struct file'.
336	*/
337	if (access_remote_vm(mm, addr: arg_end-`1`, buf: &c, len: `1`, gup_flags: FOLL_ANON) == `1` && c)
338	return get_mm_proctitle(mm, buf, count, pos, arg_start);
339
340	/*
341	* For the non-setproctitle() case we limit things strictly
342	* to the [arg_start, arg_end[ range.
343	*/
344	pos += arg_start;
345	if (pos < arg_start \|\| pos >= arg_end)
346	return `0`;
347	if (count > arg_end - pos)
348	count = arg_end - pos;
349
350	page = (char *)__get_free_page(GFP_KERNEL);
351	if (!page)
352	return -ENOMEM;
353
354	len = `0`;
355	while (count) {
356	int got;
357	size_t size = min_t(size_t, PAGE_SIZE, count);
358
359	got = access_remote_vm(mm, addr: pos, buf: page, len: size, gup_flags: FOLL_ANON);
360	if (got <= `0`)
361	break;
362	got -= copy_to_user(to: buf, from: page, n: got);
363	if (unlikely(!got)) {
364	if (!len)
365	len = -EFAULT;
366	break;
367	}
368	pos += got;
369	buf += got;
370	len += got;
371	count -= got;
372	}
373
374	free_page((unsigned long)page);
375	return len;
376	}
377
378	static ssize_t get_task_cmdline(struct task_struct tsk, char* __user *buf,
379	size_t count, loff_t *pos)
380	{
381	struct mm_struct *mm;
382	ssize_t ret;
383
384	mm = get_task_mm(task: tsk);
385	if (!mm)
386	return `0`;
387
388	ret = get_mm_cmdline(mm, buf, count, ppos: pos);
389	mmput(mm);
390	return ret;
391	}
392
393	static ssize_t proc_pid_cmdline_read(struct file file, char* __user *buf,
394	size_t count, loff_t *pos)
395	{
396	struct task_struct *tsk;
397	ssize_t ret;
398
399	BUG_ON(*pos < `0`);
400
401	tsk = get_proc_task(inode: file_inode(f: file));
402	if (!tsk)
403	return -ESRCH;
404	ret = get_task_cmdline(tsk, buf, count, pos);
405	put_task_struct(t: tsk);
406	if (ret > `0`)
407	*pos += ret;
408	return ret;
409	}
410
411	static const struct file_operations proc_pid_cmdline_ops = {
412	.read = proc_pid_cmdline_read,
413	.llseek = generic_file_llseek,
414	};
415
416	#ifdef CONFIG_KALLSYMS
417	/*
418	* Provides a wchan file via kallsyms in a proper one-value-per-file format.
419	* Returns the resolved symbol to user space.
420	*/
421	static int proc_pid_wchan(struct seq_file m, struct* pid_namespace *ns,
422	struct pid pid, struct* task_struct *task)
423	{
424	unsigned long wchan;
425	char symname[KSYM_NAME_LEN];
426
427	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
428	goto print0;
429
430	wchan = get_wchan(p: task);
431	if (wchan && !lookup_symbol_name(addr: wchan, symname)) {
432	seq_puts(m, s: symname);
433	return `0`;
434	}
435
436	print0:
437	seq_putc(m, c: `'0'`);
438	return `0`;
439	}
440	#endif /* CONFIG_KALLSYMS */
441
442	static int lock_trace(struct task_struct *task)
443	{
444	int err = down_read_killable(sem: &task->signal->exec_update_lock);
445	if (err)
446	return err;
447	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
448	up_read(sem: &task->signal->exec_update_lock);
449	return -EPERM;
450	}
451	return `0`;
452	}
453
454	static void unlock_trace(struct task_struct *task)
455	{
456	up_read(sem: &task->signal->exec_update_lock);
457	}
458
459	#ifdef CONFIG_STACKTRACE
460
461	#define MAX_STACK_TRACE_DEPTH 64
462
463	static int proc_pid_stack(struct seq_file m, struct* pid_namespace *ns,
464	struct pid pid, struct* task_struct *task)
465	{
466	unsigned long *entries;
467	int err;
468
469	/*
470	* The ability to racily run the kernel stack unwinder on a running task
471	* and then observe the unwinder output is scary; while it is useful for
472	* debugging kernel issues, it can also allow an attacker to leak kernel
473	* stack contents.
474	* Doing this in a manner that is at least safe from races would require
475	* some work to ensure that the remote task can not be scheduled; and
476	* even then, this would still expose the unwinder as local attack
477	* surface.
478	* Therefore, this interface is restricted to root.
479	*/
480	if (!file_ns_capable(file: m->file, ns: &init_user_ns, CAP_SYS_ADMIN))
481	return -EACCES;
482
483	entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
484	GFP_KERNEL);
485	if (!entries)
486	return -ENOMEM;
487
488	err = lock_trace(task);
489	if (!err) {
490	unsigned int i, nr_entries;
491
492	nr_entries = stack_trace_save_tsk(task, store: entries,
493	MAX_STACK_TRACE_DEPTH, skipnr: `0`);
494
495	for (i = `0`; i < nr_entries; i++) {
496	seq_printf(m, fmt: "[<0>] %pB\n", (void *)entries[i]);
497	}
498
499	unlock_trace(task);
500	}
501	kfree(objp: entries);
502
503	return err;
504	}
505	#endif
506
507	#ifdef CONFIG_SCHED_INFO
508	/*
509	* Provides /proc/PID/schedstat
510	*/
511	static int proc_pid_schedstat(struct seq_file m, struct* pid_namespace *ns,
512	struct pid pid, struct* task_struct *task)
513	{
514	if (unlikely(!sched_info_on()))
515	seq_puts(m, s: "0 0 0\n");
516	else
517	seq_printf(m, fmt: "%llu %llu %lu\n",
518	(unsigned long long)task->se.sum_exec_runtime,
519	(unsigned long long)task->sched_info.run_delay,
520	task->sched_info.pcount);
521
522	return `0`;
523	}
524	#endif
525
526	#ifdef CONFIG_LATENCYTOP
527	static int lstats_show_proc(struct seq_file m, void* *v)
528	{
529	int i;
530	struct inode *inode = m->private;
531	struct task_struct *task = get_proc_task(inode);
532
533	if (!task)
534	return -ESRCH;
535	seq_puts(m, s: "Latency Top version : v0.1\n");
536	for (i = `0`; i < LT_SAVECOUNT; i++) {
537	struct latency_record *lr = &task->latency_record[i];
538	if (lr->backtrace[`0`]) {
539	int q;
540	seq_printf(m, fmt: "%i %li %li",
541	lr->count, lr->time, lr->max);
542	for (q = `0`; q < LT_BACKTRACEDEPTH; q++) {
543	unsigned long bt = lr->backtrace[q];
544
545	if (!bt)
546	break;
547	seq_printf(m, fmt: " %ps", (void *)bt);
548	}
549	seq_putc(m, c: `'\n'`);
550	}
551
552	}
553	put_task_struct(t: task);
554	return `0`;
555	}
556
557	static int lstats_open(struct inode inode, struct* file *file)
558	{
559	return single_open(file, lstats_show_proc, inode);
560	}
561
562	static ssize_t lstats_write(struct file file, const* char __user *buf,
563	size_t count, loff_t *offs)
564	{
565	struct task_struct *task = get_proc_task(inode: file_inode(f: file));
566
567	if (!task)
568	return -ESRCH;
569	clear_tsk_latency_tracing(p: task);
570	put_task_struct(t: task);
571
572	return count;
573	}
574
575	static const struct file_operations proc_lstats_operations = {
576	.open = lstats_open,
577	.read = seq_read,
578	.write = lstats_write,
579	.llseek = seq_lseek,
580	.release = single_release,
581	};
582
583	#endif
584
585	static int proc_oom_score(struct seq_file m, struct* pid_namespace *ns,
586	struct pid pid, struct* task_struct *task)
587	{
588	unsigned long totalpages = totalram_pages() + total_swap_pages;
589	unsigned long points = `0`;
590	long badness;
591
592	badness = oom_badness(p: task, totalpages);
593	/*
594	* Special case OOM_SCORE_ADJ_MIN for all others scale the
595	* badness value into [0, 2000] range which we have been
596	* exporting for a long time so userspace might depend on it.
597	*/
598	if (badness != LONG_MIN)
599	points = (`1000` + badness * `1000` / (long)totalpages) * `2` / `3`;
600
601	seq_printf(m, fmt: "%lu\n", points);
602
603	return `0`;
604	}
605
606	struct limit_names {
607	const char *name;
608	const char *unit;
609	};
610
611	static const struct limit_names lnames[RLIM_NLIMITS] = {
612	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
613	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
614	[RLIMIT_DATA] = {"Max data size", "bytes"},
615	[RLIMIT_STACK] = {"Max stack size", "bytes"},
616	[RLIMIT_CORE] = {"Max core file size", "bytes"},
617	[RLIMIT_RSS] = {"Max resident set", "bytes"},
618	[RLIMIT_NPROC] = {"Max processes", "processes"},
619	[RLIMIT_NOFILE] = {"Max open files", "files"},
620	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
621	[RLIMIT_AS] = {"Max address space", "bytes"},
622	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
623	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
624	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
625	[RLIMIT_NICE] = {"Max nice priority", NULL},
626	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
627	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
628	};
629
630	/ Display limits for a process /
631	static int proc_pid_limits(struct seq_file m, struct* pid_namespace *ns,
632	struct pid pid, struct* task_struct *task)
633	{
634	unsigned int i;
635	unsigned long flags;
636
637	struct rlimit rlim[RLIM_NLIMITS];
638
639	if (!lock_task_sighand(task, flags: &flags))
640	return `0`;
641	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
642	unlock_task_sighand(task, flags: &flags);
643
644	/*
645	* print the file header
646	*/
647	seq_puts(m, s: "Limit "
648	"Soft Limit "
649	"Hard Limit "
650	"Units \n");
651
652	for (i = `0`; i < RLIM_NLIMITS; i++) {
653	if (rlim[i].rlim_cur == RLIM_INFINITY)
654	seq_printf(m, fmt: "%-25s %-20s ",
655	lnames[i].name, "unlimited");
656	else
657	seq_printf(m, fmt: "%-25s %-20lu ",
658	lnames[i].name, rlim[i].rlim_cur);
659
660	if (rlim[i].rlim_max == RLIM_INFINITY)
661	seq_printf(m, fmt: "%-20s ", "unlimited");
662	else
663	seq_printf(m, fmt: "%-20lu ", rlim[i].rlim_max);
664
665	if (lnames[i].unit)
666	seq_printf(m, fmt: "%-10s\n", lnames[i].unit);
667	else
668	seq_putc(m, c: `'\n'`);
669	}
670
671	return `0`;
672	}
673
674	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
675	static int proc_pid_syscall(struct seq_file m, struct* pid_namespace *ns,
676	struct pid pid, struct* task_struct *task)
677	{
678	struct syscall_info info;
679	u64 *args = &info.data.args[`0`];
680	int res;
681
682	res = lock_trace(task);
683	if (res)
684	return res;
685
686	if (task_current_syscall(target: task, info: &info))
687	seq_puts(m, s: "running\n");
688	else if (info.data.nr < `0`)
689	seq_printf(m, fmt: "%d 0x%llx 0x%llx\n",
690	info.data.nr, info.sp, info.data.instruction_pointer);
691	else
692	seq_printf(m,
693	fmt: "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
694	info.data.nr,
695	args[`0`], args[`1`], args[`2`], args[`3`], args[`4`], args[`5`],
696	info.sp, info.data.instruction_pointer);
697	unlock_trace(task);
698
699	return `0`;
700	}
701	#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
702
703	/**********************************************************************/
704	/ Here the fs part begins /
705	/**********************************************************************/
706
707	/ permission checks /
708	static bool proc_fd_access_allowed(struct inode *inode)
709	{
710	struct task_struct *task;
711	bool allowed = false;
712	/ Allow access to a task's file descriptors if it is us or we*
713	* may use ptrace attach to the process and find out that
714	* information.
715	*/
716	task = get_proc_task(inode);
717	if (task) {
718	allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
719	put_task_struct(t: task);
720	}
721	return allowed;
722	}
723
724	int proc_setattr(struct mnt_idmap idmap, struct* dentry *dentry,
725	struct iattr *attr)
726	{
727	int error;
728	struct inode *inode = d_inode(dentry);
729
730	if (attr->ia_valid & ATTR_MODE)
731	return -EPERM;
732
733	error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
734	if (error)
735	return error;
736
737	setattr_copy(&nop_mnt_idmap, inode, attr);
738	return `0`;
739	}
740
741	/*
742	* May current process learn task's sched/cmdline info (for hide_pid_min=1)
743	* or euid/egid (for hide_pid_min=2)?
744	*/
745	static bool has_pid_permissions(struct proc_fs_info *fs_info,
746	struct task_struct *task,
747	enum proc_hidepid hide_pid_min)
748	{
749	/*
750	* If 'hidpid' mount option is set force a ptrace check,
751	* we indicate that we are using a filesystem syscall
752	* by passing PTRACE_MODE_READ_FSCREDS
753	*/
754	if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
755	return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
756
757	if (fs_info->hide_pid < hide_pid_min)
758	return true;
759	if (in_group_p(fs_info->pid_gid))
760	return true;
761	return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
762	}
763
764
765	static int proc_pid_permission(struct mnt_idmap *idmap,
766	struct inode inode, int* mask)
767	{
768	struct proc_fs_info *fs_info = proc_sb_info(sb: inode->i_sb);
769	struct task_struct *task;
770	bool has_perms;
771
772	task = get_proc_task(inode);
773	if (!task)
774	return -ESRCH;
775	has_perms = has_pid_permissions(fs_info, task, hide_pid_min: HIDEPID_NO_ACCESS);
776	put_task_struct(t: task);
777
778	if (!has_perms) {
779	if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
780	/*
781	* Let's make getdents(), stat(), and open()
782	* consistent with each other. If a process
783	* may not stat() a file, it shouldn't be seen
784	* in procfs at all.
785	*/
786	return -ENOENT;
787	}
788
789	return -EPERM;
790	}
791	return generic_permission(&nop_mnt_idmap, inode, mask);
792	}
793
794
795
796	static const struct inode_operations proc_def_inode_operations = {
797	.setattr = proc_setattr,
798	};
799
800	static int proc_single_show(struct seq_file m, void* *v)
801	{
802	struct inode *inode = m->private;
803	struct pid_namespace *ns = proc_pid_ns(sb: inode->i_sb);
804	struct pid *pid = proc_pid(inode);
805	struct task_struct *task;
806	int ret;
807
808	task = get_pid_task(pid, PIDTYPE_PID);
809	if (!task)
810	return -ESRCH;
811
812	ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
813
814	put_task_struct(t: task);
815	return ret;
816	}
817
818	static int proc_single_open(struct inode inode, struct* file *filp)
819	{
820	return single_open(filp, proc_single_show, inode);
821	}
822
823	static const struct file_operations proc_single_file_operations = {
824	.open = proc_single_open,
825	.read = seq_read,
826	.llseek = seq_lseek,
827	.release = single_release,
828	};
829
830	/*
831	* proc_mem_open() can return errno, NULL or mm_struct*.
832	*
833	* - Returns NULL if the task has no mm (PF_KTHREAD or PF_EXITING)
834	* - Returns mm_struct* on success
835	* - Returns error code on failure
836	*/
837	struct mm_struct proc_mem_open(struct* inode inode, unsigned* int mode)
838	{
839	struct task_struct *task = get_proc_task(inode);
840	struct mm_struct *mm;
841
842	if (!task)
843	return ERR_PTR(error: -ESRCH);
844
845	mm = mm_access(task, mode: mode \| PTRACE_MODE_FSCREDS);
846	put_task_struct(t: task);
847
848	if (IS_ERR(ptr: mm))
849	return mm == ERR_PTR(error: -ESRCH) ? NULL : mm;
850
851	/ ensure this mm_struct can't be freed /
852	mmgrab(mm);
853	/ but do not pin its memory /
854	mmput(mm);
855
856	return mm;
857	}
858
859	static int __mem_open(struct inode inode, struct* file file, unsigned* int mode)
860	{
861	struct mm_struct *mm = proc_mem_open(inode, mode);
862
863	if (IS_ERR_OR_NULL(ptr: mm))
864	return mm ? PTR_ERR(ptr: mm) : -ESRCH;
865
866	file->private_data = mm;
867	return `0`;
868	}
869
870	static int mem_open(struct inode inode, struct* file *file)
871	{
872	if (WARN_ON_ONCE(!(file->f_op->fop_flags & FOP_UNSIGNED_OFFSET)))
873	return -EINVAL;
874	return __mem_open(inode, file, PTRACE_MODE_ATTACH);
875	}
876
877	static bool proc_mem_foll_force(struct file file, struct* mm_struct *mm)
878	{
879	struct task_struct *task;
880	bool ptrace_active = false;
881
882	switch (proc_mem_force_override) {
883	case PROC_MEM_FORCE_NEVER:
884	return false;
885	case PROC_MEM_FORCE_PTRACE:
886	task = get_proc_task(inode: file_inode(f: file));
887	if (task) {
888	ptrace_active = READ_ONCE(task->ptrace) &&
889	READ_ONCE(task->mm) == mm &&
890	READ_ONCE(task->parent) == current;
891	put_task_struct(t: task);
892	}
893	return ptrace_active;
894	default:
895	return true;
896	}
897	}
898
899	static ssize_t mem_rw(struct file file, char* __user *buf,
900	size_t count, loff_t ppos, int* write)
901	{
902	struct mm_struct *mm = file->private_data;
903	unsigned long addr = *ppos;
904	ssize_t copied;
905	char *page;
906	unsigned int flags;
907
908	if (!mm)
909	return `0`;
910
911	page = (char *)__get_free_page(GFP_KERNEL);
912	if (!page)
913	return -ENOMEM;
914
915	copied = `0`;
916	if (!mmget_not_zero(mm))
917	goto free;
918
919	flags = write ? FOLL_WRITE : `0`;
920	if (proc_mem_foll_force(file, mm))
921	flags \|= FOLL_FORCE;
922
923	while (count > `0`) {
924	size_t this_len = min_t(size_t, count, PAGE_SIZE);
925
926	if (write && copy_from_user(to: page, from: buf, n: this_len)) {
927	copied = -EFAULT;
928	break;
929	}
930
931	this_len = access_remote_vm(mm, addr, buf: page, len: this_len, gup_flags: flags);
932	if (!this_len) {
933	if (!copied)
934	copied = -EIO;
935	break;
936	}
937
938	if (!write && copy_to_user(to: buf, from: page, n: this_len)) {
939	copied = -EFAULT;
940	break;
941	}
942
943	buf += this_len;
944	addr += this_len;
945	copied += this_len;
946	count -= this_len;
947	}
948	*ppos = addr;
949
950	mmput(mm);
951	free:
952	free_page((unsigned long) page);
953	return copied;
954	}
955
956	static ssize_t mem_read(struct file file, char* __user *buf,
957	size_t count, loff_t *ppos)
958	{
959	return mem_rw(file, buf, count, ppos, write: `0`);
960	}
961
962	static ssize_t mem_write(struct file file, const* char __user *buf,
963	size_t count, loff_t *ppos)
964	{
965	return mem_rw(file, buf: (char __user*)buf, count, ppos, write: `1`);
966	}
967
968	loff_t mem_lseek(struct file file, loff_t offset, int* orig)
969	{
970	switch (orig) {
971	case `0`:
972	file->f_pos = offset;
973	break;
974	case `1`:
975	file->f_pos += offset;
976	break;
977	default:
978	return -EINVAL;
979	}
980	force_successful_syscall_return();
981	return file->f_pos;
982	}
983
984	static int mem_release(struct inode inode, struct* file *file)
985	{
986	struct mm_struct *mm = file->private_data;
987	if (mm)
988	mmdrop(mm);
989	return `0`;
990	}
991
992	static const struct file_operations proc_mem_operations = {
993	.llseek = mem_lseek,
994	.read = mem_read,
995	.write = mem_write,
996	.open = mem_open,
997	.release = mem_release,
998	.fop_flags = FOP_UNSIGNED_OFFSET,
999	};
1000
1001	static int environ_open(struct inode inode, struct* file *file)
1002	{
1003	return __mem_open(inode, file, PTRACE_MODE_READ);
1004	}
1005
1006	static ssize_t environ_read(struct file file, char* __user *buf,
1007	size_t count, loff_t *ppos)
1008	{
1009	char *page;
1010	unsigned long src = *ppos;
1011	int ret = `0`;
1012	struct mm_struct *mm = file->private_data;
1013	unsigned long env_start, env_end;
1014
1015	/ Ensure the process spawned far enough to have an environment. /
1016	if (!mm \|\| !mm->env_end)
1017	return `0`;
1018
1019	page = (char *)__get_free_page(GFP_KERNEL);
1020	if (!page)
1021	return -ENOMEM;
1022
1023	ret = `0`;
1024	if (!mmget_not_zero(mm))
1025	goto free;
1026
1027	spin_lock(lock: &mm->arg_lock);
1028	env_start = mm->env_start;
1029	env_end = mm->env_end;
1030	spin_unlock(lock: &mm->arg_lock);
1031
1032	while (count > `0`) {
1033	size_t this_len, max_len;
1034	int retval;
1035
1036	if (src >= (env_end - env_start))
1037	break;
1038
1039	this_len = env_end - (env_start + src);
1040
1041	max_len = min_t(size_t, PAGE_SIZE, count);
1042	this_len = min(max_len, this_len);
1043
1044	retval = access_remote_vm(mm, addr: (env_start + src), buf: page, len: this_len, gup_flags: FOLL_ANON);
1045
1046	if (retval <= `0`) {
1047	ret = retval;
1048	break;
1049	}
1050
1051	if (copy_to_user(to: buf, from: page, n: retval)) {
1052	ret = -EFAULT;
1053	break;
1054	}
1055
1056	ret += retval;
1057	src += retval;
1058	buf += retval;
1059	count -= retval;
1060	}
1061	*ppos = src;
1062	mmput(mm);
1063
1064	free:
1065	free_page((unsigned long) page);
1066	return ret;
1067	}
1068
1069	static const struct file_operations proc_environ_operations = {
1070	.open = environ_open,
1071	.read = environ_read,
1072	.llseek = generic_file_llseek,
1073	.release = mem_release,
1074	};
1075
1076	static int auxv_open(struct inode inode, struct* file *file)
1077	{
1078	return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
1079	}
1080
1081	static ssize_t auxv_read(struct file file, char* __user *buf,
1082	size_t count, loff_t *ppos)
1083	{
1084	struct mm_struct *mm = file->private_data;
1085	unsigned int nwords = `0`;
1086
1087	if (!mm)
1088	return `0`;
1089	do {
1090	nwords += `2`;
1091	} while (mm->saved_auxv[nwords - `2`] != `0`); / AT_NULL /
1092	return simple_read_from_buffer(to: buf, count, ppos, from: mm->saved_auxv,
1093	available: nwords * sizeof(mm->saved_auxv[`0`]));
1094	}
1095
1096	static const struct file_operations proc_auxv_operations = {
1097	.open = auxv_open,
1098	.read = auxv_read,
1099	.llseek = generic_file_llseek,
1100	.release = mem_release,
1101	};
1102
1103	static ssize_t oom_adj_read(struct file file, char* __user *buf, size_t count,
1104	loff_t *ppos)
1105	{
1106	struct task_struct *task = get_proc_task(inode: file_inode(f: file));
1107	char buffer[PROC_NUMBUF];
1108	int oom_adj = OOM_ADJUST_MIN;
1109	size_t len;
1110
1111	if (!task)
1112	return -ESRCH;
1113	if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
1114	oom_adj = OOM_ADJUST_MAX;
1115	else
1116	oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
1117	OOM_SCORE_ADJ_MAX;
1118	put_task_struct(t: task);
1119	if (oom_adj > OOM_ADJUST_MAX)
1120	oom_adj = OOM_ADJUST_MAX;
1121	len = snprintf(buf: buffer, size: sizeof(buffer), fmt: "%d\n", oom_adj);
1122	return simple_read_from_buffer(to: buf, count, ppos, from: buffer, available: len);
1123	}
1124
1125	static int __set_oom_adj(struct file file, int* oom_adj, bool legacy)
1126	{
1127	struct mm_struct *mm = NULL;
1128	struct task_struct *task;
1129	int err = `0`;
1130
1131	task = get_proc_task(inode: file_inode(f: file));
1132	if (!task)
1133	return -ESRCH;
1134
1135	mutex_lock(&oom_adj_mutex);
1136	if (legacy) {
1137	if (oom_adj < task->signal->oom_score_adj &&
1138	!capable(CAP_SYS_RESOURCE)) {
1139	err = -EACCES;
1140	goto err_unlock;
1141	}
1142	/*
1143	* /proc/pid/oom_adj is provided for legacy purposes, ask users to use
1144	* /proc/pid/oom_score_adj instead.
1145	*/
1146	pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
1147	current->comm, task_pid_nr(current), task_pid_nr(task),
1148	task_pid_nr(task));
1149	} else {
1150	if ((short)oom_adj < task->signal->oom_score_adj_min &&
1151	!capable(CAP_SYS_RESOURCE)) {
1152	err = -EACCES;
1153	goto err_unlock;
1154	}
1155	}
1156
1157	/*
1158	* Make sure we will check other processes sharing the mm if this is
1159	* not vfrok which wants its own oom_score_adj.
1160	* pin the mm so it doesn't go away and get reused after task_unlock
1161	*/
1162	if (!task->vfork_done) {
1163	struct task_struct *p = find_lock_task_mm(p: task);
1164
1165	if (p) {
1166	if (mm_flags_test(MMF_MULTIPROCESS, mm: p->mm)) {
1167	mm = p->mm;
1168	mmgrab(mm);
1169	}
1170	task_unlock(p);
1171	}
1172	}
1173
1174	task->signal->oom_score_adj = oom_adj;
1175	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
1176	task->signal->oom_score_adj_min = (short)oom_adj;
1177	trace_oom_score_adj_update(task);
1178
1179	if (mm) {
1180	struct task_struct *p;
1181
1182	rcu_read_lock();
1183	for_each_process(p) {
1184	if (same_thread_group(p1: task, p2: p))
1185	continue;
1186
1187	/ do not touch kernel threads or the global init /
1188	if (p->flags & PF_KTHREAD \|\| is_global_init(tsk: p))
1189	continue;
1190
1191	task_lock(p);
1192	if (!p->vfork_done && process_shares_mm(p, mm)) {
1193	p->signal->oom_score_adj = oom_adj;
1194	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
1195	p->signal->oom_score_adj_min = (short)oom_adj;
1196	}
1197	task_unlock(p);
1198	}
1199	rcu_read_unlock();
1200	mmdrop(mm);
1201	}
1202	err_unlock:
1203	mutex_unlock(lock: &oom_adj_mutex);
1204	put_task_struct(t: task);
1205	return err;
1206	}
1207
1208	/*
1209	* /proc/pid/oom_adj exists solely for backwards compatibility with previous
1210	* kernels. The effective policy is defined by oom_score_adj, which has a
1211	* different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
1212	* Values written to oom_adj are simply mapped linearly to oom_score_adj.
1213	* Processes that become oom disabled via oom_adj will still be oom disabled
1214	* with this implementation.
1215	*
1216	* oom_adj cannot be removed since existing userspace binaries use it.
1217	*/
1218	static ssize_t oom_adj_write(struct file file, const* char __user *buf,
1219	size_t count, loff_t *ppos)
1220	{
1221	char buffer[PROC_NUMBUF] = {};
1222	int oom_adj;
1223	int err;
1224
1225	if (count > sizeof(buffer) - `1`)
1226	count = sizeof(buffer) - `1`;
1227	if (copy_from_user(to: buffer, from: buf, n: count)) {
1228	err = -EFAULT;
1229	goto out;
1230	}
1231
1232	err = kstrtoint(s: strstrip(str: buffer), base: `0`, res: &oom_adj);
1233	if (err)
1234	goto out;
1235	if ((oom_adj < OOM_ADJUST_MIN \|\| oom_adj > OOM_ADJUST_MAX) &&
1236	oom_adj != OOM_DISABLE) {
1237	err = -EINVAL;
1238	goto out;
1239	}
1240
1241	/*
1242	* Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
1243	* value is always attainable.
1244	*/
1245	if (oom_adj == OOM_ADJUST_MAX)
1246	oom_adj = OOM_SCORE_ADJ_MAX;
1247	else
1248	oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
1249
1250	err = __set_oom_adj(file, oom_adj, legacy: true);
1251	out:
1252	return err < `0` ? err : count;
1253	}
1254
1255	static const struct file_operations proc_oom_adj_operations = {
1256	.read = oom_adj_read,
1257	.write = oom_adj_write,
1258	.llseek = generic_file_llseek,
1259	};
1260
1261	static ssize_t oom_score_adj_read(struct file file, char* __user *buf,
1262	size_t count, loff_t *ppos)
1263	{
1264	struct task_struct *task = get_proc_task(inode: file_inode(f: file));
1265	char buffer[PROC_NUMBUF];
1266	short oom_score_adj = OOM_SCORE_ADJ_MIN;
1267	size_t len;
1268
1269	if (!task)
1270	return -ESRCH;
1271	oom_score_adj = task->signal->oom_score_adj;
1272	put_task_struct(t: task);
1273	len = snprintf(buf: buffer, size: sizeof(buffer), fmt: "%hd\n", oom_score_adj);
1274	return simple_read_from_buffer(to: buf, count, ppos, from: buffer, available: len);
1275	}
1276
1277	static ssize_t oom_score_adj_write(struct file file, const* char __user *buf,
1278	size_t count, loff_t *ppos)
1279	{
1280	char buffer[PROC_NUMBUF] = {};
1281	int oom_score_adj;
1282	int err;
1283
1284	if (count > sizeof(buffer) - `1`)
1285	count = sizeof(buffer) - `1`;
1286	if (copy_from_user(to: buffer, from: buf, n: count)) {
1287	err = -EFAULT;
1288	goto out;
1289	}
1290
1291	err = kstrtoint(s: strstrip(str: buffer), base: `0`, res: &oom_score_adj);
1292	if (err)
1293	goto out;
1294	if (oom_score_adj < OOM_SCORE_ADJ_MIN \|\|
1295	oom_score_adj > OOM_SCORE_ADJ_MAX) {
1296	err = -EINVAL;
1297	goto out;
1298	}
1299
1300	err = __set_oom_adj(file, oom_adj: oom_score_adj, legacy: false);
1301	out:
1302	return err < `0` ? err : count;
1303	}
1304
1305	static const struct file_operations proc_oom_score_adj_operations = {
1306	.read = oom_score_adj_read,
1307	.write = oom_score_adj_write,
1308	.llseek = default_llseek,
1309	};
1310
1311	#ifdef CONFIG_AUDIT
1312	#define TMPBUFLEN 11
1313	static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1314	size_t count, loff_t *ppos)
1315	{
1316	struct inode * inode = file_inode(f: file);
1317	struct task_struct *task = get_proc_task(inode);
1318	ssize_t length;
1319	char tmpbuf[TMPBUFLEN];
1320
1321	if (!task)
1322	return -ESRCH;
1323	length = scnprintf(buf: tmpbuf, TMPBUFLEN, fmt: "%u",
1324	from_kuid(to: file->f_cred->user_ns,
1325	uid: audit_get_loginuid(tsk: task)));
1326	put_task_struct(t: task);
1327	return simple_read_from_buffer(to: buf, count, ppos, from: tmpbuf, available: length);
1328	}
1329
1330	static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1331	size_t count, loff_t *ppos)
1332	{
1333	struct inode * inode = file_inode(f: file);
1334	uid_t loginuid;
1335	kuid_t kloginuid;
1336	int rv;
1337
1338	/ Don't let kthreads write their own loginuid /
1339	if (current->flags & PF_KTHREAD)
1340	return -EPERM;
1341
1342	rcu_read_lock();
1343	if (current != pid_task(pid: proc_pid(inode), PIDTYPE_PID)) {
1344	rcu_read_unlock();
1345	return -EPERM;
1346	}
1347	rcu_read_unlock();
1348
1349	if (*ppos != `0`) {
1350	/ No partial writes. /
1351	return -EINVAL;
1352	}
1353
1354	rv = kstrtou32_from_user(s: buf, count, base: `10`, res: &loginuid);
1355	if (rv < `0`)
1356	return rv;
1357
1358	/ is userspace tring to explicitly UNSET the loginuid? /
1359	if (loginuid == AUDIT_UID_UNSET) {
1360	kloginuid = INVALID_UID;
1361	} else {
1362	kloginuid = make_kuid(from: file->f_cred->user_ns, uid: loginuid);
1363	if (!uid_valid(uid: kloginuid))
1364	return -EINVAL;
1365	}
1366
1367	rv = audit_set_loginuid(loginuid: kloginuid);
1368	if (rv < `0`)
1369	return rv;
1370	return count;
1371	}
1372
1373	static const struct file_operations proc_loginuid_operations = {
1374	.read = proc_loginuid_read,
1375	.write = proc_loginuid_write,
1376	.llseek = generic_file_llseek,
1377	};
1378
1379	static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1380	size_t count, loff_t *ppos)
1381	{
1382	struct inode * inode = file_inode(f: file);
1383	struct task_struct *task = get_proc_task(inode);
1384	ssize_t length;
1385	char tmpbuf[TMPBUFLEN];
1386
1387	if (!task)
1388	return -ESRCH;
1389	length = scnprintf(buf: tmpbuf, TMPBUFLEN, fmt: "%u",
1390	audit_get_sessionid(tsk: task));
1391	put_task_struct(t: task);
1392	return simple_read_from_buffer(to: buf, count, ppos, from: tmpbuf, available: length);
1393	}
1394
1395	static const struct file_operations proc_sessionid_operations = {
1396	.read = proc_sessionid_read,
1397	.llseek = generic_file_llseek,
1398	};
1399	#endif
1400
1401	#ifdef CONFIG_FAULT_INJECTION
1402	static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
1403	size_t count, loff_t *ppos)
1404	{
1405	struct task_struct *task = get_proc_task(inode: file_inode(f: file));
1406	char buffer[PROC_NUMBUF];
1407	size_t len;
1408	int make_it_fail;
1409
1410	if (!task)
1411	return -ESRCH;
1412	make_it_fail = task->make_it_fail;
1413	put_task_struct(t: task);
1414
1415	len = snprintf(buf: buffer, size: sizeof(buffer), fmt: "%i\n", make_it_fail);
1416
1417	return simple_read_from_buffer(to: buf, count, ppos, from: buffer, available: len);
1418	}
1419
1420	static ssize_t proc_fault_inject_write(struct file * file,
1421	const char __user * buf, size_t count, loff_t *ppos)
1422	{
1423	struct task_struct *task;
1424	char buffer[PROC_NUMBUF] = {};
1425	int make_it_fail;
1426	int rv;
1427
1428	if (!capable(CAP_SYS_RESOURCE))
1429	return -EPERM;
1430
1431	if (count > sizeof(buffer) - `1`)
1432	count = sizeof(buffer) - `1`;
1433	if (copy_from_user(to: buffer, from: buf, n: count))
1434	return -EFAULT;
1435	rv = kstrtoint(s: strstrip(str: buffer), base: `0`, res: &make_it_fail);
1436	if (rv < `0`)
1437	return rv;
1438	if (make_it_fail < `0` \|\| make_it_fail > `1`)
1439	return -EINVAL;
1440
1441	task = get_proc_task(inode: file_inode(f: file));
1442	if (!task)
1443	return -ESRCH;
1444	task->make_it_fail = make_it_fail;
1445	put_task_struct(t: task);
1446
1447	return count;
1448	}
1449
1450	static const struct file_operations proc_fault_inject_operations = {
1451	.read = proc_fault_inject_read,
1452	.write = proc_fault_inject_write,
1453	.llseek = generic_file_llseek,
1454	};
1455
1456	static ssize_t proc_fail_nth_write(struct file file, const* char __user *buf,
1457	size_t count, loff_t *ppos)
1458	{
1459	struct task_struct *task;
1460	int err;
1461	unsigned int n;
1462
1463	err = kstrtouint_from_user(s: buf, count, base: `0`, res: &n);
1464	if (err)
1465	return err;
1466
1467	task = get_proc_task(inode: file_inode(f: file));
1468	if (!task)
1469	return -ESRCH;
1470	task->fail_nth = n;
1471	put_task_struct(t: task);
1472
1473	return count;
1474	}
1475
1476	static ssize_t proc_fail_nth_read(struct file file, char* __user *buf,
1477	size_t count, loff_t *ppos)
1478	{
1479	struct task_struct *task;
1480	char numbuf[PROC_NUMBUF];
1481	ssize_t len;
1482
1483	task = get_proc_task(inode: file_inode(f: file));
1484	if (!task)
1485	return -ESRCH;
1486	len = snprintf(buf: numbuf, size: sizeof(numbuf), fmt: "%u\n", task->fail_nth);
1487	put_task_struct(t: task);
1488	return simple_read_from_buffer(to: buf, count, ppos, from: numbuf, available: len);
1489	}
1490
1491	static const struct file_operations proc_fail_nth_operations = {
1492	.read = proc_fail_nth_read,
1493	.write = proc_fail_nth_write,
1494	};
1495	#endif
1496
1497
1498	/*
1499	* Print out various scheduling related per-task fields:
1500	*/
1501	static int sched_show(struct seq_file m, void* *v)
1502	{
1503	struct inode *inode = m->private;
1504	struct pid_namespace *ns = proc_pid_ns(sb: inode->i_sb);
1505	struct task_struct *p;
1506
1507	p = get_proc_task(inode);
1508	if (!p)
1509	return -ESRCH;
1510	proc_sched_show_task(p, ns, m);
1511
1512	put_task_struct(t: p);
1513
1514	return `0`;
1515	}
1516
1517	static ssize_t
1518	sched_write(struct file file, const* char __user *buf,
1519	size_t count, loff_t *offset)
1520	{
1521	struct inode *inode = file_inode(f: file);
1522	struct task_struct *p;
1523
1524	p = get_proc_task(inode);
1525	if (!p)
1526	return -ESRCH;
1527	proc_sched_set_task(p);
1528
1529	put_task_struct(t: p);
1530
1531	return count;
1532	}
1533
1534	static int sched_open(struct inode inode, struct* file *filp)
1535	{
1536	return single_open(filp, sched_show, inode);
1537	}
1538
1539	static const struct file_operations proc_pid_sched_operations = {
1540	.open = sched_open,
1541	.read = seq_read,
1542	.write = sched_write,
1543	.llseek = seq_lseek,
1544	.release = single_release,
1545	};
1546
1547	#ifdef CONFIG_SCHED_AUTOGROUP
1548	/*
1549	* Print out autogroup related information:
1550	*/
1551	static int sched_autogroup_show(struct seq_file m, void* *v)
1552	{
1553	struct inode *inode = m->private;
1554	struct task_struct *p;
1555
1556	p = get_proc_task(inode);
1557	if (!p)
1558	return -ESRCH;
1559	proc_sched_autogroup_show_task(p, m);
1560
1561	put_task_struct(t: p);
1562
1563	return `0`;
1564	}
1565
1566	static ssize_t
1567	sched_autogroup_write(struct file file, const* char __user *buf,
1568	size_t count, loff_t *offset)
1569	{
1570	struct inode *inode = file_inode(f: file);
1571	struct task_struct *p;
1572	char buffer[PROC_NUMBUF] = {};
1573	int nice;
1574	int err;
1575
1576	if (count > sizeof(buffer) - `1`)
1577	count = sizeof(buffer) - `1`;
1578	if (copy_from_user(to: buffer, from: buf, n: count))
1579	return -EFAULT;
1580
1581	err = kstrtoint(s: strstrip(str: buffer), base: `0`, res: &nice);
1582	if (err < `0`)
1583	return err;
1584
1585	p = get_proc_task(inode);
1586	if (!p)
1587	return -ESRCH;
1588
1589	err = proc_sched_autogroup_set_nice(p, nice);
1590	if (err)
1591	count = err;
1592
1593	put_task_struct(t: p);
1594
1595	return count;
1596	}
1597
1598	static int sched_autogroup_open(struct inode inode, struct* file *filp)
1599	{
1600	int ret;
1601
1602	ret = single_open(filp, sched_autogroup_show, NULL);
1603	if (!ret) {
1604	struct seq_file *m = filp->private_data;
1605
1606	m->private = inode;
1607	}
1608	return ret;
1609	}
1610
1611	static const struct file_operations proc_pid_sched_autogroup_operations = {
1612	.open = sched_autogroup_open,
1613	.read = seq_read,
1614	.write = sched_autogroup_write,
1615	.llseek = seq_lseek,
1616	.release = single_release,
1617	};
1618
1619	#endif /* CONFIG_SCHED_AUTOGROUP */
1620
1621	#ifdef CONFIG_TIME_NS
1622	static int timens_offsets_show(struct seq_file m, void* *v)
1623	{
1624	struct task_struct *p;
1625
1626	p = get_proc_task(inode: file_inode(f: m->file));
1627	if (!p)
1628	return -ESRCH;
1629	proc_timens_show_offsets(p, m);
1630
1631	put_task_struct(t: p);
1632
1633	return `0`;
1634	}
1635
1636	static ssize_t timens_offsets_write(struct file file, const* char __user *buf,
1637	size_t count, loff_t *ppos)
1638	{
1639	struct inode *inode = file_inode(f: file);
1640	struct proc_timens_offset offsets[`2`];
1641	char kbuf = NULL, pos, *next_line;
1642	struct task_struct *p;
1643	int ret, noffsets;
1644
1645	/ Only allow < page size writes at the beginning of the file /
1646	if ((*ppos != `0`) \|\| (count >= PAGE_SIZE))
1647	return -EINVAL;
1648
1649	/ Slurp in the user data /
1650	kbuf = memdup_user_nul(buf, count);
1651	if (IS_ERR(ptr: kbuf))
1652	return PTR_ERR(ptr: kbuf);
1653
1654	/ Parse the user data /
1655	ret = -EINVAL;
1656	noffsets = `0`;
1657	for (pos = kbuf; pos; pos = next_line) {
1658	struct proc_timens_offset *off = &offsets[noffsets];
1659	char clock[`10`];
1660	int err;
1661
1662	/ Find the end of line and ensure we don't look past it /
1663	next_line = strchr(pos, `'\n'`);
1664	if (next_line) {
1665	*next_line = `'\0'`;
1666	next_line++;
1667	if (*next_line == `'\0'`)
1668	next_line = NULL;
1669	}
1670
1671	err = sscanf(pos, "%9s %lld %lu", clock,
1672	&off->val.tv_sec, &off->val.tv_nsec);
1673	if (err != `3` \|\| off->val.tv_nsec >= NSEC_PER_SEC)
1674	goto out;
1675
1676	clock[sizeof(clock) - `1`] = `0`;
1677	if (strcmp(clock, "monotonic") == `0` \|\|
1678	strcmp(clock, __stringify(CLOCK_MONOTONIC)) == `0`)
1679	off->clockid = CLOCK_MONOTONIC;
1680	else if (strcmp(clock, "boottime") == `0` \|\|
1681	strcmp(clock, __stringify(CLOCK_BOOTTIME)) == `0`)
1682	off->clockid = CLOCK_BOOTTIME;
1683	else
1684	goto out;
1685
1686	noffsets++;
1687	if (noffsets == ARRAY_SIZE(offsets)) {
1688	if (next_line)
1689	count = next_line - kbuf;
1690	break;
1691	}
1692	}
1693
1694	ret = -ESRCH;
1695	p = get_proc_task(inode);
1696	if (!p)
1697	goto out;
1698	ret = proc_timens_set_offset(file, p, offsets, n: noffsets);
1699	put_task_struct(t: p);
1700	if (ret)
1701	goto out;
1702
1703	ret = count;
1704	out:
1705	kfree(objp: kbuf);
1706	return ret;
1707	}
1708
1709	static int timens_offsets_open(struct inode inode, struct* file *filp)
1710	{
1711	return single_open(filp, timens_offsets_show, inode);
1712	}
1713
1714	static const struct file_operations proc_timens_offsets_operations = {
1715	.open = timens_offsets_open,
1716	.read = seq_read,
1717	.write = timens_offsets_write,
1718	.llseek = seq_lseek,
1719	.release = single_release,
1720	};
1721	#endif /* CONFIG_TIME_NS */
1722
1723	static ssize_t comm_write(struct file file, const* char __user *buf,
1724	size_t count, loff_t *offset)
1725	{
1726	struct inode *inode = file_inode(f: file);
1727	struct task_struct *p;
1728	char buffer[TASK_COMM_LEN] = {};
1729	const size_t maxlen = sizeof(buffer) - `1`;
1730
1731	if (copy_from_user(to: buffer, from: buf, n: count > maxlen ? maxlen : count))
1732	return -EFAULT;
1733
1734	p = get_proc_task(inode);
1735	if (!p)
1736	return -ESRCH;
1737
1738	if (same_thread_group(current, p2: p)) {
1739	set_task_comm(p, buffer);
1740	proc_comm_connector(task: p);
1741	}
1742	else
1743	count = -EINVAL;
1744
1745	put_task_struct(t: p);
1746
1747	return count;
1748	}
1749
1750	static int comm_show(struct seq_file m, void* *v)
1751	{
1752	struct inode *inode = m->private;
1753	struct task_struct *p;
1754
1755	p = get_proc_task(inode);
1756	if (!p)
1757	return -ESRCH;
1758
1759	proc_task_name(m, p, escape: false);
1760	seq_putc(m, c: `'\n'`);
1761
1762	put_task_struct(t: p);
1763
1764	return `0`;
1765	}
1766
1767	static int comm_open(struct inode inode, struct* file *filp)
1768	{
1769	return single_open(filp, comm_show, inode);
1770	}
1771
1772	static const struct file_operations proc_pid_set_comm_operations = {
1773	.open = comm_open,
1774	.read = seq_read,
1775	.write = comm_write,
1776	.llseek = seq_lseek,
1777	.release = single_release,
1778	};
1779
1780	static int proc_exe_link(struct dentry dentry, struct* path *exe_path)
1781	{
1782	struct task_struct *task;
1783	struct file *exe_file;
1784
1785	task = get_proc_task(inode: d_inode(dentry));
1786	if (!task)
1787	return -ENOENT;
1788	exe_file = get_task_exe_file(task);
1789	put_task_struct(t: task);
1790	if (exe_file) {
1791	*exe_path = exe_file->f_path;
1792	path_get(&exe_file->f_path);
1793	fput(exe_file);
1794	return `0`;
1795	} else
1796	return -ENOENT;
1797	}
1798
1799	static const char proc_pid_get_link(struct* dentry *dentry,
1800	struct inode *inode,
1801	struct delayed_call *done)
1802	{
1803	struct path path;
1804	int error = -EACCES;
1805
1806	if (!dentry)
1807	return ERR_PTR(error: -ECHILD);
1808
1809	/ Are we allowed to snoop on the tasks file descriptors? /
1810	if (!proc_fd_access_allowed(inode))
1811	goto out;
1812
1813	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1814	if (error)
1815	goto out;
1816
1817	error = nd_jump_link(path: &path);
1818	out:
1819	return ERR_PTR(error);
1820	}
1821
1822	static int do_proc_readlink(const struct path path, char* __user buffer, int* buflen)
1823	{
1824	char *tmp = kmalloc(PATH_MAX, GFP_KERNEL);
1825	char *pathname;
1826	int len;
1827
1828	if (!tmp)
1829	return -ENOMEM;
1830
1831	pathname = d_path(path, tmp, PATH_MAX);
1832	len = PTR_ERR(ptr: pathname);
1833	if (IS_ERR(ptr: pathname))
1834	goto out;
1835	len = tmp + PATH_MAX - `1` - pathname;
1836
1837	if (len > buflen)
1838	len = buflen;
1839	if (copy_to_user(to: buffer, from: pathname, n: len))
1840	len = -EFAULT;
1841	out:
1842	kfree(objp: tmp);
1843	return len;
1844	}
1845
1846	static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
1847	{
1848	int error = -EACCES;
1849	struct inode *inode = d_inode(dentry);
1850	struct path path;
1851
1852	/ Are we allowed to snoop on the tasks file descriptors? /
1853	if (!proc_fd_access_allowed(inode))
1854	goto out;
1855
1856	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1857	if (error)
1858	goto out;
1859
1860	error = do_proc_readlink(path: &path, buffer, buflen);
1861	path_put(&path);
1862	out:
1863	return error;
1864	}
1865
1866	const struct inode_operations proc_pid_link_inode_operations = {
1867	.readlink = proc_pid_readlink,
1868	.get_link = proc_pid_get_link,
1869	.setattr = proc_setattr,
1870	};
1871
1872
1873	/ building an inode /
1874
1875	void task_dump_owner(struct task_struct *task, umode_t mode,
1876	kuid_t ruid, kgid_t rgid)
1877	{
1878	/ Depending on the state of dumpable compute who should own a*
1879	* proc file for a task.
1880	*/
1881	const struct cred *cred;
1882	kuid_t uid;
1883	kgid_t gid;
1884
1885	if (unlikely(task->flags & PF_KTHREAD)) {
1886	*ruid = GLOBAL_ROOT_UID;
1887	*rgid = GLOBAL_ROOT_GID;
1888	return;
1889	}
1890
1891	/ Default to the tasks effective ownership /
1892	rcu_read_lock();
1893	cred = __task_cred(task);
1894	uid = cred->euid;
1895	gid = cred->egid;
1896	rcu_read_unlock();
1897
1898	/*
1899	* Before the /proc/pid/status file was created the only way to read
1900	* the effective uid of a /process was to stat /proc/pid. Reading
1901	* /proc/pid/status is slow enough that procps and other packages
1902	* kept stating /proc/pid. To keep the rules in /proc simple I have
1903	* made this apply to all per process world readable and executable
1904	* directories.
1905	*/
1906	if (mode != (S_IFDIR\|S_IRUGO\|S_IXUGO)) {
1907	struct mm_struct *mm;
1908	task_lock(task);
1909	mm = task->mm;
1910	/ Make non-dumpable tasks owned by some root /
1911	if (mm) {
1912	if (get_dumpable(mm) != SUID_DUMP_USER) {
1913	struct user_namespace *user_ns = mm->user_ns;
1914
1915	uid = make_kuid(user_ns, `0`);
1916	if (!uid_valid(uid))
1917	uid = GLOBAL_ROOT_UID;
1918
1919	gid = make_kgid(user_ns, `0`);
1920	if (!gid_valid(gid))
1921	gid = GLOBAL_ROOT_GID;
1922	}
1923	} else {
1924	uid = GLOBAL_ROOT_UID;
1925	gid = GLOBAL_ROOT_GID;
1926	}
1927	task_unlock(task);
1928	}
1929	*ruid = uid;
1930	*rgid = gid;
1931	}
1932
1933	void proc_pid_evict_inode(struct proc_inode *ei)
1934	{
1935	struct pid *pid = ei->pid;
1936
1937	if (S_ISDIR(ei->vfs_inode.i_mode)) {
1938	spin_lock(lock: &pid->lock);
1939	hlist_del_init_rcu(n: &ei->sibling_inodes);
1940	spin_unlock(lock: &pid->lock);
1941	}
1942	}
1943
1944	struct inode proc_pid_make_inode(struct* super_block *sb,
1945	struct task_struct *task, umode_t mode)
1946	{
1947	struct inode * inode;
1948	struct proc_inode *ei;
1949	struct pid *pid;
1950
1951	/ We need a new inode /
1952
1953	inode = new_inode(sb);
1954	if (!inode)
1955	goto out;
1956
1957	/ Common stuff /
1958	ei = PROC_I(inode);
1959	inode->i_mode = mode;
1960	inode->i_ino = get_next_ino();
1961	simple_inode_init_ts(inode);
1962	inode->i_op = &proc_def_inode_operations;
1963
1964	/*
1965	* grab the reference to task.
1966	*/
1967	pid = get_task_pid(task, type: PIDTYPE_PID);
1968	if (!pid)
1969	goto out_unlock;
1970
1971	/ Let the pid remember us for quick removal /
1972	ei->pid = pid;
1973
1974	task_dump_owner(task, mode: `0`, ruid: &inode->i_uid, rgid: &inode->i_gid);
1975	security_task_to_inode(p: task, inode);
1976
1977	out:
1978	return inode;
1979
1980	out_unlock:
1981	iput(inode);
1982	return NULL;
1983	}
1984
1985	/*
1986	* Generating an inode and adding it into @pid->inodes, so that task will
1987	* invalidate inode's dentry before being released.
1988	*
1989	* This helper is used for creating dir-type entries under '/proc' and
1990	* '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>'
1991	* can be released by invalidating '/proc/<tgid>' dentry.
1992	* In theory, dentries under '/proc/<tgid>/task' can also be released by
1993	* invalidating '/proc/<tgid>' dentry, we reserve it to handle single
1994	* thread exiting situation: Any one of threads should invalidate its
1995	* '/proc/<tgid>/task/<pid>' dentry before released.
1996	*/
1997	static struct inode proc_pid_make_base_inode(struct* super_block *sb,
1998	struct task_struct *task, umode_t mode)
1999	{
2000	struct inode *inode;
2001	struct proc_inode *ei;
2002	struct pid *pid;
2003
2004	inode = proc_pid_make_inode(sb, task, mode);
2005	if (!inode)
2006	return NULL;
2007
2008	/ Let proc_flush_pid find this directory inode /
2009	ei = PROC_I(inode);
2010	pid = ei->pid;
2011	spin_lock(lock: &pid->lock);
2012	hlist_add_head_rcu(n: &ei->sibling_inodes, h: &pid->inodes);
2013	spin_unlock(lock: &pid->lock);
2014
2015	return inode;
2016	}
2017
2018	int pid_getattr(struct mnt_idmap idmap, const* struct path *path,
2019	struct kstat stat, u32 request_mask, unsigned* int query_flags)
2020	{
2021	struct inode *inode = d_inode(dentry: path->dentry);
2022	struct proc_fs_info *fs_info = proc_sb_info(sb: inode->i_sb);
2023	struct task_struct *task;
2024
2025	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
2026
2027	stat->uid = GLOBAL_ROOT_UID;
2028	stat->gid = GLOBAL_ROOT_GID;
2029	rcu_read_lock();
2030	task = pid_task(proc_pid(inode), PIDTYPE_PID);
2031	if (task) {
2032	if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
2033	rcu_read_unlock();
2034	/*
2035	* This doesn't prevent learning whether PID exists,
2036	* it only makes getattr() consistent with readdir().
2037	*/
2038	return -ENOENT;
2039	}
2040	task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
2041	}
2042	rcu_read_unlock();
2043	return `0`;
2044	}
2045
2046	/ dentry stuff /
2047
2048	/*
2049	* Set <pid>/... inode ownership (can change due to setuid(), etc.)
2050	*/
2051	void pid_update_inode(struct task_struct task, struct* inode *inode)
2052	{
2053	task_dump_owner(task, mode: inode->i_mode, ruid: &inode->i_uid, rgid: &inode->i_gid);
2054
2055	inode->i_mode &= ~(S_ISUID \| S_ISGID);
2056	security_task_to_inode(p: task, inode);
2057	}
2058
2059	/*
2060	* Rewrite the inode's ownerships here because the owning task may have
2061	* performed a setuid(), etc.
2062	*
2063	*/
2064	static int pid_revalidate(struct inode dir, const* struct qstr *name,
2065	struct dentry dentry, unsigned* int flags)
2066	{
2067	struct inode *inode;
2068	struct task_struct *task;
2069	int ret = `0`;
2070
2071	rcu_read_lock();
2072	inode = d_inode_rcu(dentry);
2073	if (!inode)
2074	goto out;
2075	task = pid_task(pid: proc_pid(inode), PIDTYPE_PID);
2076
2077	if (task) {
2078	pid_update_inode(task, inode);
2079	ret = `1`;
2080	}
2081	out:
2082	rcu_read_unlock();
2083	return ret;
2084	}
2085
2086	static inline bool proc_inode_is_dead(struct inode *inode)
2087	{
2088	return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
2089	}
2090
2091	int pid_delete_dentry(const struct dentry *dentry)
2092	{
2093	/ Is the task we represent dead?*
2094	* If so, then don't put the dentry on the lru list,
2095	* kill it immediately.
2096	*/
2097	return proc_inode_is_dead(inode: d_inode(dentry));
2098	}
2099
2100	const struct dentry_operations pid_dentry_operations =
2101	{
2102	.d_revalidate = pid_revalidate,
2103	.d_delete = pid_delete_dentry,
2104	};
2105
2106	/ Lookups /
2107
2108	/*
2109	* Fill a directory entry.
2110	*
2111	* If possible create the dcache entry and derive our inode number and
2112	* file type from dcache entry.
2113	*
2114	* Since all of the proc inode numbers are dynamically generated, the inode
2115	* numbers do not exist until the inode is cache. This means creating
2116	* the dcache entry in readdir is necessary to keep the inode numbers
2117	* reported by readdir in sync with the inode numbers reported
2118	* by stat.
2119	*/
2120	bool proc_fill_cache(struct file file, struct* dir_context *ctx,
2121	const char name, unsigned* int len,
2122	instantiate_t instantiate, struct task_struct task, const* void *ptr)
2123	{
2124	struct dentry child, dir = file->f_path.dentry;
2125	struct qstr qname = QSTR_INIT(name, len);
2126	struct inode *inode;
2127	unsigned type = DT_UNKNOWN;
2128	ino_t ino = `1`;
2129
2130	child = try_lookup_noperm(&qname, dir);
2131	if (!child) {
2132	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
2133	child = d_alloc_parallel(dir, &qname, &wq);
2134	if (IS_ERR(ptr: child))
2135	goto end_instantiate;
2136	if (d_in_lookup(dentry: child)) {
2137	struct dentry *res;
2138	res = instantiate(child, task, ptr);
2139	d_lookup_done(dentry: child);
2140	if (unlikely(res)) {
2141	dput(child);
2142	child = res;
2143	if (IS_ERR(ptr: child))
2144	goto end_instantiate;
2145	}
2146	}
2147	}
2148	inode = d_inode(dentry: child);
2149	ino = inode->i_ino;
2150	type = inode->i_mode >> `12`;
2151	dput(child);
2152	end_instantiate:
2153	return dir_emit(ctx, name, namelen: len, ino, type);
2154	}
2155
2156	/*
2157	* dname_to_vma_addr - maps a dentry name into two unsigned longs
2158	* which represent vma start and end addresses.
2159	*/
2160	static int dname_to_vma_addr(struct dentry *dentry,
2161	unsigned long start, unsigned* long *end)
2162	{
2163	const char *str = dentry->d_name.name;
2164	unsigned long long sval, eval;
2165	unsigned int len;
2166
2167	if (str[`0`] == `'0'` && str[`1`] != `'-'`)
2168	return -EINVAL;
2169	len = _parse_integer(s: str, base: `16`, res: &sval);
2170	if (len & KSTRTOX_OVERFLOW)
2171	return -EINVAL;
2172	if (sval != (unsigned long)sval)
2173	return -EINVAL;
2174	str += len;
2175
2176	if (*str != `'-'`)
2177	return -EINVAL;
2178	str++;
2179
2180	if (str[`0`] == `'0'` && str[`1`])
2181	return -EINVAL;
2182	len = _parse_integer(s: str, base: `16`, res: &eval);
2183	if (len & KSTRTOX_OVERFLOW)
2184	return -EINVAL;
2185	if (eval != (unsigned long)eval)
2186	return -EINVAL;
2187	str += len;
2188
2189	if (*str != `'\0'`)
2190	return -EINVAL;
2191
2192	*start = sval;
2193	*end = eval;
2194
2195	return `0`;
2196	}
2197
2198	static int map_files_d_revalidate(struct inode dir, const* struct qstr *name,
2199	struct dentry dentry, unsigned* int flags)
2200	{
2201	unsigned long vm_start, vm_end;
2202	bool exact_vma_exists = false;
2203	struct mm_struct *mm = NULL;
2204	struct task_struct *task;
2205	struct inode *inode;
2206	int status = `0`;
2207
2208	if (flags & LOOKUP_RCU)
2209	return -ECHILD;
2210
2211	inode = d_inode(dentry);
2212	task = get_proc_task(inode);
2213	if (!task)
2214	goto out_notask;
2215
2216	mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
2217	if (IS_ERR(ptr: mm))
2218	goto out;
2219
2220	if (!dname_to_vma_addr(dentry, start: &vm_start, end: &vm_end)) {
2221	status = mmap_read_lock_killable(mm);
2222	if (!status) {
2223	exact_vma_exists = !!find_exact_vma(mm, vm_start,
2224	vm_end);
2225	mmap_read_unlock(mm);
2226	}
2227	}
2228
2229	mmput(mm);
2230
2231	if (exact_vma_exists) {
2232	task_dump_owner(task, mode: `0`, ruid: &inode->i_uid, rgid: &inode->i_gid);
2233
2234	security_task_to_inode(p: task, inode);
2235	status = `1`;
2236	}
2237
2238	out:
2239	put_task_struct(t: task);
2240
2241	out_notask:
2242	return status;
2243	}
2244
2245	static const struct dentry_operations tid_map_files_dentry_operations = {
2246	.d_revalidate = map_files_d_revalidate,
2247	.d_delete = pid_delete_dentry,
2248	};
2249
2250	static int map_files_get_link(struct dentry dentry, struct* path *path)
2251	{
2252	unsigned long vm_start, vm_end;
2253	struct vm_area_struct *vma;
2254	struct task_struct *task;
2255	struct mm_struct *mm;
2256	int rc;
2257
2258	rc = -ENOENT;
2259	task = get_proc_task(inode: d_inode(dentry));
2260	if (!task)
2261	goto out;
2262
2263	mm = get_task_mm(task);
2264	put_task_struct(t: task);
2265	if (!mm)
2266	goto out;
2267
2268	rc = dname_to_vma_addr(dentry, start: &vm_start, end: &vm_end);
2269	if (rc)
2270	goto out_mmput;
2271
2272	rc = mmap_read_lock_killable(mm);
2273	if (rc)
2274	goto out_mmput;
2275
2276	rc = -ENOENT;
2277	vma = find_exact_vma(mm, vm_start, vm_end);
2278	if (vma && vma->vm_file) {
2279	path = file_user_path(f: vma->vm_file);
2280	path_get(path);
2281	rc = `0`;
2282	}
2283	mmap_read_unlock(mm);
2284
2285	out_mmput:
2286	mmput(mm);
2287	out:
2288	return rc;
2289	}
2290
2291	struct map_files_info {
2292	unsigned long start;
2293	unsigned long end;
2294	fmode_t mode;
2295	};
2296
2297	/*
2298	* Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due
2299	* to concerns about how the symlinks may be used to bypass permissions on
2300	* ancestor directories in the path to the file in question.
2301	*/
2302	static const char *
2303	proc_map_files_get_link(struct dentry *dentry,
2304	struct inode *inode,
2305	struct delayed_call *done)
2306	{
2307	if (!checkpoint_restore_ns_capable(ns: &init_user_ns))
2308	return ERR_PTR(error: -EPERM);
2309
2310	return proc_pid_get_link(dentry, inode, done);
2311	}
2312
2313	/*
2314	* Identical to proc_pid_link_inode_operations except for get_link()
2315	*/
2316	static const struct inode_operations proc_map_files_link_inode_operations = {
2317	.readlink = proc_pid_readlink,
2318	.get_link = proc_map_files_get_link,
2319	.setattr = proc_setattr,
2320	};
2321
2322	static struct dentry *
2323	proc_map_files_instantiate(struct dentry *dentry,
2324	struct task_struct task, const* void *ptr)
2325	{
2326	fmode_t mode = (fmode_t)(unsigned long)ptr;
2327	struct proc_inode *ei;
2328	struct inode *inode;
2329
2330	inode = proc_pid_make_inode(sb: dentry->d_sb, task, S_IFLNK \|
2331	((mode & FMODE_READ ) ? S_IRUSR : `0`) \|
2332	((mode & FMODE_WRITE) ? S_IWUSR : `0`));
2333	if (!inode)
2334	return ERR_PTR(error: -ENOENT);
2335
2336	ei = PROC_I(inode);
2337	ei->op.proc_get_link = map_files_get_link;
2338
2339	inode->i_op = &proc_map_files_link_inode_operations;
2340	inode->i_size = `64`;
2341
2342	return proc_splice_unmountable(inode, dentry,
2343	d_ops: &tid_map_files_dentry_operations);
2344	}
2345
2346	static struct dentry proc_map_files_lookup(struct* inode *dir,
2347	struct dentry dentry, unsigned* int flags)
2348	{
2349	unsigned long vm_start, vm_end;
2350	struct vm_area_struct *vma;
2351	struct task_struct *task;
2352	struct dentry *result;
2353	struct mm_struct *mm;
2354
2355	result = ERR_PTR(error: -ENOENT);
2356	task = get_proc_task(inode: dir);
2357	if (!task)
2358	goto out;
2359
2360	result = ERR_PTR(error: -EACCES);
2361	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
2362	goto out_put_task;
2363
2364	result = ERR_PTR(error: -ENOENT);
2365	if (dname_to_vma_addr(dentry, start: &vm_start, end: &vm_end))
2366	goto out_put_task;
2367
2368	mm = get_task_mm(task);
2369	if (!mm)
2370	goto out_put_task;
2371
2372	result = ERR_PTR(error: -EINTR);
2373	if (mmap_read_lock_killable(mm))
2374	goto out_put_mm;
2375
2376	result = ERR_PTR(error: -ENOENT);
2377	vma = find_exact_vma(mm, vm_start, vm_end);
2378	if (!vma)
2379	goto out_no_vma;
2380
2381	if (vma->vm_file)
2382	result = proc_map_files_instantiate(dentry, task,
2383	ptr: (void )(unsigned* long)vma->vm_file->f_mode);
2384
2385	out_no_vma:
2386	mmap_read_unlock(mm);
2387	out_put_mm:
2388	mmput(mm);
2389	out_put_task:
2390	put_task_struct(t: task);
2391	out:
2392	return result;
2393	}
2394
2395	static const struct inode_operations proc_map_files_inode_operations = {
2396	.lookup = proc_map_files_lookup,
2397	.permission = proc_fd_permission,
2398	.setattr = proc_setattr,
2399	};
2400
2401	static int
2402	proc_map_files_readdir(struct file file, struct* dir_context *ctx)
2403	{
2404	struct vm_area_struct *vma;
2405	struct task_struct *task;
2406	struct mm_struct *mm;
2407	unsigned long nr_files, pos, i;
2408	GENRADIX(struct map_files_info) fa;
2409	struct map_files_info *p;
2410	int ret;
2411	struct vma_iterator vmi;
2412
2413	genradix_init(&fa);
2414
2415	ret = -ENOENT;
2416	task = get_proc_task(inode: file_inode(f: file));
2417	if (!task)
2418	goto out;
2419
2420	ret = -EACCES;
2421	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
2422	goto out_put_task;
2423
2424	ret = `0`;
2425	if (!dir_emit_dots(file, ctx))
2426	goto out_put_task;
2427
2428	mm = get_task_mm(task);
2429	if (!mm)
2430	goto out_put_task;
2431
2432	ret = mmap_read_lock_killable(mm);
2433	if (ret) {
2434	mmput(mm);
2435	goto out_put_task;
2436	}
2437
2438	nr_files = `0`;
2439
2440	/*
2441	* We need two passes here:
2442	*
2443	* 1) Collect vmas of mapped files with mmap_lock taken
2444	* 2) Release mmap_lock and instantiate entries
2445	*
2446	* otherwise we get lockdep complained, since filldir()
2447	* routine might require mmap_lock taken in might_fault().
2448	*/
2449
2450	pos = `2`;
2451	vma_iter_init(vmi: &vmi, mm, addr: `0`);
2452	for_each_vma(vmi, vma) {
2453	if (!vma->vm_file)
2454	continue;
2455	if (++pos <= ctx->pos)
2456	continue;
2457
2458	p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
2459	if (!p) {
2460	ret = -ENOMEM;
2461	mmap_read_unlock(mm);
2462	mmput(mm);
2463	goto out_put_task;
2464	}
2465
2466	p->start = vma->vm_start;
2467	p->end = vma->vm_end;
2468	p->mode = vma->vm_file->f_mode;
2469	}
2470	mmap_read_unlock(mm);
2471	mmput(mm);
2472
2473	for (i = `0`; i < nr_files; i++) {
2474	char buf[`4` * sizeof(long) + `2`]; / max: %lx-%lx\0 /
2475	unsigned int len;
2476
2477	p = genradix_ptr(&fa, i);
2478	len = snprintf(buf, size: sizeof(buf), fmt: "%lx-%lx", p->start, p->end);
2479	if (!proc_fill_cache(file, ctx,
2480	name: buf, len,
2481	instantiate: proc_map_files_instantiate,
2482	task,
2483	ptr: (void )(unsigned* long)p->mode))
2484	break;
2485	ctx->pos++;
2486	}
2487
2488	out_put_task:
2489	put_task_struct(t: task);
2490	out:
2491	genradix_free(&fa);
2492	return ret;
2493	}
2494
2495	static const struct file_operations proc_map_files_operations = {
2496	.read = generic_read_dir,
2497	.iterate_shared = proc_map_files_readdir,
2498	.llseek = generic_file_llseek,
2499	};
2500
2501	#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
2502	struct timers_private {
2503	struct pid *pid;
2504	struct task_struct *task;
2505	struct pid_namespace *ns;
2506	};
2507
2508	static void timers_start(struct* seq_file m, loff_t pos)
2509	{
2510	struct timers_private *tp = m->private;
2511
2512	tp->task = get_pid_task(pid: tp->pid, PIDTYPE_PID);
2513	if (!tp->task)
2514	return ERR_PTR(error: -ESRCH);
2515
2516	rcu_read_lock();
2517	return seq_hlist_start_rcu(head: &tp->task->signal->posix_timers, pos: *pos);
2518	}
2519
2520	static void timers_next(struct* seq_file m, void* v, loff_t pos)
2521	{
2522	struct timers_private *tp = m->private;
2523
2524	return seq_hlist_next_rcu(v, head: &tp->task->signal->posix_timers, ppos: pos);
2525	}
2526
2527	static void timers_stop(struct seq_file m, void* *v)
2528	{
2529	struct timers_private *tp = m->private;
2530
2531	if (tp->task) {
2532	put_task_struct(t: tp->task);
2533	tp->task = NULL;
2534	rcu_read_unlock();
2535	}
2536	}
2537
2538	static int show_timer(struct seq_file m, void* *v)
2539	{
2540	static const char * const nstr[] = {
2541	[SIGEV_SIGNAL] = "signal",
2542	[SIGEV_NONE] = "none",
2543	[SIGEV_THREAD] = "thread",
2544	};
2545
2546	struct k_itimer timer = hlist_entry((struct* hlist_node )v, struct* k_itimer, list);
2547	struct timers_private *tp = m->private;
2548	int notify = timer->it_sigev_notify;
2549
2550	guard(spinlock_irq)(l: &timer->it_lock);
2551	if (!posixtimer_valid(timer))
2552	return `0`;
2553
2554	seq_printf(m, fmt: "ID: %d\n", timer->it_id);
2555	seq_printf(m, fmt: "signal: %d/%px\n", timer->sigq.info.si_signo,
2556	timer->sigq.info.si_value.sival_ptr);
2557	seq_printf(m, fmt: "notify: %s/%s.%d\n", nstr[notify & ~SIGEV_THREAD_ID],
2558	(notify & SIGEV_THREAD_ID) ? "tid" : "pid",
2559	pid_nr_ns(pid: timer->it_pid, ns: tp->ns));
2560	seq_printf(m, fmt: "ClockID: %d\n", timer->it_clock);
2561
2562	return `0`;
2563	}
2564
2565	static const struct seq_operations proc_timers_seq_ops = {
2566	.start = timers_start,
2567	.next = timers_next,
2568	.stop = timers_stop,
2569	.show = show_timer,
2570	};
2571
2572	static int proc_timers_open(struct inode inode, struct* file *file)
2573	{
2574	struct timers_private *tp;
2575
2576	tp = __seq_open_private(file, &proc_timers_seq_ops,
2577	sizeof(struct timers_private));
2578	if (!tp)
2579	return -ENOMEM;
2580
2581	tp->pid = proc_pid(inode);
2582	tp->ns = proc_pid_ns(sb: inode->i_sb);
2583	return `0`;
2584	}
2585
2586	static const struct file_operations proc_timers_operations = {
2587	.open = proc_timers_open,
2588	.read = seq_read,
2589	.llseek = seq_lseek,
2590	.release = seq_release_private,
2591	};
2592	#endif
2593
2594	static ssize_t timerslack_ns_write(struct file file, const* char __user *buf,
2595	size_t count, loff_t *offset)
2596	{
2597	struct inode *inode = file_inode(f: file);
2598	struct task_struct *p;
2599	u64 slack_ns;
2600	int err;
2601
2602	err = kstrtoull_from_user(s: buf, count, base: `10`, res: &slack_ns);
2603	if (err < `0`)
2604	return err;
2605
2606	p = get_proc_task(inode);
2607	if (!p)
2608	return -ESRCH;
2609
2610	if (p != current) {
2611	rcu_read_lock();
2612	if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
2613	rcu_read_unlock();
2614	count = -EPERM;
2615	goto out;
2616	}
2617	rcu_read_unlock();
2618
2619	err = security_task_setscheduler(p);
2620	if (err) {
2621	count = err;
2622	goto out;
2623	}
2624	}
2625
2626	task_lock(p);
2627	if (rt_or_dl_task_policy(tsk: p))
2628	slack_ns = `0`;
2629	else if (slack_ns == `0`)
2630	slack_ns = p->default_timer_slack_ns;
2631	p->timer_slack_ns = slack_ns;
2632	task_unlock(p);
2633
2634	out:
2635	put_task_struct(t: p);
2636
2637	return count;
2638	}
2639
2640	static int timerslack_ns_show(struct seq_file m, void* *v)
2641	{
2642	struct inode *inode = m->private;
2643	struct task_struct *p;
2644	int err = `0`;
2645
2646	p = get_proc_task(inode);
2647	if (!p)
2648	return -ESRCH;
2649
2650	if (p != current) {
2651	rcu_read_lock();
2652	if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
2653	rcu_read_unlock();
2654	err = -EPERM;
2655	goto out;
2656	}
2657	rcu_read_unlock();
2658
2659	err = security_task_getscheduler(p);
2660	if (err)
2661	goto out;
2662	}
2663
2664	task_lock(p);
2665	seq_printf(m, fmt: "%llu\n", p->timer_slack_ns);
2666	task_unlock(p);
2667
2668	out:
2669	put_task_struct(t: p);
2670
2671	return err;
2672	}
2673
2674	static int timerslack_ns_open(struct inode inode, struct* file *filp)
2675	{
2676	return single_open(filp, timerslack_ns_show, inode);
2677	}
2678
2679	static const struct file_operations proc_pid_set_timerslack_ns_operations = {
2680	.open = timerslack_ns_open,
2681	.read = seq_read,
2682	.write = timerslack_ns_write,
2683	.llseek = seq_lseek,
2684	.release = single_release,
2685	};
2686
2687	static struct dentry proc_pident_instantiate(struct* dentry *dentry,
2688	struct task_struct task, const* void *ptr)
2689	{
2690	const struct pid_entry *p = ptr;
2691	struct inode *inode;
2692	struct proc_inode *ei;
2693
2694	inode = proc_pid_make_inode(sb: dentry->d_sb, task, mode: p->mode);
2695	if (!inode)
2696	return ERR_PTR(error: -ENOENT);
2697
2698	ei = PROC_I(inode);
2699	if (S_ISDIR(inode->i_mode))
2700	set_nlink(inode, nlink: `2`); / Use getattr to fix if necessary /
2701	if (p->iop)
2702	inode->i_op = p->iop;
2703	if (p->fop)
2704	inode->i_fop = p->fop;
2705	ei->op = p->op;
2706	pid_update_inode(task, inode);
2707	return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
2708	}
2709
2710	static struct dentry proc_pident_lookup(struct* inode *dir,
2711	struct dentry *dentry,
2712	const struct pid_entry *p,
2713	const struct pid_entry *end)
2714	{
2715	struct task_struct *task = get_proc_task(inode: dir);
2716	struct dentry *res = ERR_PTR(error: -ENOENT);
2717
2718	if (!task)
2719	goto out_no_task;
2720
2721	/*
2722	* Yes, it does not scale. And it should not. Don't add
2723	* new entries into /proc/<tgid>/ without very good reasons.
2724	*/
2725	for (; p < end; p++) {
2726	if (p->len != dentry->d_name.len)
2727	continue;
2728	if (!memcmp(p: dentry->d_name.name, q: p->name, size: p->len)) {
2729	res = proc_pident_instantiate(dentry, task, ptr: p);
2730	break;
2731	}
2732	}
2733	put_task_struct(t: task);
2734	out_no_task:
2735	return res;
2736	}
2737
2738	static int proc_pident_readdir(struct file file, struct* dir_context *ctx,
2739	const struct pid_entry ents, unsigned* int nents)
2740	{
2741	struct task_struct *task = get_proc_task(inode: file_inode(f: file));
2742	const struct pid_entry *p;
2743
2744	if (!task)
2745	return -ENOENT;
2746
2747	if (!dir_emit_dots(file, ctx))
2748	goto out;
2749
2750	if (ctx->pos >= nents + `2`)
2751	goto out;
2752
2753	for (p = ents + (ctx->pos - `2`); p < ents + nents; p++) {
2754	if (!proc_fill_cache(file, ctx, name: p->name, len: p->len,
2755	instantiate: proc_pident_instantiate, task, ptr: p))
2756	break;
2757	ctx->pos++;
2758	}
2759	out:
2760	put_task_struct(t: task);
2761	return `0`;
2762	}
2763
2764	#ifdef CONFIG_SECURITY
2765	static int proc_pid_attr_open(struct inode inode, struct* file *file)
2766	{
2767	file->private_data = NULL;
2768	__mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
2769	return `0`;
2770	}
2771
2772	static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
2773	size_t count, loff_t *ppos)
2774	{
2775	struct inode * inode = file_inode(f: file);
2776	char *p = NULL;
2777	ssize_t length;
2778	struct task_struct *task = get_proc_task(inode);
2779
2780	if (!task)
2781	return -ESRCH;
2782
2783	length = security_getprocattr(p: task, lsmid: PROC_I(inode)->op.lsmid,
2784	name: file->f_path.dentry->d_name.name,
2785	value: &p);
2786	put_task_struct(t: task);
2787	if (length > `0`)
2788	length = simple_read_from_buffer(to: buf, count, ppos, from: p, available: length);
2789	kfree(objp: p);
2790	return length;
2791	}
2792
2793	static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2794	size_t count, loff_t *ppos)
2795	{
2796	struct inode * inode = file_inode(f: file);
2797	struct task_struct *task;
2798	void *page;
2799	int rv;
2800
2801	/ A task may only write when it was the opener. /
2802	if (file->private_data != current->mm)
2803	return -EPERM;
2804
2805	rcu_read_lock();
2806	task = pid_task(pid: proc_pid(inode), PIDTYPE_PID);
2807	if (!task) {
2808	rcu_read_unlock();
2809	return -ESRCH;
2810	}
2811	/ A task may only write its own attributes. /
2812	if (current != task) {
2813	rcu_read_unlock();
2814	return -EACCES;
2815	}
2816	/ Prevent changes to overridden credentials. /
2817	if (current_cred() != current_real_cred()) {
2818	rcu_read_unlock();
2819	return -EBUSY;
2820	}
2821	rcu_read_unlock();
2822
2823	if (count > PAGE_SIZE)
2824	count = PAGE_SIZE;
2825
2826	/ No partial writes. /
2827	if (*ppos != `0`)
2828	return -EINVAL;
2829
2830	page = memdup_user(buf, count);
2831	if (IS_ERR(ptr: page)) {
2832	rv = PTR_ERR(ptr: page);
2833	goto out;
2834	}
2835
2836	/ Guard against adverse ptrace interaction /
2837	rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
2838	if (rv < `0`)
2839	goto out_free;
2840
2841	rv = security_setprocattr(lsmid: PROC_I(inode)->op.lsmid,
2842	name: file->f_path.dentry->d_name.name, value: page,
2843	size: count);
2844	mutex_unlock(lock: &current->signal->cred_guard_mutex);
2845	out_free:
2846	kfree(objp: page);
2847	out:
2848	return rv;
2849	}
2850
2851	static const struct file_operations proc_pid_attr_operations = {
2852	.open = proc_pid_attr_open,
2853	.read = proc_pid_attr_read,
2854	.write = proc_pid_attr_write,
2855	.llseek = generic_file_llseek,
2856	.release = mem_release,
2857	};
2858
2859	#define LSM_DIR_OPS(LSM) \
2860	static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
2861	struct dir_context *ctx) \
2862	{ \
2863	return proc_pident_readdir(filp, ctx, \
2864	LSM##_attr_dir_stuff, \
2865	ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2866	} \
2867	\
2868	static const struct file_operations proc_##LSM##_attr_dir_ops = { \
2869	.read = generic_read_dir, \
2870	.iterate_shared = proc_##LSM##_attr_dir_iterate, \
2871	.llseek = default_llseek, \
2872	}; \
2873	\
2874	static struct dentry proc_##LSM##_attr_dir_lookup(struct inode dir, \
2875	struct dentry *dentry, unsigned int flags) \
2876	{ \
2877	return proc_pident_lookup(dir, dentry, \
2878	LSM##_attr_dir_stuff, \
2879	LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2880	} \
2881	\
2882	static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
2883	.lookup = proc_##LSM##_attr_dir_lookup, \
2884	.getattr = pid_getattr, \
2885	.setattr = proc_setattr, \
2886	}
2887
2888	#ifdef CONFIG_SECURITY_SMACK
2889	static const struct pid_entry smack_attr_dir_stuff[] = {
2890	ATTR(LSM_ID_SMACK, "current", `0666`),
2891	};
2892	LSM_DIR_OPS(smack);
2893	#endif
2894
2895	#ifdef CONFIG_SECURITY_APPARMOR
2896	static const struct pid_entry apparmor_attr_dir_stuff[] = {
2897	ATTR(LSM_ID_APPARMOR, "current", `0666`),
2898	ATTR(LSM_ID_APPARMOR, "prev", `0444`),
2899	ATTR(LSM_ID_APPARMOR, "exec", `0666`),
2900	};
2901	LSM_DIR_OPS(apparmor);
2902	#endif
2903
2904	static const struct pid_entry attr_dir_stuff[] = {
2905	ATTR(LSM_ID_UNDEF, "current", `0666`),
2906	ATTR(LSM_ID_UNDEF, "prev", `0444`),
2907	ATTR(LSM_ID_UNDEF, "exec", `0666`),
2908	ATTR(LSM_ID_UNDEF, "fscreate", `0666`),
2909	ATTR(LSM_ID_UNDEF, "keycreate", `0666`),
2910	ATTR(LSM_ID_UNDEF, "sockcreate", `0666`),
2911	#ifdef CONFIG_SECURITY_SMACK
2912	DIR("smack", `0555`,
2913	proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
2914	#endif
2915	#ifdef CONFIG_SECURITY_APPARMOR
2916	DIR("apparmor", `0555`,
2917	proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops),
2918	#endif
2919	};
2920
2921	static int proc_attr_dir_readdir(struct file file, struct* dir_context *ctx)
2922	{
2923	return proc_pident_readdir(file, ctx,
2924	ents: attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2925	}
2926
2927	static const struct file_operations proc_attr_dir_operations = {
2928	.read = generic_read_dir,
2929	.iterate_shared = proc_attr_dir_readdir,
2930	.llseek = generic_file_llseek,
2931	};
2932
2933	static struct dentry proc_attr_dir_lookup(struct* inode *dir,
2934	struct dentry dentry, unsigned* int flags)
2935	{
2936	return proc_pident_lookup(dir, dentry,
2937	p: attr_dir_stuff,
2938	end: attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff));
2939	}
2940
2941	static const struct inode_operations proc_attr_dir_inode_operations = {
2942	.lookup = proc_attr_dir_lookup,
2943	.getattr = pid_getattr,
2944	.setattr = proc_setattr,
2945	};
2946
2947	#endif
2948
2949	#ifdef CONFIG_ELF_CORE
2950	static ssize_t proc_coredump_filter_read(struct file file, char* __user *buf,
2951	size_t count, loff_t *ppos)
2952	{
2953	struct task_struct *task = get_proc_task(inode: file_inode(f: file));
2954	struct mm_struct *mm;
2955	char buffer[PROC_NUMBUF];
2956	size_t len;
2957	int ret;
2958
2959	if (!task)
2960	return -ESRCH;
2961
2962	ret = `0`;
2963	mm = get_task_mm(task);
2964	if (mm) {
2965	unsigned long flags = __mm_flags_get_dumpable(mm);
2966
2967	len = snprintf(buf: buffer, size: sizeof(buffer), fmt: "%08lx\n",
2968	((flags & MMF_DUMP_FILTER_MASK) >>
2969	MMF_DUMP_FILTER_SHIFT));
2970	mmput(mm);
2971	ret = simple_read_from_buffer(to: buf, count, ppos, from: buffer, available: len);
2972	}
2973
2974	put_task_struct(t: task);
2975
2976	return ret;
2977	}
2978
2979	static ssize_t proc_coredump_filter_write(struct file *file,
2980	const char __user *buf,
2981	size_t count,
2982	loff_t *ppos)
2983	{
2984	struct task_struct *task;
2985	struct mm_struct *mm;
2986	unsigned int val;
2987	int ret;
2988	int i;
2989	unsigned long mask;
2990
2991	ret = kstrtouint_from_user(s: buf, count, base: `0`, res: &val);
2992	if (ret < `0`)
2993	return ret;
2994
2995	ret = -ESRCH;
2996	task = get_proc_task(inode: file_inode(f: file));
2997	if (!task)
2998	goto out_no_task;
2999
3000	mm = get_task_mm(task);
3001	if (!mm)
3002	goto out_no_mm;
3003	ret = `0`;
3004
3005	for (i = `0`, mask = `1`; i < MMF_DUMP_FILTER_BITS; i++, mask <<= `1`) {
3006	if (val & mask)
3007	mm_flags_set(flag: i + MMF_DUMP_FILTER_SHIFT, mm);
3008	else
3009	mm_flags_clear(flag: i + MMF_DUMP_FILTER_SHIFT, mm);
3010	}
3011
3012	mmput(mm);
3013	out_no_mm:
3014	put_task_struct(t: task);
3015	out_no_task:
3016	if (ret < `0`)
3017	return ret;
3018	return count;
3019	}
3020
3021	static const struct file_operations proc_coredump_filter_operations = {
3022	.read = proc_coredump_filter_read,
3023	.write = proc_coredump_filter_write,
3024	.llseek = generic_file_llseek,
3025	};
3026	#endif
3027
3028	#ifdef CONFIG_TASK_IO_ACCOUNTING
3029	static int do_io_accounting(struct task_struct task, struct* seq_file m, int* whole)
3030	{
3031	struct task_io_accounting acct;
3032	int result;
3033
3034	result = down_read_killable(sem: &task->signal->exec_update_lock);
3035	if (result)
3036	return result;
3037
3038	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
3039	result = -EACCES;
3040	goto out_unlock;
3041	}
3042
3043	if (whole) {
3044	struct signal_struct *sig = task->signal;
3045	struct task_struct *t;
3046
3047	guard(rcu)();
3048	scoped_seqlock_read (&sig->stats_lock, ss_lock_irqsave) {
3049	acct = sig->ioac;
3050	__for_each_thread(sig, t)
3051	task_io_accounting_add(dst: &acct, src: &t->ioac);
3052
3053	}
3054	} else {
3055	acct = task->ioac;
3056	}
3057
3058	seq_printf(m,
3059	fmt: "rchar: %llu\n"
3060	"wchar: %llu\n"
3061	"syscr: %llu\n"
3062	"syscw: %llu\n"
3063	"read_bytes: %llu\n"
3064	"write_bytes: %llu\n"
3065	"cancelled_write_bytes: %llu\n",
3066	(unsigned long long)acct.rchar,
3067	(unsigned long long)acct.wchar,
3068	(unsigned long long)acct.syscr,
3069	(unsigned long long)acct.syscw,
3070	(unsigned long long)acct.read_bytes,
3071	(unsigned long long)acct.write_bytes,
3072	(unsigned long long)acct.cancelled_write_bytes);
3073	result = `0`;
3074
3075	out_unlock:
3076	up_read(sem: &task->signal->exec_update_lock);
3077	return result;
3078	}
3079
3080	static int proc_tid_io_accounting(struct seq_file m, struct* pid_namespace *ns,
3081	struct pid pid, struct* task_struct *task)
3082	{
3083	return do_io_accounting(task, m, whole: `0`);
3084	}
3085
3086	static int proc_tgid_io_accounting(struct seq_file m, struct* pid_namespace *ns,
3087	struct pid pid, struct* task_struct *task)
3088	{
3089	return do_io_accounting(task, m, whole: `1`);
3090	}
3091	#endif /* CONFIG_TASK_IO_ACCOUNTING */
3092
3093	#ifdef CONFIG_USER_NS
3094	static int proc_id_map_open(struct inode inode, struct* file *file,
3095	const struct seq_operations *seq_ops)
3096	{
3097	struct user_namespace *ns = NULL;
3098	struct task_struct *task;
3099	struct seq_file *seq;
3100	int ret = -EINVAL;
3101
3102	task = get_proc_task(inode);
3103	if (task) {
3104	rcu_read_lock();
3105	ns = get_user_ns(task_cred_xxx(task, user_ns));
3106	rcu_read_unlock();
3107	put_task_struct(t: task);
3108	}
3109	if (!ns)
3110	goto err;
3111
3112	ret = seq_open(file, seq_ops);
3113	if (ret)
3114	goto err_put_ns;
3115
3116	seq = file->private_data;
3117	seq->private = ns;
3118
3119	return `0`;
3120	err_put_ns:
3121	put_user_ns(ns);
3122	err:
3123	return ret;
3124	}
3125
3126	static int proc_id_map_release(struct inode inode, struct* file *file)
3127	{
3128	struct seq_file *seq = file->private_data;
3129	struct user_namespace *ns = seq->private;
3130	put_user_ns(ns);
3131	return seq_release(inode, file);
3132	}
3133
3134	static int proc_uid_map_open(struct inode inode, struct* file *file)
3135	{
3136	return proc_id_map_open(inode, file, seq_ops: &proc_uid_seq_operations);
3137	}
3138
3139	static int proc_gid_map_open(struct inode inode, struct* file *file)
3140	{
3141	return proc_id_map_open(inode, file, seq_ops: &proc_gid_seq_operations);
3142	}
3143
3144	static int proc_projid_map_open(struct inode inode, struct* file *file)
3145	{
3146	return proc_id_map_open(inode, file, seq_ops: &proc_projid_seq_operations);
3147	}
3148
3149	static const struct file_operations proc_uid_map_operations = {
3150	.open = proc_uid_map_open,
3151	.write = proc_uid_map_write,
3152	.read = seq_read,
3153	.llseek = seq_lseek,
3154	.release = proc_id_map_release,
3155	};
3156
3157	static const struct file_operations proc_gid_map_operations = {
3158	.open = proc_gid_map_open,
3159	.write = proc_gid_map_write,
3160	.read = seq_read,
3161	.llseek = seq_lseek,
3162	.release = proc_id_map_release,
3163	};
3164
3165	static const struct file_operations proc_projid_map_operations = {
3166	.open = proc_projid_map_open,
3167	.write = proc_projid_map_write,
3168	.read = seq_read,
3169	.llseek = seq_lseek,
3170	.release = proc_id_map_release,
3171	};
3172
3173	static int proc_setgroups_open(struct inode inode, struct* file *file)
3174	{
3175	struct user_namespace *ns = NULL;
3176	struct task_struct *task;
3177	int ret;
3178
3179	ret = -ESRCH;
3180	task = get_proc_task(inode);
3181	if (task) {
3182	rcu_read_lock();
3183	ns = get_user_ns(task_cred_xxx(task, user_ns));
3184	rcu_read_unlock();
3185	put_task_struct(t: task);
3186	}
3187	if (!ns)
3188	goto err;
3189
3190	if (file->f_mode & FMODE_WRITE) {
3191	ret = -EACCES;
3192	if (!ns_capable(ns, CAP_SYS_ADMIN))
3193	goto err_put_ns;
3194	}
3195
3196	ret = single_open(file, &proc_setgroups_show, ns);
3197	if (ret)
3198	goto err_put_ns;
3199
3200	return `0`;
3201	err_put_ns:
3202	put_user_ns(ns);
3203	err:
3204	return ret;
3205	}
3206
3207	static int proc_setgroups_release(struct inode inode, struct* file *file)
3208	{
3209	struct seq_file *seq = file->private_data;
3210	struct user_namespace *ns = seq->private;
3211	int ret = single_release(inode, file);
3212	put_user_ns(ns);
3213	return ret;
3214	}
3215
3216	static const struct file_operations proc_setgroups_operations = {
3217	.open = proc_setgroups_open,
3218	.write = proc_setgroups_write,
3219	.read = seq_read,
3220	.llseek = seq_lseek,
3221	.release = proc_setgroups_release,
3222	};
3223	#endif /* CONFIG_USER_NS */
3224
3225	static int proc_pid_personality(struct seq_file m, struct* pid_namespace *ns,
3226	struct pid pid, struct* task_struct *task)
3227	{
3228	int err = lock_trace(task);
3229	if (!err) {
3230	seq_printf(m, fmt: "%08x\n", task->personality);
3231	unlock_trace(task);
3232	}
3233	return err;
3234	}
3235
3236	#ifdef CONFIG_LIVEPATCH
3237	static int proc_pid_patch_state(struct seq_file m, struct* pid_namespace *ns,
3238	struct pid pid, struct* task_struct *task)
3239	{
3240	seq_printf(m, "%d\n", task->patch_state);
3241	return `0`;
3242	}
3243	#endif /* CONFIG_LIVEPATCH */
3244
3245	#ifdef CONFIG_KSM
3246	static int proc_pid_ksm_merging_pages(struct seq_file m, struct* pid_namespace *ns,
3247	struct pid pid, struct* task_struct *task)
3248	{
3249	struct mm_struct *mm;
3250
3251	mm = get_task_mm(task);
3252	if (mm) {
3253	seq_printf(m, fmt: "%lu\n", mm->ksm_merging_pages);
3254	mmput(mm);
3255	}
3256
3257	return `0`;
3258	}
3259	static int proc_pid_ksm_stat(struct seq_file m, struct* pid_namespace *ns,
3260	struct pid pid, struct* task_struct *task)
3261	{
3262	struct mm_struct *mm;
3263	int ret = `0`;
3264
3265	mm = get_task_mm(task);
3266	if (mm) {
3267	seq_printf(m, fmt: "ksm_rmap_items %lu\n", mm->ksm_rmap_items);
3268	seq_printf(m, fmt: "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm));
3269	seq_printf(m, fmt: "ksm_merging_pages %lu\n", mm->ksm_merging_pages);
3270	seq_printf(m, fmt: "ksm_process_profit %ld\n", ksm_process_profit(mm));
3271	seq_printf(m, fmt: "ksm_merge_any: %s\n",
3272	mm_flags_test(MMF_VM_MERGE_ANY, mm) ? "yes" : "no");
3273	ret = mmap_read_lock_killable(mm);
3274	if (ret) {
3275	mmput(mm);
3276	return ret;
3277	}
3278	seq_printf(m, fmt: "ksm_mergeable: %s\n",
3279	ksm_process_mergeable(mm) ? "yes" : "no");
3280	mmap_read_unlock(mm);
3281	mmput(mm);
3282	}
3283
3284	return `0`;
3285	}
3286	#endif /* CONFIG_KSM */
3287
3288	#ifdef CONFIG_KSTACK_ERASE_METRICS
3289	static int proc_stack_depth(struct seq_file m, struct* pid_namespace *ns,
3290	struct pid pid, struct* task_struct *task)
3291	{
3292	unsigned long prev_depth = THREAD_SIZE -
3293	(task->prev_lowest_stack & (THREAD_SIZE - `1`));
3294	unsigned long depth = THREAD_SIZE -
3295	(task->lowest_stack & (THREAD_SIZE - `1`));
3296
3297	seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
3298	prev_depth, depth);
3299	return `0`;
3300	}
3301	#endif /* CONFIG_KSTACK_ERASE_METRICS */
3302
3303	/*
3304	* Thread groups
3305	*/
3306	static const struct file_operations proc_task_operations;
3307	static const struct inode_operations proc_task_inode_operations;
3308
3309	static const struct pid_entry tgid_base_stuff[] = {
3310	DIR("task", S_IRUGO\|S_IXUGO, proc_task_inode_operations, proc_task_operations),
3311	DIR("fd", S_IRUSR\|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3312	DIR("map_files", S_IRUSR\|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
3313	DIR("fdinfo", S_IRUGO\|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3314	DIR("ns", S_IRUSR\|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3315	#ifdef CONFIG_NET
3316	DIR("net", S_IRUGO\|S_IXUGO, proc_net_inode_operations, proc_net_operations),
3317	#endif
3318	REG("environ", S_IRUSR, proc_environ_operations),
3319	REG("auxv", S_IRUSR, proc_auxv_operations),
3320	ONE("status", S_IRUGO, proc_pid_status),
3321	ONE("personality", S_IRUSR, proc_pid_personality),
3322	ONE("limits", S_IRUGO, proc_pid_limits),
3323	REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),
3324	#ifdef CONFIG_SCHED_AUTOGROUP
3325	REG("autogroup", S_IRUGO\|S_IWUSR, proc_pid_sched_autogroup_operations),
3326	#endif
3327	#ifdef CONFIG_TIME_NS
3328	REG("timens_offsets", S_IRUGO\|S_IWUSR, proc_timens_offsets_operations),
3329	#endif
3330	REG("comm", S_IRUGO\|S_IWUSR, proc_pid_set_comm_operations),
3331	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3332	ONE("syscall", S_IRUSR, proc_pid_syscall),
3333	#endif
3334	REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
3335	ONE("stat", S_IRUGO, proc_tgid_stat),
3336	ONE("statm", S_IRUGO, proc_pid_statm),
3337	REG("maps", S_IRUGO, proc_pid_maps_operations),
3338	#ifdef CONFIG_NUMA
3339	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
3340	#endif
3341	REG("mem", S_IRUSR\|S_IWUSR, proc_mem_operations),
3342	LNK("cwd", proc_cwd_link),
3343	LNK("root", proc_root_link),
3344	LNK("exe", proc_exe_link),
3345	REG("mounts", S_IRUGO, proc_mounts_operations),
3346	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
3347	REG("mountstats", S_IRUSR, proc_mountstats_operations),
3348	#ifdef CONFIG_PROC_PAGE_MONITOR
3349	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3350	REG("smaps", S_IRUGO, proc_pid_smaps_operations),
3351	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
3352	REG("pagemap", S_IRUSR, proc_pagemap_operations),
3353	#endif
3354	#ifdef CONFIG_SECURITY
3355	DIR("attr", S_IRUGO\|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
3356	#endif
3357	#ifdef CONFIG_KALLSYMS
3358	ONE("wchan", S_IRUGO, proc_pid_wchan),
3359	#endif
3360	#ifdef CONFIG_STACKTRACE
3361	ONE("stack", S_IRUSR, proc_pid_stack),
3362	#endif
3363	#ifdef CONFIG_SCHED_INFO
3364	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
3365	#endif
3366	#ifdef CONFIG_LATENCYTOP
3367	REG("latency", S_IRUGO, proc_lstats_operations),
3368	#endif
3369	#ifdef CONFIG_PROC_PID_CPUSET
3370	ONE("cpuset", S_IRUGO, proc_cpuset_show),
3371	#endif
3372	#ifdef CONFIG_CGROUPS
3373	ONE("cgroup", S_IRUGO, proc_cgroup_show),
3374	#endif
3375	#ifdef CONFIG_PROC_CPU_RESCTRL
3376	ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
3377	#endif
3378	ONE("oom_score", S_IRUGO, proc_oom_score),
3379	REG("oom_adj", S_IRUGO\|S_IWUSR, proc_oom_adj_operations),
3380	REG("oom_score_adj", S_IRUGO\|S_IWUSR, proc_oom_score_adj_operations),
3381	#ifdef CONFIG_AUDIT
3382	REG("loginuid", S_IWUSR\|S_IRUGO, proc_loginuid_operations),
3383	REG("sessionid", S_IRUGO, proc_sessionid_operations),
3384	#endif
3385	#ifdef CONFIG_FAULT_INJECTION
3386	REG("make-it-fail", S_IRUGO\|S_IWUSR, proc_fault_inject_operations),
3387	REG("fail-nth", `0644`, proc_fail_nth_operations),
3388	#endif
3389	#ifdef CONFIG_ELF_CORE
3390	REG("coredump_filter", S_IRUGO\|S_IWUSR, proc_coredump_filter_operations),
3391	#endif
3392	#ifdef CONFIG_TASK_IO_ACCOUNTING
3393	ONE("io", S_IRUSR, proc_tgid_io_accounting),
3394	#endif
3395	#ifdef CONFIG_USER_NS
3396	REG("uid_map", S_IRUGO\|S_IWUSR, proc_uid_map_operations),
3397	REG("gid_map", S_IRUGO\|S_IWUSR, proc_gid_map_operations),
3398	REG("projid_map", S_IRUGO\|S_IWUSR, proc_projid_map_operations),
3399	REG("setgroups", S_IRUGO\|S_IWUSR, proc_setgroups_operations),
3400	#endif
3401	#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
3402	REG("timers", S_IRUGO, proc_timers_operations),
3403	#endif
3404	REG("timerslack_ns", S_IRUGO\|S_IWUGO, proc_pid_set_timerslack_ns_operations),
3405	#ifdef CONFIG_LIVEPATCH
3406	ONE("patch_state", S_IRUSR, proc_pid_patch_state),
3407	#endif
3408	#ifdef CONFIG_KSTACK_ERASE_METRICS
3409	ONE("stack_depth", S_IRUGO, proc_stack_depth),
3410	#endif
3411	#ifdef CONFIG_PROC_PID_ARCH_STATUS
3412	ONE("arch_status", S_IRUGO, proc_pid_arch_status),
3413	#endif
3414	#ifdef CONFIG_SECCOMP_CACHE_DEBUG
3415	ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
3416	#endif
3417	#ifdef CONFIG_KSM
3418	ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
3419	ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat),
3420	#endif
3421	};
3422
3423	static int proc_tgid_base_readdir(struct file file, struct* dir_context *ctx)
3424	{
3425	return proc_pident_readdir(file, ctx,
3426	ents: tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
3427	}
3428
3429	static const struct file_operations proc_tgid_base_operations = {
3430	.read = generic_read_dir,
3431	.iterate_shared = proc_tgid_base_readdir,
3432	.llseek = generic_file_llseek,
3433	};
3434
3435	struct pid tgid_pidfd_to_pid(const* struct file *file)
3436	{
3437	if (file->f_op != &proc_tgid_base_operations)
3438	return ERR_PTR(error: -EBADF);
3439
3440	return proc_pid(inode: file_inode(f: file));
3441	}
3442
3443	static struct dentry proc_tgid_base_lookup(struct* inode dir, struct* dentry dentry, unsigned* int flags)
3444	{
3445	return proc_pident_lookup(dir, dentry,
3446	p: tgid_base_stuff,
3447	end: tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff));
3448	}
3449
3450	static const struct inode_operations proc_tgid_base_inode_operations = {
3451	.lookup = proc_tgid_base_lookup,
3452	.getattr = pid_getattr,
3453	.setattr = proc_setattr,
3454	.permission = proc_pid_permission,
3455	};
3456
3457	/**
3458	* proc_flush_pid - Remove dcache entries for @pid from the /proc dcache.
3459	* @pid: pid that should be flushed.
3460	*
3461	* This function walks a list of inodes (that belong to any proc
3462	* filesystem) that are attached to the pid and flushes them from
3463	* the dentry cache.
3464	*
3465	* It is safe and reasonable to cache /proc entries for a task until
3466	* that task exits. After that they just clog up the dcache with
3467	* useless entries, possibly causing useful dcache entries to be
3468	* flushed instead. This routine is provided to flush those useless
3469	* dcache entries when a process is reaped.
3470	*
3471	* NOTE: This routine is just an optimization so it does not guarantee
3472	* that no dcache entries will exist after a process is reaped
3473	* it just makes it very unlikely that any will persist.
3474	*/
3475
3476	void proc_flush_pid(struct pid *pid)
3477	{
3478	proc_invalidate_siblings_dcache(inodes: &pid->inodes, lock: &pid->lock);
3479	}
3480
3481	static struct dentry proc_pid_instantiate(struct* dentry * dentry,
3482	struct task_struct task, const* void *ptr)
3483	{
3484	struct inode *inode;
3485
3486	inode = proc_pid_make_base_inode(sb: dentry->d_sb, task,
3487	S_IFDIR \| S_IRUGO \| S_IXUGO);
3488	if (!inode)
3489	return ERR_PTR(error: -ENOENT);
3490
3491	inode->i_op = &proc_tgid_base_inode_operations;
3492	inode->i_fop = &proc_tgid_base_operations;
3493	inode->i_flags\|=S_IMMUTABLE;
3494
3495	set_nlink(inode, nlink: nlink_tgid);
3496	pid_update_inode(task, inode);
3497
3498	return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
3499	}
3500
3501	struct dentry proc_pid_lookup(struct* dentry dentry, unsigned* int flags)
3502	{
3503	struct task_struct *task;
3504	unsigned tgid;
3505	struct proc_fs_info *fs_info;
3506	struct pid_namespace *ns;
3507	struct dentry *result = ERR_PTR(error: -ENOENT);
3508
3509	tgid = name_to_int(qstr: &dentry->d_name);
3510	if (tgid == ~`0U`)
3511	goto out;
3512
3513	fs_info = proc_sb_info(sb: dentry->d_sb);
3514	ns = fs_info->pid_ns;
3515	rcu_read_lock();
3516	task = find_task_by_pid_ns(nr: tgid, ns);
3517	if (task)
3518	get_task_struct(t: task);
3519	rcu_read_unlock();
3520	if (!task)
3521	goto out;
3522
3523	/ Limit procfs to only ptraceable tasks /
3524	if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
3525	if (!has_pid_permissions(fs_info, task, hide_pid_min: HIDEPID_NO_ACCESS))
3526	goto out_put_task;
3527	}
3528
3529	result = proc_pid_instantiate(dentry, task, NULL);
3530	out_put_task:
3531	put_task_struct(t: task);
3532	out:
3533	return result;
3534	}
3535
3536	/*
3537	* Find the first task with tgid >= tgid
3538	*
3539	*/
3540	struct tgid_iter {
3541	unsigned int tgid;
3542	struct task_struct *task;
3543	};
3544	static struct tgid_iter next_tgid(struct pid_namespace ns, struct* tgid_iter iter)
3545	{
3546	struct pid *pid;
3547
3548	if (iter.task)
3549	put_task_struct(t: iter.task);
3550	rcu_read_lock();
3551	retry:
3552	iter.task = NULL;
3553	pid = find_ge_pid(nr: iter.tgid, ns);
3554	if (pid) {
3555	iter.tgid = pid_nr_ns(pid, ns);
3556	iter.task = pid_task(pid, PIDTYPE_TGID);
3557	if (!iter.task) {
3558	iter.tgid += `1`;
3559	goto retry;
3560	}
3561	get_task_struct(t: iter.task);
3562	}
3563	rcu_read_unlock();
3564	return iter;
3565	}
3566
3567	#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
3568
3569	/ for the /proc/ directory itself, after non-process stuff has been done /
3570	int proc_pid_readdir(struct file file, struct* dir_context *ctx)
3571	{
3572	struct tgid_iter iter;
3573	struct proc_fs_info *fs_info = proc_sb_info(sb: file_inode(f: file)->i_sb);
3574	struct pid_namespace *ns = proc_pid_ns(sb: file_inode(f: file)->i_sb);
3575	loff_t pos = ctx->pos;
3576
3577	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
3578	return `0`;
3579
3580	if (pos == TGID_OFFSET - `2`) {
3581	if (!dir_emit(ctx, name: "self", namelen: `4`, ino: self_inum, DT_LNK))
3582	return `0`;
3583	ctx->pos = pos = pos + `1`;
3584	}
3585	if (pos == TGID_OFFSET - `1`) {
3586	if (!dir_emit(ctx, name: "thread-self", namelen: `11`, ino: thread_self_inum, DT_LNK))
3587	return `0`;
3588	ctx->pos = pos = pos + `1`;
3589	}
3590	iter.tgid = pos - TGID_OFFSET;
3591	iter.task = NULL;
3592	for (iter = next_tgid(ns, iter);
3593	iter.task;
3594	iter.tgid += `1`, iter = next_tgid(ns, iter)) {
3595	char name[`10` + `1`];
3596	unsigned int len;
3597
3598	cond_resched();
3599	if (!has_pid_permissions(fs_info, task: iter.task, hide_pid_min: HIDEPID_INVISIBLE))
3600	continue;
3601
3602	len = snprintf(buf: name, size: sizeof(name), fmt: "%u", iter.tgid);
3603	ctx->pos = iter.tgid + TGID_OFFSET;
3604	if (!proc_fill_cache(file, ctx, name, len,
3605	instantiate: proc_pid_instantiate, task: iter.task, NULL)) {
3606	put_task_struct(t: iter.task);
3607	return `0`;
3608	}
3609	}
3610	ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
3611	return `0`;
3612	}
3613
3614	/*
3615	* proc_tid_comm_permission is a special permission function exclusively
3616	* used for the node /proc/<pid>/task/<tid>/comm.
3617	* It bypasses generic permission checks in the case where a task of the same
3618	* task group attempts to access the node.
3619	* The rationale behind this is that glibc and bionic access this node for
3620	* cross thread naming (pthread_set/getname_np(!self)). However, if
3621	* PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
3622	* which locks out the cross thread naming implementation.
3623	* This function makes sure that the node is always accessible for members of
3624	* same thread group.
3625	*/
3626	static int proc_tid_comm_permission(struct mnt_idmap *idmap,
3627	struct inode inode, int* mask)
3628	{
3629	bool is_same_tgroup;
3630	struct task_struct *task;
3631
3632	task = get_proc_task(inode);
3633	if (!task)
3634	return -ESRCH;
3635	is_same_tgroup = same_thread_group(current, p2: task);
3636	put_task_struct(t: task);
3637
3638	if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
3639	/ This file (/proc/<pid>/task/<tid>/comm) can always be*
3640	* read or written by the members of the corresponding
3641	* thread group.
3642	*/
3643	return `0`;
3644	}
3645
3646	return generic_permission(&nop_mnt_idmap, inode, mask);
3647	}
3648
3649	static const struct inode_operations proc_tid_comm_inode_operations = {
3650	.setattr = proc_setattr,
3651	.permission = proc_tid_comm_permission,
3652	};
3653
3654	/*
3655	* Tasks
3656	*/
3657	static const struct pid_entry tid_base_stuff[] = {
3658	DIR("fd", S_IRUSR\|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3659	DIR("fdinfo", S_IRUGO\|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3660	DIR("ns", S_IRUSR\|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3661	#ifdef CONFIG_NET
3662	DIR("net", S_IRUGO\|S_IXUGO, proc_net_inode_operations, proc_net_operations),
3663	#endif
3664	REG("environ", S_IRUSR, proc_environ_operations),
3665	REG("auxv", S_IRUSR, proc_auxv_operations),
3666	ONE("status", S_IRUGO, proc_pid_status),
3667	ONE("personality", S_IRUSR, proc_pid_personality),
3668	ONE("limits", S_IRUGO, proc_pid_limits),
3669	REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),
3670	NOD("comm", S_IFREG\|S_IRUGO\|S_IWUSR,
3671	&proc_tid_comm_inode_operations,
3672	&proc_pid_set_comm_operations, {}),
3673	#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3674	ONE("syscall", S_IRUSR, proc_pid_syscall),
3675	#endif
3676	REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
3677	ONE("stat", S_IRUGO, proc_tid_stat),
3678	ONE("statm", S_IRUGO, proc_pid_statm),
3679	REG("maps", S_IRUGO, proc_pid_maps_operations),
3680	#ifdef CONFIG_PROC_CHILDREN
3681	REG("children", S_IRUGO, proc_tid_children_operations),
3682	#endif
3683	#ifdef CONFIG_NUMA
3684	REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
3685	#endif
3686	REG("mem", S_IRUSR\|S_IWUSR, proc_mem_operations),
3687	LNK("cwd", proc_cwd_link),
3688	LNK("root", proc_root_link),
3689	LNK("exe", proc_exe_link),
3690	REG("mounts", S_IRUGO, proc_mounts_operations),
3691	REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
3692	#ifdef CONFIG_PROC_PAGE_MONITOR
3693	REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3694	REG("smaps", S_IRUGO, proc_pid_smaps_operations),
3695	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
3696	REG("pagemap", S_IRUSR, proc_pagemap_operations),
3697	#endif
3698	#ifdef CONFIG_SECURITY
3699	DIR("attr", S_IRUGO\|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
3700	#endif
3701	#ifdef CONFIG_KALLSYMS
3702	ONE("wchan", S_IRUGO, proc_pid_wchan),
3703	#endif
3704	#ifdef CONFIG_STACKTRACE
3705	ONE("stack", S_IRUSR, proc_pid_stack),
3706	#endif
3707	#ifdef CONFIG_SCHED_INFO
3708	ONE("schedstat", S_IRUGO, proc_pid_schedstat),
3709	#endif
3710	#ifdef CONFIG_LATENCYTOP
3711	REG("latency", S_IRUGO, proc_lstats_operations),
3712	#endif
3713	#ifdef CONFIG_PROC_PID_CPUSET
3714	ONE("cpuset", S_IRUGO, proc_cpuset_show),
3715	#endif
3716	#ifdef CONFIG_CGROUPS
3717	ONE("cgroup", S_IRUGO, proc_cgroup_show),
3718	#endif
3719	#ifdef CONFIG_PROC_CPU_RESCTRL
3720	ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
3721	#endif
3722	ONE("oom_score", S_IRUGO, proc_oom_score),
3723	REG("oom_adj", S_IRUGO\|S_IWUSR, proc_oom_adj_operations),
3724	REG("oom_score_adj", S_IRUGO\|S_IWUSR, proc_oom_score_adj_operations),
3725	#ifdef CONFIG_AUDIT
3726	REG("loginuid", S_IWUSR\|S_IRUGO, proc_loginuid_operations),
3727	REG("sessionid", S_IRUGO, proc_sessionid_operations),
3728	#endif
3729	#ifdef CONFIG_FAULT_INJECTION
3730	REG("make-it-fail", S_IRUGO\|S_IWUSR, proc_fault_inject_operations),
3731	REG("fail-nth", `0644`, proc_fail_nth_operations),
3732	#endif
3733	#ifdef CONFIG_TASK_IO_ACCOUNTING
3734	ONE("io", S_IRUSR, proc_tid_io_accounting),
3735	#endif
3736	#ifdef CONFIG_USER_NS
3737	REG("uid_map", S_IRUGO\|S_IWUSR, proc_uid_map_operations),
3738	REG("gid_map", S_IRUGO\|S_IWUSR, proc_gid_map_operations),
3739	REG("projid_map", S_IRUGO\|S_IWUSR, proc_projid_map_operations),
3740	REG("setgroups", S_IRUGO\|S_IWUSR, proc_setgroups_operations),
3741	#endif
3742	#ifdef CONFIG_LIVEPATCH
3743	ONE("patch_state", S_IRUSR, proc_pid_patch_state),
3744	#endif
3745	#ifdef CONFIG_PROC_PID_ARCH_STATUS
3746	ONE("arch_status", S_IRUGO, proc_pid_arch_status),
3747	#endif
3748	#ifdef CONFIG_SECCOMP_CACHE_DEBUG
3749	ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
3750	#endif
3751	#ifdef CONFIG_KSM
3752	ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
3753	ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat),
3754	#endif
3755	};
3756
3757	static int proc_tid_base_readdir(struct file file, struct* dir_context *ctx)
3758	{
3759	return proc_pident_readdir(file, ctx,
3760	ents: tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3761	}
3762
3763	static struct dentry proc_tid_base_lookup(struct* inode dir, struct* dentry dentry, unsigned* int flags)
3764	{
3765	return proc_pident_lookup(dir, dentry,
3766	p: tid_base_stuff,
3767	end: tid_base_stuff + ARRAY_SIZE(tid_base_stuff));
3768	}
3769
3770	static const struct file_operations proc_tid_base_operations = {
3771	.read = generic_read_dir,
3772	.iterate_shared = proc_tid_base_readdir,
3773	.llseek = generic_file_llseek,
3774	};
3775
3776	static const struct inode_operations proc_tid_base_inode_operations = {
3777	.lookup = proc_tid_base_lookup,
3778	.getattr = pid_getattr,
3779	.setattr = proc_setattr,
3780	};
3781
3782	static struct dentry proc_task_instantiate(struct* dentry *dentry,
3783	struct task_struct task, const* void *ptr)
3784	{
3785	struct inode *inode;
3786	inode = proc_pid_make_base_inode(sb: dentry->d_sb, task,
3787	S_IFDIR \| S_IRUGO \| S_IXUGO);
3788	if (!inode)
3789	return ERR_PTR(error: -ENOENT);
3790
3791	inode->i_op = &proc_tid_base_inode_operations;
3792	inode->i_fop = &proc_tid_base_operations;
3793	inode->i_flags \|= S_IMMUTABLE;
3794
3795	set_nlink(inode, nlink: nlink_tid);
3796	pid_update_inode(task, inode);
3797
3798	return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
3799	}
3800
3801	static struct dentry proc_task_lookup(struct* inode dir, struct* dentry * dentry, unsigned int flags)
3802	{
3803	struct task_struct *task;
3804	struct task_struct *leader = get_proc_task(inode: dir);
3805	unsigned tid;
3806	struct proc_fs_info *fs_info;
3807	struct pid_namespace *ns;
3808	struct dentry *result = ERR_PTR(error: -ENOENT);
3809
3810	if (!leader)
3811	goto out_no_task;
3812
3813	tid = name_to_int(qstr: &dentry->d_name);
3814	if (tid == ~`0U`)
3815	goto out;
3816
3817	fs_info = proc_sb_info(sb: dentry->d_sb);
3818	ns = fs_info->pid_ns;
3819	rcu_read_lock();
3820	task = find_task_by_pid_ns(nr: tid, ns);
3821	if (task)
3822	get_task_struct(t: task);
3823	rcu_read_unlock();
3824	if (!task)
3825	goto out;
3826	if (!same_thread_group(p1: leader, p2: task))
3827	goto out_drop_task;
3828
3829	result = proc_task_instantiate(dentry, task, NULL);
3830	out_drop_task:
3831	put_task_struct(t: task);
3832	out:
3833	put_task_struct(t: leader);
3834	out_no_task:
3835	return result;
3836	}
3837
3838	/*
3839	* Find the first tid of a thread group to return to user space.
3840	*
3841	* Usually this is just the thread group leader, but if the users
3842	* buffer was too small or there was a seek into the middle of the
3843	* directory we have more work todo.
3844	*
3845	* In the case of a short read we start with find_task_by_pid.
3846	*
3847	* In the case of a seek we start with the leader and walk nr
3848	* threads past it.
3849	*/
3850	static struct task_struct first_tid(struct* pid pid, int* tid, loff_t f_pos,
3851	struct pid_namespace *ns)
3852	{
3853	struct task_struct pos, task;
3854	unsigned long nr = f_pos;
3855
3856	if (nr != f_pos) / 32bit overflow? /
3857	return NULL;
3858
3859	rcu_read_lock();
3860	task = pid_task(pid, PIDTYPE_PID);
3861	if (!task)
3862	goto fail;
3863
3864	/ Attempt to start with the tid of a thread /
3865	if (tid && nr) {
3866	pos = find_task_by_pid_ns(nr: tid, ns);
3867	if (pos && same_thread_group(p1: pos, p2: task))
3868	goto found;
3869	}
3870
3871	/ If nr exceeds the number of threads there is nothing todo /
3872	if (nr >= get_nr_threads(task))
3873	goto fail;
3874
3875	/ If we haven't found our starting place yet start*
3876	* with the leader and walk nr threads forward.
3877	*/
3878	for_each_thread(task, pos) {
3879	if (!nr--)
3880	goto found;
3881	}
3882	fail:
3883	pos = NULL;
3884	goto out;
3885	found:
3886	get_task_struct(t: pos);
3887	out:
3888	rcu_read_unlock();
3889	return pos;
3890	}
3891
3892	/*
3893	* Find the next thread in the thread list.
3894	* Return NULL if there is an error or no next thread.
3895	*
3896	* The reference to the input task_struct is released.
3897	*/
3898	static struct task_struct next_tid(struct* task_struct *start)
3899	{
3900	struct task_struct *pos = NULL;
3901	rcu_read_lock();
3902	if (pid_alive(p: start)) {
3903	pos = __next_thread(p: start);
3904	if (pos)
3905	get_task_struct(t: pos);
3906	}
3907	rcu_read_unlock();
3908	put_task_struct(t: start);
3909	return pos;
3910	}
3911
3912	/ for the /proc/TGID/task/ directories /
3913	static int proc_task_readdir(struct file file, struct* dir_context *ctx)
3914	{
3915	struct inode *inode = file_inode(f: file);
3916	struct task_struct *task;
3917	struct pid_namespace *ns;
3918	int tid;
3919
3920	if (proc_inode_is_dead(inode))
3921	return -ENOENT;
3922
3923	if (!dir_emit_dots(file, ctx))
3924	return `0`;
3925
3926	/ We cache the tgid value that the last readdir call couldn't*
3927	* return and lseek resets it to 0.
3928	*/
3929	ns = proc_pid_ns(sb: inode->i_sb);
3930	tid = (int)(intptr_t)file->private_data;
3931	file->private_data = NULL;
3932	for (task = first_tid(pid: proc_pid(inode), tid, f_pos: ctx->pos - `2`, ns);
3933	task;
3934	task = next_tid(start: task), ctx->pos++) {
3935	char name[`10` + `1`];
3936	unsigned int len;
3937
3938	tid = task_pid_nr_ns(tsk: task, ns);
3939	if (!tid)
3940	continue; / The task has just exited. /
3941	len = snprintf(buf: name, size: sizeof(name), fmt: "%d", tid);
3942	if (!proc_fill_cache(file, ctx, name, len,
3943	instantiate: proc_task_instantiate, task, NULL)) {
3944	/ returning this tgid failed, save it as the first*
3945	* pid for the next readir call */
3946	file->private_data = (void *)(intptr_t)tid;
3947	put_task_struct(t: task);
3948	break;
3949	}
3950	}
3951
3952	return `0`;
3953	}
3954
3955	static int proc_task_getattr(struct mnt_idmap *idmap,
3956	const struct path path, struct* kstat *stat,
3957	u32 request_mask, unsigned int query_flags)
3958	{
3959	struct inode *inode = d_inode(dentry: path->dentry);
3960	struct task_struct *p = get_proc_task(inode);
3961	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
3962
3963	if (p) {
3964	stat->nlink += get_nr_threads(task: p);
3965	put_task_struct(t: p);
3966	}
3967
3968	return `0`;
3969	}
3970
3971	/*
3972	* proc_task_readdir() set @file->private_data to a positive integer
3973	* value, so casting that to u64 is safe. generic_llseek_cookie() will
3974	* set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is
3975	* here to catch any unexpected change in behavior either in
3976	* proc_task_readdir() or generic_llseek_cookie().
3977	*/
3978	static loff_t proc_dir_llseek(struct file file, loff_t offset, int* whence)
3979	{
3980	u64 cookie = (u64)(intptr_t)file->private_data;
3981	loff_t off;
3982
3983	off = generic_llseek_cookie(file, offset, whence, cookie: &cookie);
3984	WARN_ON_ONCE(cookie > INT_MAX);
3985	file->private_data = (void )(intptr_t)cookie; /* serialized by f_pos_lock /
3986	return off;
3987	}
3988
3989	static const struct inode_operations proc_task_inode_operations = {
3990	.lookup = proc_task_lookup,
3991	.getattr = proc_task_getattr,
3992	.setattr = proc_setattr,
3993	.permission = proc_pid_permission,
3994	};
3995
3996	static const struct file_operations proc_task_operations = {
3997	.read = generic_read_dir,
3998	.iterate_shared = proc_task_readdir,
3999	.llseek = proc_dir_llseek,
4000	};
4001
4002	void __init set_proc_pid_nlink(void)
4003	{
4004	nlink_tid = pid_entry_nlink(entries: tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
4005	nlink_tgid = pid_entry_nlink(entries: tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
4006	}
4007

source code of linux/fs/proc/base.c