file.c source code [linux/fs/file.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* linux/fs/file.c
4	*
5	* Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
6	*
7	* Manage the dynamic fd arrays in the process files_struct.
8	*/
9
10	#include <linux/syscalls.h>
11	#include <linux/export.h>
12	#include <linux/fs.h>
13	#include <linux/kernel.h>
14	#include <linux/mm.h>
15	#include <linux/sched/signal.h>
16	#include <linux/slab.h>
17	#include <linux/file.h>
18	#include <linux/fdtable.h>
19	#include <linux/bitops.h>
20	#include <linux/spinlock.h>
21	#include <linux/rcupdate.h>
22	#include <linux/close_range.h>
23	#include <linux/file_ref.h>
24	#include <net/sock.h>
25	#include <linux/init_task.h>
26
27	#include "internal.h"
28
29	static noinline bool __file_ref_put_badval(file_ref_t ref, unsigned* long cnt)
30	{
31	/*
32	* If the reference count was already in the dead zone, then this
33	* put() operation is imbalanced. Warn, put the reference count back to
34	* DEAD and tell the caller to not deconstruct the object.
35	*/
36	if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
37	atomic_long_set(v: &ref->refcnt, FILE_REF_DEAD);
38	return false;
39	}
40
41	/*
42	* This is a put() operation on a saturated refcount. Restore the
43	* mean saturation value and tell the caller to not deconstruct the
44	* object.
45	*/
46	if (cnt > FILE_REF_MAXREF)
47	atomic_long_set(v: &ref->refcnt, FILE_REF_SATURATED);
48	return false;
49	}
50
51	/**
52	* __file_ref_put - Slowpath of file_ref_put()
53	* @ref: Pointer to the reference count
54	* @cnt: Current reference count
55	*
56	* Invoked when the reference count is outside of the valid zone.
57	*
58	* Return:
59	* True if this was the last reference with no future references
60	* possible. This signals the caller that it can safely schedule the
61	* object, which is protected by the reference counter, for
62	* deconstruction.
63	*
64	* False if there are still active references or the put() raced
65	* with a concurrent get()/put() pair. Caller is not allowed to
66	* deconstruct the protected object.
67	*/
68	bool __file_ref_put(file_ref_t ref, unsigned* long cnt)
69	{
70	/ Did this drop the last reference? /
71	if (likely(cnt == FILE_REF_NOREF)) {
72	/*
73	* Carefully try to set the reference count to FILE_REF_DEAD.
74	*
75	* This can fail if a concurrent get() operation has
76	* elevated it again or the corresponding put() even marked
77	* it dead already. Both are valid situations and do not
78	* require a retry. If this fails the caller is not
79	* allowed to deconstruct the object.
80	*/
81	if (!atomic_long_try_cmpxchg_release(v: &ref->refcnt, old: &cnt, FILE_REF_DEAD))
82	return false;
83
84	/*
85	* The caller can safely schedule the object for
86	* deconstruction. Provide acquire ordering.
87	*/
88	smp_acquire__after_ctrl_dep();
89	return true;
90	}
91
92	return __file_ref_put_badval(ref, cnt);
93	}
94	EXPORT_SYMBOL_GPL(__file_ref_put);
95
96	unsigned int sysctl_nr_open __read_mostly = `1024`*`1024`;
97	unsigned int sysctl_nr_open_min = BITS_PER_LONG;
98	/ our min() is unusable in constant expressions ;-/ /
99	#define __const_min(x, y) ((x) < (y) ? (x) : (y))
100	unsigned int sysctl_nr_open_max =
101	__const_min(INT_MAX, ~(size_t)`0`/sizeof(void *)) & -BITS_PER_LONG;
102
103	static void __free_fdtable(struct fdtable *fdt)
104	{
105	kvfree(addr: fdt->fd);
106	kvfree(addr: fdt->open_fds);
107	kfree(objp: fdt);
108	}
109
110	static void free_fdtable_rcu(struct rcu_head *rcu)
111	{
112	__free_fdtable(container_of(rcu, struct fdtable, rcu));
113	}
114
115	#define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr))
116	#define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long))
117
118	#define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds
119	/*
120	* Copy 'count' fd bits from the old table to the new table and clear the extra
121	* space if any. This does not copy the file pointers. Called with the files
122	* spinlock held for write.
123	*/
124	static inline void copy_fd_bitmaps(struct fdtable nfdt, struct* fdtable *ofdt,
125	unsigned int copy_words)
126	{
127	unsigned int nwords = fdt_words(nfdt);
128
129	bitmap_copy_and_extend(to: nfdt->open_fds, from: ofdt->open_fds,
130	count: copy_words * BITS_PER_LONG, size: nwords * BITS_PER_LONG);
131	bitmap_copy_and_extend(to: nfdt->close_on_exec, from: ofdt->close_on_exec,
132	count: copy_words * BITS_PER_LONG, size: nwords * BITS_PER_LONG);
133	bitmap_copy_and_extend(to: nfdt->full_fds_bits, from: ofdt->full_fds_bits,
134	count: copy_words, size: nwords);
135	}
136
137	/*
138	* Copy all file descriptors from the old table to the new, expanded table and
139	* clear the extra space. Called with the files spinlock held for write.
140	*/
141	static void copy_fdtable(struct fdtable nfdt, struct* fdtable *ofdt)
142	{
143	size_t cpy, set;
144
145	BUG_ON(nfdt->max_fds < ofdt->max_fds);
146
147	cpy = ofdt->max_fds * sizeof(struct file *);
148	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
149	memcpy(nfdt->fd, ofdt->fd, cpy);
150	memset((char *)nfdt->fd + cpy, `0`, set);
151
152	copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt));
153	}
154
155	/*
156	* Note how the fdtable bitmap allocations very much have to be a multiple of
157	* BITS_PER_LONG. This is not only because we walk those things in chunks of
158	* 'unsigned long' in some places, but simply because that is how the Linux
159	* kernel bitmaps are defined to work: they are not "bits in an array of bytes",
160	* they are very much "bits in an array of unsigned long".
161	*/
162	static struct fdtable alloc_fdtable(unsigned* int slots_wanted)
163	{
164	struct fdtable *fdt;
165	unsigned int nr;
166	void *data;
167
168	/*
169	* Figure out how many fds we actually want to support in this fdtable.
170	* Allocation steps are keyed to the size of the fdarray, since it
171	* grows far faster than any of the other dynamic data. We try to fit
172	* the fdarray into comfortable page-tuned chunks: starting at 1024B
173	* and growing in powers of two from there on. Since we called only
174	* with slots_wanted > BITS_PER_LONG (embedded instance in files->fdtab
175	* already gives BITS_PER_LONG slots), the above boils down to
176	* 1. use the smallest power of two large enough to give us that many
177	* slots.
178	* 2. on 32bit skip 64 and 128 - the minimal capacity we want there is
179	* 256 slots (i.e. 1Kb fd array).
180	* 3. on 64bit don't skip anything, 1Kb fd array means 128 slots there
181	* and we are never going to be asked for 64 or less.
182	*/
183	if (IS_ENABLED(CONFIG_32BIT) && slots_wanted < `256`)
184	nr = `256`;
185	else
186	nr = roundup_pow_of_two(slots_wanted);
187	/*
188	* Note that this can drive nr below what we had passed if sysctl_nr_open
189	* had been set lower between the check in expand_files() and here.
190	*
191	* We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
192	* bitmaps handling below becomes unpleasant, to put it mildly...
193	*/
194	if (unlikely(nr > sysctl_nr_open)) {
195	nr = round_down(sysctl_nr_open, BITS_PER_LONG);
196	if (nr < slots_wanted)
197	return ERR_PTR(error: -EMFILE);
198	}
199
200	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
201	if (!fdt)
202	goto out;
203	fdt->max_fds = nr;
204	data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
205	if (!data)
206	goto out_fdt;
207	fdt->fd = data;
208
209	data = kvmalloc(max_t(size_t,
210	`2` * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
211	GFP_KERNEL_ACCOUNT);
212	if (!data)
213	goto out_arr;
214	fdt->open_fds = data;
215	data += nr / BITS_PER_BYTE;
216	fdt->close_on_exec = data;
217	data += nr / BITS_PER_BYTE;
218	fdt->full_fds_bits = data;
219
220	return fdt;
221
222	out_arr:
223	kvfree(addr: fdt->fd);
224	out_fdt:
225	kfree(objp: fdt);
226	out:
227	return ERR_PTR(error: -ENOMEM);
228	}
229
230	/*
231	* Expand the file descriptor table.
232	* This function will allocate a new fdtable and both fd array and fdset, of
233	* the given size.
234	* Return <0 error code on error; 0 on successful completion.
235	* The files->file_lock should be held on entry, and will be held on exit.
236	*/
237	static int expand_fdtable(struct files_struct files, unsigned* int nr)
238	__releases(files->file_lock)
239	__acquires(files->file_lock)
240	{
241	struct fdtable new_fdt, cur_fdt;
242
243	spin_unlock(lock: &files->file_lock);
244	new_fdt = alloc_fdtable(slots_wanted: nr + `1`);
245
246	/ make sure all fd_install() have seen resize_in_progress*
247	* or have finished their rcu_read_lock_sched() section.
248	*/
249	if (atomic_read(v: &files->count) > `1`)
250	synchronize_rcu();
251
252	spin_lock(lock: &files->file_lock);
253	if (IS_ERR(ptr: new_fdt))
254	return PTR_ERR(ptr: new_fdt);
255	cur_fdt = files_fdtable(files);
256	BUG_ON(nr < cur_fdt->max_fds);
257	copy_fdtable(nfdt: new_fdt, ofdt: cur_fdt);
258	rcu_assign_pointer(files->fdt, new_fdt);
259	if (cur_fdt != &files->fdtab)
260	call_rcu(head: &cur_fdt->rcu, func: free_fdtable_rcu);
261	/ coupled with smp_rmb() in fd_install() /
262	smp_wmb();
263	return `0`;
264	}
265
266	/*
267	* Expand files.
268	* This function will expand the file structures, if the requested size exceeds
269	* the current capacity and there is room for expansion.
270	* Return <0 error code on error; 0 on success.
271	* The files->file_lock should be held on entry, and will be held on exit.
272	*/
273	static int expand_files(struct files_struct files, unsigned* int nr)
274	__releases(files->file_lock)
275	__acquires(files->file_lock)
276	{
277	struct fdtable *fdt;
278	int error;
279
280	repeat:
281	fdt = files_fdtable(files);
282
283	/ Do we need to expand? /
284	if (nr < fdt->max_fds)
285	return `0`;
286
287	if (unlikely(files->resize_in_progress)) {
288	spin_unlock(lock: &files->file_lock);
289	wait_event(files->resize_wait, !files->resize_in_progress);
290	spin_lock(lock: &files->file_lock);
291	goto repeat;
292	}
293
294	/ Can we expand? /
295	if (unlikely(nr >= sysctl_nr_open))
296	return -EMFILE;
297
298	/ All good, so we try /
299	files->resize_in_progress = true;
300	error = expand_fdtable(files, nr);
301	files->resize_in_progress = false;
302
303	wake_up_all(&files->resize_wait);
304	return error;
305	}
306
307	static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt,
308	bool set)
309	{
310	if (set) {
311	__set_bit(fd, fdt->close_on_exec);
312	} else {
313	if (test_bit(fd, fdt->close_on_exec))
314	__clear_bit(fd, fdt->close_on_exec);
315	}
316	}
317
318	static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
319	{
320	__set_bit(fd, fdt->open_fds);
321	__set_close_on_exec(fd, fdt, set);
322	fd /= BITS_PER_LONG;
323	if (!~fdt->open_fds[fd])
324	__set_bit(fd, fdt->full_fds_bits);
325	}
326
327	static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
328	{
329	__clear_bit(fd, fdt->open_fds);
330	fd /= BITS_PER_LONG;
331	if (test_bit(fd, fdt->full_fds_bits))
332	__clear_bit(fd, fdt->full_fds_bits);
333	}
334
335	static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt)
336	{
337	return test_bit(fd, fdt->open_fds);
338	}
339
340	/*
341	* Note that a sane fdtable size always has to be a multiple of
342	* BITS_PER_LONG, since we have bitmaps that are sized by this.
343	*
344	* punch_hole is optional - when close_range() is asked to unshare
345	* and close, we don't need to copy descriptors in that range, so
346	* a smaller cloned descriptor table might suffice if the last
347	* currently opened descriptor falls into that range.
348	*/
349	static unsigned int sane_fdtable_size(struct fdtable fdt, struct* fd_range *punch_hole)
350	{
351	unsigned int last = find_last_bit(addr: fdt->open_fds, size: fdt->max_fds);
352
353	if (last == fdt->max_fds)
354	return NR_OPEN_DEFAULT;
355	if (punch_hole && punch_hole->to >= last && punch_hole->from <= last) {
356	last = find_last_bit(addr: fdt->open_fds, size: punch_hole->from);
357	if (last == punch_hole->from)
358	return NR_OPEN_DEFAULT;
359	}
360	return ALIGN(last + `1`, BITS_PER_LONG);
361	}
362
363	/*
364	* Allocate a new descriptor table and copy contents from the passed in
365	* instance. Returns a pointer to cloned table on success, ERR_PTR()
366	* on failure. For 'punch_hole' see sane_fdtable_size().
367	*/
368	struct files_struct dup_fd(struct* files_struct oldf, struct* fd_range *punch_hole)
369	{
370	struct files_struct *newf;
371	struct file old_fds, new_fds;
372	unsigned int open_files, i;
373	struct fdtable old_fdt, new_fdt;
374
375	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
376	if (!newf)
377	return ERR_PTR(error: -ENOMEM);
378
379	atomic_set(v: &newf->count, i: `1`);
380
381	spin_lock_init(&newf->file_lock);
382	newf->resize_in_progress = false;
383	init_waitqueue_head(&newf->resize_wait);
384	newf->next_fd = `0`;
385	new_fdt = &newf->fdtab;
386	new_fdt->max_fds = NR_OPEN_DEFAULT;
387	new_fdt->close_on_exec = newf->close_on_exec_init;
388	new_fdt->open_fds = newf->open_fds_init;
389	new_fdt->full_fds_bits = newf->full_fds_bits_init;
390	new_fdt->fd = &newf->fd_array[`0`];
391
392	spin_lock(lock: &oldf->file_lock);
393	old_fdt = files_fdtable(oldf);
394	open_files = sane_fdtable_size(fdt: old_fdt, punch_hole);
395
396	/*
397	* Check whether we need to allocate a larger fd array and fd set.
398	*/
399	while (unlikely(open_files > new_fdt->max_fds)) {
400	spin_unlock(lock: &oldf->file_lock);
401
402	if (new_fdt != &newf->fdtab)
403	__free_fdtable(fdt: new_fdt);
404
405	new_fdt = alloc_fdtable(slots_wanted: open_files);
406	if (IS_ERR(ptr: new_fdt)) {
407	kmem_cache_free(s: files_cachep, objp: newf);
408	return ERR_CAST(ptr: new_fdt);
409	}
410
411	/*
412	* Reacquire the oldf lock and a pointer to its fd table
413	* who knows it may have a new bigger fd table. We need
414	* the latest pointer.
415	*/
416	spin_lock(lock: &oldf->file_lock);
417	old_fdt = files_fdtable(oldf);
418	open_files = sane_fdtable_size(fdt: old_fdt, punch_hole);
419	}
420
421	copy_fd_bitmaps(nfdt: new_fdt, ofdt: old_fdt, copy_words: open_files / BITS_PER_LONG);
422
423	old_fds = old_fdt->fd;
424	new_fds = new_fdt->fd;
425
426	/*
427	* We may be racing against fd allocation from other threads using this
428	* files_struct, despite holding ->file_lock.
429	*
430	* alloc_fd() might have already claimed a slot, while fd_install()
431	* did not populate it yet. Note the latter operates locklessly, so
432	* the file can show up as we are walking the array below.
433	*
434	* At the same time we know no files will disappear as all other
435	* operations take the lock.
436	*
437	* Instead of trying to placate userspace racing with itself, we
438	* ref the file if we see it and mark the fd slot as unused otherwise.
439	*/
440	for (i = open_files; i != `0`; i--) {
441	struct file f = rcu_dereference_raw(old_fds++);
442	if (f) {
443	get_file(f);
444	} else {
445	__clear_open_fd(fd: open_files - i, fdt: new_fdt);
446	}
447	rcu_assign_pointer(*new_fds++, f);
448	}
449	spin_unlock(lock: &oldf->file_lock);
450
451	/ clear the remainder /
452	memset(new_fds, `0`, (new_fdt->max_fds - open_files) * sizeof(struct file *));
453
454	rcu_assign_pointer(newf->fdt, new_fdt);
455
456	return newf;
457	}
458
459	static struct fdtable close_files(struct* files_struct * files)
460	{
461	/*
462	* It is safe to dereference the fd table without RCU or
463	* ->file_lock because this is the last reference to the
464	* files structure.
465	*/
466	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
467	unsigned int i, j = `0`;
468
469	for (;;) {
470	unsigned long set;
471	i = j * BITS_PER_LONG;
472	if (i >= fdt->max_fds)
473	break;
474	set = fdt->open_fds[j++];
475	while (set) {
476	if (set & `1`) {
477	struct file *file = fdt->fd[i];
478	if (file) {
479	filp_close(file, id: files);
480	cond_resched();
481	}
482	}
483	i++;
484	set >>= `1`;
485	}
486	}
487
488	return fdt;
489	}
490
491	void put_files_struct(struct files_struct *files)
492	{
493	if (atomic_dec_and_test(v: &files->count)) {
494	struct fdtable *fdt = close_files(files);
495
496	/ free the arrays if they are not embedded /
497	if (fdt != &files->fdtab)
498	__free_fdtable(fdt);
499	kmem_cache_free(s: files_cachep, objp: files);
500	}
501	}
502
503	void exit_files(struct task_struct *tsk)
504	{
505	struct files_struct * files = tsk->files;
506
507	if (files) {
508	task_lock(p: tsk);
509	tsk->files = NULL;
510	task_unlock(p: tsk);
511	put_files_struct(files);
512	}
513	}
514
515	struct files_struct init_files = {
516	.count = ATOMIC_INIT(`1`),
517	.fdt = &init_files.fdtab,
518	.fdtab = {
519	.max_fds = NR_OPEN_DEFAULT,
520	.fd = &init_files.fd_array[`0`],
521	.close_on_exec = init_files.close_on_exec_init,
522	.open_fds = init_files.open_fds_init,
523	.full_fds_bits = init_files.full_fds_bits_init,
524	},
525	.file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
526	.resize_wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait),
527	};
528
529	static unsigned int find_next_fd(struct fdtable fdt, unsigned* int start)
530	{
531	unsigned int maxfd = fdt->max_fds; / always multiple of BITS_PER_LONG /
532	unsigned int maxbit = maxfd / BITS_PER_LONG;
533	unsigned int bitbit = start / BITS_PER_LONG;
534	unsigned int bit;
535
536	/*
537	* Try to avoid looking at the second level bitmap
538	*/
539	bit = find_next_zero_bit(addr: &fdt->open_fds[bitbit], BITS_PER_LONG,
540	offset: start & (BITS_PER_LONG - `1`));
541	if (bit < BITS_PER_LONG)
542	return bit + bitbit * BITS_PER_LONG;
543
544	bitbit = find_next_zero_bit(addr: fdt->full_fds_bits, size: maxbit, offset: bitbit) * BITS_PER_LONG;
545	if (bitbit >= maxfd)
546	return maxfd;
547	if (bitbit > start)
548	start = bitbit;
549	return find_next_zero_bit(addr: fdt->open_fds, size: maxfd, offset: start);
550	}
551
552	/*
553	* allocate a file descriptor, mark it busy.
554	*/
555	static int alloc_fd(unsigned start, unsigned end, unsigned flags)
556	{
557	struct files_struct *files = current->files;
558	unsigned int fd;
559	int error;
560	struct fdtable *fdt;
561
562	spin_lock(lock: &files->file_lock);
563	repeat:
564	fdt = files_fdtable(files);
565	fd = start;
566	if (fd < files->next_fd)
567	fd = files->next_fd;
568
569	if (likely(fd < fdt->max_fds))
570	fd = find_next_fd(fdt, start: fd);
571
572	/*
573	* N.B. For clone tasks sharing a files structure, this test
574	* will limit the total number of files that can be opened.
575	*/
576	error = -EMFILE;
577	if (unlikely(fd >= end))
578	goto out;
579
580	if (unlikely(fd >= fdt->max_fds)) {
581	error = expand_files(files, nr: fd);
582	if (error < `0`)
583	goto out;
584
585	goto repeat;
586	}
587
588	if (start <= files->next_fd)
589	files->next_fd = fd + `1`;
590
591	__set_open_fd(fd, fdt, set: flags & O_CLOEXEC);
592	error = fd;
593	VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
594
595	out:
596	spin_unlock(lock: &files->file_lock);
597	return error;
598	}
599
600	int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
601	{
602	return alloc_fd(start: `0`, end: nofile, flags);
603	}
604
605	int get_unused_fd_flags(unsigned flags)
606	{
607	return __get_unused_fd_flags(flags, nofile: rlimit(RLIMIT_NOFILE));
608	}
609	EXPORT_SYMBOL(get_unused_fd_flags);
610
611	static void __put_unused_fd(struct files_struct files, unsigned* int fd)
612	{
613	struct fdtable *fdt = files_fdtable(files);
614	__clear_open_fd(fd, fdt);
615	if (fd < files->next_fd)
616	files->next_fd = fd;
617	}
618
619	void put_unused_fd(unsigned int fd)
620	{
621	struct files_struct *files = current->files;
622	spin_lock(lock: &files->file_lock);
623	__put_unused_fd(files, fd);
624	spin_unlock(lock: &files->file_lock);
625	}
626
627	EXPORT_SYMBOL(put_unused_fd);
628
629	/**
630	* fd_install - install a file pointer in the fd array
631	* @fd: file descriptor to install the file in
632	* @file: the file to install
633	*
634	* This consumes the "file" refcount, so callers should treat it
635	* as if they had called fput(file).
636	*/
637	void fd_install(unsigned int fd, struct file *file)
638	{
639	struct files_struct *files = current->files;
640	struct fdtable *fdt;
641
642	if (WARN_ON_ONCE(unlikely(file->f_mode & FMODE_BACKING)))
643	return;
644
645	rcu_read_lock_sched();
646
647	if (unlikely(files->resize_in_progress)) {
648	rcu_read_unlock_sched();
649	spin_lock(lock: &files->file_lock);
650	fdt = files_fdtable(files);
651	VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
652	rcu_assign_pointer(fdt->fd[fd], file);
653	spin_unlock(lock: &files->file_lock);
654	return;
655	}
656	/ coupled with smp_wmb() in expand_fdtable() /
657	smp_rmb();
658	fdt = rcu_dereference_sched(files->fdt);
659	VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL);
660	rcu_assign_pointer(fdt->fd[fd], file);
661	rcu_read_unlock_sched();
662	}
663
664	EXPORT_SYMBOL(fd_install);
665
666	/**
667	* file_close_fd_locked - return file associated with fd
668	* @files: file struct to retrieve file from
669	* @fd: file descriptor to retrieve file for
670	*
671	* Doesn't take a separate reference count.
672	*
673	* Context: files_lock must be held.
674	*
675	* Returns: The file associated with @fd (NULL if @fd is not open)
676	*/
677	struct file file_close_fd_locked(struct* files_struct files, unsigned* fd)
678	{
679	struct fdtable *fdt = files_fdtable(files);
680	struct file *file;
681
682	lockdep_assert_held(&files->file_lock);
683
684	if (fd >= fdt->max_fds)
685	return NULL;
686
687	fd = array_index_nospec(fd, fdt->max_fds);
688	file = rcu_dereference_raw(fdt->fd[fd]);
689	if (file) {
690	rcu_assign_pointer(fdt->fd[fd], NULL);
691	__put_unused_fd(files, fd);
692	}
693	return file;
694	}
695
696	int close_fd(unsigned fd)
697	{
698	struct files_struct *files = current->files;
699	struct file *file;
700
701	spin_lock(lock: &files->file_lock);
702	file = file_close_fd_locked(files, fd);
703	spin_unlock(lock: &files->file_lock);
704	if (!file)
705	return -EBADF;
706
707	return filp_close(file, id: files);
708	}
709	EXPORT_SYMBOL(close_fd);
710
711	/**
712	* last_fd - return last valid index into fd table
713	* @fdt: File descriptor table.
714	*
715	* Context: Either rcu read lock or files_lock must be held.
716	*
717	* Returns: Last valid index into fdtable.
718	*/
719	static inline unsigned last_fd(struct fdtable *fdt)
720	{
721	return fdt->max_fds - `1`;
722	}
723
724	static inline void __range_cloexec(struct files_struct *cur_fds,
725	unsigned int fd, unsigned int max_fd)
726	{
727	struct fdtable *fdt;
728
729	/ make sure we're using the correct maximum value /
730	spin_lock(lock: &cur_fds->file_lock);
731	fdt = files_fdtable(cur_fds);
732	max_fd = min(last_fd(fdt), max_fd);
733	if (fd <= max_fd)
734	bitmap_set(map: fdt->close_on_exec, start: fd, nbits: max_fd - fd + `1`);
735	spin_unlock(lock: &cur_fds->file_lock);
736	}
737
738	static inline void __range_close(struct files_struct files, unsigned* int fd,
739	unsigned int max_fd)
740	{
741	struct file *file;
742	unsigned n;
743
744	spin_lock(lock: &files->file_lock);
745	n = last_fd(files_fdtable(files));
746	max_fd = min(max_fd, n);
747
748	for (; fd <= max_fd; fd++) {
749	file = file_close_fd_locked(files, fd);
750	if (file) {
751	spin_unlock(lock: &files->file_lock);
752	filp_close(file, id: files);
753	cond_resched();
754	spin_lock(lock: &files->file_lock);
755	} else if (need_resched()) {
756	spin_unlock(lock: &files->file_lock);
757	cond_resched();
758	spin_lock(lock: &files->file_lock);
759	}
760	}
761	spin_unlock(lock: &files->file_lock);
762	}
763
764	/**
765	* sys_close_range() - Close all file descriptors in a given range.
766	*
767	* @fd: starting file descriptor to close
768	* @max_fd: last file descriptor to close
769	* @flags: CLOSE_RANGE flags.
770	*
771	* This closes a range of file descriptors. All file descriptors
772	* from @fd up to and including @max_fd are closed.
773	* Currently, errors to close a given file descriptor are ignored.
774	*/
775	SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
776	unsigned int, flags)
777	{
778	struct task_struct *me = current;
779	struct files_struct cur_fds = me->files, fds = NULL;
780
781	if (flags & ~(CLOSE_RANGE_UNSHARE \| CLOSE_RANGE_CLOEXEC))
782	return -EINVAL;
783
784	if (fd > max_fd)
785	return -EINVAL;
786
787	if ((flags & CLOSE_RANGE_UNSHARE) && atomic_read(v: &cur_fds->count) > `1`) {
788	struct fd_range range = {fd, max_fd}, *punch_hole = &range;
789
790	/*
791	* If the caller requested all fds to be made cloexec we always
792	* copy all of the file descriptors since they still want to
793	* use them.
794	*/
795	if (flags & CLOSE_RANGE_CLOEXEC)
796	punch_hole = NULL;
797
798	fds = dup_fd(oldf: cur_fds, punch_hole);
799	if (IS_ERR(ptr: fds))
800	return PTR_ERR(ptr: fds);
801	/*
802	* We used to share our file descriptor table, and have now
803	* created a private one, make sure we're using it below.
804	*/
805	swap(cur_fds, fds);
806	}
807
808	if (flags & CLOSE_RANGE_CLOEXEC)
809	__range_cloexec(cur_fds, fd, max_fd);
810	else
811	__range_close(files: cur_fds, fd, max_fd);
812
813	if (fds) {
814	/*
815	* We're done closing the files we were supposed to. Time to install
816	* the new file descriptor table and drop the old one.
817	*/
818	task_lock(p: me);
819	me->files = cur_fds;
820	task_unlock(p: me);
821	put_files_struct(files: fds);
822	}
823
824	return `0`;
825	}
826
827	/**
828	* file_close_fd - return file associated with fd
829	* @fd: file descriptor to retrieve file for
830	*
831	* Doesn't take a separate reference count.
832	*
833	* Returns: The file associated with @fd (NULL if @fd is not open)
834	*/
835	struct file file_close_fd(unsigned* int fd)
836	{
837	struct files_struct *files = current->files;
838	struct file *file;
839
840	spin_lock(lock: &files->file_lock);
841	file = file_close_fd_locked(files, fd);
842	spin_unlock(lock: &files->file_lock);
843
844	return file;
845	}
846
847	void do_close_on_exec(struct files_struct *files)
848	{
849	unsigned i;
850	struct fdtable *fdt;
851
852	/ exec unshares first /
853	spin_lock(lock: &files->file_lock);
854	for (i = `0`; ; i++) {
855	unsigned long set;
856	unsigned fd = i * BITS_PER_LONG;
857	fdt = files_fdtable(files);
858	if (fd >= fdt->max_fds)
859	break;
860	set = fdt->close_on_exec[i];
861	if (!set)
862	continue;
863	fdt->close_on_exec[i] = `0`;
864	for ( ; set ; fd++, set >>= `1`) {
865	struct file *file;
866	if (!(set & `1`))
867	continue;
868	file = fdt->fd[fd];
869	if (!file)
870	continue;
871	rcu_assign_pointer(fdt->fd[fd], NULL);
872	__put_unused_fd(files, fd);
873	spin_unlock(lock: &files->file_lock);
874	filp_close(file, id: files);
875	cond_resched();
876	spin_lock(lock: &files->file_lock);
877	}
878
879	}
880	spin_unlock(lock: &files->file_lock);
881	}
882
883	static struct file __get_file_rcu(struct* file __rcu **f)
884	{
885	struct file __rcu *file;
886	struct file __rcu *file_reloaded;
887	struct file __rcu *file_reloaded_cmp;
888
889	file = rcu_dereference_raw(*f);
890	if (!file)
891	return NULL;
892
893	if (unlikely(!file_ref_get(&file->f_ref)))
894	return ERR_PTR(error: -EAGAIN);
895
896	file_reloaded = rcu_dereference_raw(*f);
897
898	/*
899	* Ensure that all accesses have a dependency on the load from
900	* rcu_dereference_raw() above so we get correct ordering
901	* between reuse/allocation and the pointer check below.
902	*/
903	file_reloaded_cmp = file_reloaded;
904	OPTIMIZER_HIDE_VAR(file_reloaded_cmp);
905
906	/*
907	* file_ref_get() above provided a full memory barrier when we
908	* acquired a reference.
909	*
910	* This is paired with the write barrier from assigning to the
911	* __rcu protected file pointer so that if that pointer still
912	* matches the current file, we know we have successfully
913	* acquired a reference to the right file.
914	*
915	* If the pointers don't match the file has been reallocated by
916	* SLAB_TYPESAFE_BY_RCU.
917	*/
918	if (file == file_reloaded_cmp)
919	return file_reloaded;
920
921	fput(file);
922	return ERR_PTR(error: -EAGAIN);
923	}
924
925	/**
926	* get_file_rcu - try go get a reference to a file under rcu
927	* @f: the file to get a reference on
928	*
929	* This function tries to get a reference on @f carefully verifying that
930	* @f hasn't been reused.
931	*
932	* This function should rarely have to be used and only by users who
933	* understand the implications of SLAB_TYPESAFE_BY_RCU. Try to avoid it.
934	*
935	* Return: Returns @f with the reference count increased or NULL.
936	*/
937	struct file get_file_rcu(struct* file __rcu **f)
938	{
939	for (;;) {
940	struct file __rcu *file;
941
942	file = __get_file_rcu(f);
943	if (!IS_ERR(ptr: file))
944	return file;
945	}
946	}
947	EXPORT_SYMBOL_GPL(get_file_rcu);
948
949	/**
950	* get_file_active - try go get a reference to a file
951	* @f: the file to get a reference on
952	*
953	* In contast to get_file_rcu() the pointer itself isn't part of the
954	* reference counting.
955	*
956	* This function should rarely have to be used and only by users who
957	* understand the implications of SLAB_TYPESAFE_BY_RCU. Try to avoid it.
958	*
959	* Return: Returns @f with the reference count increased or NULL.
960	*/
961	struct file get_file_active(struct* file **f)
962	{
963	struct file __rcu *file;
964
965	rcu_read_lock();
966	file = __get_file_rcu(f);
967	rcu_read_unlock();
968	if (IS_ERR(ptr: file))
969	file = NULL;
970	return file;
971	}
972	EXPORT_SYMBOL_GPL(get_file_active);
973
974	static inline struct file __fget_files_rcu(struct* files_struct *files,
975	unsigned int fd, fmode_t mask)
976	{
977	for (;;) {
978	struct file *file;
979	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
980	struct file __rcu **fdentry;
981	unsigned long nospec_mask;
982
983	/ Mask is a 0 for invalid fd's, ~0 for valid ones /
984	nospec_mask = array_index_mask_nospec(fd, fdt->max_fds);
985
986	/*
987	* fdentry points to the 'fd' offset, or fdt->fd[0].
988	* Loading from fdt->fd[0] is always safe, because the
989	* array always exists.
990	*/
991	fdentry = fdt->fd + (fd & nospec_mask);
992
993	/ Do the load, then mask any invalid result /
994	file = rcu_dereference_raw(*fdentry);
995	file = (void )(nospec_mask & (unsigned* long)file);
996	if (unlikely(!file))
997	return NULL;
998
999	/*
1000	* Ok, we have a file pointer that was valid at
1001	* some point, but it might have become stale since.
1002	*
1003	* We need to confirm it by incrementing the refcount
1004	* and then check the lookup again.
1005	*
1006	* file_ref_get() gives us a full memory barrier. We
1007	* only really need an 'acquire' one to protect the
1008	* loads below, but we don't have that.
1009	*/
1010	if (unlikely(!file_ref_get(&file->f_ref)))
1011	continue;
1012
1013	/*
1014	* Such a race can take two forms:
1015	*
1016	* (a) the file ref already went down to zero and the
1017	* file hasn't been reused yet or the file count
1018	* isn't zero but the file has already been reused.
1019	*
1020	* (b) the file table entry has changed under us.
1021	* Note that we don't need to re-check the 'fdt->fd'
1022	* pointer having changed, because it always goes
1023	* hand-in-hand with 'fdt'.
1024	*
1025	* If so, we need to put our ref and try again.
1026	*/
1027	if (unlikely(file != rcu_dereference_raw(*fdentry)) \|\|
1028	unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
1029	fput(file);
1030	continue;
1031	}
1032
1033	/*
1034	* This isn't the file we're looking for or we're not
1035	* allowed to get a reference to it.
1036	*/
1037	if (unlikely(file->f_mode & mask)) {
1038	fput(file);
1039	return NULL;
1040	}
1041
1042	/*
1043	* Ok, we have a ref to the file, and checked that it
1044	* still exists.
1045	*/
1046	return file;
1047	}
1048	}
1049
1050	static struct file __fget_files(struct* files_struct files, unsigned* int fd,
1051	fmode_t mask)
1052	{
1053	struct file *file;
1054
1055	rcu_read_lock();
1056	file = __fget_files_rcu(files, fd, mask);
1057	rcu_read_unlock();
1058
1059	return file;
1060	}
1061
1062	static inline struct file __fget(unsigned* int fd, fmode_t mask)
1063	{
1064	return __fget_files(current->files, fd, mask);
1065	}
1066
1067	struct file fget(unsigned* int fd)
1068	{
1069	return __fget(fd, FMODE_PATH);
1070	}
1071	EXPORT_SYMBOL(fget);
1072
1073	struct file fget_raw(unsigned* int fd)
1074	{
1075	return __fget(fd, mask: `0`);
1076	}
1077	EXPORT_SYMBOL(fget_raw);
1078
1079	struct file fget_task(struct* task_struct task, unsigned* int fd)
1080	{
1081	struct file *file = NULL;
1082
1083	task_lock(p: task);
1084	if (task->files)
1085	file = __fget_files(files: task->files, fd, mask: `0`);
1086	task_unlock(p: task);
1087
1088	return file;
1089	}
1090
1091	struct file fget_task_next(struct* task_struct task, unsigned* int *ret_fd)
1092	{
1093	/ Must be called with rcu_read_lock held /
1094	struct files_struct *files;
1095	unsigned int fd = *ret_fd;
1096	struct file *file = NULL;
1097
1098	task_lock(p: task);
1099	files = task->files;
1100	if (files) {
1101	rcu_read_lock();
1102	for (; fd < files_fdtable(files)->max_fds; fd++) {
1103	file = __fget_files_rcu(files, fd, mask: `0`);
1104	if (file)
1105	break;
1106	}
1107	rcu_read_unlock();
1108	}
1109	task_unlock(p: task);
1110	*ret_fd = fd;
1111	return file;
1112	}
1113	EXPORT_SYMBOL(fget_task_next);
1114
1115	/*
1116	* Lightweight file lookup - no refcnt increment if fd table isn't shared.
1117	*
1118	* You can use this instead of fget if you satisfy all of the following
1119	* conditions:
1120	* 1) You must call fput_light before exiting the syscall and returning control
1121	* to userspace (i.e. you cannot remember the returned struct file * after
1122	* returning to userspace).
1123	* 2) You must not call filp_close on the returned struct file * in between
1124	* calls to fget_light and fput_light.
1125	* 3) You must not clone the current task in between the calls to fget_light
1126	* and fput_light.
1127	*
1128	* The fput_needed flag returned by fget_light should be passed to the
1129	* corresponding fput_light.
1130	*
1131	* (As an exception to rule 2, you can call filp_close between fget_light and
1132	* fput_light provided that you capture a real refcount with get_file before
1133	* the call to filp_close, and ensure that this real refcount is fput after
1134	* the fput_light call.)
1135	*
1136	* See also the documentation in rust/kernel/file.rs.
1137	*/
1138	static inline struct fd __fget_light(unsigned int fd, fmode_t mask)
1139	{
1140	struct files_struct *files = current->files;
1141	struct file *file;
1142
1143	/*
1144	* If another thread is concurrently calling close_fd() followed
1145	* by put_files_struct(), we must not observe the old table
1146	* entry combined with the new refcount - otherwise we could
1147	* return a file that is concurrently being freed.
1148	*
1149	* atomic_read_acquire() pairs with atomic_dec_and_test() in
1150	* put_files_struct().
1151	*/
1152	if (likely(atomic_read_acquire(&files->count) == `1`)) {
1153	file = files_lookup_fd_raw(files, fd);
1154	if (!file \|\| unlikely(file->f_mode & mask))
1155	return EMPTY_FD;
1156	return BORROWED_FD(file);
1157	} else {
1158	file = __fget_files(files, fd, mask);
1159	if (!file)
1160	return EMPTY_FD;
1161	return CLONED_FD(file);
1162	}
1163	}
1164	struct fd fdget(unsigned int fd)
1165	{
1166	return __fget_light(fd, FMODE_PATH);
1167	}
1168	EXPORT_SYMBOL(fdget);
1169
1170	struct fd fdget_raw(unsigned int fd)
1171	{
1172	return __fget_light(fd, mask: `0`);
1173	}
1174
1175	/*
1176	* Try to avoid f_pos locking. We only need it if the
1177	* file is marked for FMODE_ATOMIC_POS, and it can be
1178	* accessed multiple ways.
1179	*
1180	* Always do it for directories, because pidfd_getfd()
1181	* can make a file accessible even if it otherwise would
1182	* not be, and for directories this is a correctness
1183	* issue, not a "POSIX requirement".
1184	*/
1185	static inline bool file_needs_f_pos_lock(struct file *file)
1186	{
1187	if (!(file->f_mode & FMODE_ATOMIC_POS))
1188	return false;
1189	if (__file_ref_read_raw(ref: &file->f_ref) != FILE_REF_ONEREF)
1190	return true;
1191	if (file->f_op->iterate_shared)
1192	return true;
1193	return false;
1194	}
1195
1196	bool file_seek_cur_needs_f_lock(struct file *file)
1197	{
1198	if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared)
1199	return false;
1200
1201	VFS_WARN_ON_ONCE((file_count(file) > `1`) &&
1202	!mutex_is_locked(&file->f_pos_lock));
1203	return true;
1204	}
1205
1206	struct fd fdget_pos(unsigned int fd)
1207	{
1208	struct fd f = fdget(fd);
1209	struct file *file = fd_file(f);
1210
1211	if (likely(file) && file_needs_f_pos_lock(file)) {
1212	f.word \|= FDPUT_POS_UNLOCK;
1213	mutex_lock(&file->f_pos_lock);
1214	}
1215	return f;
1216	}
1217
1218	void __f_unlock_pos(struct file *f)
1219	{
1220	mutex_unlock(lock: &f->f_pos_lock);
1221	}
1222
1223	/*
1224	* We only lock f_pos if we have threads or if the file might be
1225	* shared with another process. In both cases we'll have an elevated
1226	* file count (done either by fdget() or by fork()).
1227	*/
1228
1229	void set_close_on_exec(unsigned int fd, int flag)
1230	{
1231	struct files_struct *files = current->files;
1232	spin_lock(lock: &files->file_lock);
1233	__set_close_on_exec(fd, files_fdtable(files), set: flag);
1234	spin_unlock(lock: &files->file_lock);
1235	}
1236
1237	bool get_close_on_exec(unsigned int fd)
1238	{
1239	bool res;
1240	rcu_read_lock();
1241	res = close_on_exec(fd, current->files);
1242	rcu_read_unlock();
1243	return res;
1244	}
1245
1246	static int do_dup2(struct files_struct *files,
1247	struct file file, unsigned* fd, unsigned flags)
1248	__releases(&files->file_lock)
1249	{
1250	struct file *tofree;
1251	struct fdtable *fdt;
1252
1253	/*
1254	* dup2() is expected to close the file installed in the target fd slot
1255	* (if any). However, userspace hand-picking a fd may be racing against
1256	* its own threads which happened to allocate it in open() et al but did
1257	* not populate it yet.
1258	*
1259	* Broadly speaking we may be racing against the following:
1260	* fd = get_unused_fd_flags(); // fd slot reserved, ->fd[fd] == NULL
1261	* file = hard_work_goes_here();
1262	* fd_install(fd, file); // only now ->fd[fd] == file
1263	*
1264	* It is an invariant that a successfully allocated fd has a NULL entry
1265	* in the array until the matching fd_install().
1266	*
1267	* If we fit the window, we have the fd to populate, yet no target file
1268	* to close. Trying to ignore it and install our new file would violate
1269	* the invariant and make fd_install() overwrite our file.
1270	*
1271	* Things can be done(tm) to handle this. However, the issue does not
1272	* concern legitimate programs and we only need to make sure the kernel
1273	* does not trip over it.
1274	*
1275	* The simplest way out is to return an error if we find ourselves here.
1276	*
1277	* POSIX is silent on the issue, we return -EBUSY.
1278	*/
1279	fdt = files_fdtable(files);
1280	fd = array_index_nospec(fd, fdt->max_fds);
1281	tofree = rcu_dereference_raw(fdt->fd[fd]);
1282	if (!tofree && fd_is_open(fd, fdt))
1283	goto Ebusy;
1284	get_file(f: file);
1285	rcu_assign_pointer(fdt->fd[fd], file);
1286	__set_open_fd(fd, fdt, set: flags & O_CLOEXEC);
1287	spin_unlock(lock: &files->file_lock);
1288
1289	if (tofree)
1290	filp_close(tofree, id: files);
1291
1292	return fd;
1293
1294	Ebusy:
1295	spin_unlock(lock: &files->file_lock);
1296	return -EBUSY;
1297	}
1298
1299	int replace_fd(unsigned fd, struct file file, unsigned* flags)
1300	{
1301	int err;
1302	struct files_struct *files = current->files;
1303
1304	if (!file)
1305	return close_fd(fd);
1306
1307	if (fd >= rlimit(RLIMIT_NOFILE))
1308	return -EBADF;
1309
1310	spin_lock(lock: &files->file_lock);
1311	err = expand_files(files, nr: fd);
1312	if (unlikely(err < `0`))
1313	goto out_unlock;
1314	return do_dup2(files, file, fd, flags);
1315
1316	out_unlock:
1317	spin_unlock(lock: &files->file_lock);
1318	return err;
1319	}
1320
1321	/**
1322	* receive_fd() - Install received file into file descriptor table
1323	* @file: struct file that was received from another process
1324	* @ufd: __user pointer to write new fd number to
1325	* @o_flags: the O_* flags to apply to the new fd entry
1326	*
1327	* Installs a received file into the file descriptor table, with appropriate
1328	* checks and count updates. Optionally writes the fd number to userspace, if
1329	* @ufd is non-NULL.
1330	*
1331	* This helper handles its own reference counting of the incoming
1332	* struct file.
1333	*
1334	* Returns newly install fd or -ve on error.
1335	*/
1336	int receive_fd(struct file file, int* __user ufd, unsigned* int o_flags)
1337	{
1338	int new_fd;
1339	int error;
1340
1341	error = security_file_receive(file);
1342	if (error)
1343	return error;
1344
1345	new_fd = get_unused_fd_flags(o_flags);
1346	if (new_fd < `0`)
1347	return new_fd;
1348
1349	if (ufd) {
1350	error = put_user(new_fd, ufd);
1351	if (error) {
1352	put_unused_fd(new_fd);
1353	return error;
1354	}
1355	}
1356
1357	fd_install(new_fd, get_file(f: file));
1358	__receive_sock(file);
1359	return new_fd;
1360	}
1361	EXPORT_SYMBOL_GPL(receive_fd);
1362
1363	int receive_fd_replace(int new_fd, struct file file, unsigned* int o_flags)
1364	{
1365	int error;
1366
1367	error = security_file_receive(file);
1368	if (error)
1369	return error;
1370	error = replace_fd(fd: new_fd, file, flags: o_flags);
1371	if (error)
1372	return error;
1373	__receive_sock(file);
1374	return new_fd;
1375	}
1376
1377	static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
1378	{
1379	int err = -EBADF;
1380	struct file *file;
1381	struct files_struct *files = current->files;
1382
1383	if ((flags & ~O_CLOEXEC) != `0`)
1384	return -EINVAL;
1385
1386	if (unlikely(oldfd == newfd))
1387	return -EINVAL;
1388
1389	if (newfd >= rlimit(RLIMIT_NOFILE))
1390	return -EBADF;
1391
1392	spin_lock(lock: &files->file_lock);
1393	err = expand_files(files, nr: newfd);
1394	file = files_lookup_fd_locked(files, fd: oldfd);
1395	if (unlikely(!file))
1396	goto Ebadf;
1397	if (unlikely(err < `0`)) {
1398	if (err == -EMFILE)
1399	goto Ebadf;
1400	goto out_unlock;
1401	}
1402	return do_dup2(files, file, fd: newfd, flags);
1403
1404	Ebadf:
1405	err = -EBADF;
1406	out_unlock:
1407	spin_unlock(lock: &files->file_lock);
1408	return err;
1409	}
1410
1411	SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
1412	{
1413	return ksys_dup3(oldfd, newfd, flags);
1414	}
1415
1416	SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
1417	{
1418	if (unlikely(newfd == oldfd)) { / corner case /
1419	struct files_struct *files = current->files;
1420	struct file *f;
1421	int retval = oldfd;
1422
1423	rcu_read_lock();
1424	f = __fget_files_rcu(files, fd: oldfd, mask: `0`);
1425	if (!f)
1426	retval = -EBADF;
1427	rcu_read_unlock();
1428	if (f)
1429	fput(f);
1430	return retval;
1431	}
1432	return ksys_dup3(oldfd, newfd, flags: `0`);
1433	}
1434
1435	SYSCALL_DEFINE1(dup, unsigned int, fildes)
1436	{
1437	int ret = -EBADF;
1438	struct file *file = fget_raw(fildes);
1439
1440	if (file) {
1441	ret = get_unused_fd_flags(`0`);
1442	if (ret >= `0`)
1443	fd_install(ret, file);
1444	else
1445	fput(file);
1446	}
1447	return ret;
1448	}
1449
1450	int f_dupfd(unsigned int from, struct file file, unsigned* flags)
1451	{
1452	unsigned long nofile = rlimit(RLIMIT_NOFILE);
1453	int err;
1454	if (from >= nofile)
1455	return -EINVAL;
1456	err = alloc_fd(start: from, end: nofile, flags);
1457	if (err >= `0`) {
1458	get_file(f: file);
1459	fd_install(err, file);
1460	}
1461	return err;
1462	}
1463
1464	int iterate_fd(struct files_struct files, unsigned* n,
1465	int (f)(const* void , struct* file , unsigned*),
1466	const void *p)
1467	{
1468	struct fdtable *fdt;
1469	int res = `0`;
1470	if (!files)
1471	return `0`;
1472	spin_lock(lock: &files->file_lock);
1473	for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
1474	struct file *file;
1475	file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
1476	if (!file)
1477	continue;
1478	res = f(p, file, n);
1479	if (res)
1480	break;
1481	}
1482	spin_unlock(lock: &files->file_lock);
1483	return res;
1484	}
1485	EXPORT_SYMBOL(iterate_fd);
1486

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/fs/file.c