fs.h source code [linux/include/linux/fs.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef _LINUX_FS_H
3	#define _LINUX_FS_H
4
5	#include <linux/linkage.h>
6	#include <linux/wait_bit.h>
7	#include <linux/kdev_t.h>
8	#include <linux/dcache.h>
9	#include <linux/path.h>
10	#include <linux/stat.h>
11	#include <linux/cache.h>
12	#include <linux/list.h>
13	#include <linux/list_lru.h>
14	#include <linux/llist.h>
15	#include <linux/radix-tree.h>
16	#include <linux/xarray.h>
17	#include <linux/rbtree.h>
18	#include <linux/init.h>
19	#include <linux/pid.h>
20	#include <linux/bug.h>
21	#include <linux/mutex.h>
22	#include <linux/rwsem.h>
23	#include <linux/mm_types.h>
24	#include <linux/capability.h>
25	#include <linux/semaphore.h>
26	#include <linux/fcntl.h>
27	#include <linux/rculist_bl.h>
28	#include <linux/atomic.h>
29	#include <linux/shrinker.h>
30	#include <linux/migrate_mode.h>
31	#include <linux/uidgid.h>
32	#include <linux/lockdep.h>
33	#include <linux/percpu-rwsem.h>
34	#include <linux/workqueue.h>
35	#include <linux/delayed_call.h>
36	#include <linux/uuid.h>
37	#include <linux/errseq.h>
38	#include <linux/ioprio.h>
39	#include <linux/fs_types.h>
40	#include <linux/build_bug.h>
41	#include <linux/stddef.h>
42	#include <linux/mount.h>
43	#include <linux/cred.h>
44	#include <linux/mnt_idmapping.h>
45	#include <linux/slab.h>
46	#include <linux/maple_tree.h>
47	#include <linux/rw_hint.h>
48
49	#include <asm/byteorder.h>
50	#include <uapi/linux/fs.h>
51
52	struct backing_dev_info;
53	struct bdi_writeback;
54	struct bio;
55	struct io_comp_batch;
56	struct export_operations;
57	struct fiemap_extent_info;
58	struct hd_geometry;
59	struct iovec;
60	struct kiocb;
61	struct kobject;
62	struct pipe_inode_info;
63	struct poll_table_struct;
64	struct kstatfs;
65	struct vm_area_struct;
66	struct vfsmount;
67	struct cred;
68	struct swap_info_struct;
69	struct seq_file;
70	struct workqueue_struct;
71	struct iov_iter;
72	struct fscrypt_inode_info;
73	struct fscrypt_operations;
74	struct fsverity_info;
75	struct fsverity_operations;
76	struct fs_context;
77	struct fs_parameter_spec;
78	struct fileattr;
79	struct iomap_ops;
80
81	extern void __init inode_init(void);
82	extern void __init inode_init_early(void);
83	extern void __init files_init(void);
84	extern void __init files_maxfiles_init(void);
85
86	extern unsigned long get_max_files(void);
87	extern unsigned int sysctl_nr_open;
88
89	typedef __kernel_rwf_t rwf_t;
90
91	struct buffer_head;
92	typedef int (get_block_t)(struct inode *inode, sector_t iblock,
93	struct buffer_head bh_result, int* create);
94	typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
95	ssize_t bytes, void *private);
96
97	#define MAY_EXEC 0x00000001
98	#define MAY_WRITE 0x00000002
99	#define MAY_READ 0x00000004
100	#define MAY_APPEND 0x00000008
101	#define MAY_ACCESS 0x00000010
102	#define MAY_OPEN 0x00000020
103	#define MAY_CHDIR 0x00000040
104	/ called from RCU mode, don't block /
105	#define MAY_NOT_BLOCK 0x00000080
106
107	/*
108	* flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond
109	* to O_WRONLY and O_RDWR via the strange trick in do_dentry_open()
110	*/
111
112	/ file is open for reading /
113	#define FMODE_READ ((__force fmode_t)0x1)
114	/ file is open for writing /
115	#define FMODE_WRITE ((__force fmode_t)0x2)
116	/ file is seekable /
117	#define FMODE_LSEEK ((__force fmode_t)0x4)
118	/ file can be accessed using pread /
119	#define FMODE_PREAD ((__force fmode_t)0x8)
120	/ file can be accessed using pwrite /
121	#define FMODE_PWRITE ((__force fmode_t)0x10)
122	/ File is opened for execution with sys_execve / sys_uselib /
123	#define FMODE_EXEC ((__force fmode_t)0x20)
124	/ File writes are restricted (block device specific) /
125	#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40)
126	/ 32bit hashes as llseek() offset (for directories) /
127	#define FMODE_32BITHASH ((__force fmode_t)0x200)
128	/ 64bit hashes as llseek() offset (for directories) /
129	#define FMODE_64BITHASH ((__force fmode_t)0x400)
130
131	/*
132	* Don't update ctime and mtime.
133	*
134	* Currently a special hack for the XFS open_by_handle ioctl, but we'll
135	* hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
136	*/
137	#define FMODE_NOCMTIME ((__force fmode_t)0x800)
138
139	/ Expect random access pattern /
140	#define FMODE_RANDOM ((__force fmode_t)0x1000)
141
142	/ File is huge (eg. /dev/mem): treat loff_t as unsigned /
143	#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
144
145	/ File is opened with O_PATH; almost nothing can be done with it /
146	#define FMODE_PATH ((__force fmode_t)0x4000)
147
148	/ File needs atomic accesses to f_pos /
149	#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000)
150	/ Write access to underlying fs /
151	#define FMODE_WRITER ((__force fmode_t)0x10000)
152	/ Has read method(s) /
153	#define FMODE_CAN_READ ((__force fmode_t)0x20000)
154	/ Has write method(s) /
155	#define FMODE_CAN_WRITE ((__force fmode_t)0x40000)
156
157	#define FMODE_OPENED ((__force fmode_t)0x80000)
158	#define FMODE_CREATED ((__force fmode_t)0x100000)
159
160	/ File is stream-like /
161	#define FMODE_STREAM ((__force fmode_t)0x200000)
162
163	/ File supports DIRECT IO /
164	#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000)
165
166	#define FMODE_NOREUSE ((__force fmode_t)0x800000)
167
168	/ File supports non-exclusive O_DIRECT writes from multiple threads /
169	#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000)
170
171	/ File is embedded in backing_file object /
172	#define FMODE_BACKING ((__force fmode_t)0x2000000)
173
174	/ File was opened by fanotify and shouldn't generate fanotify events /
175	#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
176
177	/ File is capable of returning -EAGAIN if I/O will block /
178	#define FMODE_NOWAIT ((__force fmode_t)0x8000000)
179
180	/ File represents mount that needs unmounting /
181	#define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000)
182
183	/ File does not contribute to nr_files count /
184	#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
185
186	/ File supports async buffered reads /
187	#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
188
189	/ File supports async nowait buffered writes /
190	#define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000)
191
192	/*
193	* Attribute flags. These should be or-ed together to figure out what
194	* has been changed!
195	*/
196	#define ATTR_MODE (1 << 0)
197	#define ATTR_UID (1 << 1)
198	#define ATTR_GID (1 << 2)
199	#define ATTR_SIZE (1 << 3)
200	#define ATTR_ATIME (1 << 4)
201	#define ATTR_MTIME (1 << 5)
202	#define ATTR_CTIME (1 << 6)
203	#define ATTR_ATIME_SET (1 << 7)
204	#define ATTR_MTIME_SET (1 << 8)
205	#define ATTR_FORCE (1 << 9) /* Not a change, but a change it */
206	#define ATTR_KILL_SUID (1 << 11)
207	#define ATTR_KILL_SGID (1 << 12)
208	#define ATTR_FILE (1 << 13)
209	#define ATTR_KILL_PRIV (1 << 14)
210	#define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
211	#define ATTR_TIMES_SET (1 << 16)
212	#define ATTR_TOUCH (1 << 17)
213
214	/*
215	* Whiteout is represented by a char device. The following constants define the
216	* mode and device number to use.
217	*/
218	#define WHITEOUT_MODE 0
219	#define WHITEOUT_DEV 0
220
221	/*
222	* This is the Inode Attributes structure, used for notify_change(). It
223	* uses the above definitions as flags, to know which values have changed.
224	* Also, in this manner, a Filesystem can look at only the values it cares
225	* about. Basically, these are the attributes that the VFS layer can
226	* request to change from the FS layer.
227	*
228	* Derek Atkins <warlord@MIT.EDU> 94-10-20
229	*/
230	struct iattr {
231	unsigned int ia_valid;
232	umode_t ia_mode;
233	/*
234	* The two anonymous unions wrap structures with the same member.
235	*
236	* Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which
237	* are a dedicated type requiring the filesystem to use the dedicated
238	* helpers. Other filesystem can continue to use ia_{g,u}id until they
239	* have been ported.
240	*
241	* They always contain the same value. In other words FS_ALLOW_IDMAP
242	* pass down the same value on idmapped mounts as they would on regular
243	* mounts.
244	*/
245	union {
246	kuid_t ia_uid;
247	vfsuid_t ia_vfsuid;
248	};
249	union {
250	kgid_t ia_gid;
251	vfsgid_t ia_vfsgid;
252	};
253	loff_t ia_size;
254	struct timespec64 ia_atime;
255	struct timespec64 ia_mtime;
256	struct timespec64 ia_ctime;
257
258	/*
259	* Not an attribute, but an auxiliary info for filesystems wanting to
260	* implement an ftruncate() like method. NOTE: filesystem should
261	* check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL).
262	*/
263	struct file *ia_file;
264	};
265
266	/*
267	* Includes for diskquotas.
268	*/
269	#include <linux/quota.h>
270
271	/*
272	* Maximum number of layers of fs stack. Needs to be limited to
273	* prevent kernel stack overflow
274	*/
275	#define FILESYSTEM_MAX_STACK_DEPTH 2
276
277	/**
278	* enum positive_aop_returns - aop return codes with specific semantics
279	*
280	* @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
281	* completed, that the page is still locked, and
282	* should be considered active. The VM uses this hint
283	* to return the page to the active list -- it won't
284	* be a candidate for writeback again in the near
285	* future. Other callers must be careful to unlock
286	* the page if they get this return. Returned by
287	* writepage();
288	*
289	* @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
290	* unlocked it and the page might have been truncated.
291	* The caller should back up to acquiring a new page and
292	* trying again. The aop will be taking reasonable
293	* precautions not to livelock. If the caller held a page
294	* reference, it should drop it before retrying. Returned
295	* by read_folio().
296	*
297	* address_space_operation functions return these large constants to indicate
298	* special semantics to the caller. These are much larger than the bytes in a
299	* page to allow for functions that return the number of bytes operated on in a
300	* given page.
301	*/
302
303	enum positive_aop_returns {
304	AOP_WRITEPAGE_ACTIVATE = `0x80000`,
305	AOP_TRUNCATED_PAGE = `0x80001`,
306	};
307
308	/*
309	* oh the beauties of C type declarations.
310	*/
311	struct page;
312	struct address_space;
313	struct writeback_control;
314	struct readahead_control;
315
316	/ Match RWF_* bits to IOCB bits /
317	#define IOCB_HIPRI (__force int) RWF_HIPRI
318	#define IOCB_DSYNC (__force int) RWF_DSYNC
319	#define IOCB_SYNC (__force int) RWF_SYNC
320	#define IOCB_NOWAIT (__force int) RWF_NOWAIT
321	#define IOCB_APPEND (__force int) RWF_APPEND
322
323	/ non-RWF related bits - start at 16 /
324	#define IOCB_EVENTFD (1 << 16)
325	#define IOCB_DIRECT (1 << 17)
326	#define IOCB_WRITE (1 << 18)
327	/ iocb->ki_waitq is valid /
328	#define IOCB_WAITQ (1 << 19)
329	#define IOCB_NOIO (1 << 20)
330	/ can use bio alloc cache /
331	#define IOCB_ALLOC_CACHE (1 << 21)
332	/*
333	* IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
334	* iocb completion can be passed back to the owner for execution from a safe
335	* context rather than needing to be punted through a workqueue. If this
336	* flag is set, the bio completion handling may set iocb->dio_complete to a
337	* handler function and iocb->private to context information for that handler.
338	* The issuer should call the handler with that context information from task
339	* context to complete the processing of the iocb. Note that while this
340	* provides a task context for the dio_complete() callback, it should only be
341	* used on the completion side for non-IO generating completions. It's fine to
342	* call blocking functions from this callback, but they should not wait for
343	* unrelated IO (like cache flushing, new IO generation, etc).
344	*/
345	#define IOCB_DIO_CALLER_COMP (1 << 22)
346	/ kiocb is a read or write operation submitted by fs/aio.c. /
347	#define IOCB_AIO_RW (1 << 23)
348
349	/ for use in trace events /
350	#define TRACE_IOCB_STRINGS \
351	{ IOCB_HIPRI, "HIPRI" }, \
352	{ IOCB_DSYNC, "DSYNC" }, \
353	{ IOCB_SYNC, "SYNC" }, \
354	{ IOCB_NOWAIT, "NOWAIT" }, \
355	{ IOCB_APPEND, "APPEND" }, \
356	{ IOCB_EVENTFD, "EVENTFD"}, \
357	{ IOCB_DIRECT, "DIRECT" }, \
358	{ IOCB_WRITE, "WRITE" }, \
359	{ IOCB_WAITQ, "WAITQ" }, \
360	{ IOCB_NOIO, "NOIO" }, \
361	{ IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
362	{ IOCB_DIO_CALLER_COMP, "CALLER_COMP" }
363
364	struct kiocb {
365	struct file *ki_filp;
366	loff_t ki_pos;
367	void (ki_complete)(struct* kiocb iocb, long* ret);
368	void *private;
369	int ki_flags;
370	u16 ki_ioprio; / See linux/ioprio.h /
371	union {
372	/*
373	* Only used for async buffered reads, where it denotes the
374	* page waitqueue associated with completing the read. Valid
375	* IFF IOCB_WAITQ is set.
376	*/
377	struct wait_page_queue *ki_waitq;
378	/*
379	* Can be used for O_DIRECT IO, where the completion handling
380	* is punted back to the issuer of the IO. May only be set
381	* if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
382	* must then check for presence of this handler when ki_complete
383	* is invoked. The data passed in to this handler must be
384	* assigned to ->private when dio_complete is assigned.
385	*/
386	ssize_t (dio_complete)(void* *data);
387	};
388	};
389
390	static inline bool is_sync_kiocb(struct kiocb *kiocb)
391	{
392	return kiocb->ki_complete == NULL;
393	}
394
395	struct address_space_operations {
396	int (writepage)(struct* page page, struct* writeback_control *wbc);
397	int (read_folio)(struct* file , struct* folio *);
398
399	/ Write back some dirty pages from this mapping. /
400	int (writepages)(struct* address_space , struct* writeback_control *);
401
402	/ Mark a folio dirty. Return true if this dirtied it /
403	bool (dirty_folio)(struct* address_space , struct* folio *);
404
405	void (readahead)(struct* readahead_control *);
406
407	int (write_begin)(struct* file , struct* address_space *mapping,
408	loff_t pos, unsigned len,
409	struct page *pagep, void* **fsdata);
410	int (write_end)(struct* file , struct* address_space *mapping,
411	loff_t pos, unsigned len, unsigned copied,
412	struct page page, void* *fsdata);
413
414	/ Unfortunately this kludge is needed for FIBMAP. Don't use it /
415	sector_t (bmap)(struct* address_space *, sector_t);
416	void (invalidate_folio) (struct* folio *, size_t offset, size_t len);
417	bool (release_folio)(struct* folio *, gfp_t);
418	void (free_folio)(struct* folio *folio);
419	ssize_t (direct_IO)(struct* kiocb , struct* iov_iter *iter);
420	/*
421	* migrate the contents of a folio to the specified target. If
422	* migrate_mode is MIGRATE_ASYNC, it must not block.
423	*/
424	int (migrate_folio)(struct* address_space , struct* folio *dst,
425	struct folio src, enum* migrate_mode);
426	int (launder_folio)(struct* folio *);
427	bool (is_partially_uptodate) (struct* folio *, size_t from,
428	size_t count);
429	void (is_dirty_writeback) (struct* folio , bool dirty, bool *wb);
430	int (error_remove_folio)(struct* address_space , struct* folio *);
431
432	/ swapfile support /
433	int (swap_activate)(struct* swap_info_struct sis, struct* file *file,
434	sector_t *span);
435	void (swap_deactivate)(struct* file *file);
436	int (swap_rw)(struct* kiocb iocb, struct* iov_iter *iter);
437	};
438
439	extern const struct address_space_operations empty_aops;
440
441	/**
442	* struct address_space - Contents of a cacheable, mappable object.
443	* @host: Owner, either the inode or the block_device.
444	* @i_pages: Cached pages.
445	* @invalidate_lock: Guards coherency between page cache contents and
446	* file offset->disk block mappings in the filesystem during invalidates.
447	* It is also used to block modification of page cache contents through
448	* memory mappings.
449	* @gfp_mask: Memory allocation flags to use for allocating pages.
450	* @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings.
451	* @nr_thps: Number of THPs in the pagecache (non-shmem only).
452	* @i_mmap: Tree of private and shared mappings.
453	* @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
454	* @nrpages: Number of page entries, protected by the i_pages lock.
455	* @writeback_index: Writeback starts here.
456	* @a_ops: Methods.
457	* @flags: Error bits and flags (AS_*).
458	* @wb_err: The most recent error which has occurred.
459	* @i_private_lock: For use by the owner of the address_space.
460	* @i_private_list: For use by the owner of the address_space.
461	* @i_private_data: For use by the owner of the address_space.
462	*/
463	struct address_space {
464	struct inode *host;
465	struct xarray i_pages;
466	struct rw_semaphore invalidate_lock;
467	gfp_t gfp_mask;
468	atomic_t i_mmap_writable;
469	#ifdef CONFIG_READ_ONLY_THP_FOR_FS
470	/ number of thp, only for non-shmem files /
471	atomic_t nr_thps;
472	#endif
473	struct rb_root_cached i_mmap;
474	unsigned long nrpages;
475	pgoff_t writeback_index;
476	const struct address_space_operations *a_ops;
477	unsigned long flags;
478	errseq_t wb_err;
479	spinlock_t i_private_lock;
480	struct list_head i_private_list;
481	struct rw_semaphore i_mmap_rwsem;
482	void * i_private_data;
483	} __attribute__((aligned(sizeof(long)))) __randomize_layout;
484	/*
485	* On most architectures that alignment is already the case; but
486	* must be enforced here for CRIS, to let the least significant bit
487	* of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
488	*/
489
490	/ XArray tags, for tagging dirty and writeback pages in the pagecache. /
491	#define PAGECACHE_TAG_DIRTY XA_MARK_0
492	#define PAGECACHE_TAG_WRITEBACK XA_MARK_1
493	#define PAGECACHE_TAG_TOWRITE XA_MARK_2
494
495	/*
496	* Returns true if any of the pages in the mapping are marked with the tag.
497	*/
498	static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
499	{
500	return xa_marked(xa: &mapping->i_pages, mark: tag);
501	}
502
503	static inline void i_mmap_lock_write(struct address_space *mapping)
504	{
505	down_write(sem: &mapping->i_mmap_rwsem);
506	}
507
508	static inline int i_mmap_trylock_write(struct address_space *mapping)
509	{
510	return down_write_trylock(sem: &mapping->i_mmap_rwsem);
511	}
512
513	static inline void i_mmap_unlock_write(struct address_space *mapping)
514	{
515	up_write(sem: &mapping->i_mmap_rwsem);
516	}
517
518	static inline int i_mmap_trylock_read(struct address_space *mapping)
519	{
520	return down_read_trylock(sem: &mapping->i_mmap_rwsem);
521	}
522
523	static inline void i_mmap_lock_read(struct address_space *mapping)
524	{
525	down_read(sem: &mapping->i_mmap_rwsem);
526	}
527
528	static inline void i_mmap_unlock_read(struct address_space *mapping)
529	{
530	up_read(sem: &mapping->i_mmap_rwsem);
531	}
532
533	static inline void i_mmap_assert_locked(struct address_space *mapping)
534	{
535	lockdep_assert_held(&mapping->i_mmap_rwsem);
536	}
537
538	static inline void i_mmap_assert_write_locked(struct address_space *mapping)
539	{
540	lockdep_assert_held_write(&mapping->i_mmap_rwsem);
541	}
542
543	/*
544	* Might pages of this file be mapped into userspace?
545	*/
546	static inline int mapping_mapped(struct address_space *mapping)
547	{
548	return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root);
549	}
550
551	/*
552	* Might pages of this file have been modified in userspace?
553	* Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap
554	* marks vma as VM_SHARED if it is shared, and the file was opened for
555	* writing i.e. vma may be mprotected writable even if now readonly.
556	*
557	* If i_mmap_writable is negative, no new writable mappings are allowed. You
558	* can only deny writable mappings, if none exists right now.
559	*/
560	static inline int mapping_writably_mapped(struct address_space *mapping)
561	{
562	return atomic_read(v: &mapping->i_mmap_writable) > `0`;
563	}
564
565	static inline int mapping_map_writable(struct address_space *mapping)
566	{
567	return atomic_inc_unless_negative(v: &mapping->i_mmap_writable) ?
568	`0` : -EPERM;
569	}
570
571	static inline void mapping_unmap_writable(struct address_space *mapping)
572	{
573	atomic_dec(v: &mapping->i_mmap_writable);
574	}
575
576	static inline int mapping_deny_writable(struct address_space *mapping)
577	{
578	return atomic_dec_unless_positive(v: &mapping->i_mmap_writable) ?
579	`0` : -EBUSY;
580	}
581
582	static inline void mapping_allow_writable(struct address_space *mapping)
583	{
584	atomic_inc(v: &mapping->i_mmap_writable);
585	}
586
587	/*
588	* Use sequence counter to get consistent i_size on 32-bit processors.
589	*/
590	#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
591	#include <linux/seqlock.h>
592	#define __NEED_I_SIZE_ORDERED
593	#define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
594	#else
595	#define i_size_ordered_init(inode) do { } while (0)
596	#endif
597
598	struct posix_acl;
599	#define ACL_NOT_CACHED ((void *)(-1))
600	/*
601	* ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to
602	* cache the ACL. This also means that ->get_inode_acl() can be called in RCU
603	* mode with the LOOKUP_RCU flag.
604	*/
605	#define ACL_DONT_CACHE ((void *)(-3))
606
607	static inline struct posix_acl *
608	uncached_acl_sentinel(struct task_struct *task)
609	{
610	return (void *)task + `1`;
611	}
612
613	static inline bool
614	is_uncached_acl(struct posix_acl *acl)
615	{
616	return (long)acl & `1`;
617	}
618
619	#define IOP_FASTPERM 0x0001
620	#define IOP_LOOKUP 0x0002
621	#define IOP_NOFOLLOW 0x0004
622	#define IOP_XATTR 0x0008
623	#define IOP_DEFAULT_READLINK 0x0010
624
625	struct fsnotify_mark_connector;
626
627	/*
628	* Keep mostly read-only and often accessed (especially for
629	* the RCU path lookup and 'stat' data) fields at the beginning
630	* of the 'struct inode'
631	*/
632	struct inode {
633	umode_t i_mode;
634	unsigned short i_opflags;
635	kuid_t i_uid;
636	kgid_t i_gid;
637	unsigned int i_flags;
638
639	#ifdef CONFIG_FS_POSIX_ACL
640	struct posix_acl *i_acl;
641	struct posix_acl *i_default_acl;
642	#endif
643
644	const struct inode_operations *i_op;
645	struct super_block *i_sb;
646	struct address_space *i_mapping;
647
648	#ifdef CONFIG_SECURITY
649	void *i_security;
650	#endif
651
652	/ Stat data, not accessed from path walking /
653	unsigned long i_ino;
654	/*
655	* Filesystems may only read i_nlink directly. They shall use the
656	* following functions for modification:
657	*
658	* (set\|clear\|inc\|drop)_nlink
659	* inode_(inc\|dec)_link_count
660	*/
661	union {
662	const unsigned int i_nlink;
663	unsigned int __i_nlink;
664	};
665	dev_t i_rdev;
666	loff_t i_size;
667	struct timespec64 __i_atime;
668	struct timespec64 __i_mtime;
669	struct timespec64 __i_ctime; / use inode__ctime accessors! /*
670	spinlock_t i_lock; / i_blocks, i_bytes, maybe i_size /
671	unsigned short i_bytes;
672	u8 i_blkbits;
673	enum rw_hint i_write_hint;
674	blkcnt_t i_blocks;
675
676	#ifdef __NEED_I_SIZE_ORDERED
677	seqcount_t i_size_seqcount;
678	#endif
679
680	/ Misc /
681	unsigned long i_state;
682	struct rw_semaphore i_rwsem;
683
684	unsigned long dirtied_when; / jiffies of first dirtying /
685	unsigned long dirtied_time_when;
686
687	struct hlist_node i_hash;
688	struct list_head i_io_list; / backing dev IO list /
689	#ifdef CONFIG_CGROUP_WRITEBACK
690	struct bdi_writeback i_wb; /* the associated cgroup wb /
691
692	/ foreign inode detection, see wbc_detach_inode() /
693	int i_wb_frn_winner;
694	u16 i_wb_frn_avg_time;
695	u16 i_wb_frn_history;
696	#endif
697	struct list_head i_lru; / inode LRU list /
698	struct list_head i_sb_list;
699	struct list_head i_wb_list; / backing dev writeback list /
700	union {
701	struct hlist_head i_dentry;
702	struct rcu_head i_rcu;
703	};
704	atomic64_t i_version;
705	atomic64_t i_sequence; / see futex /
706	atomic_t i_count;
707	atomic_t i_dio_count;
708	atomic_t i_writecount;
709	#if defined(CONFIG_IMA) \|\| defined(CONFIG_FILE_LOCKING)
710	atomic_t i_readcount; / struct files open RO /
711	#endif
712	union {
713	const struct file_operations i_fop; /* former ->i_op->default_file_ops /
714	void (free_inode)(struct* inode *);
715	};
716	struct file_lock_context *i_flctx;
717	struct address_space i_data;
718	struct list_head i_devices;
719	union {
720	struct pipe_inode_info *i_pipe;
721	struct cdev *i_cdev;
722	char *i_link;
723	unsigned i_dir_seq;
724	};
725
726	__u32 i_generation;
727
728	#ifdef CONFIG_FSNOTIFY
729	__u32 i_fsnotify_mask; / all events this inode cares about /
730	struct fsnotify_mark_connector __rcu *i_fsnotify_marks;
731	#endif
732
733	#ifdef CONFIG_FS_ENCRYPTION
734	struct fscrypt_inode_info *i_crypt_info;
735	#endif
736
737	#ifdef CONFIG_FS_VERITY
738	struct fsverity_info *i_verity_info;
739	#endif
740
741	void i_private; /* fs or device private pointer /
742	} __randomize_layout;
743
744	struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
745
746	static inline unsigned int i_blocksize(const struct inode *node)
747	{
748	return (`1` << node->i_blkbits);
749	}
750
751	static inline int inode_unhashed(struct inode *inode)
752	{
753	return hlist_unhashed(h: &inode->i_hash);
754	}
755
756	/*
757	* __mark_inode_dirty expects inodes to be hashed. Since we don't
758	* want special inodes in the fileset inode space, we make them
759	* appear hashed, but do not put on any lists. hlist_del()
760	* will work fine and require no locking.
761	*/
762	static inline void inode_fake_hash(struct inode *inode)
763	{
764	hlist_add_fake(n: &inode->i_hash);
765	}
766
767	/*
768	* inode->i_mutex nesting subclasses for the lock validator:
769	*
770	* 0: the object of the current VFS operation
771	* 1: parent
772	* 2: child/target
773	* 3: xattr
774	* 4: second non-directory
775	* 5: second parent (when locking independent directories in rename)
776	*
777	* I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two
778	* non-directories at once.
779	*
780	* The locking order between these classes is
781	* parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory
782	*/
783	enum inode_i_mutex_lock_class
784	{
785	I_MUTEX_NORMAL,
786	I_MUTEX_PARENT,
787	I_MUTEX_CHILD,
788	I_MUTEX_XATTR,
789	I_MUTEX_NONDIR2,
790	I_MUTEX_PARENT2,
791	};
792
793	static inline void inode_lock(struct inode *inode)
794	{
795	down_write(sem: &inode->i_rwsem);
796	}
797
798	static inline void inode_unlock(struct inode *inode)
799	{
800	up_write(sem: &inode->i_rwsem);
801	}
802
803	static inline void inode_lock_shared(struct inode *inode)
804	{
805	down_read(sem: &inode->i_rwsem);
806	}
807
808	static inline void inode_unlock_shared(struct inode *inode)
809	{
810	up_read(sem: &inode->i_rwsem);
811	}
812
813	static inline int inode_trylock(struct inode *inode)
814	{
815	return down_write_trylock(sem: &inode->i_rwsem);
816	}
817
818	static inline int inode_trylock_shared(struct inode *inode)
819	{
820	return down_read_trylock(sem: &inode->i_rwsem);
821	}
822
823	static inline int inode_is_locked(struct inode *inode)
824	{
825	return rwsem_is_locked(sem: &inode->i_rwsem);
826	}
827
828	static inline void inode_lock_nested(struct inode inode, unsigned* subclass)
829	{
830	down_write_nested(sem: &inode->i_rwsem, subclass);
831	}
832
833	static inline void inode_lock_shared_nested(struct inode inode, unsigned* subclass)
834	{
835	down_read_nested(sem: &inode->i_rwsem, subclass);
836	}
837
838	static inline void filemap_invalidate_lock(struct address_space *mapping)
839	{
840	down_write(sem: &mapping->invalidate_lock);
841	}
842
843	static inline void filemap_invalidate_unlock(struct address_space *mapping)
844	{
845	up_write(sem: &mapping->invalidate_lock);
846	}
847
848	static inline void filemap_invalidate_lock_shared(struct address_space *mapping)
849	{
850	down_read(sem: &mapping->invalidate_lock);
851	}
852
853	static inline int filemap_invalidate_trylock_shared(
854	struct address_space *mapping)
855	{
856	return down_read_trylock(sem: &mapping->invalidate_lock);
857	}
858
859	static inline void filemap_invalidate_unlock_shared(
860	struct address_space *mapping)
861	{
862	up_read(sem: &mapping->invalidate_lock);
863	}
864
865	void lock_two_nondirectories(struct inode , struct* inode*);
866	void unlock_two_nondirectories(struct inode , struct* inode*);
867
868	void filemap_invalidate_lock_two(struct address_space *mapping1,
869	struct address_space *mapping2);
870	void filemap_invalidate_unlock_two(struct address_space *mapping1,
871	struct address_space *mapping2);
872
873
874	/*
875	* NOTE: in a 32bit arch with a preemptable kernel and
876	* an UP compile the i_size_read/write must be atomic
877	* with respect to the local cpu (unlike with preempt disabled),
878	* but they don't need to be atomic with respect to other cpus like in
879	* true SMP (so they need either to either locally disable irq around
880	* the read or for example on x86 they can be still implemented as a
881	* cmpxchg8b without the need of the lock prefix). For SMP compiles
882	* and 64bit archs it makes no difference if preempt is enabled or not.
883	*/
884	static inline loff_t i_size_read(const struct inode *inode)
885	{
886	#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
887	loff_t i_size;
888	unsigned int seq;
889
890	do {
891	seq = read_seqcount_begin(&inode->i_size_seqcount);
892	i_size = inode->i_size;
893	} while (read_seqcount_retry(&inode->i_size_seqcount, seq));
894	return i_size;
895	#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
896	loff_t i_size;
897
898	preempt_disable();
899	i_size = inode->i_size;
900	preempt_enable();
901	return i_size;
902	#else
903	/ Pairs with smp_store_release() in i_size_write() /
904	return smp_load_acquire(&inode->i_size);
905	#endif
906	}
907
908	/*
909	* NOTE: unlike i_size_read(), i_size_write() does need locking around it
910	* (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
911	* can be lost, resulting in subsequent i_size_read() calls spinning forever.
912	*/
913	static inline void i_size_write(struct inode *inode, loff_t i_size)
914	{
915	#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
916	preempt_disable();
917	write_seqcount_begin(&inode->i_size_seqcount);
918	inode->i_size = i_size;
919	write_seqcount_end(&inode->i_size_seqcount);
920	preempt_enable();
921	#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
922	preempt_disable();
923	inode->i_size = i_size;
924	preempt_enable();
925	#else
926	/*
927	* Pairs with smp_load_acquire() in i_size_read() to ensure
928	* changes related to inode size (such as page contents) are
929	* visible before we see the changed inode size.
930	*/
931	smp_store_release(&inode->i_size, i_size);
932	#endif
933	}
934
935	static inline unsigned iminor(const struct inode *inode)
936	{
937	return MINOR(inode->i_rdev);
938	}
939
940	static inline unsigned imajor(const struct inode *inode)
941	{
942	return MAJOR(inode->i_rdev);
943	}
944
945	struct fown_struct {
946	rwlock_t lock; / protects pid, uid, euid fields /
947	struct pid pid; /* pid or -pgrp where SIGIO should be sent /
948	enum pid_type pid_type; / Kind of process group SIGIO should be sent to /
949	kuid_t uid, euid; / uid/euid of process setting the owner /
950	int signum; / posix.1b rt signal to be delivered on IO /
951	};
952
953	/**
954	* struct file_ra_state - Track a file's readahead state.
955	* @start: Where the most recent readahead started.
956	* @size: Number of pages read in the most recent readahead.
957	* @async_size: Numer of pages that were/are not needed immediately
958	* and so were/are genuinely "ahead". Start next readahead when
959	* the first of these pages is accessed.
960	* @ra_pages: Maximum size of a readahead request, copied from the bdi.
961	* @mmap_miss: How many mmap accesses missed in the page cache.
962	* @prev_pos: The last byte in the most recent read request.
963	*
964	* When this structure is passed to ->readahead(), the "most recent"
965	* readahead means the current readahead.
966	*/
967	struct file_ra_state {
968	pgoff_t start;
969	unsigned int size;
970	unsigned int async_size;
971	unsigned int ra_pages;
972	unsigned int mmap_miss;
973	loff_t prev_pos;
974	};
975
976	/*
977	* Check if @index falls in the readahead windows.
978	*/
979	static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
980	{
981	return (index >= ra->start &&
982	index < ra->start + ra->size);
983	}
984
985	/*
986	* f_{lock,count,pos_lock} members can be highly contended and share
987	* the same cacheline. f_{lock,mode} are very frequently used together
988	* and so share the same cacheline as well. The read-mostly
989	* f_{path,inode,op} are kept on a separate cacheline.
990	*/
991	struct file {
992	union {
993	/ fput() uses task work when closing and freeing file (default). /
994	struct callback_head f_task_work;
995	/ fput() must use workqueue (most kernel threads). /
996	struct llist_node f_llist;
997	unsigned int f_iocb_flags;
998	};
999
1000	/*
1001	* Protects f_ep, f_flags.
1002	* Must not be taken from IRQ context.
1003	*/
1004	spinlock_t f_lock;
1005	fmode_t f_mode;
1006	atomic_long_t f_count;
1007	struct mutex f_pos_lock;
1008	loff_t f_pos;
1009	unsigned int f_flags;
1010	struct fown_struct f_owner;
1011	const struct cred *f_cred;
1012	struct file_ra_state f_ra;
1013	struct path f_path;
1014	struct inode f_inode; /* cached value /
1015	const struct file_operations *f_op;
1016
1017	u64 f_version;
1018	#ifdef CONFIG_SECURITY
1019	void *f_security;
1020	#endif
1021	/ needed for tty driver, and maybe others /
1022	void *private_data;
1023
1024	#ifdef CONFIG_EPOLL
1025	/ Used by fs/eventpoll.c to link all the hooks to this file /
1026	struct hlist_head *f_ep;
1027	#endif /* #ifdef CONFIG_EPOLL */
1028	struct address_space *f_mapping;
1029	errseq_t f_wb_err;
1030	errseq_t f_sb_err; / for syncfs /
1031	} __randomize_layout
1032	__attribute__((aligned(`4`))); / lest something weird decides that 2 is OK /
1033
1034	struct file_handle {
1035	__u32 handle_bytes;
1036	int handle_type;
1037	/ file identifier /
1038	unsigned char f_handle[];
1039	};
1040
1041	static inline struct file get_file(struct* file *f)
1042	{
1043	atomic_long_inc(v: &f->f_count);
1044	return f;
1045	}
1046
1047	struct file get_file_rcu(struct* file __rcu **f);
1048	struct file get_file_active(struct* file **f);
1049
1050	#define file_count(x) atomic_long_read(&(x)->f_count)
1051
1052	#define MAX_NON_LFS ((1UL<<31) - 1)
1053
1054	/ Page cache limit. The filesystems should put that into their s_maxbytes*
1055	limits, otherwise bad things can happen in VM. /*
1056	#if BITS_PER_LONG==32
1057	#define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT)
1058	#elif BITS_PER_LONG==64
1059	#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX)
1060	#endif
1061
1062	/ legacy typedef, should eventually be removed /
1063	typedef void *fl_owner_t;
1064
1065	struct file_lock;
1066	struct file_lease;
1067
1068	/ The following constant reflects the upper bound of the file/locking space /
1069	#ifndef OFFSET_MAX
1070	#define OFFSET_MAX type_max(loff_t)
1071	#define OFFT_OFFSET_MAX type_max(off_t)
1072	#endif
1073
1074	extern void send_sigio(struct fown_struct fown, int* fd, int band);
1075
1076	static inline struct inode file_inode(const* struct file *f)
1077	{
1078	return f->f_inode;
1079	}
1080
1081	/*
1082	* file_dentry() is a relic from the days that overlayfs was using files with a
1083	* "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs.
1084	* In those days, file_dentry() was needed to get the underlying fs dentry that
1085	* matches f_inode.
1086	* Files with "fake" path should not exist nowadays, so use an assertion to make
1087	* sure that file_dentry() was not papering over filesystem bugs.
1088	*/
1089	static inline struct dentry file_dentry(const* struct file *file)
1090	{
1091	struct dentry *dentry = file->f_path.dentry;
1092
1093	WARN_ON_ONCE(d_inode(dentry) != file_inode(file));
1094	return dentry;
1095	}
1096
1097	struct fasync_struct {
1098	rwlock_t fa_lock;
1099	int magic;
1100	int fa_fd;
1101	struct fasync_struct fa_next; /* singly linked list /
1102	struct file *fa_file;
1103	struct rcu_head fa_rcu;
1104	};
1105
1106	#define FASYNC_MAGIC 0x4601
1107
1108	/ SMP safe fasync helpers: /
1109	extern int fasync_helper(int, struct file , int, struct* fasync_struct **);
1110	extern struct fasync_struct fasync_insert_entry(int, struct* file , struct* fasync_struct , struct** fasync_struct *);
1111	extern int fasync_remove_entry(struct file , struct* fasync_struct **);
1112	extern struct fasync_struct fasync_alloc(void*);
1113	extern void fasync_free(struct fasync_struct *);
1114
1115	/ can be called from interrupts /
1116	extern void kill_fasync(struct fasync_struct *, int, int*);
1117
1118	extern void __f_setown(struct file filp, struct* pid , enum* pid_type, int force);
1119	extern int f_setown(struct file filp, int* who, int force);
1120	extern void f_delown(struct file *filp);
1121	extern pid_t f_getown(struct file *filp);
1122	extern int send_sigurg(struct fown_struct *fown);
1123
1124	/*
1125	* sb->s_flags. Note that these mirror the equivalent MS_* flags where
1126	* represented in both.
1127	*/
1128	#define SB_RDONLY BIT(0) /* Mount read-only */
1129	#define SB_NOSUID BIT(1) /* Ignore suid and sgid bits */
1130	#define SB_NODEV BIT(2) /* Disallow access to device special files */
1131	#define SB_NOEXEC BIT(3) /* Disallow program execution */
1132	#define SB_SYNCHRONOUS BIT(4) /* Writes are synced at once */
1133	#define SB_MANDLOCK BIT(6) /* Allow mandatory locks on an FS */
1134	#define SB_DIRSYNC BIT(7) /* Directory modifications are synchronous */
1135	#define SB_NOATIME BIT(10) /* Do not update access times. */
1136	#define SB_NODIRATIME BIT(11) /* Do not update directory access times */
1137	#define SB_SILENT BIT(15)
1138	#define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */
1139	#define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */
1140	#define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */
1141	#define SB_I_VERSION BIT(23) /* Update inode I_version field */
1142	#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
1143
1144	/ These sb flags are internal to the kernel /
1145	#define SB_DEAD BIT(21)
1146	#define SB_DYING BIT(24)
1147	#define SB_SUBMOUNT BIT(26)
1148	#define SB_FORCE BIT(27)
1149	#define SB_NOSEC BIT(28)
1150	#define SB_BORN BIT(29)
1151	#define SB_ACTIVE BIT(30)
1152	#define SB_NOUSER BIT(31)
1153
1154	/ These flags relate to encoding and casefolding /
1155	#define SB_ENC_STRICT_MODE_FL (1 << 0)
1156
1157	#define sb_has_strict_encoding(sb) \
1158	(sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
1159
1160	/*
1161	* Umount options
1162	*/
1163
1164	#define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */
1165	#define MNT_DETACH 0x00000002 /* Just detach from the tree */
1166	#define MNT_EXPIRE 0x00000004 /* Mark for expiry */
1167	#define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */
1168	#define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */
1169
1170	/ sb->s_iflags /
1171	#define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */
1172	#define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */
1173	#define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */
1174	#define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */
1175
1176	/ sb->s_iflags to limit user namespace mounts /
1177	#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */
1178	#define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020
1179	#define SB_I_UNTRUSTED_MOUNTER 0x00000040
1180	#define SB_I_EVM_UNSUPPORTED 0x00000080
1181
1182	#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
1183	#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
1184	#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
1185	#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
1186	#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
1187
1188	/ Possible states of 'frozen' field /
1189	enum {
1190	SB_UNFROZEN = `0`, / FS is unfrozen /
1191	SB_FREEZE_WRITE = `1`, / Writes, dir ops, ioctls frozen /
1192	SB_FREEZE_PAGEFAULT = `2`, / Page faults stopped as well /
1193	SB_FREEZE_FS = `3`, / For internal FS use (e.g. to stop*
1194	* internal threads if needed) */
1195	SB_FREEZE_COMPLETE = `4`, / ->freeze_fs finished successfully /
1196	};
1197
1198	#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
1199
1200	struct sb_writers {
1201	unsigned short frozen; / Is sb frozen? /
1202	int freeze_kcount; / How many kernel freeze requests? /
1203	int freeze_ucount; / How many userspace freeze requests? /
1204	struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
1205	};
1206
1207	struct super_block {
1208	struct list_head s_list; / Keep this first /
1209	dev_t s_dev; / search index; _not_ kdev_t /
1210	unsigned char s_blocksize_bits;
1211	unsigned long s_blocksize;
1212	loff_t s_maxbytes; / Max file size /
1213	struct file_system_type *s_type;
1214	const struct super_operations *s_op;
1215	const struct dquot_operations *dq_op;
1216	const struct quotactl_ops *s_qcop;
1217	const struct export_operations *s_export_op;
1218	unsigned long s_flags;
1219	unsigned long s_iflags; / internal SB_I_* flags /
1220	unsigned long s_magic;
1221	struct dentry *s_root;
1222	struct rw_semaphore s_umount;
1223	int s_count;
1224	atomic_t s_active;
1225	#ifdef CONFIG_SECURITY
1226	void *s_security;
1227	#endif
1228	const struct xattr_handler * const *s_xattr;
1229	#ifdef CONFIG_FS_ENCRYPTION
1230	const struct fscrypt_operations *s_cop;
1231	struct fscrypt_keyring s_master_keys; /* master crypto keys in use /
1232	#endif
1233	#ifdef CONFIG_FS_VERITY
1234	const struct fsverity_operations *s_vop;
1235	#endif
1236	#if IS_ENABLED(CONFIG_UNICODE)
1237	struct unicode_map *s_encoding;
1238	__u16 s_encoding_flags;
1239	#endif
1240	struct hlist_bl_head s_roots; / alternate root dentries for NFS /
1241	struct list_head s_mounts; / list of mounts; _not_ for fs use /
1242	struct block_device s_bdev; /* can go away once we use an accessor for @s_bdev_file /
1243	struct file *s_bdev_file;
1244	struct backing_dev_info *s_bdi;
1245	struct mtd_info *s_mtd;
1246	struct hlist_node s_instances;
1247	unsigned int s_quota_types; / Bitmask of supported quota types /
1248	struct quota_info s_dquot; / Diskquota specific options /
1249
1250	struct sb_writers s_writers;
1251
1252	/*
1253	* Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
1254	* s_fsnotify_marks together for cache efficiency. They are frequently
1255	* accessed and rarely modified.
1256	*/
1257	void s_fs_info; /* Filesystem private info /
1258
1259	/ Granularity of c/m/atime in ns (cannot be worse than a second) /
1260	u32 s_time_gran;
1261	/ Time limits for c/m/atime in seconds /
1262	time64_t s_time_min;
1263	time64_t s_time_max;
1264	#ifdef CONFIG_FSNOTIFY
1265	__u32 s_fsnotify_mask;
1266	struct fsnotify_mark_connector __rcu *s_fsnotify_marks;
1267	#endif
1268
1269	/*
1270	* q: why are s_id and s_sysfs_name not the same? both are human
1271	* readable strings that identify the filesystem
1272	* a: s_id is allowed to change at runtime; it's used in log messages,
1273	* and we want to when a device starts out as single device (s_id is dev
1274	* name) but then a device is hot added and we have to switch to
1275	* identifying it by UUID
1276	* but s_sysfs_name is a handle for programmatic access, and can't
1277	* change at runtime
1278	*/
1279	char s_id[`32`]; / Informational name /
1280	uuid_t s_uuid; / UUID /
1281	u8 s_uuid_len; / Default 16, possibly smaller for weird filesystems /
1282
1283	/ if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name /
1284	char s_sysfs_name[UUID_STRING_LEN + `1`];
1285
1286	unsigned int s_max_links;
1287
1288	/*
1289	* The next field is for VFS only. No filesystems have any business
1290	* even looking at it. You had been warned.
1291	*/
1292	struct mutex s_vfs_rename_mutex; / Kludge /
1293
1294	/*
1295	* Filesystem subtype. If non-empty the filesystem type field
1296	* in /proc/mounts will be "type.subtype"
1297	*/
1298	const char *s_subtype;
1299
1300	const struct dentry_operations s_d_op; /* default d_op for dentries /
1301
1302	struct shrinker s_shrink; /* per-sb shrinker handle /
1303
1304	/ Number of inodes with nlink == 0 but still referenced /
1305	atomic_long_t s_remove_count;
1306
1307	/*
1308	* Number of inode/mount/sb objects that are being watched, note that
1309	* inodes objects are currently double-accounted.
1310	*/
1311	atomic_long_t s_fsnotify_connectors;
1312
1313	/ Read-only state of the superblock is being changed /
1314	int s_readonly_remount;
1315
1316	/ per-sb errseq_t for reporting writeback errors via syncfs /
1317	errseq_t s_wb_err;
1318
1319	/ AIO completions deferred from interrupt context /
1320	struct workqueue_struct *s_dio_done_wq;
1321	struct hlist_head s_pins;
1322
1323	/*
1324	* Owning user namespace and default context in which to
1325	* interpret filesystem uids, gids, quotas, device nodes,
1326	* xattrs and security labels.
1327	*/
1328	struct user_namespace *s_user_ns;
1329
1330	/*
1331	* The list_lru structure is essentially just a pointer to a table
1332	* of per-node lru lists, each of which has its own spinlock.
1333	* There is no need to put them into separate cachelines.
1334	*/
1335	struct list_lru s_dentry_lru;
1336	struct list_lru s_inode_lru;
1337	struct rcu_head rcu;
1338	struct work_struct destroy_work;
1339
1340	struct mutex s_sync_lock; / sync serialisation lock /
1341
1342	/*
1343	* Indicates how deep in a filesystem stack this SB is
1344	*/
1345	int s_stack_depth;
1346
1347	/ s_inode_list_lock protects s_inodes /
1348	spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp;
1349	struct list_head s_inodes; / all inodes /
1350
1351	spinlock_t s_inode_wblist_lock;
1352	struct list_head s_inodes_wb; / writeback inodes /
1353	} __randomize_layout;
1354
1355	static inline struct user_namespace i_user_ns(const* struct inode *inode)
1356	{
1357	return inode->i_sb->s_user_ns;
1358	}
1359
1360	/ Helper functions so that in most cases filesystems will*
1361	* not need to deal directly with kuid_t and kgid_t and can
1362	* instead deal with the raw numeric values that are stored
1363	* in the filesystem.
1364	*/
1365	static inline uid_t i_uid_read(const struct inode *inode)
1366	{
1367	return from_kuid(to: i_user_ns(inode), uid: inode->i_uid);
1368	}
1369
1370	static inline gid_t i_gid_read(const struct inode *inode)
1371	{
1372	return from_kgid(to: i_user_ns(inode), gid: inode->i_gid);
1373	}
1374
1375	static inline void i_uid_write(struct inode *inode, uid_t uid)
1376	{
1377	inode->i_uid = make_kuid(from: i_user_ns(inode), uid);
1378	}
1379
1380	static inline void i_gid_write(struct inode *inode, gid_t gid)
1381	{
1382	inode->i_gid = make_kgid(from: i_user_ns(inode), gid);
1383	}
1384
1385	/**
1386	* i_uid_into_vfsuid - map an inode's i_uid down according to an idmapping
1387	* @idmap: idmap of the mount the inode was found from
1388	* @inode: inode to map
1389	*
1390	* Return: whe inode's i_uid mapped down according to @idmap.
1391	* If the inode's i_uid has no mapping INVALID_VFSUID is returned.
1392	*/
1393	static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap,
1394	const struct inode *inode)
1395	{
1396	return make_vfsuid(idmap, fs_userns: i_user_ns(inode), kuid: inode->i_uid);
1397	}
1398
1399	/**
1400	* i_uid_needs_update - check whether inode's i_uid needs to be updated
1401	* @idmap: idmap of the mount the inode was found from
1402	* @attr: the new attributes of @inode
1403	* @inode: the inode to update
1404	*
1405	* Check whether the $inode's i_uid field needs to be updated taking idmapped
1406	* mounts into account if the filesystem supports it.
1407	*
1408	* Return: true if @inode's i_uid field needs to be updated, false if not.
1409	*/
1410	static inline bool i_uid_needs_update(struct mnt_idmap *idmap,
1411	const struct iattr *attr,
1412	const struct inode *inode)
1413	{
1414	return ((attr->ia_valid & ATTR_UID) &&
1415	!vfsuid_eq(left: attr->ia_vfsuid,
1416	right: i_uid_into_vfsuid(idmap, inode)));
1417	}
1418
1419	/**
1420	* i_uid_update - update @inode's i_uid field
1421	* @idmap: idmap of the mount the inode was found from
1422	* @attr: the new attributes of @inode
1423	* @inode: the inode to update
1424	*
1425	* Safely update @inode's i_uid field translating the vfsuid of any idmapped
1426	* mount into the filesystem kuid.
1427	*/
1428	static inline void i_uid_update(struct mnt_idmap *idmap,
1429	const struct iattr *attr,
1430	struct inode *inode)
1431	{
1432	if (attr->ia_valid & ATTR_UID)
1433	inode->i_uid = from_vfsuid(idmap, fs_userns: i_user_ns(inode),
1434	vfsuid: attr->ia_vfsuid);
1435	}
1436
1437	/**
1438	* i_gid_into_vfsgid - map an inode's i_gid down according to an idmapping
1439	* @idmap: idmap of the mount the inode was found from
1440	* @inode: inode to map
1441	*
1442	* Return: the inode's i_gid mapped down according to @idmap.
1443	* If the inode's i_gid has no mapping INVALID_VFSGID is returned.
1444	*/
1445	static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap,
1446	const struct inode *inode)
1447	{
1448	return make_vfsgid(idmap, fs_userns: i_user_ns(inode), kgid: inode->i_gid);
1449	}
1450
1451	/**
1452	* i_gid_needs_update - check whether inode's i_gid needs to be updated
1453	* @idmap: idmap of the mount the inode was found from
1454	* @attr: the new attributes of @inode
1455	* @inode: the inode to update
1456	*
1457	* Check whether the $inode's i_gid field needs to be updated taking idmapped
1458	* mounts into account if the filesystem supports it.
1459	*
1460	* Return: true if @inode's i_gid field needs to be updated, false if not.
1461	*/
1462	static inline bool i_gid_needs_update(struct mnt_idmap *idmap,
1463	const struct iattr *attr,
1464	const struct inode *inode)
1465	{
1466	return ((attr->ia_valid & ATTR_GID) &&
1467	!vfsgid_eq(left: attr->ia_vfsgid,
1468	right: i_gid_into_vfsgid(idmap, inode)));
1469	}
1470
1471	/**
1472	* i_gid_update - update @inode's i_gid field
1473	* @idmap: idmap of the mount the inode was found from
1474	* @attr: the new attributes of @inode
1475	* @inode: the inode to update
1476	*
1477	* Safely update @inode's i_gid field translating the vfsgid of any idmapped
1478	* mount into the filesystem kgid.
1479	*/
1480	static inline void i_gid_update(struct mnt_idmap *idmap,
1481	const struct iattr *attr,
1482	struct inode *inode)
1483	{
1484	if (attr->ia_valid & ATTR_GID)
1485	inode->i_gid = from_vfsgid(idmap, fs_userns: i_user_ns(inode),
1486	vfsgid: attr->ia_vfsgid);
1487	}
1488
1489	/**
1490	* inode_fsuid_set - initialize inode's i_uid field with callers fsuid
1491	* @inode: inode to initialize
1492	* @idmap: idmap of the mount the inode was found from
1493	*
1494	* Initialize the i_uid field of @inode. If the inode was found/created via
1495	* an idmapped mount map the caller's fsuid according to @idmap.
1496	*/
1497	static inline void inode_fsuid_set(struct inode *inode,
1498	struct mnt_idmap *idmap)
1499	{
1500	inode->i_uid = mapped_fsuid(idmap, fs_userns: i_user_ns(inode));
1501	}
1502
1503	/**
1504	* inode_fsgid_set - initialize inode's i_gid field with callers fsgid
1505	* @inode: inode to initialize
1506	* @idmap: idmap of the mount the inode was found from
1507	*
1508	* Initialize the i_gid field of @inode. If the inode was found/created via
1509	* an idmapped mount map the caller's fsgid according to @idmap.
1510	*/
1511	static inline void inode_fsgid_set(struct inode *inode,
1512	struct mnt_idmap *idmap)
1513	{
1514	inode->i_gid = mapped_fsgid(idmap, fs_userns: i_user_ns(inode));
1515	}
1516
1517	/**
1518	* fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped
1519	* @sb: the superblock we want a mapping in
1520	* @idmap: idmap of the relevant mount
1521	*
1522	* Check whether the caller's fsuid and fsgid have a valid mapping in the
1523	* s_user_ns of the superblock @sb. If the caller is on an idmapped mount map
1524	* the caller's fsuid and fsgid according to the @idmap first.
1525	*
1526	* Return: true if fsuid and fsgid is mapped, false if not.
1527	*/
1528	static inline bool fsuidgid_has_mapping(struct super_block *sb,
1529	struct mnt_idmap *idmap)
1530	{
1531	struct user_namespace *fs_userns = sb->s_user_ns;
1532	kuid_t kuid;
1533	kgid_t kgid;
1534
1535	kuid = mapped_fsuid(idmap, fs_userns);
1536	if (!uid_valid(uid: kuid))
1537	return false;
1538	kgid = mapped_fsgid(idmap, fs_userns);
1539	if (!gid_valid(gid: kgid))
1540	return false;
1541	return kuid_has_mapping(ns: fs_userns, uid: kuid) &&
1542	kgid_has_mapping(ns: fs_userns, gid: kgid);
1543	}
1544
1545	struct timespec64 current_time(struct inode *inode);
1546	struct timespec64 inode_set_ctime_current(struct inode *inode);
1547
1548	static inline time64_t inode_get_atime_sec(const struct inode *inode)
1549	{
1550	return inode->__i_atime.tv_sec;
1551	}
1552
1553	static inline long inode_get_atime_nsec(const struct inode *inode)
1554	{
1555	return inode->__i_atime.tv_nsec;
1556	}
1557
1558	static inline struct timespec64 inode_get_atime(const struct inode *inode)
1559	{
1560	return inode->__i_atime;
1561	}
1562
1563	static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
1564	struct timespec64 ts)
1565	{
1566	inode->__i_atime = ts;
1567	return ts;
1568	}
1569
1570	static inline struct timespec64 inode_set_atime(struct inode *inode,
1571	time64_t sec, long nsec)
1572	{
1573	struct timespec64 ts = { .tv_sec = sec,
1574	.tv_nsec = nsec };
1575	return inode_set_atime_to_ts(inode, ts);
1576	}
1577
1578	static inline time64_t inode_get_mtime_sec(const struct inode *inode)
1579	{
1580	return inode->__i_mtime.tv_sec;
1581	}
1582
1583	static inline long inode_get_mtime_nsec(const struct inode *inode)
1584	{
1585	return inode->__i_mtime.tv_nsec;
1586	}
1587
1588	static inline struct timespec64 inode_get_mtime(const struct inode *inode)
1589	{
1590	return inode->__i_mtime;
1591	}
1592
1593	static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
1594	struct timespec64 ts)
1595	{
1596	inode->__i_mtime = ts;
1597	return ts;
1598	}
1599
1600	static inline struct timespec64 inode_set_mtime(struct inode *inode,
1601	time64_t sec, long nsec)
1602	{
1603	struct timespec64 ts = { .tv_sec = sec,
1604	.tv_nsec = nsec };
1605	return inode_set_mtime_to_ts(inode, ts);
1606	}
1607
1608	static inline time64_t inode_get_ctime_sec(const struct inode *inode)
1609	{
1610	return inode->__i_ctime.tv_sec;
1611	}
1612
1613	static inline long inode_get_ctime_nsec(const struct inode *inode)
1614	{
1615	return inode->__i_ctime.tv_nsec;
1616	}
1617
1618	static inline struct timespec64 inode_get_ctime(const struct inode *inode)
1619	{
1620	return inode->__i_ctime;
1621	}
1622
1623	static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
1624	struct timespec64 ts)
1625	{
1626	inode->__i_ctime = ts;
1627	return ts;
1628	}
1629
1630	/**
1631	* inode_set_ctime - set the ctime in the inode
1632	* @inode: inode in which to set the ctime
1633	* @sec: tv_sec value to set
1634	* @nsec: tv_nsec value to set
1635	*
1636	* Set the ctime in @inode to { @sec, @nsec }
1637	*/
1638	static inline struct timespec64 inode_set_ctime(struct inode *inode,
1639	time64_t sec, long nsec)
1640	{
1641	struct timespec64 ts = { .tv_sec = sec,
1642	.tv_nsec = nsec };
1643
1644	return inode_set_ctime_to_ts(inode, ts);
1645	}
1646
1647	struct timespec64 simple_inode_init_ts(struct inode *inode);
1648
1649	/*
1650	* Snapshotting support.
1651	*/
1652
1653	/*
1654	* These are internal functions, please use sb_start_{write,pagefault,intwrite}
1655	* instead.
1656	*/
1657	static inline void __sb_end_write(struct super_block sb, int* level)
1658	{
1659	percpu_up_read(sem: sb->s_writers.rw_sem + level-`1`);
1660	}
1661
1662	static inline void __sb_start_write(struct super_block sb, int* level)
1663	{
1664	percpu_down_read(sem: sb->s_writers.rw_sem + level - `1`);
1665	}
1666
1667	static inline bool __sb_start_write_trylock(struct super_block sb, int* level)
1668	{
1669	return percpu_down_read_trylock(sem: sb->s_writers.rw_sem + level - `1`);
1670	}
1671
1672	#define __sb_writers_acquired(sb, lev) \
1673	percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
1674	#define __sb_writers_release(sb, lev) \
1675	percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
1676
1677	/**
1678	* __sb_write_started - check if sb freeze level is held
1679	* @sb: the super we write to
1680	* @level: the freeze level
1681	*
1682	* * > 0 - sb freeze level is held
1683	* * 0 - sb freeze level is not held
1684	* * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN
1685	*/
1686	static inline int __sb_write_started(const struct super_block sb, int* level)
1687	{
1688	return lockdep_is_held_type(sb->s_writers.rw_sem + level - `1`, `1`);
1689	}
1690
1691	/**
1692	* sb_write_started - check if SB_FREEZE_WRITE is held
1693	* @sb: the super we write to
1694	*
1695	* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
1696	*/
1697	static inline bool sb_write_started(const struct super_block *sb)
1698	{
1699	return __sb_write_started(sb, level: SB_FREEZE_WRITE);
1700	}
1701
1702	/**
1703	* sb_write_not_started - check if SB_FREEZE_WRITE is not held
1704	* @sb: the super we write to
1705	*
1706	* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
1707	*/
1708	static inline bool sb_write_not_started(const struct super_block *sb)
1709	{
1710	return __sb_write_started(sb, level: SB_FREEZE_WRITE) <= `0`;
1711	}
1712
1713	/**
1714	* file_write_started - check if SB_FREEZE_WRITE is held
1715	* @file: the file we write to
1716	*
1717	* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
1718	* May be false positive with !S_ISREG, because file_start_write() has
1719	* no effect on !S_ISREG.
1720	*/
1721	static inline bool file_write_started(const struct file *file)
1722	{
1723	if (!S_ISREG(file_inode(file)->i_mode))
1724	return true;
1725	return sb_write_started(sb: file_inode(f: file)->i_sb);
1726	}
1727
1728	/**
1729	* file_write_not_started - check if SB_FREEZE_WRITE is not held
1730	* @file: the file we write to
1731	*
1732	* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
1733	* May be false positive with !S_ISREG, because file_start_write() has
1734	* no effect on !S_ISREG.
1735	*/
1736	static inline bool file_write_not_started(const struct file *file)
1737	{
1738	if (!S_ISREG(file_inode(file)->i_mode))
1739	return true;
1740	return sb_write_not_started(sb: file_inode(f: file)->i_sb);
1741	}
1742
1743	/**
1744	* sb_end_write - drop write access to a superblock
1745	* @sb: the super we wrote to
1746	*
1747	* Decrement number of writers to the filesystem. Wake up possible waiters
1748	* wanting to freeze the filesystem.
1749	*/
1750	static inline void sb_end_write(struct super_block *sb)
1751	{
1752	__sb_end_write(sb, level: SB_FREEZE_WRITE);
1753	}
1754
1755	/**
1756	* sb_end_pagefault - drop write access to a superblock from a page fault
1757	* @sb: the super we wrote to
1758	*
1759	* Decrement number of processes handling write page fault to the filesystem.
1760	* Wake up possible waiters wanting to freeze the filesystem.
1761	*/
1762	static inline void sb_end_pagefault(struct super_block *sb)
1763	{
1764	__sb_end_write(sb, level: SB_FREEZE_PAGEFAULT);
1765	}
1766
1767	/**
1768	* sb_end_intwrite - drop write access to a superblock for internal fs purposes
1769	* @sb: the super we wrote to
1770	*
1771	* Decrement fs-internal number of writers to the filesystem. Wake up possible
1772	* waiters wanting to freeze the filesystem.
1773	*/
1774	static inline void sb_end_intwrite(struct super_block *sb)
1775	{
1776	__sb_end_write(sb, level: SB_FREEZE_FS);
1777	}
1778
1779	/**
1780	* sb_start_write - get write access to a superblock
1781	* @sb: the super we write to
1782	*
1783	* When a process wants to write data or metadata to a file system (i.e. dirty
1784	* a page or an inode), it should embed the operation in a sb_start_write() -
1785	* sb_end_write() pair to get exclusion against file system freezing. This
1786	* function increments number of writers preventing freezing. If the file
1787	* system is already frozen, the function waits until the file system is
1788	* thawed.
1789	*
1790	* Since freeze protection behaves as a lock, users have to preserve
1791	* ordering of freeze protection and other filesystem locks. Generally,
1792	* freeze protection should be the outermost lock. In particular, we have:
1793	*
1794	* sb_start_write
1795	* -> i_mutex (write path, truncate, directory ops, ...)
1796	* -> s_umount (freeze_super, thaw_super)
1797	*/
1798	static inline void sb_start_write(struct super_block *sb)
1799	{
1800	__sb_start_write(sb, level: SB_FREEZE_WRITE);
1801	}
1802
1803	static inline bool sb_start_write_trylock(struct super_block *sb)
1804	{
1805	return __sb_start_write_trylock(sb, level: SB_FREEZE_WRITE);
1806	}
1807
1808	/**
1809	* sb_start_pagefault - get write access to a superblock from a page fault
1810	* @sb: the super we write to
1811	*
1812	* When a process starts handling write page fault, it should embed the
1813	* operation into sb_start_pagefault() - sb_end_pagefault() pair to get
1814	* exclusion against file system freezing. This is needed since the page fault
1815	* is going to dirty a page. This function increments number of running page
1816	* faults preventing freezing. If the file system is already frozen, the
1817	* function waits until the file system is thawed.
1818	*
1819	* Since page fault freeze protection behaves as a lock, users have to preserve
1820	* ordering of freeze protection and other filesystem locks. It is advised to
1821	* put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault
1822	* handling code implies lock dependency:
1823	*
1824	* mmap_lock
1825	* -> sb_start_pagefault
1826	*/
1827	static inline void sb_start_pagefault(struct super_block *sb)
1828	{
1829	__sb_start_write(sb, level: SB_FREEZE_PAGEFAULT);
1830	}
1831
1832	/**
1833	* sb_start_intwrite - get write access to a superblock for internal fs purposes
1834	* @sb: the super we write to
1835	*
1836	* This is the third level of protection against filesystem freezing. It is
1837	* free for use by a filesystem. The only requirement is that it must rank
1838	* below sb_start_pagefault.
1839	*
1840	* For example filesystem can call sb_start_intwrite() when starting a
1841	* transaction which somewhat eases handling of freezing for internal sources
1842	* of filesystem changes (internal fs threads, discarding preallocation on file
1843	* close, etc.).
1844	*/
1845	static inline void sb_start_intwrite(struct super_block *sb)
1846	{
1847	__sb_start_write(sb, level: SB_FREEZE_FS);
1848	}
1849
1850	static inline bool sb_start_intwrite_trylock(struct super_block *sb)
1851	{
1852	return __sb_start_write_trylock(sb, level: SB_FREEZE_FS);
1853	}
1854
1855	bool inode_owner_or_capable(struct mnt_idmap *idmap,
1856	const struct inode *inode);
1857
1858	/*
1859	* VFS helper functions..
1860	*/
1861	int vfs_create(struct mnt_idmap , struct* inode *,
1862	struct dentry *, umode_t, bool);
1863	int vfs_mkdir(struct mnt_idmap , struct* inode *,
1864	struct dentry *, umode_t);
1865	int vfs_mknod(struct mnt_idmap , struct* inode , struct* dentry *,
1866	umode_t, dev_t);
1867	int vfs_symlink(struct mnt_idmap , struct* inode *,
1868	struct dentry , const* char *);
1869	int vfs_link(struct dentry , struct* mnt_idmap , struct* inode *,
1870	struct dentry , struct* inode **);
1871	int vfs_rmdir(struct mnt_idmap , struct* inode , struct* dentry *);
1872	int vfs_unlink(struct mnt_idmap , struct* inode , struct* dentry *,
1873	struct inode **);
1874
1875	/**
1876	* struct renamedata - contains all information required for renaming
1877	* @old_mnt_idmap: idmap of the old mount the inode was found from
1878	* @old_dir: parent of source
1879	* @old_dentry: source
1880	* @new_mnt_idmap: idmap of the new mount the inode was found from
1881	* @new_dir: parent of destination
1882	* @new_dentry: destination
1883	* @delegated_inode: returns an inode needing a delegation break
1884	* @flags: rename flags
1885	*/
1886	struct renamedata {
1887	struct mnt_idmap *old_mnt_idmap;
1888	struct inode *old_dir;
1889	struct dentry *old_dentry;
1890	struct mnt_idmap *new_mnt_idmap;
1891	struct inode *new_dir;
1892	struct dentry *new_dentry;
1893	struct inode **delegated_inode;
1894	unsigned int flags;
1895	} __randomize_layout;
1896
1897	int vfs_rename(struct renamedata *);
1898
1899	static inline int vfs_whiteout(struct mnt_idmap *idmap,
1900	struct inode dir, struct* dentry *dentry)
1901	{
1902	return vfs_mknod(idmap, dir, dentry, S_IFCHR \| WHITEOUT_MODE,
1903	WHITEOUT_DEV);
1904	}
1905
1906	struct file kernel_tmpfile_open(struct* mnt_idmap *idmap,
1907	const struct path *parentpath,
1908	umode_t mode, int open_flag,
1909	const struct cred *cred);
1910	struct file kernel_file_open(const* struct path path, int* flags,
1911	struct inode inode, const* struct cred *cred);
1912
1913	int vfs_mkobj(struct dentry *, umode_t,
1914	int (f)(struct* dentry , umode_t, void* *),
1915	void *);
1916
1917	int vfs_fchown(struct file *file, uid_t user, gid_t group);
1918	int vfs_fchmod(struct file *file, umode_t mode);
1919	int vfs_utimes(const struct path path, struct* timespec64 *times);
1920
1921	extern long vfs_ioctl(struct file file, unsigned* int cmd, unsigned long arg);
1922
1923	#ifdef CONFIG_COMPAT
1924	extern long compat_ptr_ioctl(struct file file, unsigned* int cmd,
1925	unsigned long arg);
1926	#else
1927	#define compat_ptr_ioctl NULL
1928	#endif
1929
1930	/*
1931	* VFS file helper functions.
1932	*/
1933	void inode_init_owner(struct mnt_idmap idmap, struct* inode *inode,
1934	const struct inode *dir, umode_t mode);
1935	extern bool may_open_dev(const struct path *path);
1936	umode_t mode_strip_sgid(struct mnt_idmap *idmap,
1937	const struct inode *dir, umode_t mode);
1938
1939	/*
1940	* This is the "filldir" function type, used by readdir() to let
1941	* the kernel specify what kind of dirent layout it wants to have.
1942	* This allows the kernel to read directories into kernel space or
1943	* to have different dirent layouts depending on the binary type.
1944	* Return 'true' to keep going and 'false' if there are no more entries.
1945	*/
1946	struct dir_context;
1947	typedef bool (filldir_t)(struct* dir_context , const* char , int*, loff_t, u64,
1948	unsigned);
1949
1950	struct dir_context {
1951	filldir_t actor;
1952	loff_t pos;
1953	};
1954
1955	/*
1956	* These flags let !MMU mmap() govern direct device mapping vs immediate
1957	* copying more easily for MAP_PRIVATE, especially for ROM filesystems.
1958	*
1959	* NOMMU_MAP_COPY: Copy can be mapped (MAP_PRIVATE)
1960	* NOMMU_MAP_DIRECT: Can be mapped directly (MAP_SHARED)
1961	* NOMMU_MAP_READ: Can be mapped for reading
1962	* NOMMU_MAP_WRITE: Can be mapped for writing
1963	* NOMMU_MAP_EXEC: Can be mapped for execution
1964	*/
1965	#define NOMMU_MAP_COPY 0x00000001
1966	#define NOMMU_MAP_DIRECT 0x00000008
1967	#define NOMMU_MAP_READ VM_MAYREAD
1968	#define NOMMU_MAP_WRITE VM_MAYWRITE
1969	#define NOMMU_MAP_EXEC VM_MAYEXEC
1970
1971	#define NOMMU_VMFLAGS \
1972	(NOMMU_MAP_READ \| NOMMU_MAP_WRITE \| NOMMU_MAP_EXEC)
1973
1974	/*
1975	* These flags control the behavior of the remap_file_range function pointer.
1976	* If it is called with len == 0 that means "remap to end of source file".
1977	* See Documentation/filesystems/vfs.rst for more details about this call.
1978	*
1979	* REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
1980	* REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
1981	*/
1982	#define REMAP_FILE_DEDUP (1 << 0)
1983	#define REMAP_FILE_CAN_SHORTEN (1 << 1)
1984
1985	/*
1986	* These flags signal that the caller is ok with altering various aspects of
1987	* the behavior of the remap operation. The changes must be made by the
1988	* implementation; the vfs remap helper functions can take advantage of them.
1989	* Flags in this category exist to preserve the quirky behavior of the hoisted
1990	* btrfs clone/dedupe ioctls.
1991	*/
1992	#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
1993
1994	/*
1995	* These flags control the behavior of vfs_copy_file_range().
1996	* They are not available to the user via syscall.
1997	*
1998	* COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
1999	*/
2000	#define COPY_FILE_SPLICE (1 << 0)
2001
2002	struct iov_iter;
2003	struct io_uring_cmd;
2004	struct offset_ctx;
2005
2006	struct file_operations {
2007	struct module *owner;
2008	loff_t (llseek) (struct* file , loff_t, int*);
2009	ssize_t (read) (struct* file , char* __user , size_t, loff_t );
2010	ssize_t (write) (struct* file , const* char __user , size_t, loff_t );
2011	ssize_t (read_iter) (struct* kiocb , struct* iov_iter *);
2012	ssize_t (write_iter) (struct* kiocb , struct* iov_iter *);
2013	int (iopoll)(struct* kiocb kiocb, struct* io_comp_batch *,
2014	unsigned int flags);
2015	int (iterate_shared) (struct* file , struct* dir_context *);
2016	__poll_t (poll) (struct* file , struct* poll_table_struct *);
2017	long (unlocked_ioctl) (struct* file , unsigned* int, unsigned long);
2018	long (compat_ioctl) (struct* file , unsigned* int, unsigned long);
2019	int (mmap) (struct* file , struct* vm_area_struct *);
2020	unsigned long mmap_supported_flags;
2021	int (open) (struct* inode , struct* file *);
2022	int (flush) (struct* file *, fl_owner_t id);
2023	int (release) (struct* inode , struct* file *);
2024	int (fsync) (struct* file , loff_t, loff_t, int* datasync);
2025	int (fasync) (int, struct* file , int*);
2026	int (lock) (struct* file , int, struct* file_lock *);
2027	unsigned long (get_unmapped_area)(struct* file , unsigned* long, unsigned long, unsigned long, unsigned long);
2028	int (check_flags)(int*);
2029	int (flock) (struct* file , int, struct* file_lock *);
2030	ssize_t (splice_write)(struct* pipe_inode_info , struct* file , loff_t , size_t, unsigned int);
2031	ssize_t (splice_read)(struct* file , loff_t , struct pipe_inode_info , size_t, unsigned* int);
2032	void (splice_eof)(struct* file *file);
2033	int (setlease)(struct* file , int, struct* file_lease *, void* **);
2034	long (fallocate)(struct* file file, int* mode, loff_t offset,
2035	loff_t len);
2036	void (show_fdinfo)(struct* seq_file m, struct* file *f);
2037	#ifndef CONFIG_MMU
2038	unsigned (mmap_capabilities)(struct* file *);
2039	#endif
2040	ssize_t (copy_file_range)(struct* file , loff_t, struct* file *,
2041	loff_t, size_t, unsigned int);
2042	loff_t (remap_file_range)(struct* file *file_in, loff_t pos_in,
2043	struct file *file_out, loff_t pos_out,
2044	loff_t len, unsigned int remap_flags);
2045	int (fadvise)(struct* file , loff_t, loff_t, int*);
2046	int (uring_cmd)(struct* io_uring_cmd ioucmd, unsigned* int issue_flags);
2047	int (uring_cmd_iopoll)(struct* io_uring_cmd , struct* io_comp_batch *,
2048	unsigned int poll_flags);
2049	} __randomize_layout;
2050
2051	/ Wrap a directory iterator that needs exclusive inode access /
2052	int wrap_directory_iterator(struct file , struct* dir_context *,
2053	int () (struct* file , struct* dir_context *));
2054	#define WRAP_DIR_ITER(x) \
2055	static int shared_##x(struct file file , struct dir_context ctx) \
2056	{ return wrap_directory_iterator(file, ctx, x); }
2057
2058	struct inode_operations {
2059	struct dentry * (lookup) (struct* inode ,struct* dentry , unsigned* int);
2060	const char * (get_link) (struct* dentry , struct* inode , struct* delayed_call *);
2061	int (permission) (struct* mnt_idmap , struct* inode , int*);
2062	struct posix_acl * (get_inode_acl)(struct* inode , int*, bool);
2063
2064	int (readlink) (struct* dentry , char* __user ,int*);
2065
2066	int (create) (struct* mnt_idmap , struct* inode ,struct* dentry *,
2067	umode_t, bool);
2068	int (link) (struct* dentry ,struct* inode ,struct* dentry *);
2069	int (unlink) (struct* inode ,struct* dentry *);
2070	int (symlink) (struct* mnt_idmap , struct* inode ,struct* dentry *,
2071	const char *);
2072	int (mkdir) (struct* mnt_idmap , struct* inode ,struct* dentry *,
2073	umode_t);
2074	int (rmdir) (struct* inode ,struct* dentry *);
2075	int (mknod) (struct* mnt_idmap , struct* inode ,struct* dentry *,
2076	umode_t,dev_t);
2077	int (rename) (struct* mnt_idmap , struct* inode , struct* dentry *,
2078	struct inode , struct* dentry , unsigned* int);
2079	int (setattr) (struct* mnt_idmap , struct* dentry , struct* iattr *);
2080	int (getattr) (struct* mnt_idmap , const* struct path *,
2081	struct kstat , u32, unsigned* int);
2082	ssize_t (listxattr) (struct* dentry , char* *, size_t);
2083	int (fiemap)(struct* inode , struct* fiemap_extent_info *, u64 start,
2084	u64 len);
2085	int (update_time)(struct* inode , int*);
2086	int (atomic_open)(struct* inode , struct* dentry *,
2087	struct file , unsigned* open_flag,
2088	umode_t create_mode);
2089	int (tmpfile) (struct* mnt_idmap , struct* inode *,
2090	struct file *, umode_t);
2091	struct posix_acl (get_acl)(struct mnt_idmap , struct* dentry *,
2092	int);
2093	int (set_acl)(struct* mnt_idmap , struct* dentry *,
2094	struct posix_acl , int*);
2095	int (fileattr_set)(struct* mnt_idmap *idmap,
2096	struct dentry dentry, struct* fileattr *fa);
2097	int (fileattr_get)(struct* dentry dentry, struct* fileattr *fa);
2098	struct offset_ctx (get_offset_ctx)(struct inode *inode);
2099	} ____cacheline_aligned;
2100
2101	static inline ssize_t call_read_iter(struct file file, struct* kiocb *kio,
2102	struct iov_iter *iter)
2103	{
2104	return file->f_op->read_iter(kio, iter);
2105	}
2106
2107	static inline ssize_t call_write_iter(struct file file, struct* kiocb *kio,
2108	struct iov_iter *iter)
2109	{
2110	return file->f_op->write_iter(kio, iter);
2111	}
2112
2113	static inline int call_mmap(struct file file, struct* vm_area_struct *vma)
2114	{
2115	return file->f_op->mmap(file, vma);
2116	}
2117
2118	extern ssize_t vfs_read(struct file , char* __user , size_t, loff_t );
2119	extern ssize_t vfs_write(struct file , const* char __user , size_t, loff_t );
2120	extern ssize_t vfs_copy_file_range(struct file , loff_t , struct* file *,
2121	loff_t, size_t, unsigned int);
2122	int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
2123	struct file *file_out, loff_t pos_out,
2124	loff_t len, unsigned* int remap_flags,
2125	const struct iomap_ops *dax_read_ops);
2126	int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
2127	struct file *file_out, loff_t pos_out,
2128	loff_t count, unsigned* int remap_flags);
2129	extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
2130	struct file *file_out, loff_t pos_out,
2131	loff_t len, unsigned int remap_flags);
2132	extern int vfs_dedupe_file_range(struct file *file,
2133	struct file_dedupe_range *same);
2134	extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
2135	struct file *dst_file, loff_t dst_pos,
2136	loff_t len, unsigned int remap_flags);
2137
2138	/**
2139	* enum freeze_holder - holder of the freeze
2140	* @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem
2141	* @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem
2142	* @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed
2143	*
2144	* Indicate who the owner of the freeze or thaw request is and whether
2145	* the freeze needs to be exclusive or can nest.
2146	* Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the
2147	* same holder aren't allowed. It is however allowed to hold a single
2148	* @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at
2149	* the same time. This is relied upon by some filesystems during online
2150	* repair or similar.
2151	*/
2152	enum freeze_holder {
2153	FREEZE_HOLDER_KERNEL = (`1U` << `0`),
2154	FREEZE_HOLDER_USERSPACE = (`1U` << `1`),
2155	FREEZE_MAY_NEST = (`1U` << `2`),
2156	};
2157
2158	struct super_operations {
2159	struct inode (alloc_inode)(struct super_block *sb);
2160	void (destroy_inode)(struct* inode *);
2161	void (free_inode)(struct* inode *);
2162
2163	void (dirty_inode) (struct* inode , int* flags);
2164	int (write_inode) (struct* inode , struct* writeback_control *wbc);
2165	int (drop_inode) (struct* inode *);
2166	void (evict_inode) (struct* inode *);
2167	void (put_super) (struct* super_block *);
2168	int (sync_fs)(struct* super_block sb, int* wait);
2169	int (freeze_super) (struct* super_block , enum* freeze_holder who);
2170	int (freeze_fs) (struct* super_block *);
2171	int (thaw_super) (struct* super_block , enum* freeze_holder who);
2172	int (unfreeze_fs) (struct* super_block *);
2173	int (statfs) (struct* dentry , struct* kstatfs *);
2174	int (remount_fs) (struct* super_block , int* , char* *);
2175	void (umount_begin) (struct* super_block *);
2176
2177	int (show_options)(struct* seq_file , struct* dentry *);
2178	int (show_devname)(struct* seq_file , struct* dentry *);
2179	int (show_path)(struct* seq_file , struct* dentry *);
2180	int (show_stats)(struct* seq_file , struct* dentry *);
2181	#ifdef CONFIG_QUOTA
2182	ssize_t (quota_read)(struct* super_block , int, char* *, size_t, loff_t);
2183	ssize_t (quota_write)(struct* super_block , int, const* char *, size_t, loff_t);
2184	struct dquot __rcu *(get_dquots)(struct inode *);
2185	#endif
2186	long (nr_cached_objects)(struct* super_block *,
2187	struct shrink_control *);
2188	long (free_cached_objects)(struct* super_block *,
2189	struct shrink_control *);
2190	void (shutdown)(struct* super_block *sb);
2191	};
2192
2193	/*
2194	* Inode flags - they have no relation to superblock flags now
2195	*/
2196	#define S_SYNC (1 << 0) /* Writes are synced at once */
2197	#define S_NOATIME (1 << 1) /* Do not update access times */
2198	#define S_APPEND (1 << 2) /* Append-only file */
2199	#define S_IMMUTABLE (1 << 3) /* Immutable file */
2200	#define S_DEAD (1 << 4) /* removed, but still open directory */
2201	#define S_NOQUOTA (1 << 5) /* Inode is not counted to quota */
2202	#define S_DIRSYNC (1 << 6) /* Directory modifications are synchronous */
2203	#define S_NOCMTIME (1 << 7) /* Do not update file c/mtime */
2204	#define S_SWAPFILE (1 << 8) /* Do not truncate: swapon got its bmaps */
2205	#define S_PRIVATE (1 << 9) /* Inode is fs-internal */
2206	#define S_IMA (1 << 10) /* Inode has an associated IMA struct */
2207	#define S_AUTOMOUNT (1 << 11) /* Automount/referral quasi-directory */
2208	#define S_NOSEC (1 << 12) /* no suid or xattr security attributes */
2209	#ifdef CONFIG_FS_DAX
2210	#define S_DAX (1 << 13) /* Direct Access, avoiding the page cache */
2211	#else
2212	#define S_DAX 0 /* Make all the DAX code disappear */
2213	#endif
2214	#define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */
2215	#define S_CASEFOLD (1 << 15) /* Casefolded file */
2216	#define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */
2217	#define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
2218
2219	/*
2220	* Note that nosuid etc flags are inode-specific: setting some file-system
2221	* flags just means all the inodes inherit those flags by default. It might be
2222	* possible to override it selectively if you really wanted to with some
2223	* ioctl() that is not currently implemented.
2224	*
2225	* Exception: SB_RDONLY is always applied to the entire file system.
2226	*
2227	* Unfortunately, it is possible to change a filesystems flags with it mounted
2228	* with files in use. This means that all of the inodes will not have their
2229	* i_flags updated. Hence, i_flags no longer inherit the superblock mount
2230	* flags, so these have to be checked separately. -- rmk@arm.uk.linux.org
2231	*/
2232	#define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg))
2233
2234	static inline bool sb_rdonly(const struct super_block sb) { return* sb->s_flags & SB_RDONLY; }
2235	#define IS_RDONLY(inode) sb_rdonly((inode)->i_sb)
2236	#define IS_SYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS) \|\| \
2237	((inode)->i_flags & S_SYNC))
2238	#define IS_DIRSYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS\|SB_DIRSYNC) \|\| \
2239	((inode)->i_flags & (S_SYNC\|S_DIRSYNC)))
2240	#define IS_MANDLOCK(inode) __IS_FLG(inode, SB_MANDLOCK)
2241	#define IS_NOATIME(inode) __IS_FLG(inode, SB_RDONLY\|SB_NOATIME)
2242	#define IS_I_VERSION(inode) __IS_FLG(inode, SB_I_VERSION)
2243
2244	#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
2245	#define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
2246	#define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
2247
2248	#ifdef CONFIG_FS_POSIX_ACL
2249	#define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL)
2250	#else
2251	#define IS_POSIXACL(inode) 0
2252	#endif
2253
2254	#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
2255	#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
2256	#define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
2257	#define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
2258	#define IS_IMA(inode) ((inode)->i_flags & S_IMA)
2259	#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
2260	#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
2261	#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
2262	#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED)
2263	#define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD)
2264	#define IS_VERITY(inode) ((inode)->i_flags & S_VERITY)
2265
2266	#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
2267	(inode)->i_rdev == WHITEOUT_DEV)
2268
2269	static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
2270	struct inode *inode)
2271	{
2272	return !vfsuid_valid(uid: i_uid_into_vfsuid(idmap, inode)) \|\|
2273	!vfsgid_valid(gid: i_gid_into_vfsgid(idmap, inode));
2274	}
2275
2276	static inline void init_sync_kiocb(struct kiocb kiocb, struct* file *filp)
2277	{
2278	kiocb = (struct* kiocb) {
2279	.ki_filp = filp,
2280	.ki_flags = filp->f_iocb_flags,
2281	.ki_ioprio = get_current_ioprio(),
2282	};
2283	}
2284
2285	static inline void kiocb_clone(struct kiocb kiocb, struct* kiocb *kiocb_src,
2286	struct file *filp)
2287	{
2288	kiocb = (struct* kiocb) {
2289	.ki_filp = filp,
2290	.ki_flags = kiocb_src->ki_flags,
2291	.ki_ioprio = kiocb_src->ki_ioprio,
2292	.ki_pos = kiocb_src->ki_pos,
2293	};
2294	}
2295
2296	/*
2297	* Inode state bits. Protected by inode->i_lock
2298	*
2299	* Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
2300	* I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
2301	*
2302	* Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
2303	* until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
2304	* various stages of removing an inode.
2305	*
2306	* Two bits are used for locking and completion notification, I_NEW and I_SYNC.
2307	*
2308	* I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
2309	* fdatasync() (unless I_DIRTY_DATASYNC is also set).
2310	* Timestamp updates are the usual cause.
2311	* I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
2312	* these changes separately from I_DIRTY_SYNC so that we
2313	* don't have to write inode on fdatasync() when only
2314	* e.g. the timestamps have changed.
2315	* I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
2316	* I_DIRTY_TIME The inode itself has dirty timestamps, and the
2317	* lazytime mount option is enabled. We keep track of this
2318	* separately from I_DIRTY_SYNC in order to implement
2319	* lazytime. This gets cleared if I_DIRTY_INODE
2320	* (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
2321	* I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
2322	* in place because writeback might already be in progress
2323	* and we don't want to lose the time update
2324	* I_NEW Serves as both a mutex and completion notification.
2325	* New inodes set I_NEW. If two processes both create
2326	* the same inode, one of them will release its inode and
2327	* wait for I_NEW to be released before returning.
2328	* Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
2329	* also cause waiting on I_NEW, without I_NEW actually
2330	* being set. find_inode() uses this to prevent returning
2331	* nearly-dead inodes.
2332	* I_WILL_FREE Must be set when calling write_inode_now() if i_count
2333	* is zero. I_FREEING must be set when I_WILL_FREE is
2334	* cleared.
2335	* I_FREEING Set when inode is about to be freed but still has dirty
2336	* pages or buffers attached or the inode itself is still
2337	* dirty.
2338	* I_CLEAR Added by clear_inode(). In this state the inode is
2339	* clean and can be destroyed. Inode keeps I_FREEING.
2340	*
2341	* Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
2342	* prohibited for many purposes. iget() must wait for
2343	* the inode to be completely released, then create it
2344	* anew. Other functions will just ignore such inodes,
2345	* if appropriate. I_NEW is used for waiting.
2346	*
2347	* I_SYNC Writeback of inode is running. The bit is set during
2348	* data writeback, and cleared with a wakeup on the bit
2349	* address once it is done. The bit is also used to pin
2350	* the inode in memory for flusher thread.
2351	*
2352	* I_REFERENCED Marks the inode as recently references on the LRU list.
2353	*
2354	* I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit().
2355	*
2356	* I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to
2357	* synchronize competing switching instances and to tell
2358	* wb stat updates to grab the i_pages lock. See
2359	* inode_switch_wbs_work_fn() for details.
2360	*
2361	* I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
2362	* and work dirs among overlayfs mounts.
2363	*
2364	* I_CREATING New object's inode in the middle of setting up.
2365	*
2366	* I_DONTCACHE Evict inode as soon as it is not used anymore.
2367	*
2368	* I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists.
2369	* Used to detect that mark_inode_dirty() should not move
2370	* inode between dirty lists.
2371	*
2372	* I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback.
2373	*
2374	* Q: What is the difference between I_WILL_FREE and I_FREEING?
2375	*/
2376	#define I_DIRTY_SYNC (1 << 0)
2377	#define I_DIRTY_DATASYNC (1 << 1)
2378	#define I_DIRTY_PAGES (1 << 2)
2379	#define __I_NEW 3
2380	#define I_NEW (1 << __I_NEW)
2381	#define I_WILL_FREE (1 << 4)
2382	#define I_FREEING (1 << 5)
2383	#define I_CLEAR (1 << 6)
2384	#define __I_SYNC 7
2385	#define I_SYNC (1 << __I_SYNC)
2386	#define I_REFERENCED (1 << 8)
2387	#define __I_DIO_WAKEUP 9
2388	#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP)
2389	#define I_LINKABLE (1 << 10)
2390	#define I_DIRTY_TIME (1 << 11)
2391	#define I_WB_SWITCH (1 << 13)
2392	#define I_OVL_INUSE (1 << 14)
2393	#define I_CREATING (1 << 15)
2394	#define I_DONTCACHE (1 << 16)
2395	#define I_SYNC_QUEUED (1 << 17)
2396	#define I_PINNING_NETFS_WB (1 << 18)
2397
2398	#define I_DIRTY_INODE (I_DIRTY_SYNC \| I_DIRTY_DATASYNC)
2399	#define I_DIRTY (I_DIRTY_INODE \| I_DIRTY_PAGES)
2400	#define I_DIRTY_ALL (I_DIRTY \| I_DIRTY_TIME)
2401
2402	extern void __mark_inode_dirty(struct inode , int*);
2403	static inline void mark_inode_dirty(struct inode *inode)
2404	{
2405	__mark_inode_dirty(inode, I_DIRTY);
2406	}
2407
2408	static inline void mark_inode_dirty_sync(struct inode *inode)
2409	{
2410	__mark_inode_dirty(inode, I_DIRTY_SYNC);
2411	}
2412
2413	/*
2414	* Returns true if the given inode itself only has dirty timestamps (its pages
2415	* may still be dirty) and isn't currently being allocated or freed.
2416	* Filesystems should call this if when writing an inode when lazytime is
2417	* enabled, they want to opportunistically write the timestamps of other inodes
2418	* located very nearby on-disk, e.g. in the same inode block. This returns true
2419	* if the given inode is in need of such an opportunistic update. Requires
2420	* i_lock, or at least later re-checking under i_lock.
2421	*/
2422	static inline bool inode_is_dirtytime_only(struct inode *inode)
2423	{
2424	return (inode->i_state & (I_DIRTY_TIME \| I_NEW \|
2425	I_FREEING \| I_WILL_FREE)) == I_DIRTY_TIME;
2426	}
2427
2428	extern void inc_nlink(struct inode *inode);
2429	extern void drop_nlink(struct inode *inode);
2430	extern void clear_nlink(struct inode *inode);
2431	extern void set_nlink(struct inode inode, unsigned* int nlink);
2432
2433	static inline void inode_inc_link_count(struct inode *inode)
2434	{
2435	inc_nlink(inode);
2436	mark_inode_dirty(inode);
2437	}
2438
2439	static inline void inode_dec_link_count(struct inode *inode)
2440	{
2441	drop_nlink(inode);
2442	mark_inode_dirty(inode);
2443	}
2444
2445	enum file_time_flags {
2446	S_ATIME = `1`,
2447	S_MTIME = `2`,
2448	S_CTIME = `4`,
2449	S_VERSION = `8`,
2450	};
2451
2452	extern bool atime_needs_update(const struct path , struct* inode *);
2453	extern void touch_atime(const struct path *);
2454	int inode_update_time(struct inode inode, int* flags);
2455
2456	static inline void file_accessed(struct file *file)
2457	{
2458	if (!(file->f_flags & O_NOATIME))
2459	touch_atime(&file->f_path);
2460	}
2461
2462	extern int file_modified(struct file *file);
2463	int kiocb_modified(struct kiocb *iocb);
2464
2465	int sync_inode_metadata(struct inode inode, int* wait);
2466
2467	struct file_system_type {
2468	const char *name;
2469	int fs_flags;
2470	#define FS_REQUIRES_DEV 1
2471	#define FS_BINARY_MOUNTDATA 2
2472	#define FS_HAS_SUBTYPE 4
2473	#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
2474	#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
2475	#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
2476	#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
2477	int (init_fs_context)(struct* fs_context *);
2478	const struct fs_parameter_spec *parameters;
2479	struct dentry (mount) (struct file_system_type , int*,
2480	const char , void* *);
2481	void (kill_sb) (struct* super_block *);
2482	struct module *owner;
2483	struct file_system_type * next;
2484	struct hlist_head fs_supers;
2485
2486	struct lock_class_key s_lock_key;
2487	struct lock_class_key s_umount_key;
2488	struct lock_class_key s_vfs_rename_key;
2489	struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
2490
2491	struct lock_class_key i_lock_key;
2492	struct lock_class_key i_mutex_key;
2493	struct lock_class_key invalidate_lock_key;
2494	struct lock_class_key i_mutex_dir_key;
2495	};
2496
2497	#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
2498
2499	extern struct dentry mount_bdev(struct* file_system_type *fs_type,
2500	int flags, const char dev_name, void* *data,
2501	int (fill_super)(struct* super_block , void* , int*));
2502	extern struct dentry mount_single(struct* file_system_type *fs_type,
2503	int flags, void *data,
2504	int (fill_super)(struct* super_block , void* , int*));
2505	extern struct dentry mount_nodev(struct* file_system_type *fs_type,
2506	int flags, void *data,
2507	int (fill_super)(struct* super_block , void* , int*));
2508	extern struct dentry mount_subtree(struct* vfsmount mnt, const* char *path);
2509	void retire_super(struct super_block *sb);
2510	void generic_shutdown_super(struct super_block *sb);
2511	void kill_block_super(struct super_block *sb);
2512	void kill_anon_super(struct super_block *sb);
2513	void kill_litter_super(struct super_block *sb);
2514	void deactivate_super(struct super_block *sb);
2515	void deactivate_locked_super(struct super_block *sb);
2516	int set_anon_super(struct super_block s, void* *data);
2517	int set_anon_super_fc(struct super_block s, struct* fs_context *fc);
2518	int get_anon_bdev(dev_t *);
2519	void free_anon_bdev(dev_t);
2520	struct super_block sget_fc(struct* fs_context *fc,
2521	int (test)(struct* super_block , struct* fs_context *),
2522	int (set)(struct* super_block , struct* fs_context *));
2523	struct super_block sget(struct* file_system_type *type,
2524	int (test)(struct* super_block ,void* *),
2525	int (set)(struct* super_block ,void* *),
2526	int flags, void *data);
2527	struct super_block sget_dev(struct* fs_context *fc, dev_t dev);
2528
2529	/ Alas, no aliases. Too much hassle with bringing module.h everywhere /
2530	#define fops_get(fops) \
2531	(((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
2532	#define fops_put(fops) \
2533	do { if (fops) module_put((fops)->owner); } while(0)
2534	/*
2535	* This one is to be used ONLY from ->open() instances.
2536	* fops must be non-NULL, pinned down and module dependencies
2537	* should be sufficient to pin the caller down as well.
2538	*/
2539	#define replace_fops(f, fops) \
2540	do { \
2541	struct file *__file = (f); \
2542	fops_put(__file->f_op); \
2543	BUG_ON(!(__file->f_op = (fops))); \
2544	} while(0)
2545
2546	extern int register_filesystem(struct file_system_type *);
2547	extern int unregister_filesystem(struct file_system_type *);
2548	extern int vfs_statfs(const struct path , struct* kstatfs *);
2549	extern int user_statfs(const char __user , struct* kstatfs *);
2550	extern int fd_statfs(int, struct kstatfs *);
2551	int freeze_super(struct super_block super, enum* freeze_holder who);
2552	int thaw_super(struct super_block super, enum* freeze_holder who);
2553	extern __printf(`2`, `3`)
2554	int super_setup_bdi_name(struct super_block sb, char* *fmt, ...);
2555	extern int super_setup_bdi(struct super_block *sb);
2556
2557	static inline void super_set_uuid(struct super_block sb, const* u8 uuid, unsigned* len)
2558	{
2559	if (WARN_ON(len > sizeof(sb->s_uuid)))
2560	len = sizeof(sb->s_uuid);
2561	sb->s_uuid_len = len;
2562	memcpy(&sb->s_uuid, uuid, len);
2563	}
2564
2565	/ set sb sysfs name based on sb->s_bdev /
2566	static inline void super_set_sysfs_name_bdev(struct super_block *sb)
2567	{
2568	snprintf(buf: sb->s_sysfs_name, size: sizeof(sb->s_sysfs_name), fmt: "%pg", sb->s_bdev);
2569	}
2570
2571	/ set sb sysfs name based on sb->s_uuid /
2572	static inline void super_set_sysfs_name_uuid(struct super_block *sb)
2573	{
2574	WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid));
2575	snprintf(buf: sb->s_sysfs_name, size: sizeof(sb->s_sysfs_name), fmt: "%pU", sb->s_uuid.b);
2576	}
2577
2578	/ set sb sysfs name based on sb->s_id /
2579	static inline void super_set_sysfs_name_id(struct super_block *sb)
2580	{
2581	strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name));
2582	}
2583
2584	/ try to use something standard before you use this /
2585	__printf(`2`, `3`)
2586	static inline void super_set_sysfs_name_generic(struct super_block sb, const* char *fmt, ...)
2587	{
2588	va_list args;
2589
2590	va_start(args, fmt);
2591	vsnprintf(buf: sb->s_sysfs_name, size: sizeof(sb->s_sysfs_name), fmt, args);
2592	va_end(args);
2593	}
2594
2595	extern int current_umask(void);
2596
2597	extern void ihold(struct inode * inode);
2598	extern void iput(struct inode *);
2599	int inode_update_timestamps(struct inode inode, int* flags);
2600	int generic_update_time(struct inode , int*);
2601
2602	/ /sys/fs /
2603	extern struct kobject *fs_kobj;
2604
2605	#define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
2606
2607	/ fs/open.c /
2608	struct audit_names;
2609	struct filename {
2610	const char name; /* pointer to actual string /
2611	const __user char uptr; /* original userland pointer /
2612	atomic_t refcnt;
2613	struct audit_names *aname;
2614	const char iname[];
2615	};
2616	static_assert(offsetof(struct filename, iname) % sizeof(long) == `0`);
2617
2618	static inline struct mnt_idmap file_mnt_idmap(const* struct file *file)
2619	{
2620	return mnt_idmap(mnt: file->f_path.mnt);
2621	}
2622
2623	/**
2624	* is_idmapped_mnt - check whether a mount is mapped
2625	* @mnt: the mount to check
2626	*
2627	* If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped.
2628	*
2629	* Return: true if mount is mapped, false if not.
2630	*/
2631	static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
2632	{
2633	return mnt_idmap(mnt) != &nop_mnt_idmap;
2634	}
2635
2636	extern long vfs_truncate(const struct path *, loff_t);
2637	int do_truncate(struct mnt_idmap , struct* dentry *, loff_t start,
2638	unsigned int time_attrs, struct file *filp);
2639	extern int vfs_fallocate(struct file file, int* mode, loff_t offset,
2640	loff_t len);
2641	extern long do_sys_open(int dfd, const char __user filename, int* flags,
2642	umode_t mode);
2643	extern struct file file_open_name(struct* filename , int*, umode_t);
2644	extern struct file filp_open(const* char , int*, umode_t);
2645	extern struct file file_open_root(const* struct path *,
2646	const char , int*, umode_t);
2647	static inline struct file file_open_root_mnt(struct* vfsmount *mnt,
2648	const char name, int* flags, umode_t mode)
2649	{
2650	return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
2651	name, flags, mode);
2652	}
2653	struct file dentry_open(const* struct path path, int* flags,
2654	const struct cred *creds);
2655	struct file dentry_create(const* struct path path, int* flags, umode_t mode,
2656	const struct cred *cred);
2657	struct path backing_file_user_path(struct* file *f);
2658
2659	/*
2660	* When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
2661	* stored in ->vm_file is a backing file whose f_inode is on the underlying
2662	* filesystem. When the mapped file path and inode number are displayed to
2663	* user (e.g. via /proc/<pid>/maps), these helpers should be used to get the
2664	* path and inode number to display to the user, which is the path of the fd
2665	* that user has requested to map and the inode number that would be returned
2666	* by fstat() on that same fd.
2667	*/
2668	/ Get the path to display in /proc/<pid>/maps /
2669	static inline const struct path file_user_path(struct* file *f)
2670	{
2671	if (unlikely(f->f_mode & FMODE_BACKING))
2672	return backing_file_user_path(f);
2673	return &f->f_path;
2674	}
2675	/ Get the inode whose inode number to display in /proc/<pid>/maps /
2676	static inline const struct inode file_user_inode(struct* file *f)
2677	{
2678	if (unlikely(f->f_mode & FMODE_BACKING))
2679	return d_inode(dentry: backing_file_user_path(f)->dentry);
2680	return file_inode(f);
2681	}
2682
2683	static inline struct file file_clone_open(struct* file *file)
2684	{
2685	return dentry_open(path: &file->f_path, flags: file->f_flags, creds: file->f_cred);
2686	}
2687	extern int filp_close(struct file *, fl_owner_t id);
2688
2689	extern struct filename getname_flags(const* char __user , int, int* *);
2690	extern struct filename getname_uflags(const* char __user , int*);
2691	extern struct filename getname(const* char __user *);
2692	extern struct filename getname_kernel(const* char *);
2693	extern void putname(struct filename *name);
2694
2695	extern int finish_open(struct file file, struct* dentry *dentry,
2696	int (open)(struct* inode , struct* file *));
2697	extern int finish_no_open(struct file file, struct* dentry *dentry);
2698
2699	/ Helper for the simple case when original dentry is used /
2700	static inline int finish_open_simple(struct file file, int* error)
2701	{
2702	if (error)
2703	return error;
2704
2705	return finish_open(file, dentry: file->f_path.dentry, NULL);
2706	}
2707
2708	/ fs/dcache.c /
2709	extern void __init vfs_caches_init_early(void);
2710	extern void __init vfs_caches_init(void);
2711
2712	extern struct kmem_cache *names_cachep;
2713
2714	#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
2715	#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
2716
2717	extern struct super_block *blockdev_superblock;
2718	static inline bool sb_is_blkdev_sb(struct super_block *sb)
2719	{
2720	return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
2721	}
2722
2723	void emergency_thaw_all(void);
2724	extern int sync_filesystem(struct super_block *);
2725	extern const struct file_operations def_blk_fops;
2726	extern const struct file_operations def_chr_fops;
2727
2728	/ fs/char_dev.c /
2729	#define CHRDEV_MAJOR_MAX 512
2730	/ Marks the bottom of the first segment of free char majors /
2731	#define CHRDEV_MAJOR_DYN_END 234
2732	/ Marks the top and bottom of the second segment of free char majors /
2733	#define CHRDEV_MAJOR_DYN_EXT_START 511
2734	#define CHRDEV_MAJOR_DYN_EXT_END 384
2735
2736	extern int alloc_chrdev_region(dev_t , unsigned, unsigned, const* char *);
2737	extern int register_chrdev_region(dev_t, unsigned, const char *);
2738	extern int __register_chrdev(unsigned int major, unsigned int baseminor,
2739	unsigned int count, const char *name,
2740	const struct file_operations *fops);
2741	extern void __unregister_chrdev(unsigned int major, unsigned int baseminor,
2742	unsigned int count, const char *name);
2743	extern void unregister_chrdev_region(dev_t, unsigned);
2744	extern void chrdev_show(struct seq_file *,off_t);
2745
2746	static inline int register_chrdev(unsigned int major, const char *name,
2747	const struct file_operations *fops)
2748	{
2749	return __register_chrdev(major, baseminor: `0`, count: `256`, name, fops);
2750	}
2751
2752	static inline void unregister_chrdev(unsigned int major, const char *name)
2753	{
2754	__unregister_chrdev(major, baseminor: `0`, count: `256`, name);
2755	}
2756
2757	extern void init_special_inode(struct inode *, umode_t, dev_t);
2758
2759	/ Invalid inode operations -- fs/bad_inode.c /
2760	extern void make_bad_inode(struct inode *);
2761	extern bool is_bad_inode(struct inode *);
2762
2763	extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
2764	loff_t lend);
2765	extern int __must_check file_check_and_advance_wb_err(struct file *file);
2766	extern int __must_check file_write_and_wait_range(struct file *file,
2767	loff_t start, loff_t end);
2768
2769	static inline int file_write_and_wait(struct file *file)
2770	{
2771	return file_write_and_wait_range(file, start: `0`, LLONG_MAX);
2772	}
2773
2774	extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
2775	int datasync);
2776	extern int vfs_fsync(struct file file, int* datasync);
2777
2778	extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
2779	unsigned int flags);
2780
2781	static inline bool iocb_is_dsync(const struct kiocb *iocb)
2782	{
2783	return (iocb->ki_flags & IOCB_DSYNC) \|\|
2784	IS_SYNC(iocb->ki_filp->f_mapping->host);
2785	}
2786
2787	/*
2788	* Sync the bytes written if this was a synchronous write. Expect ki_pos
2789	* to already be updated for the write, and will return either the amount
2790	* of bytes passed in, or an error if syncing the file failed.
2791	*/
2792	static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count)
2793	{
2794	if (iocb_is_dsync(iocb)) {
2795	int ret = vfs_fsync_range(file: iocb->ki_filp,
2796	start: iocb->ki_pos - count, end: iocb->ki_pos - `1`,
2797	datasync: (iocb->ki_flags & IOCB_SYNC) ? `0` : `1`);
2798	if (ret)
2799	return ret;
2800	}
2801
2802	return count;
2803	}
2804
2805	extern void emergency_sync(void);
2806	extern void emergency_remount(void);
2807
2808	#ifdef CONFIG_BLOCK
2809	extern int bmap(struct inode inode, sector_t block);
2810	#else
2811	static inline int bmap(struct inode inode, sector_t block)
2812	{
2813	return -EINVAL;
2814	}
2815	#endif
2816
2817	int notify_change(struct mnt_idmap , struct* dentry *,
2818	struct iattr , struct* inode **);
2819	int inode_permission(struct mnt_idmap , struct* inode , int*);
2820	int generic_permission(struct mnt_idmap , struct* inode , int*);
2821	static inline int file_permission(struct file file, int* mask)
2822	{
2823	return inode_permission(file_mnt_idmap(file),
2824	file_inode(f: file), mask);
2825	}
2826	static inline int path_permission(const struct path path, int* mask)
2827	{
2828	return inode_permission(mnt_idmap(mnt: path->mnt),
2829	d_inode(dentry: path->dentry), mask);
2830	}
2831	int __check_sticky(struct mnt_idmap idmap, struct* inode *dir,
2832	struct inode *inode);
2833
2834	static inline bool execute_ok(struct inode *inode)
2835	{
2836	return (inode->i_mode & S_IXUGO) \|\| S_ISDIR(inode->i_mode);
2837	}
2838
2839	static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
2840	{
2841	return (inode->i_mode ^ mode) & S_IFMT;
2842	}
2843
2844	/**
2845	* file_start_write - get write access to a superblock for regular file io
2846	* @file: the file we want to write to
2847	*
2848	* This is a variant of sb_start_write() which is a noop on non-regualr file.
2849	* Should be matched with a call to file_end_write().
2850	*/
2851	static inline void file_start_write(struct file *file)
2852	{
2853	if (!S_ISREG(file_inode(file)->i_mode))
2854	return;
2855	sb_start_write(sb: file_inode(f: file)->i_sb);
2856	}
2857
2858	static inline bool file_start_write_trylock(struct file *file)
2859	{
2860	if (!S_ISREG(file_inode(file)->i_mode))
2861	return true;
2862	return sb_start_write_trylock(sb: file_inode(f: file)->i_sb);
2863	}
2864
2865	/**
2866	* file_end_write - drop write access to a superblock of a regular file
2867	* @file: the file we wrote to
2868	*
2869	* Should be matched with a call to file_start_write().
2870	*/
2871	static inline void file_end_write(struct file *file)
2872	{
2873	if (!S_ISREG(file_inode(file)->i_mode))
2874	return;
2875	sb_end_write(sb: file_inode(f: file)->i_sb);
2876	}
2877
2878	/**
2879	* kiocb_start_write - get write access to a superblock for async file io
2880	* @iocb: the io context we want to submit the write with
2881	*
2882	* This is a variant of sb_start_write() for async io submission.
2883	* Should be matched with a call to kiocb_end_write().
2884	*/
2885	static inline void kiocb_start_write(struct kiocb *iocb)
2886	{
2887	struct inode *inode = file_inode(f: iocb->ki_filp);
2888
2889	sb_start_write(sb: inode->i_sb);
2890	/*
2891	* Fool lockdep by telling it the lock got released so that it
2892	* doesn't complain about the held lock when we return to userspace.
2893	*/
2894	__sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
2895	}
2896
2897	/**
2898	* kiocb_end_write - drop write access to a superblock after async file io
2899	* @iocb: the io context we sumbitted the write with
2900	*
2901	* Should be matched with a call to kiocb_start_write().
2902	*/
2903	static inline void kiocb_end_write(struct kiocb *iocb)
2904	{
2905	struct inode *inode = file_inode(f: iocb->ki_filp);
2906
2907	/*
2908	* Tell lockdep we inherited freeze protection from submission thread.
2909	*/
2910	__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
2911	sb_end_write(sb: inode->i_sb);
2912	}
2913
2914	/*
2915	* This is used for regular files where some users -- especially the
2916	* currently executed binary in a process, previously handled via
2917	* VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap
2918	* read-write shared) accesses.
2919	*
2920	* get_write_access() gets write permission for a file.
2921	* put_write_access() releases this write permission.
2922	* deny_write_access() denies write access to a file.
2923	* allow_write_access() re-enables write access to a file.
2924	*
2925	* The i_writecount field of an inode can have the following values:
2926	* 0: no write access, no denied write access
2927	* < 0: (-i_writecount) users that denied write access to the file.
2928	* > 0: (i_writecount) users that have write access to the file.
2929	*
2930	* Normally we operate on that counter with atomic_{inc,dec} and it's safe
2931	* except for the cases where we don't hold i_writecount yet. Then we need to
2932	* use {get,deny}_write_access() - these functions check the sign and refuse
2933	* to do the change if sign is wrong.
2934	*/
2935	static inline int get_write_access(struct inode *inode)
2936	{
2937	return atomic_inc_unless_negative(v: &inode->i_writecount) ? `0` : -ETXTBSY;
2938	}
2939	static inline int deny_write_access(struct file *file)
2940	{
2941	struct inode *inode = file_inode(f: file);
2942	return atomic_dec_unless_positive(v: &inode->i_writecount) ? `0` : -ETXTBSY;
2943	}
2944	static inline void put_write_access(struct inode * inode)
2945	{
2946	atomic_dec(v: &inode->i_writecount);
2947	}
2948	static inline void allow_write_access(struct file *file)
2949	{
2950	if (file)
2951	atomic_inc(v: &file_inode(f: file)->i_writecount);
2952	}
2953	static inline bool inode_is_open_for_write(const struct inode *inode)
2954	{
2955	return atomic_read(v: &inode->i_writecount) > `0`;
2956	}
2957
2958	#if defined(CONFIG_IMA) \|\| defined(CONFIG_FILE_LOCKING)
2959	static inline void i_readcount_dec(struct inode *inode)
2960	{
2961	BUG_ON(atomic_dec_return(&inode->i_readcount) < `0`);
2962	}
2963	static inline void i_readcount_inc(struct inode *inode)
2964	{
2965	atomic_inc(v: &inode->i_readcount);
2966	}
2967	#else
2968	static inline void i_readcount_dec(struct inode *inode)
2969	{
2970	return;
2971	}
2972	static inline void i_readcount_inc(struct inode *inode)
2973	{
2974	return;
2975	}
2976	#endif
2977	extern int do_pipe_flags(int , int*);
2978
2979	extern ssize_t kernel_read(struct file , void* , size_t, loff_t );
2980	ssize_t __kernel_read(struct file file, void* buf, size_t count, loff_t pos);
2981	extern ssize_t kernel_write(struct file , const* void , size_t, loff_t );
2982	extern ssize_t __kernel_write(struct file , const* void , size_t, loff_t );
2983	extern struct file * open_exec(const char *);
2984
2985	/ fs/dcache.c -- generic fs support functions /
2986	extern bool is_subdir(struct dentry , struct* dentry *);
2987	extern bool path_is_under(const struct path , const* struct path *);
2988
2989	extern char file_path(struct* file , char* , int*);
2990
2991	/**
2992	* is_dot_dotdot - returns true only if @name is "." or ".."
2993	* @name: file name to check
2994	* @len: length of file name, in bytes
2995	*/
2996	static inline bool is_dot_dotdot(const char *name, size_t len)
2997	{
2998	return len && unlikely(name[`0`] == `'.'`) &&
2999	(len == `1` \|\| (len == `2` && name[`1`] == `'.'`));
3000	}
3001
3002	#include <linux/err.h>
3003
3004	/ needed for stackable file system support /
3005	extern loff_t default_llseek(struct file file, loff_t offset, int* whence);
3006
3007	extern loff_t vfs_llseek(struct file file, loff_t offset, int* whence);
3008
3009	extern int inode_init_always(struct super_block , struct* inode *);
3010	extern void inode_init_once(struct inode *);
3011	extern void address_space_init_once(struct address_space *mapping);
3012	extern struct inode * igrab(struct inode *);
3013	extern ino_t iunique(struct super_block *, ino_t);
3014	extern int inode_needs_sync(struct inode *inode);
3015	extern int generic_delete_inode(struct inode *inode);
3016	static inline int generic_drop_inode(struct inode *inode)
3017	{
3018	return !inode->i_nlink \|\| inode_unhashed(inode);
3019	}
3020	extern void d_mark_dontcache(struct inode *inode);
3021
3022	extern struct inode ilookup5_nowait(struct* super_block *sb,
3023	unsigned long hashval, int (test)(struct* inode , void* *),
3024	void *data);
3025	extern struct inode ilookup5(struct* super_block sb, unsigned* long hashval,
3026	int (test)(struct* inode , void* ), void* *data);
3027	extern struct inode ilookup(struct* super_block sb, unsigned* long ino);
3028
3029	extern struct inode inode_insert5(struct* inode inode, unsigned* long hashval,
3030	int (test)(struct* inode , void* *),
3031	int (set)(struct* inode , void* *),
3032	void *data);
3033	extern struct inode * iget5_locked(struct super_block , unsigned* long, int (test)(struct* inode , void* ), int* (set)(struct* inode , void* ), void* *);
3034	extern struct inode * iget_locked(struct super_block , unsigned* long);
3035	extern struct inode find_inode_nowait(struct* super_block *,
3036	unsigned long,
3037	int (match)(struct* inode *,
3038	unsigned long, void *),
3039	void *data);
3040	extern struct inode find_inode_rcu(struct* super_block , unsigned* long,
3041	int ()(struct* inode , void* ), void* *);
3042	extern struct inode find_inode_by_ino_rcu(struct* super_block , unsigned* long);
3043	extern int insert_inode_locked4(struct inode , unsigned* long, int (test)(struct* inode , void* ), void* *);
3044	extern int insert_inode_locked(struct inode *);
3045	#ifdef CONFIG_DEBUG_LOCK_ALLOC
3046	extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
3047	#else
3048	static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
3049	#endif
3050	extern void unlock_new_inode(struct inode *);
3051	extern void discard_new_inode(struct inode *);
3052	extern unsigned int get_next_ino(void);
3053	extern void evict_inodes(struct super_block *sb);
3054	void dump_mapping(const struct address_space *);
3055
3056	/*
3057	* Userspace may rely on the inode number being non-zero. For example, glibc
3058	* simply ignores files with zero i_ino in unlink() and other places.
3059	*
3060	* As an additional complication, if userspace was compiled with
3061	* _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
3062	* lower 32 bits, so we need to check that those aren't zero explicitly. With
3063	* _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
3064	* better safe than sorry.
3065	*/
3066	static inline bool is_zero_ino(ino_t ino)
3067	{
3068	return (u32)ino == `0`;
3069	}
3070
3071	extern void __iget(struct inode * inode);
3072	extern void iget_failed(struct inode *);
3073	extern void clear_inode(struct inode *);
3074	extern void __destroy_inode(struct inode *);
3075	extern struct inode new_inode_pseudo(struct* super_block *sb);
3076	extern struct inode new_inode(struct* super_block *sb);
3077	extern void free_inode_nonrcu(struct inode *inode);
3078	extern int setattr_should_drop_suidgid(struct mnt_idmap , struct* inode *);
3079	extern int file_remove_privs_flags(struct file file, unsigned* int flags);
3080	extern int file_remove_privs(struct file *);
3081	int setattr_should_drop_sgid(struct mnt_idmap *idmap,
3082	const struct inode *inode);
3083
3084	/*
3085	* This must be used for allocating filesystems specific inodes to set
3086	* up the inode reclaim context correctly.
3087	*/
3088	static inline void *
3089	alloc_inode_sb(struct super_block sb, struct* kmem_cache *cache, gfp_t gfp)
3090	{
3091	return kmem_cache_alloc_lru(s: cache, lru: &sb->s_inode_lru, gfpflags: gfp);
3092	}
3093
3094	extern void __insert_inode_hash(struct inode , unsigned* long hashval);
3095	static inline void insert_inode_hash(struct inode *inode)
3096	{
3097	__insert_inode_hash(inode, hashval: inode->i_ino);
3098	}
3099
3100	extern void __remove_inode_hash(struct inode *);
3101	static inline void remove_inode_hash(struct inode *inode)
3102	{
3103	if (!inode_unhashed(inode) && !hlist_fake(h: &inode->i_hash))
3104	__remove_inode_hash(inode);
3105	}
3106
3107	extern void inode_sb_list_add(struct inode *inode);
3108	extern void inode_add_lru(struct inode *inode);
3109
3110	extern int sb_set_blocksize(struct super_block , int*);
3111	extern int sb_min_blocksize(struct super_block , int*);
3112
3113	extern int generic_file_mmap(struct file , struct* vm_area_struct *);
3114	extern int generic_file_readonly_mmap(struct file , struct* vm_area_struct *);
3115	extern ssize_t generic_write_checks(struct kiocb , struct* iov_iter *);
3116	int generic_write_checks_count(struct kiocb iocb, loff_t count);
3117	extern int generic_write_check_limits(struct file *file, loff_t pos,
3118	loff_t *count);
3119	extern int generic_file_rw_checks(struct file file_in, struct* file *file_out);
3120	ssize_t filemap_read(struct kiocb iocb, struct* iov_iter *to,
3121	ssize_t already_read);
3122	extern ssize_t generic_file_read_iter(struct kiocb , struct* iov_iter *);
3123	extern ssize_t __generic_file_write_iter(struct kiocb , struct* iov_iter *);
3124	extern ssize_t generic_file_write_iter(struct kiocb , struct* iov_iter *);
3125	extern ssize_t generic_file_direct_write(struct kiocb , struct* iov_iter *);
3126	ssize_t generic_perform_write(struct kiocb , struct* iov_iter *);
3127	ssize_t direct_write_fallback(struct kiocb iocb, struct* iov_iter *iter,
3128	ssize_t direct_written, ssize_t buffered_written);
3129
3130	ssize_t vfs_iter_read(struct file file, struct* iov_iter iter, loff_t ppos,
3131	rwf_t flags);
3132	ssize_t vfs_iter_write(struct file file, struct* iov_iter iter, loff_t ppos,
3133	rwf_t flags);
3134	ssize_t vfs_iocb_iter_read(struct file file, struct* kiocb *iocb,
3135	struct iov_iter *iter);
3136	ssize_t vfs_iocb_iter_write(struct file file, struct* kiocb *iocb,
3137	struct iov_iter *iter);
3138
3139	/ fs/splice.c /
3140	ssize_t filemap_splice_read(struct file in, loff_t ppos,
3141	struct pipe_inode_info *pipe,
3142	size_t len, unsigned int flags);
3143	ssize_t copy_splice_read(struct file in, loff_t ppos,
3144	struct pipe_inode_info *pipe,
3145	size_t len, unsigned int flags);
3146	extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
3147	struct file , loff_t , size_t, unsigned int);
3148
3149
3150	extern void
3151	file_ra_state_init(struct file_ra_state ra, struct* address_space *mapping);
3152	extern loff_t noop_llseek(struct file file, loff_t offset, int* whence);
3153	#define no_llseek NULL
3154	extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
3155	extern loff_t generic_file_llseek(struct file file, loff_t offset, int* whence);
3156	extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
3157	int whence, loff_t maxsize, loff_t eof);
3158	extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
3159	int whence, loff_t size);
3160	extern loff_t no_seek_end_llseek_size(struct file , loff_t, int*, loff_t);
3161	extern loff_t no_seek_end_llseek(struct file , loff_t, int*);
3162	int rw_verify_area(int, struct file , const* loff_t *, size_t);
3163	extern int generic_file_open(struct inode * inode, struct file * filp);
3164	extern int nonseekable_open(struct inode * inode, struct file * filp);
3165	extern int stream_open(struct inode * inode, struct file * filp);
3166
3167	#ifdef CONFIG_BLOCK
3168	typedef void (dio_submit_t)(struct bio bio, struct* inode *inode,
3169	loff_t file_offset);
3170
3171	enum {
3172	/ need locking between buffered and direct access /
3173	DIO_LOCKING = `0x01`,
3174
3175	/ filesystem does not support filling holes /
3176	DIO_SKIP_HOLES = `0x02`,
3177	};
3178
3179	ssize_t __blockdev_direct_IO(struct kiocb iocb, struct* inode *inode,
3180	struct block_device bdev, struct* iov_iter *iter,
3181	get_block_t get_block,
3182	dio_iodone_t end_io,
3183	int flags);
3184
3185	static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
3186	struct inode *inode,
3187	struct iov_iter *iter,
3188	get_block_t get_block)
3189	{
3190	return __blockdev_direct_IO(iocb, inode, bdev: inode->i_sb->s_bdev, iter,
3191	get_block, NULL, flags: DIO_LOCKING \| DIO_SKIP_HOLES);
3192	}
3193	#endif
3194
3195	void inode_dio_wait(struct inode *inode);
3196
3197	/**
3198	* inode_dio_begin - signal start of a direct I/O requests
3199	* @inode: inode the direct I/O happens on
3200	*
3201	* This is called once we've finished processing a direct I/O request,
3202	* and is used to wake up callers waiting for direct I/O to be quiesced.
3203	*/
3204	static inline void inode_dio_begin(struct inode *inode)
3205	{
3206	atomic_inc(v: &inode->i_dio_count);
3207	}
3208
3209	/**
3210	* inode_dio_end - signal finish of a direct I/O requests
3211	* @inode: inode the direct I/O happens on
3212	*
3213	* This is called once we've finished processing a direct I/O request,
3214	* and is used to wake up callers waiting for direct I/O to be quiesced.
3215	*/
3216	static inline void inode_dio_end(struct inode *inode)
3217	{
3218	if (atomic_dec_and_test(v: &inode->i_dio_count))
3219	wake_up_bit(word: &inode->i_state, __I_DIO_WAKEUP);
3220	}
3221
3222	extern void inode_set_flags(struct inode inode, unsigned* int flags,
3223	unsigned int mask);
3224
3225	extern const struct file_operations generic_ro_fops;
3226
3227	#define special_file(m) (S_ISCHR(m)\|\|S_ISBLK(m)\|\|S_ISFIFO(m)\|\|S_ISSOCK(m))
3228
3229	extern int readlink_copy(char __user , int, const* char *);
3230	extern int page_readlink(struct dentry , char* __user , int*);
3231	extern const char page_get_link(struct* dentry , struct* inode *,
3232	struct delayed_call *);
3233	extern void page_put_link(void *);
3234	extern int page_symlink(struct inode inode, const* char symname, int* len);
3235	extern const struct inode_operations page_symlink_inode_operations;
3236	extern void kfree_link(void *);
3237	void generic_fillattr(struct mnt_idmap , u32, struct* inode , struct* kstat *);
3238	void generic_fill_statx_attr(struct inode inode, struct* kstat *stat);
3239	extern int vfs_getattr_nosec(const struct path , struct* kstat , u32, unsigned* int);
3240	extern int vfs_getattr(const struct path , struct* kstat , u32, unsigned* int);
3241	void __inode_add_bytes(struct inode *inode, loff_t bytes);
3242	void inode_add_bytes(struct inode *inode, loff_t bytes);
3243	void __inode_sub_bytes(struct inode *inode, loff_t bytes);
3244	void inode_sub_bytes(struct inode *inode, loff_t bytes);
3245	static inline loff_t __inode_get_bytes(struct inode *inode)
3246	{
3247	return (((loff_t)inode->i_blocks) << `9`) + inode->i_bytes;
3248	}
3249	loff_t inode_get_bytes(struct inode *inode);
3250	void inode_set_bytes(struct inode *inode, loff_t bytes);
3251	const char simple_get_link(struct* dentry , struct* inode *,
3252	struct delayed_call *);
3253	extern const struct inode_operations simple_symlink_inode_operations;
3254
3255	extern int iterate_dir(struct file , struct* dir_context *);
3256
3257	int vfs_fstatat(int dfd, const char __user filename, struct* kstat *stat,
3258	int flags);
3259	int vfs_fstat(int fd, struct kstat *stat);
3260
3261	static inline int vfs_stat(const char __user filename, struct* kstat *stat)
3262	{
3263	return vfs_fstatat(AT_FDCWD, filename, stat, flags: `0`);
3264	}
3265	static inline int vfs_lstat(const char __user name, struct* kstat *stat)
3266	{
3267	return vfs_fstatat(AT_FDCWD, filename: name, stat, AT_SYMLINK_NOFOLLOW);
3268	}
3269
3270	extern const char vfs_get_link(struct* dentry , struct* delayed_call *);
3271	extern int vfs_readlink(struct dentry , char* __user , int*);
3272
3273	extern struct file_system_type get_filesystem(struct* file_system_type *fs);
3274	extern void put_filesystem(struct file_system_type *fs);
3275	extern struct file_system_type get_fs_type(const* char *name);
3276	extern void drop_super(struct super_block *sb);
3277	extern void drop_super_exclusive(struct super_block *sb);
3278	extern void iterate_supers(void ()(struct* super_block , void* ), void* *);
3279	extern void iterate_supers_type(struct file_system_type *,
3280	void ()(struct* super_block , void* ), void* *);
3281
3282	extern int dcache_dir_open(struct inode , struct* file *);
3283	extern int dcache_dir_close(struct inode , struct* file *);
3284	extern loff_t dcache_dir_lseek(struct file , loff_t, int*);
3285	extern int dcache_readdir(struct file , struct* dir_context *);
3286	extern int simple_setattr(struct mnt_idmap , struct* dentry *,
3287	struct iattr *);
3288	extern int simple_getattr(struct mnt_idmap , const* struct path *,
3289	struct kstat , u32, unsigned* int);
3290	extern int simple_statfs(struct dentry , struct* kstatfs *);
3291	extern int simple_open(struct inode inode, struct* file *file);
3292	extern int simple_link(struct dentry , struct* inode , struct* dentry *);
3293	extern int simple_unlink(struct inode , struct* dentry *);
3294	extern int simple_rmdir(struct inode , struct* dentry *);
3295	void simple_rename_timestamp(struct inode old_dir, struct* dentry *old_dentry,
3296	struct inode new_dir, struct* dentry *new_dentry);
3297	extern int simple_rename_exchange(struct inode old_dir, struct* dentry *old_dentry,
3298	struct inode new_dir, struct* dentry *new_dentry);
3299	extern int simple_rename(struct mnt_idmap , struct* inode *,
3300	struct dentry , struct* inode , struct* dentry *,
3301	unsigned int);
3302	extern void simple_recursive_removal(struct dentry *,
3303	void (callback)(struct* dentry *));
3304	extern int noop_fsync(struct file , loff_t, loff_t, int*);
3305	extern ssize_t noop_direct_IO(struct kiocb iocb, struct* iov_iter *iter);
3306	extern int simple_empty(struct dentry *);
3307	extern int simple_write_begin(struct file file, struct* address_space *mapping,
3308	loff_t pos, unsigned len,
3309	struct page *pagep, void* **fsdata);
3310	extern const struct address_space_operations ram_aops;
3311	extern int always_delete_dentry(const struct dentry *);
3312	extern struct inode alloc_anon_inode(struct* super_block *);
3313	extern int simple_nosetlease(struct file , int, struct* file_lease *, void* **);
3314	extern const struct dentry_operations simple_dentry_operations;
3315
3316	extern struct dentry simple_lookup(struct* inode , struct* dentry , unsigned* int flags);
3317	extern ssize_t generic_read_dir(struct file , char* __user , size_t, loff_t );
3318	extern const struct file_operations simple_dir_operations;
3319	extern const struct inode_operations simple_dir_inode_operations;
3320	extern void make_empty_dir_inode(struct inode *inode);
3321	extern bool is_empty_dir_inode(struct inode *inode);
3322	struct tree_descr { const char name; const* struct file_operations ops; int* mode; };
3323	struct dentry d_alloc_name(struct* dentry , const* char *);
3324	extern int simple_fill_super(struct super_block , unsigned* long,
3325	const struct tree_descr *);
3326	extern int simple_pin_fs(struct file_system_type , struct* vfsmount *mount, int* *count);
3327	extern void simple_release_fs(struct vfsmount *mount, int* *count);
3328
3329	extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
3330	loff_t ppos, const* void *from, size_t available);
3331	extern ssize_t simple_write_to_buffer(void to, size_t available, loff_t ppos,
3332	const void __user *from, size_t count);
3333
3334	struct offset_ctx {
3335	struct maple_tree mt;
3336	unsigned long next_offset;
3337	};
3338
3339	void simple_offset_init(struct offset_ctx *octx);
3340	int simple_offset_add(struct offset_ctx octx, struct* dentry *dentry);
3341	void simple_offset_remove(struct offset_ctx octx, struct* dentry *dentry);
3342	int simple_offset_empty(struct dentry *dentry);
3343	int simple_offset_rename_exchange(struct inode *old_dir,
3344	struct dentry *old_dentry,
3345	struct inode *new_dir,
3346	struct dentry *new_dentry);
3347	void simple_offset_destroy(struct offset_ctx *octx);
3348
3349	extern const struct file_operations simple_offset_dir_operations;
3350
3351	extern int __generic_file_fsync(struct file , loff_t, loff_t, int*);
3352	extern int generic_file_fsync(struct file , loff_t, loff_t, int*);
3353
3354	extern int generic_check_addressable(unsigned, u64);
3355
3356	extern void generic_set_sb_d_ops(struct super_block *sb);
3357
3358	static inline bool sb_has_encoding(const struct super_block *sb)
3359	{
3360	#if IS_ENABLED(CONFIG_UNICODE)
3361	return !!sb->s_encoding;
3362	#else
3363	return false;
3364	#endif
3365	}
3366
3367	int may_setattr(struct mnt_idmap idmap, struct* inode *inode,
3368	unsigned int ia_valid);
3369	int setattr_prepare(struct mnt_idmap , struct* dentry , struct* iattr *);
3370	extern int inode_newsize_ok(const struct inode *, loff_t offset);
3371	void setattr_copy(struct mnt_idmap , struct* inode *inode,
3372	const struct iattr *attr);
3373
3374	extern int file_update_time(struct file *file);
3375
3376	static inline bool vma_is_dax(const struct vm_area_struct *vma)
3377	{
3378	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
3379	}
3380
3381	static inline bool vma_is_fsdax(struct vm_area_struct *vma)
3382	{
3383	struct inode *inode;
3384
3385	if (!IS_ENABLED(CONFIG_FS_DAX) \|\| !vma->vm_file)
3386	return false;
3387	if (!vma_is_dax(vma))
3388	return false;
3389	inode = file_inode(f: vma->vm_file);
3390	if (S_ISCHR(inode->i_mode))
3391	return false; / device-dax /
3392	return true;
3393	}
3394
3395	static inline int iocb_flags(struct file *file)
3396	{
3397	int res = `0`;
3398	if (file->f_flags & O_APPEND)
3399	res \|= IOCB_APPEND;
3400	if (file->f_flags & O_DIRECT)
3401	res \|= IOCB_DIRECT;
3402	if (file->f_flags & O_DSYNC)
3403	res \|= IOCB_DSYNC;
3404	if (file->f_flags & __O_SYNC)
3405	res \|= IOCB_SYNC;
3406	return res;
3407	}
3408
3409	static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
3410	{
3411	int kiocb_flags = `0`;
3412
3413	/ make sure there's no overlap between RWF and private IOCB flags /
3414	BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD);
3415
3416	if (!flags)
3417	return `0`;
3418	if (unlikely(flags & ~RWF_SUPPORTED))
3419	return -EOPNOTSUPP;
3420	if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND)))
3421	return -EINVAL;
3422
3423	if (flags & RWF_NOWAIT) {
3424	if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
3425	return -EOPNOTSUPP;
3426	kiocb_flags \|= IOCB_NOIO;
3427	}
3428	kiocb_flags \|= (__force int) (flags & RWF_SUPPORTED);
3429	if (flags & RWF_SYNC)
3430	kiocb_flags \|= IOCB_DSYNC;
3431
3432	if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) {
3433	if (IS_APPEND(file_inode(ki->ki_filp)))
3434	return -EPERM;
3435	ki->ki_flags &= ~IOCB_APPEND;
3436	}
3437
3438	ki->ki_flags \|= kiocb_flags;
3439	return `0`;
3440	}
3441
3442	static inline ino_t parent_ino(struct dentry *dentry)
3443	{
3444	ino_t res;
3445
3446	/*
3447	* Don't strictly need d_lock here? If the parent ino could change
3448	* then surely we'd have a deeper race in the caller?
3449	*/
3450	spin_lock(lock: &dentry->d_lock);
3451	res = dentry->d_parent->d_inode->i_ino;
3452	spin_unlock(lock: &dentry->d_lock);
3453	return res;
3454	}
3455
3456	/ Transaction based IO helpers /
3457
3458	/*
3459	* An argresp is stored in an allocated page and holds the
3460	* size of the argument or response, along with its content
3461	*/
3462	struct simple_transaction_argresp {
3463	ssize_t size;
3464	char data[];
3465	};
3466
3467	#define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
3468
3469	char simple_transaction_get(struct* file file, const* char __user *buf,
3470	size_t size);
3471	ssize_t simple_transaction_read(struct file file, char* __user *buf,
3472	size_t size, loff_t *pos);
3473	int simple_transaction_release(struct inode inode, struct* file *file);
3474
3475	void simple_transaction_set(struct file *file, size_t n);
3476
3477	/*
3478	* simple attribute files
3479	*
3480	* These attributes behave similar to those in sysfs:
3481	*
3482	* Writing to an attribute immediately sets a value, an open file can be
3483	* written to multiple times.
3484	*
3485	* Reading from an attribute creates a buffer from the value that might get
3486	* read with multiple read calls. When the attribute has been read
3487	* completely, no further read calls are possible until the file is opened
3488	* again.
3489	*
3490	* All attributes contain a text representation of a numeric value
3491	* that are accessed with the get() and set() functions.
3492	*/
3493	#define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \
3494	static int __fops ## _open(struct inode inode, struct file file) \
3495	{ \
3496	__simple_attr_check_format(__fmt, 0ull); \
3497	return simple_attr_open(inode, file, __get, __set, __fmt); \
3498	} \
3499	static const struct file_operations __fops = { \
3500	.owner = THIS_MODULE, \
3501	.open = __fops ## _open, \
3502	.release = simple_attr_release, \
3503	.read = simple_attr_read, \
3504	.write = (__is_signed) ? simple_attr_write_signed : simple_attr_write, \
3505	.llseek = generic_file_llseek, \
3506	}
3507
3508	#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
3509	DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false)
3510
3511	#define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \
3512	DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true)
3513
3514	static inline __printf(`1`, `2`)
3515	void __simple_attr_check_format(const char *fmt, ...)
3516	{
3517	/ don't do anything, just let the compiler check the arguments; /
3518	}
3519
3520	int simple_attr_open(struct inode inode, struct* file *file,
3521	int (get)(void* , u64 ), int (set)(void* *, u64),
3522	const char *fmt);
3523	int simple_attr_release(struct inode inode, struct* file *file);
3524	ssize_t simple_attr_read(struct file file, char* __user *buf,
3525	size_t len, loff_t *ppos);
3526	ssize_t simple_attr_write(struct file file, const* char __user *buf,
3527	size_t len, loff_t *ppos);
3528	ssize_t simple_attr_write_signed(struct file file, const* char __user *buf,
3529	size_t len, loff_t *ppos);
3530
3531	struct ctl_table;
3532	int __init list_bdev_fs_names(char *buf, size_t size);
3533
3534	#define __FMODE_EXEC ((__force int) FMODE_EXEC)
3535	#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY)
3536
3537	#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
3538	#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) \| \
3539	(flag & __FMODE_NONOTIFY)))
3540
3541	static inline bool is_sxid(umode_t mode)
3542	{
3543	return mode & (S_ISUID \| S_ISGID);
3544	}
3545
3546	static inline int check_sticky(struct mnt_idmap *idmap,
3547	struct inode dir, struct* inode *inode)
3548	{
3549	if (!(dir->i_mode & S_ISVTX))
3550	return `0`;
3551
3552	return __check_sticky(idmap, dir, inode);
3553	}
3554
3555	static inline void inode_has_no_xattr(struct inode *inode)
3556	{
3557	if (!is_sxid(mode: inode->i_mode) && (inode->i_sb->s_flags & SB_NOSEC))
3558	inode->i_flags \|= S_NOSEC;
3559	}
3560
3561	static inline bool is_root_inode(struct inode *inode)
3562	{
3563	return inode == inode->i_sb->s_root->d_inode;
3564	}
3565
3566	static inline bool dir_emit(struct dir_context *ctx,
3567	const char name, int* namelen,
3568	u64 ino, unsigned type)
3569	{
3570	return ctx->actor(ctx, name, namelen, ctx->pos, ino, type);
3571	}
3572	static inline bool dir_emit_dot(struct file file, struct* dir_context *ctx)
3573	{
3574	return ctx->actor(ctx, ".", `1`, ctx->pos,
3575	file->f_path.dentry->d_inode->i_ino, DT_DIR);
3576	}
3577	static inline bool dir_emit_dotdot(struct file file, struct* dir_context *ctx)
3578	{
3579	return ctx->actor(ctx, "..", `2`, ctx->pos,
3580	parent_ino(dentry: file->f_path.dentry), DT_DIR);
3581	}
3582	static inline bool dir_emit_dots(struct file file, struct* dir_context *ctx)
3583	{
3584	if (ctx->pos == `0`) {
3585	if (!dir_emit_dot(file, ctx))
3586	return false;
3587	ctx->pos = `1`;
3588	}
3589	if (ctx->pos == `1`) {
3590	if (!dir_emit_dotdot(file, ctx))
3591	return false;
3592	ctx->pos = `2`;
3593	}
3594	return true;
3595	}
3596	static inline bool dir_relax(struct inode *inode)
3597	{
3598	inode_unlock(inode);
3599	inode_lock(inode);
3600	return !IS_DEADDIR(inode);
3601	}
3602
3603	static inline bool dir_relax_shared(struct inode *inode)
3604	{
3605	inode_unlock_shared(inode);
3606	inode_lock_shared(inode);
3607	return !IS_DEADDIR(inode);
3608	}
3609
3610	extern bool path_noexec(const struct path *path);
3611	extern void inode_nohighmem(struct inode *inode);
3612
3613	/ mm/fadvise.c /
3614	extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
3615	int advice);
3616	extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
3617	int advice);
3618
3619	#endif /* _LINUX_FS_H */
3620

source code of linux/include/linux/fs.h