inode.c source code [linux/fs/inode.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* (C) 1997 Linus Torvalds
4	* (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
5	*/
6	#include <linux/export.h>
7	#include <linux/fs.h>
8	#include <linux/filelock.h>
9	#include <linux/mm.h>
10	#include <linux/backing-dev.h>
11	#include <linux/hash.h>
12	#include <linux/swap.h>
13	#include <linux/security.h>
14	#include <linux/cdev.h>
15	#include <linux/memblock.h>
16	#include <linux/fsnotify.h>
17	#include <linux/mount.h>
18	#include <linux/posix_acl.h>
19	#include <linux/buffer_head.h> /* for inode_has_buffers */
20	#include <linux/ratelimit.h>
21	#include <linux/list_lru.h>
22	#include <linux/iversion.h>
23	#include <linux/rw_hint.h>
24	#include <linux/seq_file.h>
25	#include <linux/debugfs.h>
26	#include <trace/events/writeback.h>
27	#define CREATE_TRACE_POINTS
28	#include <trace/events/timestamp.h>
29
30	#include "internal.h"
31
32	/*
33	* Inode locking rules:
34	*
35	* inode->i_lock protects:
36	* inode->i_state, inode->i_hash, __iget(), inode->i_io_list
37	* Inode LRU list locks protect:
38	* inode->i_sb->s_inode_lru, inode->i_lru
39	* inode->i_sb->s_inode_list_lock protects:
40	* inode->i_sb->s_inodes, inode->i_sb_list
41	* bdi->wb.list_lock protects:
42	* bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
43	* inode_hash_lock protects:
44	* inode_hashtable, inode->i_hash
45	*
46	* Lock ordering:
47	*
48	* inode->i_sb->s_inode_list_lock
49	* inode->i_lock
50	* Inode LRU list locks
51	*
52	* bdi->wb.list_lock
53	* inode->i_lock
54	*
55	* inode_hash_lock
56	* inode->i_sb->s_inode_list_lock
57	* inode->i_lock
58	*
59	* iunique_lock
60	* inode_hash_lock
61	*/
62
63	static unsigned int i_hash_mask __ro_after_init;
64	static unsigned int i_hash_shift __ro_after_init;
65	static struct hlist_head *inode_hashtable __ro_after_init;
66	static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
67
68	/*
69	* Empty aops. Can be used for the cases where the user does not
70	* define any of the address_space operations.
71	*/
72	const struct address_space_operations empty_aops = {
73	};
74	EXPORT_SYMBOL(empty_aops);
75
76	static DEFINE_PER_CPU(unsigned long, nr_inodes);
77	static DEFINE_PER_CPU(unsigned long, nr_unused);
78
79	static struct kmem_cache *inode_cachep __ro_after_init;
80
81	static long get_nr_inodes(void)
82	{
83	int i;
84	long sum = `0`;
85	for_each_possible_cpu(i)
86	sum += per_cpu(nr_inodes, i);
87	return sum < `0` ? `0` : sum;
88	}
89
90	static inline long get_nr_inodes_unused(void)
91	{
92	int i;
93	long sum = `0`;
94	for_each_possible_cpu(i)
95	sum += per_cpu(nr_unused, i);
96	return sum < `0` ? `0` : sum;
97	}
98
99	long get_nr_dirty_inodes(void)
100	{
101	/ not actually dirty inodes, but a wild approximation /
102	long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
103	return nr_dirty > `0` ? nr_dirty : `0`;
104	}
105
106	#ifdef CONFIG_DEBUG_FS
107	static DEFINE_PER_CPU(long, mg_ctime_updates);
108	static DEFINE_PER_CPU(long, mg_fine_stamps);
109	static DEFINE_PER_CPU(long, mg_ctime_swaps);
110
111	static unsigned long get_mg_ctime_updates(void)
112	{
113	unsigned long sum = `0`;
114	int i;
115
116	for_each_possible_cpu(i)
117	sum += data_race(per_cpu(mg_ctime_updates, i));
118	return sum;
119	}
120
121	static unsigned long get_mg_fine_stamps(void)
122	{
123	unsigned long sum = `0`;
124	int i;
125
126	for_each_possible_cpu(i)
127	sum += data_race(per_cpu(mg_fine_stamps, i));
128	return sum;
129	}
130
131	static unsigned long get_mg_ctime_swaps(void)
132	{
133	unsigned long sum = `0`;
134	int i;
135
136	for_each_possible_cpu(i)
137	sum += data_race(per_cpu(mg_ctime_swaps, i));
138	return sum;
139	}
140
141	#define mgtime_counter_inc(__var) this_cpu_inc(__var)
142
143	static int mgts_show(struct seq_file s, void* *p)
144	{
145	unsigned long ctime_updates = get_mg_ctime_updates();
146	unsigned long ctime_swaps = get_mg_ctime_swaps();
147	unsigned long fine_stamps = get_mg_fine_stamps();
148	unsigned long floor_swaps = timekeeping_get_mg_floor_swaps();
149
150	seq_printf(m: s, fmt: "%lu %lu %lu %lu\n",
151	ctime_updates, ctime_swaps, fine_stamps, floor_swaps);
152	return `0`;
153	}
154
155	DEFINE_SHOW_ATTRIBUTE(mgts);
156
157	static int __init mg_debugfs_init(void)
158	{
159	debugfs_create_file("multigrain_timestamps", S_IFREG \| S_IRUGO, NULL, NULL, &mgts_fops);
160	return `0`;
161	}
162	late_initcall(mg_debugfs_init);
163
164	#else /* ! CONFIG_DEBUG_FS */
165
166	#define mgtime_counter_inc(__var) do { } while (0)
167
168	#endif /* CONFIG_DEBUG_FS */
169
170	/*
171	* Handle nr_inode sysctl
172	*/
173	#ifdef CONFIG_SYSCTL
174	/*
175	* Statistics gathering..
176	*/
177	static struct inodes_stat_t inodes_stat;
178
179	static int proc_nr_inodes(const struct ctl_table table, int* write, void *buffer,
180	size_t lenp, loff_t ppos)
181	{
182	inodes_stat.nr_inodes = get_nr_inodes();
183	inodes_stat.nr_unused = get_nr_inodes_unused();
184	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
185	}
186
187	static const struct ctl_table inodes_sysctls[] = {
188	{
189	.procname = "inode-nr",
190	.data = &inodes_stat,
191	.maxlen = `2`*sizeof(long),
192	.mode = `0444`,
193	.proc_handler = proc_nr_inodes,
194	},
195	{
196	.procname = "inode-state",
197	.data = &inodes_stat,
198	.maxlen = `7`*sizeof(long),
199	.mode = `0444`,
200	.proc_handler = proc_nr_inodes,
201	},
202	};
203
204	static int __init init_fs_inode_sysctls(void)
205	{
206	register_sysctl_init("fs", inodes_sysctls);
207	return `0`;
208	}
209	early_initcall(init_fs_inode_sysctls);
210	#endif
211
212	static int no_open(struct inode inode, struct* file *file)
213	{
214	return -ENXIO;
215	}
216
217	/**
218	* inode_init_always_gfp - perform inode structure initialisation
219	* @sb: superblock inode belongs to
220	* @inode: inode to initialise
221	* @gfp: allocation flags
222	*
223	* These are initializations that need to be done on every inode
224	* allocation as the fields are not initialised by slab allocation.
225	* If there are additional allocations required @gfp is used.
226	*/
227	int inode_init_always_gfp(struct super_block sb, struct* inode *inode, gfp_t gfp)
228	{
229	static const struct inode_operations empty_iops;
230	static const struct file_operations no_open_fops = {.open = no_open};
231	struct address_space *const mapping = &inode->i_data;
232
233	inode->i_sb = sb;
234	inode->i_blkbits = sb->s_blocksize_bits;
235	inode->i_flags = `0`;
236	inode->i_state = `0`;
237	atomic64_set(v: &inode->i_sequence, i: `0`);
238	atomic_set(v: &inode->i_count, i: `1`);
239	inode->i_op = &empty_iops;
240	inode->i_fop = &no_open_fops;
241	inode->i_ino = `0`;
242	inode->__i_nlink = `1`;
243	inode->i_opflags = `0`;
244	if (sb->s_xattr)
245	inode->i_opflags \|= IOP_XATTR;
246	if (sb->s_type->fs_flags & FS_MGTIME)
247	inode->i_opflags \|= IOP_MGTIME;
248	i_uid_write(inode, uid: `0`);
249	i_gid_write(inode, gid: `0`);
250	atomic_set(v: &inode->i_writecount, i: `0`);
251	inode->i_size = `0`;
252	inode->i_write_hint = WRITE_LIFE_NOT_SET;
253	inode->i_blocks = `0`;
254	inode->i_bytes = `0`;
255	inode->i_generation = `0`;
256	inode->i_pipe = NULL;
257	inode->i_cdev = NULL;
258	inode->i_link = NULL;
259	inode->i_dir_seq = `0`;
260	inode->i_rdev = `0`;
261	inode->dirtied_when = `0`;
262
263	#ifdef CONFIG_CGROUP_WRITEBACK
264	inode->i_wb_frn_winner = `0`;
265	inode->i_wb_frn_avg_time = `0`;
266	inode->i_wb_frn_history = `0`;
267	#endif
268
269	spin_lock_init(&inode->i_lock);
270	lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
271
272	init_rwsem(&inode->i_rwsem);
273	lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
274
275	atomic_set(v: &inode->i_dio_count, i: `0`);
276
277	mapping->a_ops = &empty_aops;
278	mapping->host = inode;
279	mapping->flags = `0`;
280	mapping->wb_err = `0`;
281	atomic_set(v: &mapping->i_mmap_writable, i: `0`);
282	#ifdef CONFIG_READ_ONLY_THP_FOR_FS
283	atomic_set(v: &mapping->nr_thps, i: `0`);
284	#endif
285	mapping_set_gfp_mask(m: mapping, GFP_HIGHUSER_MOVABLE);
286	mapping->i_private_data = NULL;
287	mapping->writeback_index = `0`;
288	init_rwsem(&mapping->invalidate_lock);
289	lockdep_set_class_and_name(&mapping->invalidate_lock,
290	&sb->s_type->invalidate_lock_key,
291	"mapping.invalidate_lock");
292	if (sb->s_iflags & SB_I_STABLE_WRITES)
293	mapping_set_stable_writes(mapping);
294	inode->i_private = NULL;
295	inode->i_mapping = mapping;
296	INIT_HLIST_HEAD(&inode->i_dentry); / buggered by rcu freeing /
297	#ifdef CONFIG_FS_POSIX_ACL
298	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
299	#endif
300
301	#ifdef CONFIG_FSNOTIFY
302	inode->i_fsnotify_mask = `0`;
303	#endif
304	inode->i_flctx = NULL;
305
306	if (unlikely(security_inode_alloc(inode, gfp)))
307	return -ENOMEM;
308
309	this_cpu_inc(nr_inodes);
310
311	return `0`;
312	}
313	EXPORT_SYMBOL(inode_init_always_gfp);
314
315	void free_inode_nonrcu(struct inode *inode)
316	{
317	kmem_cache_free(s: inode_cachep, objp: inode);
318	}
319	EXPORT_SYMBOL(free_inode_nonrcu);
320
321	static void i_callback(struct rcu_head *head)
322	{
323	struct inode inode = container_of(head, struct* inode, i_rcu);
324	if (inode->free_inode)
325	inode->free_inode(inode);
326	else
327	free_inode_nonrcu(inode);
328	}
329
330	/**
331	* alloc_inode - obtain an inode
332	* @sb: superblock
333	*
334	* Allocates a new inode for given superblock.
335	* Inode wont be chained in superblock s_inodes list
336	* This means :
337	* - fs can't be unmount
338	* - quotas, fsnotify, writeback can't work
339	*/
340	struct inode alloc_inode(struct* super_block *sb)
341	{
342	const struct super_operations *ops = sb->s_op;
343	struct inode *inode;
344
345	if (ops->alloc_inode)
346	inode = ops->alloc_inode(sb);
347	else
348	inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL);
349
350	if (!inode)
351	return NULL;
352
353	if (unlikely(inode_init_always(sb, inode))) {
354	if (ops->destroy_inode) {
355	ops->destroy_inode(inode);
356	if (!ops->free_inode)
357	return NULL;
358	}
359	inode->free_inode = ops->free_inode;
360	i_callback(head: &inode->i_rcu);
361	return NULL;
362	}
363
364	return inode;
365	}
366
367	void __destroy_inode(struct inode *inode)
368	{
369	BUG_ON(inode_has_buffers(inode));
370	inode_detach_wb(inode);
371	security_inode_free(inode);
372	fsnotify_inode_delete(inode);
373	locks_free_lock_context(inode);
374	if (!inode->i_nlink) {
375	WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == `0`);
376	atomic_long_dec(v: &inode->i_sb->s_remove_count);
377	}
378
379	#ifdef CONFIG_FS_POSIX_ACL
380	if (inode->i_acl && !is_uncached_acl(acl: inode->i_acl))
381	posix_acl_release(acl: inode->i_acl);
382	if (inode->i_default_acl && !is_uncached_acl(acl: inode->i_default_acl))
383	posix_acl_release(acl: inode->i_default_acl);
384	#endif
385	this_cpu_dec(nr_inodes);
386	}
387	EXPORT_SYMBOL(__destroy_inode);
388
389	static void destroy_inode(struct inode *inode)
390	{
391	const struct super_operations *ops = inode->i_sb->s_op;
392
393	BUG_ON(!list_empty(&inode->i_lru));
394	__destroy_inode(inode);
395	if (ops->destroy_inode) {
396	ops->destroy_inode(inode);
397	if (!ops->free_inode)
398	return;
399	}
400	inode->free_inode = ops->free_inode;
401	call_rcu(head: &inode->i_rcu, func: i_callback);
402	}
403
404	/**
405	* drop_nlink - directly drop an inode's link count
406	* @inode: inode
407	*
408	* This is a low-level filesystem helper to replace any
409	* direct filesystem manipulation of i_nlink. In cases
410	* where we are attempting to track writes to the
411	* filesystem, a decrement to zero means an imminent
412	* write when the file is truncated and actually unlinked
413	* on the filesystem.
414	*/
415	void drop_nlink(struct inode *inode)
416	{
417	WARN_ON(inode->i_nlink == `0`);
418	inode->__i_nlink--;
419	if (!inode->i_nlink)
420	atomic_long_inc(v: &inode->i_sb->s_remove_count);
421	}
422	EXPORT_SYMBOL(drop_nlink);
423
424	/**
425	* clear_nlink - directly zero an inode's link count
426	* @inode: inode
427	*
428	* This is a low-level filesystem helper to replace any
429	* direct filesystem manipulation of i_nlink. See
430	* drop_nlink() for why we care about i_nlink hitting zero.
431	*/
432	void clear_nlink(struct inode *inode)
433	{
434	if (inode->i_nlink) {
435	inode->__i_nlink = `0`;
436	atomic_long_inc(v: &inode->i_sb->s_remove_count);
437	}
438	}
439	EXPORT_SYMBOL(clear_nlink);
440
441	/**
442	* set_nlink - directly set an inode's link count
443	* @inode: inode
444	* @nlink: new nlink (should be non-zero)
445	*
446	* This is a low-level filesystem helper to replace any
447	* direct filesystem manipulation of i_nlink.
448	*/
449	void set_nlink(struct inode inode, unsigned* int nlink)
450	{
451	if (!nlink) {
452	clear_nlink(inode);
453	} else {
454	/ Yes, some filesystems do change nlink from zero to one /
455	if (inode->i_nlink == `0`)
456	atomic_long_dec(v: &inode->i_sb->s_remove_count);
457
458	inode->__i_nlink = nlink;
459	}
460	}
461	EXPORT_SYMBOL(set_nlink);
462
463	/**
464	* inc_nlink - directly increment an inode's link count
465	* @inode: inode
466	*
467	* This is a low-level filesystem helper to replace any
468	* direct filesystem manipulation of i_nlink. Currently,
469	* it is only here for parity with dec_nlink().
470	*/
471	void inc_nlink(struct inode *inode)
472	{
473	if (unlikely(inode->i_nlink == `0`)) {
474	WARN_ON(!(inode->i_state & I_LINKABLE));
475	atomic_long_dec(v: &inode->i_sb->s_remove_count);
476	}
477
478	inode->__i_nlink++;
479	}
480	EXPORT_SYMBOL(inc_nlink);
481
482	static void __address_space_init_once(struct address_space *mapping)
483	{
484	xa_init_flags(xa: &mapping->i_pages, XA_FLAGS_LOCK_IRQ \| XA_FLAGS_ACCOUNT);
485	init_rwsem(&mapping->i_mmap_rwsem);
486	INIT_LIST_HEAD(list: &mapping->i_private_list);
487	spin_lock_init(&mapping->i_private_lock);
488	mapping->i_mmap = RB_ROOT_CACHED;
489	}
490
491	void address_space_init_once(struct address_space *mapping)
492	{
493	memset(mapping, `0`, sizeof(*mapping));
494	__address_space_init_once(mapping);
495	}
496	EXPORT_SYMBOL(address_space_init_once);
497
498	/*
499	* These are initializations that only need to be done
500	* once, because the fields are idempotent across use
501	* of the inode, so let the slab aware of that.
502	*/
503	void inode_init_once(struct inode *inode)
504	{
505	memset(inode, `0`, sizeof(*inode));
506	INIT_HLIST_NODE(h: &inode->i_hash);
507	INIT_LIST_HEAD(list: &inode->i_devices);
508	INIT_LIST_HEAD(list: &inode->i_io_list);
509	INIT_LIST_HEAD(list: &inode->i_wb_list);
510	INIT_LIST_HEAD(list: &inode->i_lru);
511	INIT_LIST_HEAD(list: &inode->i_sb_list);
512	__address_space_init_once(mapping: &inode->i_data);
513	i_size_ordered_init(inode);
514	}
515	EXPORT_SYMBOL(inode_init_once);
516
517	static void init_once(void *foo)
518	{
519	struct inode inode = (struct* inode *) foo;
520
521	inode_init_once(inode);
522	}
523
524	/*
525	* get additional reference to inode; caller must already hold one.
526	*/
527	void ihold(struct inode *inode)
528	{
529	WARN_ON(atomic_inc_return(&inode->i_count) < `2`);
530	}
531	EXPORT_SYMBOL(ihold);
532
533	static void __inode_add_lru(struct inode *inode, bool rotate)
534	{
535	if (inode->i_state & (I_DIRTY_ALL \| I_SYNC \| I_FREEING \| I_WILL_FREE))
536	return;
537	if (atomic_read(v: &inode->i_count))
538	return;
539	if (!(inode->i_sb->s_flags & SB_ACTIVE))
540	return;
541	if (!mapping_shrinkable(mapping: &inode->i_data))
542	return;
543
544	if (list_lru_add_obj(lru: &inode->i_sb->s_inode_lru, item: &inode->i_lru))
545	this_cpu_inc(nr_unused);
546	else if (rotate)
547	inode->i_state \|= I_REFERENCED;
548	}
549
550	struct wait_queue_head inode_bit_waitqueue(struct* wait_bit_queue_entry *wqe,
551	struct inode *inode, u32 bit)
552	{
553	void *bit_address;
554
555	bit_address = inode_state_wait_address(inode, bit);
556	init_wait_var_entry(wbq_entry: wqe, var: bit_address, flags: `0`);
557	return __var_waitqueue(p: bit_address);
558	}
559	EXPORT_SYMBOL(inode_bit_waitqueue);
560
561	/*
562	* Add inode to LRU if needed (inode is unused and clean).
563	*
564	* Needs inode->i_lock held.
565	*/
566	void inode_add_lru(struct inode *inode)
567	{
568	__inode_add_lru(inode, rotate: false);
569	}
570
571	static void inode_lru_list_del(struct inode *inode)
572	{
573	if (list_lru_del_obj(lru: &inode->i_sb->s_inode_lru, item: &inode->i_lru))
574	this_cpu_dec(nr_unused);
575	}
576
577	static void inode_pin_lru_isolating(struct inode *inode)
578	{
579	lockdep_assert_held(&inode->i_lock);
580	WARN_ON(inode->i_state & (I_LRU_ISOLATING \| I_FREEING \| I_WILL_FREE));
581	inode->i_state \|= I_LRU_ISOLATING;
582	}
583
584	static void inode_unpin_lru_isolating(struct inode *inode)
585	{
586	spin_lock(lock: &inode->i_lock);
587	WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
588	inode->i_state &= ~I_LRU_ISOLATING;
589	/ Called with inode->i_lock which ensures memory ordering. /
590	inode_wake_up_bit(inode, __I_LRU_ISOLATING);
591	spin_unlock(lock: &inode->i_lock);
592	}
593
594	static void inode_wait_for_lru_isolating(struct inode *inode)
595	{
596	struct wait_bit_queue_entry wqe;
597	struct wait_queue_head *wq_head;
598
599	lockdep_assert_held(&inode->i_lock);
600	if (!(inode->i_state & I_LRU_ISOLATING))
601	return;
602
603	wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
604	for (;;) {
605	prepare_to_wait_event(wq_head, wq_entry: &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
606	/*
607	* Checking I_LRU_ISOLATING with inode->i_lock guarantees
608	* memory ordering.
609	*/
610	if (!(inode->i_state & I_LRU_ISOLATING))
611	break;
612	spin_unlock(lock: &inode->i_lock);
613	schedule();
614	spin_lock(lock: &inode->i_lock);
615	}
616	finish_wait(wq_head, wq_entry: &wqe.wq_entry);
617	WARN_ON(inode->i_state & I_LRU_ISOLATING);
618	}
619
620	/**
621	* inode_sb_list_add - add inode to the superblock list of inodes
622	* @inode: inode to add
623	*/
624	void inode_sb_list_add(struct inode *inode)
625	{
626	struct super_block *sb = inode->i_sb;
627
628	spin_lock(lock: &sb->s_inode_list_lock);
629	list_add(new: &inode->i_sb_list, head: &sb->s_inodes);
630	spin_unlock(lock: &sb->s_inode_list_lock);
631	}
632	EXPORT_SYMBOL_GPL(inode_sb_list_add);
633
634	static inline void inode_sb_list_del(struct inode *inode)
635	{
636	struct super_block *sb = inode->i_sb;
637
638	if (!list_empty(head: &inode->i_sb_list)) {
639	spin_lock(lock: &sb->s_inode_list_lock);
640	list_del_init(entry: &inode->i_sb_list);
641	spin_unlock(lock: &sb->s_inode_list_lock);
642	}
643	}
644
645	static unsigned long hash(struct super_block sb, unsigned* long hashval)
646	{
647	unsigned long tmp;
648
649	tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
650	L1_CACHE_BYTES;
651	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
652	return tmp & i_hash_mask;
653	}
654
655	/**
656	* __insert_inode_hash - hash an inode
657	* @inode: unhashed inode
658	* @hashval: unsigned long value used to locate this object in the
659	* inode_hashtable.
660	*
661	* Add an inode to the inode hash for this superblock.
662	*/
663	void __insert_inode_hash(struct inode inode, unsigned* long hashval)
664	{
665	struct hlist_head *b = inode_hashtable + hash(sb: inode->i_sb, hashval);
666
667	spin_lock(lock: &inode_hash_lock);
668	spin_lock(lock: &inode->i_lock);
669	hlist_add_head_rcu(n: &inode->i_hash, h: b);
670	spin_unlock(lock: &inode->i_lock);
671	spin_unlock(lock: &inode_hash_lock);
672	}
673	EXPORT_SYMBOL(__insert_inode_hash);
674
675	/**
676	* __remove_inode_hash - remove an inode from the hash
677	* @inode: inode to unhash
678	*
679	* Remove an inode from the superblock.
680	*/
681	void __remove_inode_hash(struct inode *inode)
682	{
683	spin_lock(lock: &inode_hash_lock);
684	spin_lock(lock: &inode->i_lock);
685	hlist_del_init_rcu(n: &inode->i_hash);
686	spin_unlock(lock: &inode->i_lock);
687	spin_unlock(lock: &inode_hash_lock);
688	}
689	EXPORT_SYMBOL(__remove_inode_hash);
690
691	void dump_mapping(const struct address_space *mapping)
692	{
693	struct inode *host;
694	const struct address_space_operations *a_ops;
695	struct hlist_node *dentry_first;
696	struct dentry *dentry_ptr;
697	struct dentry dentry;
698	char fname[`64`] = {};
699	unsigned long ino;
700
701	/*
702	* If mapping is an invalid pointer, we don't want to crash
703	* accessing it, so probe everything depending on it carefully.
704	*/
705	if (get_kernel_nofault(host, &mapping->host) \|\|
706	get_kernel_nofault(a_ops, &mapping->a_ops)) {
707	pr_warn("invalid mapping:%px\n", mapping);
708	return;
709	}
710
711	if (!host) {
712	pr_warn("aops:%ps\n", a_ops);
713	return;
714	}
715
716	if (get_kernel_nofault(dentry_first, &host->i_dentry.first) \|\|
717	get_kernel_nofault(ino, &host->i_ino)) {
718	pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
719	return;
720	}
721
722	if (!dentry_first) {
723	pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
724	return;
725	}
726
727	dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
728	if (get_kernel_nofault(dentry, dentry_ptr) \|\|
729	!dentry.d_parent \|\| !dentry.d_name.name) {
730	pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
731	a_ops, ino, dentry_ptr);
732	return;
733	}
734
735	if (strncpy_from_kernel_nofault(dst: fname, unsafe_addr: dentry.d_name.name, count: `63`) < `0`)
736	strscpy(fname, "<invalid>");
737	/*
738	* Even if strncpy_from_kernel_nofault() succeeded,
739	* the fname could be unreliable
740	*/
741	pr_warn("aops:%ps ino:%lx dentry name(?):\"%s\"\n",
742	a_ops, ino, fname);
743	}
744
745	void clear_inode(struct inode *inode)
746	{
747	/*
748	* We have to cycle the i_pages lock here because reclaim can be in the
749	* process of removing the last page (in __filemap_remove_folio())
750	* and we must not free the mapping under it.
751	*/
752	xa_lock_irq(&inode->i_data.i_pages);
753	BUG_ON(inode->i_data.nrpages);
754	/*
755	* Almost always, mapping_empty(&inode->i_data) here; but there are
756	* two known and long-standing ways in which nodes may get left behind
757	* (when deep radix-tree node allocation failed partway; or when THP
758	* collapse_file() failed). Until those two known cases are cleaned up,
759	* or a cleanup function is called here, do not BUG_ON(!mapping_empty),
760	* nor even WARN_ON(!mapping_empty).
761	*/
762	xa_unlock_irq(&inode->i_data.i_pages);
763	BUG_ON(!list_empty(&inode->i_data.i_private_list));
764	BUG_ON(!(inode->i_state & I_FREEING));
765	BUG_ON(inode->i_state & I_CLEAR);
766	BUG_ON(!list_empty(&inode->i_wb_list));
767	/ don't need i_lock here, no concurrent mods to i_state /
768	inode->i_state = I_FREEING \| I_CLEAR;
769	}
770	EXPORT_SYMBOL(clear_inode);
771
772	/*
773	* Free the inode passed in, removing it from the lists it is still connected
774	* to. We remove any pages still attached to the inode and wait for any IO that
775	* is still in progress before finally destroying the inode.
776	*
777	* An inode must already be marked I_FREEING so that we avoid the inode being
778	* moved back onto lists if we race with other code that manipulates the lists
779	* (e.g. writeback_single_inode). The caller is responsible for setting this.
780	*
781	* An inode must already be removed from the LRU list before being evicted from
782	* the cache. This should occur atomically with setting the I_FREEING state
783	* flag, so no inodes here should ever be on the LRU when being evicted.
784	*/
785	static void evict(struct inode *inode)
786	{
787	const struct super_operations *op = inode->i_sb->s_op;
788
789	BUG_ON(!(inode->i_state & I_FREEING));
790	BUG_ON(!list_empty(&inode->i_lru));
791
792	if (!list_empty(head: &inode->i_io_list))
793	inode_io_list_del(inode);
794
795	inode_sb_list_del(inode);
796
797	spin_lock(lock: &inode->i_lock);
798	inode_wait_for_lru_isolating(inode);
799
800	/*
801	* Wait for flusher thread to be done with the inode so that filesystem
802	* does not start destroying it while writeback is still running. Since
803	* the inode has I_FREEING set, flusher thread won't start new work on
804	* the inode. We just have to wait for running writeback to finish.
805	*/
806	inode_wait_for_writeback(inode);
807	spin_unlock(lock: &inode->i_lock);
808
809	if (op->evict_inode) {
810	op->evict_inode(inode);
811	} else {
812	truncate_inode_pages_final(&inode->i_data);
813	clear_inode(inode);
814	}
815	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
816	cd_forget(inode);
817
818	remove_inode_hash(inode);
819
820	/*
821	* Wake up waiters in __wait_on_freeing_inode().
822	*
823	* It is an invariant that any thread we need to wake up is already
824	* accounted for before remove_inode_hash() acquires ->i_lock -- both
825	* sides take the lock and sleep is aborted if the inode is found
826	* unhashed. Thus either the sleeper wins and goes off CPU, or removal
827	* wins and the sleeper aborts after testing with the lock.
828	*
829	* This also means we don't need any fences for the call below.
830	*/
831	inode_wake_up_bit(inode, __I_NEW);
832	BUG_ON(inode->i_state != (I_FREEING \| I_CLEAR));
833
834	destroy_inode(inode);
835	}
836
837	/*
838	* dispose_list - dispose of the contents of a local list
839	* @head: the head of the list to free
840	*
841	* Dispose-list gets a local list with local inodes in it, so it doesn't
842	* need to worry about list corruption and SMP locks.
843	*/
844	static void dispose_list(struct list_head *head)
845	{
846	while (!list_empty(head)) {
847	struct inode *inode;
848
849	inode = list_first_entry(head, struct inode, i_lru);
850	list_del_init(entry: &inode->i_lru);
851
852	evict(inode);
853	cond_resched();
854	}
855	}
856
857	/**
858	* evict_inodes - evict all evictable inodes for a superblock
859	* @sb: superblock to operate on
860	*
861	* Make sure that no inodes with zero refcount are retained. This is
862	* called by superblock shutdown after having SB_ACTIVE flag removed,
863	* so any inode reaching zero refcount during or after that call will
864	* be immediately evicted.
865	*/
866	void evict_inodes(struct super_block *sb)
867	{
868	struct inode inode, next;
869	LIST_HEAD(dispose);
870
871	again:
872	spin_lock(lock: &sb->s_inode_list_lock);
873	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
874	if (atomic_read(v: &inode->i_count))
875	continue;
876
877	spin_lock(lock: &inode->i_lock);
878	if (atomic_read(v: &inode->i_count)) {
879	spin_unlock(lock: &inode->i_lock);
880	continue;
881	}
882	if (inode->i_state & (I_NEW \| I_FREEING \| I_WILL_FREE)) {
883	spin_unlock(lock: &inode->i_lock);
884	continue;
885	}
886
887	inode->i_state \|= I_FREEING;
888	inode_lru_list_del(inode);
889	spin_unlock(lock: &inode->i_lock);
890	list_add(new: &inode->i_lru, head: &dispose);
891
892	/*
893	* We can have a ton of inodes to evict at unmount time given
894	* enough memory, check to see if we need to go to sleep for a
895	* bit so we don't livelock.
896	*/
897	if (need_resched()) {
898	spin_unlock(lock: &sb->s_inode_list_lock);
899	cond_resched();
900	dispose_list(head: &dispose);
901	goto again;
902	}
903	}
904	spin_unlock(lock: &sb->s_inode_list_lock);
905
906	dispose_list(head: &dispose);
907	}
908	EXPORT_SYMBOL_GPL(evict_inodes);
909
910	/*
911	* Isolate the inode from the LRU in preparation for freeing it.
912	*
913	* If the inode has the I_REFERENCED flag set, then it means that it has been
914	* used recently - the flag is set in iput_final(). When we encounter such an
915	* inode, clear the flag and move it to the back of the LRU so it gets another
916	* pass through the LRU before it gets reclaimed. This is necessary because of
917	* the fact we are doing lazy LRU updates to minimise lock contention so the
918	* LRU does not have strict ordering. Hence we don't want to reclaim inodes
919	* with this flag set because they are the inodes that are out of order.
920	*/
921	static enum lru_status inode_lru_isolate(struct list_head *item,
922	struct list_lru_one lru, void* *arg)
923	{
924	struct list_head *freeable = arg;
925	struct inode inode = container_of(item, struct* inode, i_lru);
926
927	/*
928	* We are inverting the lru lock/inode->i_lock here, so use a
929	* trylock. If we fail to get the lock, just skip it.
930	*/
931	if (!spin_trylock(lock: &inode->i_lock))
932	return LRU_SKIP;
933
934	/*
935	* Inodes can get referenced, redirtied, or repopulated while
936	* they're already on the LRU, and this can make them
937	* unreclaimable for a while. Remove them lazily here; iput,
938	* sync, or the last page cache deletion will requeue them.
939	*/
940	if (atomic_read(v: &inode->i_count) \|\|
941	(inode->i_state & ~I_REFERENCED) \|\|
942	!mapping_shrinkable(mapping: &inode->i_data)) {
943	list_lru_isolate(list: lru, item: &inode->i_lru);
944	spin_unlock(lock: &inode->i_lock);
945	this_cpu_dec(nr_unused);
946	return LRU_REMOVED;
947	}
948
949	/ Recently referenced inodes get one more pass /
950	if (inode->i_state & I_REFERENCED) {
951	inode->i_state &= ~I_REFERENCED;
952	spin_unlock(lock: &inode->i_lock);
953	return LRU_ROTATE;
954	}
955
956	/*
957	* On highmem systems, mapping_shrinkable() permits dropping
958	* page cache in order to free up struct inodes: lowmem might
959	* be under pressure before the cache inside the highmem zone.
960	*/
961	if (inode_has_buffers(inode) \|\| !mapping_empty(mapping: &inode->i_data)) {
962	inode_pin_lru_isolating(inode);
963	spin_unlock(lock: &inode->i_lock);
964	spin_unlock(lock: &lru->lock);
965	if (remove_inode_buffers(inode)) {
966	unsigned long reap;
967	reap = invalidate_mapping_pages(mapping: &inode->i_data, start: `0`, end: -`1`);
968	if (current_is_kswapd())
969	__count_vm_events(item: KSWAPD_INODESTEAL, delta: reap);
970	else
971	__count_vm_events(item: PGINODESTEAL, delta: reap);
972	mm_account_reclaimed_pages(pages: reap);
973	}
974	inode_unpin_lru_isolating(inode);
975	return LRU_RETRY;
976	}
977
978	WARN_ON(inode->i_state & I_NEW);
979	inode->i_state \|= I_FREEING;
980	list_lru_isolate_move(list: lru, item: &inode->i_lru, head: freeable);
981	spin_unlock(lock: &inode->i_lock);
982
983	this_cpu_dec(nr_unused);
984	return LRU_REMOVED;
985	}
986
987	/*
988	* Walk the superblock inode LRU for freeable inodes and attempt to free them.
989	* This is called from the superblock shrinker function with a number of inodes
990	* to trim from the LRU. Inodes to be freed are moved to a temporary list and
991	* then are freed outside inode_lock by dispose_list().
992	*/
993	long prune_icache_sb(struct super_block sb, struct* shrink_control *sc)
994	{
995	LIST_HEAD(freeable);
996	long freed;
997
998	freed = list_lru_shrink_walk(lru: &sb->s_inode_lru, sc,
999	isolate: inode_lru_isolate, cb_arg: &freeable);
1000	dispose_list(head: &freeable);
1001	return freed;
1002	}
1003
1004	static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked);
1005	/*
1006	* Called with the inode lock held.
1007	*/
1008	static struct inode find_inode(struct* super_block *sb,
1009	struct hlist_head *head,
1010	int (test)(struct* inode , void* *),
1011	void *data, bool is_inode_hash_locked)
1012	{
1013	struct inode *inode = NULL;
1014
1015	if (is_inode_hash_locked)
1016	lockdep_assert_held(&inode_hash_lock);
1017	else
1018	lockdep_assert_not_held(&inode_hash_lock);
1019
1020	rcu_read_lock();
1021	repeat:
1022	hlist_for_each_entry_rcu(inode, head, i_hash) {
1023	if (inode->i_sb != sb)
1024	continue;
1025	if (!test(inode, data))
1026	continue;
1027	spin_lock(lock: &inode->i_lock);
1028	if (inode->i_state & (I_FREEING\|I_WILL_FREE)) {
1029	__wait_on_freeing_inode(inode, is_inode_hash_locked);
1030	goto repeat;
1031	}
1032	if (unlikely(inode->i_state & I_CREATING)) {
1033	spin_unlock(lock: &inode->i_lock);
1034	rcu_read_unlock();
1035	return ERR_PTR(error: -ESTALE);
1036	}
1037	__iget(inode);
1038	spin_unlock(lock: &inode->i_lock);
1039	rcu_read_unlock();
1040	return inode;
1041	}
1042	rcu_read_unlock();
1043	return NULL;
1044	}
1045
1046	/*
1047	* find_inode_fast is the fast path version of find_inode, see the comment at
1048	* iget_locked for details.
1049	*/
1050	static struct inode find_inode_fast(struct* super_block *sb,
1051	struct hlist_head head, unsigned* long ino,
1052	bool is_inode_hash_locked)
1053	{
1054	struct inode *inode = NULL;
1055
1056	if (is_inode_hash_locked)
1057	lockdep_assert_held(&inode_hash_lock);
1058	else
1059	lockdep_assert_not_held(&inode_hash_lock);
1060
1061	rcu_read_lock();
1062	repeat:
1063	hlist_for_each_entry_rcu(inode, head, i_hash) {
1064	if (inode->i_ino != ino)
1065	continue;
1066	if (inode->i_sb != sb)
1067	continue;
1068	spin_lock(lock: &inode->i_lock);
1069	if (inode->i_state & (I_FREEING\|I_WILL_FREE)) {
1070	__wait_on_freeing_inode(inode, is_inode_hash_locked);
1071	goto repeat;
1072	}
1073	if (unlikely(inode->i_state & I_CREATING)) {
1074	spin_unlock(lock: &inode->i_lock);
1075	rcu_read_unlock();
1076	return ERR_PTR(error: -ESTALE);
1077	}
1078	__iget(inode);
1079	spin_unlock(lock: &inode->i_lock);
1080	rcu_read_unlock();
1081	return inode;
1082	}
1083	rcu_read_unlock();
1084	return NULL;
1085	}
1086
1087	/*
1088	* Each cpu owns a range of LAST_INO_BATCH numbers.
1089	* 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
1090	* to renew the exhausted range.
1091	*
1092	* This does not significantly increase overflow rate because every CPU can
1093	* consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
1094	* NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
1095	* 2^32 range, and is a worst-case. Even a 50% wastage would only increase
1096	* overflow rate by 2x, which does not seem too significant.
1097	*
1098	* On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
1099	* error if st_ino won't fit in target struct field. Use 32bit counter
1100	* here to attempt to avoid that.
1101	*/
1102	#define LAST_INO_BATCH 1024
1103	static DEFINE_PER_CPU(unsigned int, last_ino);
1104
1105	unsigned int get_next_ino(void)
1106	{
1107	unsigned int *p = &get_cpu_var(last_ino);
1108	unsigned int res = *p;
1109
1110	#ifdef CONFIG_SMP
1111	if (unlikely((res & (LAST_INO_BATCH-`1`)) == `0`)) {
1112	static atomic_t shared_last_ino;
1113	int next = atomic_add_return(LAST_INO_BATCH, v: &shared_last_ino);
1114
1115	res = next - LAST_INO_BATCH;
1116	}
1117	#endif
1118
1119	res++;
1120	/ get_next_ino should not provide a 0 inode number /
1121	if (unlikely(!res))
1122	res++;
1123	*p = res;
1124	put_cpu_var(last_ino);
1125	return res;
1126	}
1127	EXPORT_SYMBOL(get_next_ino);
1128
1129	/**
1130	* new_inode - obtain an inode
1131	* @sb: superblock
1132	*
1133	* Allocates a new inode for given superblock. The default gfp_mask
1134	* for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
1135	* If HIGHMEM pages are unsuitable or it is known that pages allocated
1136	* for the page cache are not reclaimable or migratable,
1137	* mapping_set_gfp_mask() must be called with suitable flags on the
1138	* newly created inode's mapping
1139	*
1140	*/
1141	struct inode new_inode(struct* super_block *sb)
1142	{
1143	struct inode *inode;
1144
1145	inode = alloc_inode(sb);
1146	if (inode)
1147	inode_sb_list_add(inode);
1148	return inode;
1149	}
1150	EXPORT_SYMBOL(new_inode);
1151
1152	#ifdef CONFIG_DEBUG_LOCK_ALLOC
1153	void lockdep_annotate_inode_mutex_key(struct inode *inode)
1154	{
1155	if (S_ISDIR(inode->i_mode)) {
1156	struct file_system_type *type = inode->i_sb->s_type;
1157
1158	/ Set new key only if filesystem hasn't already changed it /
1159	if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
1160	/*
1161	* ensure nobody is actually holding i_mutex
1162	*/
1163	// mutex_destroy(&inode->i_mutex);
1164	init_rwsem(&inode->i_rwsem);
1165	lockdep_set_class(&inode->i_rwsem,
1166	&type->i_mutex_dir_key);
1167	}
1168	}
1169	}
1170	EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
1171	#endif
1172
1173	/**
1174	* unlock_new_inode - clear the I_NEW state and wake up any waiters
1175	* @inode: new inode to unlock
1176	*
1177	* Called when the inode is fully initialised to clear the new state of the
1178	* inode and wake up anyone waiting for the inode to finish initialisation.
1179	*/
1180	void unlock_new_inode(struct inode *inode)
1181	{
1182	lockdep_annotate_inode_mutex_key(inode);
1183	spin_lock(lock: &inode->i_lock);
1184	WARN_ON(!(inode->i_state & I_NEW));
1185	inode->i_state &= ~I_NEW & ~I_CREATING;
1186	/*
1187	* Pairs with the barrier in prepare_to_wait_event() to make sure
1188	* ___wait_var_event() either sees the bit cleared or
1189	* waitqueue_active() check in wake_up_var() sees the waiter.
1190	*/
1191	smp_mb();
1192	inode_wake_up_bit(inode, __I_NEW);
1193	spin_unlock(lock: &inode->i_lock);
1194	}
1195	EXPORT_SYMBOL(unlock_new_inode);
1196
1197	void discard_new_inode(struct inode *inode)
1198	{
1199	lockdep_annotate_inode_mutex_key(inode);
1200	spin_lock(lock: &inode->i_lock);
1201	WARN_ON(!(inode->i_state & I_NEW));
1202	inode->i_state &= ~I_NEW;
1203	/*
1204	* Pairs with the barrier in prepare_to_wait_event() to make sure
1205	* ___wait_var_event() either sees the bit cleared or
1206	* waitqueue_active() check in wake_up_var() sees the waiter.
1207	*/
1208	smp_mb();
1209	inode_wake_up_bit(inode, __I_NEW);
1210	spin_unlock(lock: &inode->i_lock);
1211	iput(inode);
1212	}
1213	EXPORT_SYMBOL(discard_new_inode);
1214
1215	/**
1216	* lock_two_nondirectories - take two i_mutexes on non-directory objects
1217	*
1218	* Lock any non-NULL argument. Passed objects must not be directories.
1219	* Zero, one or two objects may be locked by this function.
1220	*
1221	* @inode1: first inode to lock
1222	* @inode2: second inode to lock
1223	*/
1224	void lock_two_nondirectories(struct inode inode1, struct* inode *inode2)
1225	{
1226	if (inode1)
1227	WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
1228	if (inode2)
1229	WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
1230	if (inode1 > inode2)
1231	swap(inode1, inode2);
1232	if (inode1)
1233	inode_lock(inode: inode1);
1234	if (inode2 && inode2 != inode1)
1235	inode_lock_nested(inode: inode2, subclass: I_MUTEX_NONDIR2);
1236	}
1237	EXPORT_SYMBOL(lock_two_nondirectories);
1238
1239	/**
1240	* unlock_two_nondirectories - release locks from lock_two_nondirectories()
1241	* @inode1: first inode to unlock
1242	* @inode2: second inode to unlock
1243	*/
1244	void unlock_two_nondirectories(struct inode inode1, struct* inode *inode2)
1245	{
1246	if (inode1) {
1247	WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
1248	inode_unlock(inode: inode1);
1249	}
1250	if (inode2 && inode2 != inode1) {
1251	WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
1252	inode_unlock(inode: inode2);
1253	}
1254	}
1255	EXPORT_SYMBOL(unlock_two_nondirectories);
1256
1257	/**
1258	* inode_insert5 - obtain an inode from a mounted file system
1259	* @inode: pre-allocated inode to use for insert to cache
1260	* @hashval: hash value (usually inode number) to get
1261	* @test: callback used for comparisons between inodes
1262	* @set: callback used to initialize a new struct inode
1263	* @data: opaque data pointer to pass to @test and @set
1264	*
1265	* Search for the inode specified by @hashval and @data in the inode cache,
1266	* and if present return it with an increased reference count. This is a
1267	* variant of iget5_locked() that doesn't allocate an inode.
1268	*
1269	* If the inode is not present in the cache, insert the pre-allocated inode and
1270	* return it locked, hashed, and with the I_NEW flag set. The file system gets
1271	* to fill it in before unlocking it via unlock_new_inode().
1272	*
1273	* Note that both @test and @set are called with the inode_hash_lock held, so
1274	* they can't sleep.
1275	*/
1276	struct inode inode_insert5(struct* inode inode, unsigned* long hashval,
1277	int (test)(struct* inode , void* *),
1278	int (set)(struct* inode , void* ), void* *data)
1279	{
1280	struct hlist_head *head = inode_hashtable + hash(sb: inode->i_sb, hashval);
1281	struct inode *old;
1282
1283	again:
1284	spin_lock(lock: &inode_hash_lock);
1285	old = find_inode(sb: inode->i_sb, head, test, data, is_inode_hash_locked: true);
1286	if (unlikely(old)) {
1287	/*
1288	* Uhhuh, somebody else created the same inode under us.
1289	* Use the old inode instead of the preallocated one.
1290	*/
1291	spin_unlock(lock: &inode_hash_lock);
1292	if (IS_ERR(ptr: old))
1293	return NULL;
1294	wait_on_inode(inode: old);
1295	if (unlikely(inode_unhashed(old))) {
1296	iput(old);
1297	goto again;
1298	}
1299	return old;
1300	}
1301
1302	if (set && unlikely(set(inode, data))) {
1303	spin_unlock(lock: &inode_hash_lock);
1304	return NULL;
1305	}
1306
1307	/*
1308	* Return the locked inode with I_NEW set, the
1309	* caller is responsible for filling in the contents
1310	*/
1311	spin_lock(lock: &inode->i_lock);
1312	inode->i_state \|= I_NEW;
1313	hlist_add_head_rcu(n: &inode->i_hash, h: head);
1314	spin_unlock(lock: &inode->i_lock);
1315
1316	spin_unlock(lock: &inode_hash_lock);
1317
1318	/*
1319	* Add inode to the sb list if it's not already. It has I_NEW at this
1320	* point, so it should be safe to test i_sb_list locklessly.
1321	*/
1322	if (list_empty(head: &inode->i_sb_list))
1323	inode_sb_list_add(inode);
1324
1325	return inode;
1326	}
1327	EXPORT_SYMBOL(inode_insert5);
1328
1329	/**
1330	* iget5_locked - obtain an inode from a mounted file system
1331	* @sb: super block of file system
1332	* @hashval: hash value (usually inode number) to get
1333	* @test: callback used for comparisons between inodes
1334	* @set: callback used to initialize a new struct inode
1335	* @data: opaque data pointer to pass to @test and @set
1336	*
1337	* Search for the inode specified by @hashval and @data in the inode cache,
1338	* and if present return it with an increased reference count. This is a
1339	* generalized version of iget_locked() for file systems where the inode
1340	* number is not sufficient for unique identification of an inode.
1341	*
1342	* If the inode is not present in the cache, allocate and insert a new inode
1343	* and return it locked, hashed, and with the I_NEW flag set. The file system
1344	* gets to fill it in before unlocking it via unlock_new_inode().
1345	*
1346	* Note that both @test and @set are called with the inode_hash_lock held, so
1347	* they can't sleep.
1348	*/
1349	struct inode iget5_locked(struct* super_block sb, unsigned* long hashval,
1350	int (test)(struct* inode , void* *),
1351	int (set)(struct* inode , void* ), void* *data)
1352	{
1353	struct inode *inode = ilookup5(sb, hashval, test, data);
1354
1355	if (!inode) {
1356	struct inode *new = alloc_inode(sb);
1357
1358	if (new) {
1359	inode = inode_insert5(new, hashval, test, set, data);
1360	if (unlikely(inode != new))
1361	destroy_inode(inode: new);
1362	}
1363	}
1364	return inode;
1365	}
1366	EXPORT_SYMBOL(iget5_locked);
1367
1368	/**
1369	* iget5_locked_rcu - obtain an inode from a mounted file system
1370	* @sb: super block of file system
1371	* @hashval: hash value (usually inode number) to get
1372	* @test: callback used for comparisons between inodes
1373	* @set: callback used to initialize a new struct inode
1374	* @data: opaque data pointer to pass to @test and @set
1375	*
1376	* This is equivalent to iget5_locked, except the @test callback must
1377	* tolerate the inode not being stable, including being mid-teardown.
1378	*/
1379	struct inode iget5_locked_rcu(struct* super_block sb, unsigned* long hashval,
1380	int (test)(struct* inode , void* *),
1381	int (set)(struct* inode , void* ), void* *data)
1382	{
1383	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1384	struct inode inode, new;
1385
1386	again:
1387	inode = find_inode(sb, head, test, data, is_inode_hash_locked: false);
1388	if (inode) {
1389	if (IS_ERR(ptr: inode))
1390	return NULL;
1391	wait_on_inode(inode);
1392	if (unlikely(inode_unhashed(inode))) {
1393	iput(inode);
1394	goto again;
1395	}
1396	return inode;
1397	}
1398
1399	new = alloc_inode(sb);
1400	if (new) {
1401	inode = inode_insert5(new, hashval, test, set, data);
1402	if (unlikely(inode != new))
1403	destroy_inode(inode: new);
1404	}
1405	return inode;
1406	}
1407	EXPORT_SYMBOL_GPL(iget5_locked_rcu);
1408
1409	/**
1410	* iget_locked - obtain an inode from a mounted file system
1411	* @sb: super block of file system
1412	* @ino: inode number to get
1413	*
1414	* Search for the inode specified by @ino in the inode cache and if present
1415	* return it with an increased reference count. This is for file systems
1416	* where the inode number is sufficient for unique identification of an inode.
1417	*
1418	* If the inode is not in cache, allocate a new inode and return it locked,
1419	* hashed, and with the I_NEW flag set. The file system gets to fill it in
1420	* before unlocking it via unlock_new_inode().
1421	*/
1422	struct inode iget_locked(struct* super_block sb, unsigned* long ino)
1423	{
1424	struct hlist_head *head = inode_hashtable + hash(sb, hashval: ino);
1425	struct inode *inode;
1426	again:
1427	inode = find_inode_fast(sb, head, ino, is_inode_hash_locked: false);
1428	if (inode) {
1429	if (IS_ERR(ptr: inode))
1430	return NULL;
1431	wait_on_inode(inode);
1432	if (unlikely(inode_unhashed(inode))) {
1433	iput(inode);
1434	goto again;
1435	}
1436	return inode;
1437	}
1438
1439	inode = alloc_inode(sb);
1440	if (inode) {
1441	struct inode *old;
1442
1443	spin_lock(lock: &inode_hash_lock);
1444	/ We released the lock, so.. /
1445	old = find_inode_fast(sb, head, ino, is_inode_hash_locked: true);
1446	if (!old) {
1447	inode->i_ino = ino;
1448	spin_lock(lock: &inode->i_lock);
1449	inode->i_state = I_NEW;
1450	hlist_add_head_rcu(n: &inode->i_hash, h: head);
1451	spin_unlock(lock: &inode->i_lock);
1452	spin_unlock(lock: &inode_hash_lock);
1453	inode_sb_list_add(inode);
1454
1455	/ Return the locked inode with I_NEW set, the*
1456	* caller is responsible for filling in the contents
1457	*/
1458	return inode;
1459	}
1460
1461	/*
1462	* Uhhuh, somebody else created the same inode under
1463	* us. Use the old inode instead of the one we just
1464	* allocated.
1465	*/
1466	spin_unlock(lock: &inode_hash_lock);
1467	destroy_inode(inode);
1468	if (IS_ERR(ptr: old))
1469	return NULL;
1470	inode = old;
1471	wait_on_inode(inode);
1472	if (unlikely(inode_unhashed(inode))) {
1473	iput(inode);
1474	goto again;
1475	}
1476	}
1477	return inode;
1478	}
1479	EXPORT_SYMBOL(iget_locked);
1480
1481	/*
1482	* search the inode cache for a matching inode number.
1483	* If we find one, then the inode number we are trying to
1484	* allocate is not unique and so we should not use it.
1485	*
1486	* Returns 1 if the inode number is unique, 0 if it is not.
1487	*/
1488	static int test_inode_iunique(struct super_block sb, unsigned* long ino)
1489	{
1490	struct hlist_head *b = inode_hashtable + hash(sb, hashval: ino);
1491	struct inode *inode;
1492
1493	hlist_for_each_entry_rcu(inode, b, i_hash) {
1494	if (inode->i_ino == ino && inode->i_sb == sb)
1495	return `0`;
1496	}
1497	return `1`;
1498	}
1499
1500	/**
1501	* iunique - get a unique inode number
1502	* @sb: superblock
1503	* @max_reserved: highest reserved inode number
1504	*
1505	* Obtain an inode number that is unique on the system for a given
1506	* superblock. This is used by file systems that have no natural
1507	* permanent inode numbering system. An inode number is returned that
1508	* is higher than the reserved limit but unique.
1509	*
1510	* BUGS:
1511	* With a large number of inodes live on the file system this function
1512	* currently becomes quite slow.
1513	*/
1514	ino_t iunique(struct super_block *sb, ino_t max_reserved)
1515	{
1516	/*
1517	* On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
1518	* error if st_ino won't fit in target struct field. Use 32bit counter
1519	* here to attempt to avoid that.
1520	*/
1521	static DEFINE_SPINLOCK(iunique_lock);
1522	static unsigned int counter;
1523	ino_t res;
1524
1525	rcu_read_lock();
1526	spin_lock(lock: &iunique_lock);
1527	do {
1528	if (counter <= max_reserved)
1529	counter = max_reserved + `1`;
1530	res = counter++;
1531	} while (!test_inode_iunique(sb, ino: res));
1532	spin_unlock(lock: &iunique_lock);
1533	rcu_read_unlock();
1534
1535	return res;
1536	}
1537	EXPORT_SYMBOL(iunique);
1538
1539	struct inode igrab(struct* inode *inode)
1540	{
1541	spin_lock(lock: &inode->i_lock);
1542	if (!(inode->i_state & (I_FREEING\|I_WILL_FREE))) {
1543	__iget(inode);
1544	spin_unlock(lock: &inode->i_lock);
1545	} else {
1546	spin_unlock(lock: &inode->i_lock);
1547	/*
1548	* Handle the case where s_op->clear_inode is not been
1549	* called yet, and somebody is calling igrab
1550	* while the inode is getting freed.
1551	*/
1552	inode = NULL;
1553	}
1554	return inode;
1555	}
1556	EXPORT_SYMBOL(igrab);
1557
1558	/**
1559	* ilookup5_nowait - search for an inode in the inode cache
1560	* @sb: super block of file system to search
1561	* @hashval: hash value (usually inode number) to search for
1562	* @test: callback used for comparisons between inodes
1563	* @data: opaque data pointer to pass to @test
1564	*
1565	* Search for the inode specified by @hashval and @data in the inode cache.
1566	* If the inode is in the cache, the inode is returned with an incremented
1567	* reference count.
1568	*
1569	* Note: I_NEW is not waited upon so you have to be very careful what you do
1570	* with the returned inode. You probably should be using ilookup5() instead.
1571	*
1572	* Note2: @test is called with the inode_hash_lock held, so can't sleep.
1573	*/
1574	struct inode ilookup5_nowait(struct* super_block sb, unsigned* long hashval,
1575	int (test)(struct* inode , void* ), void* *data)
1576	{
1577	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1578	struct inode *inode;
1579
1580	spin_lock(lock: &inode_hash_lock);
1581	inode = find_inode(sb, head, test, data, is_inode_hash_locked: true);
1582	spin_unlock(lock: &inode_hash_lock);
1583
1584	return IS_ERR(ptr: inode) ? NULL : inode;
1585	}
1586	EXPORT_SYMBOL(ilookup5_nowait);
1587
1588	/**
1589	* ilookup5 - search for an inode in the inode cache
1590	* @sb: super block of file system to search
1591	* @hashval: hash value (usually inode number) to search for
1592	* @test: callback used for comparisons between inodes
1593	* @data: opaque data pointer to pass to @test
1594	*
1595	* Search for the inode specified by @hashval and @data in the inode cache,
1596	* and if the inode is in the cache, return the inode with an incremented
1597	* reference count. Waits on I_NEW before returning the inode.
1598	* returned with an incremented reference count.
1599	*
1600	* This is a generalized version of ilookup() for file systems where the
1601	* inode number is not sufficient for unique identification of an inode.
1602	*
1603	* Note: @test is called with the inode_hash_lock held, so can't sleep.
1604	*/
1605	struct inode ilookup5(struct* super_block sb, unsigned* long hashval,
1606	int (test)(struct* inode , void* ), void* *data)
1607	{
1608	struct inode *inode;
1609	again:
1610	inode = ilookup5_nowait(sb, hashval, test, data);
1611	if (inode) {
1612	wait_on_inode(inode);
1613	if (unlikely(inode_unhashed(inode))) {
1614	iput(inode);
1615	goto again;
1616	}
1617	}
1618	return inode;
1619	}
1620	EXPORT_SYMBOL(ilookup5);
1621
1622	/**
1623	* ilookup - search for an inode in the inode cache
1624	* @sb: super block of file system to search
1625	* @ino: inode number to search for
1626	*
1627	* Search for the inode @ino in the inode cache, and if the inode is in the
1628	* cache, the inode is returned with an incremented reference count.
1629	*/
1630	struct inode ilookup(struct* super_block sb, unsigned* long ino)
1631	{
1632	struct hlist_head *head = inode_hashtable + hash(sb, hashval: ino);
1633	struct inode *inode;
1634	again:
1635	inode = find_inode_fast(sb, head, ino, is_inode_hash_locked: false);
1636
1637	if (inode) {
1638	if (IS_ERR(ptr: inode))
1639	return NULL;
1640	wait_on_inode(inode);
1641	if (unlikely(inode_unhashed(inode))) {
1642	iput(inode);
1643	goto again;
1644	}
1645	}
1646	return inode;
1647	}
1648	EXPORT_SYMBOL(ilookup);
1649
1650	/**
1651	* find_inode_nowait - find an inode in the inode cache
1652	* @sb: super block of file system to search
1653	* @hashval: hash value (usually inode number) to search for
1654	* @match: callback used for comparisons between inodes
1655	* @data: opaque data pointer to pass to @match
1656	*
1657	* Search for the inode specified by @hashval and @data in the inode
1658	* cache, where the helper function @match will return 0 if the inode
1659	* does not match, 1 if the inode does match, and -1 if the search
1660	* should be stopped. The @match function must be responsible for
1661	* taking the i_lock spin_lock and checking i_state for an inode being
1662	* freed or being initialized, and incrementing the reference count
1663	* before returning 1. It also must not sleep, since it is called with
1664	* the inode_hash_lock spinlock held.
1665	*
1666	* This is a even more generalized version of ilookup5() when the
1667	* function must never block --- find_inode() can block in
1668	* __wait_on_freeing_inode() --- or when the caller can not increment
1669	* the reference count because the resulting iput() might cause an
1670	* inode eviction. The tradeoff is that the @match funtion must be
1671	* very carefully implemented.
1672	*/
1673	struct inode find_inode_nowait(struct* super_block *sb,
1674	unsigned long hashval,
1675	int (match)(struct* inode , unsigned* long,
1676	void *),
1677	void *data)
1678	{
1679	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1680	struct inode inode, ret_inode = NULL;
1681	int mval;
1682
1683	spin_lock(lock: &inode_hash_lock);
1684	hlist_for_each_entry(inode, head, i_hash) {
1685	if (inode->i_sb != sb)
1686	continue;
1687	mval = match(inode, hashval, data);
1688	if (mval == `0`)
1689	continue;
1690	if (mval == `1`)
1691	ret_inode = inode;
1692	goto out;
1693	}
1694	out:
1695	spin_unlock(lock: &inode_hash_lock);
1696	return ret_inode;
1697	}
1698	EXPORT_SYMBOL(find_inode_nowait);
1699
1700	/**
1701	* find_inode_rcu - find an inode in the inode cache
1702	* @sb: Super block of file system to search
1703	* @hashval: Key to hash
1704	* @test: Function to test match on an inode
1705	* @data: Data for test function
1706	*
1707	* Search for the inode specified by @hashval and @data in the inode cache,
1708	* where the helper function @test will return 0 if the inode does not match
1709	* and 1 if it does. The @test function must be responsible for taking the
1710	* i_lock spin_lock and checking i_state for an inode being freed or being
1711	* initialized.
1712	*
1713	* If successful, this will return the inode for which the @test function
1714	* returned 1 and NULL otherwise.
1715	*
1716	* The @test function is not permitted to take a ref on any inode presented.
1717	* It is also not permitted to sleep.
1718	*
1719	* The caller must hold the RCU read lock.
1720	*/
1721	struct inode find_inode_rcu(struct* super_block sb, unsigned* long hashval,
1722	int (test)(struct* inode , void* ), void* *data)
1723	{
1724	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1725	struct inode *inode;
1726
1727	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
1728	"suspicious find_inode_rcu() usage");
1729
1730	hlist_for_each_entry_rcu(inode, head, i_hash) {
1731	if (inode->i_sb == sb &&
1732	!(READ_ONCE(inode->i_state) & (I_FREEING \| I_WILL_FREE)) &&
1733	test(inode, data))
1734	return inode;
1735	}
1736	return NULL;
1737	}
1738	EXPORT_SYMBOL(find_inode_rcu);
1739
1740	/**
1741	* find_inode_by_ino_rcu - Find an inode in the inode cache
1742	* @sb: Super block of file system to search
1743	* @ino: The inode number to match
1744	*
1745	* Search for the inode specified by @hashval and @data in the inode cache,
1746	* where the helper function @test will return 0 if the inode does not match
1747	* and 1 if it does. The @test function must be responsible for taking the
1748	* i_lock spin_lock and checking i_state for an inode being freed or being
1749	* initialized.
1750	*
1751	* If successful, this will return the inode for which the @test function
1752	* returned 1 and NULL otherwise.
1753	*
1754	* The @test function is not permitted to take a ref on any inode presented.
1755	* It is also not permitted to sleep.
1756	*
1757	* The caller must hold the RCU read lock.
1758	*/
1759	struct inode find_inode_by_ino_rcu(struct* super_block *sb,
1760	unsigned long ino)
1761	{
1762	struct hlist_head *head = inode_hashtable + hash(sb, hashval: ino);
1763	struct inode *inode;
1764
1765	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
1766	"suspicious find_inode_by_ino_rcu() usage");
1767
1768	hlist_for_each_entry_rcu(inode, head, i_hash) {
1769	if (inode->i_ino == ino &&
1770	inode->i_sb == sb &&
1771	!(READ_ONCE(inode->i_state) & (I_FREEING \| I_WILL_FREE)))
1772	return inode;
1773	}
1774	return NULL;
1775	}
1776	EXPORT_SYMBOL(find_inode_by_ino_rcu);
1777
1778	int insert_inode_locked(struct inode *inode)
1779	{
1780	struct super_block *sb = inode->i_sb;
1781	ino_t ino = inode->i_ino;
1782	struct hlist_head *head = inode_hashtable + hash(sb, hashval: ino);
1783
1784	while (`1`) {
1785	struct inode *old = NULL;
1786	spin_lock(lock: &inode_hash_lock);
1787	hlist_for_each_entry(old, head, i_hash) {
1788	if (old->i_ino != ino)
1789	continue;
1790	if (old->i_sb != sb)
1791	continue;
1792	spin_lock(lock: &old->i_lock);
1793	if (old->i_state & (I_FREEING\|I_WILL_FREE)) {
1794	spin_unlock(lock: &old->i_lock);
1795	continue;
1796	}
1797	break;
1798	}
1799	if (likely(!old)) {
1800	spin_lock(lock: &inode->i_lock);
1801	inode->i_state \|= I_NEW \| I_CREATING;
1802	hlist_add_head_rcu(n: &inode->i_hash, h: head);
1803	spin_unlock(lock: &inode->i_lock);
1804	spin_unlock(lock: &inode_hash_lock);
1805	return `0`;
1806	}
1807	if (unlikely(old->i_state & I_CREATING)) {
1808	spin_unlock(lock: &old->i_lock);
1809	spin_unlock(lock: &inode_hash_lock);
1810	return -EBUSY;
1811	}
1812	__iget(inode: old);
1813	spin_unlock(lock: &old->i_lock);
1814	spin_unlock(lock: &inode_hash_lock);
1815	wait_on_inode(inode: old);
1816	if (unlikely(!inode_unhashed(old))) {
1817	iput(old);
1818	return -EBUSY;
1819	}
1820	iput(old);
1821	}
1822	}
1823	EXPORT_SYMBOL(insert_inode_locked);
1824
1825	int insert_inode_locked4(struct inode inode, unsigned* long hashval,
1826	int (test)(struct* inode , void* ), void* *data)
1827	{
1828	struct inode *old;
1829
1830	inode->i_state \|= I_CREATING;
1831	old = inode_insert5(inode, hashval, test, NULL, data);
1832
1833	if (old != inode) {
1834	iput(old);
1835	return -EBUSY;
1836	}
1837	return `0`;
1838	}
1839	EXPORT_SYMBOL(insert_inode_locked4);
1840
1841
1842	int generic_delete_inode(struct inode *inode)
1843	{
1844	return `1`;
1845	}
1846	EXPORT_SYMBOL(generic_delete_inode);
1847
1848	/*
1849	* Called when we're dropping the last reference
1850	* to an inode.
1851	*
1852	* Call the FS "drop_inode()" function, defaulting to
1853	* the legacy UNIX filesystem behaviour. If it tells
1854	* us to evict inode, do so. Otherwise, retain inode
1855	* in cache if fs is alive, sync and evict if fs is
1856	* shutting down.
1857	*/
1858	static void iput_final(struct inode *inode)
1859	{
1860	struct super_block *sb = inode->i_sb;
1861	const struct super_operations *op = inode->i_sb->s_op;
1862	unsigned long state;
1863	int drop;
1864
1865	WARN_ON(inode->i_state & I_NEW);
1866
1867	if (op->drop_inode)
1868	drop = op->drop_inode(inode);
1869	else
1870	drop = generic_drop_inode(inode);
1871
1872	if (!drop &&
1873	!(inode->i_state & I_DONTCACHE) &&
1874	(sb->s_flags & SB_ACTIVE)) {
1875	__inode_add_lru(inode, rotate: true);
1876	spin_unlock(lock: &inode->i_lock);
1877	return;
1878	}
1879
1880	state = inode->i_state;
1881	if (!drop) {
1882	WRITE_ONCE(inode->i_state, state \| I_WILL_FREE);
1883	spin_unlock(lock: &inode->i_lock);
1884
1885	write_inode_now(inode, sync: `1`);
1886
1887	spin_lock(lock: &inode->i_lock);
1888	state = inode->i_state;
1889	WARN_ON(state & I_NEW);
1890	state &= ~I_WILL_FREE;
1891	}
1892
1893	WRITE_ONCE(inode->i_state, state \| I_FREEING);
1894	if (!list_empty(head: &inode->i_lru))
1895	inode_lru_list_del(inode);
1896	spin_unlock(lock: &inode->i_lock);
1897
1898	evict(inode);
1899	}
1900
1901	/**
1902	* iput - put an inode
1903	* @inode: inode to put
1904	*
1905	* Puts an inode, dropping its usage count. If the inode use count hits
1906	* zero, the inode is then freed and may also be destroyed.
1907	*
1908	* Consequently, iput() can sleep.
1909	*/
1910	void iput(struct inode *inode)
1911	{
1912	if (!inode)
1913	return;
1914	BUG_ON(inode->i_state & I_CLEAR);
1915	retry:
1916	if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
1917	if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
1918	atomic_inc(v: &inode->i_count);
1919	spin_unlock(lock: &inode->i_lock);
1920	trace_writeback_lazytime_iput(inode);
1921	mark_inode_dirty_sync(inode);
1922	goto retry;
1923	}
1924	iput_final(inode);
1925	}
1926	}
1927	EXPORT_SYMBOL(iput);
1928
1929	#ifdef CONFIG_BLOCK
1930	/**
1931	* bmap - find a block number in a file
1932	* @inode: inode owning the block number being requested
1933	* @block: pointer containing the block to find
1934	*
1935	* Replaces the value in ``*block`` with the block number on the device holding
1936	* corresponding to the requested block number in the file.
1937	* That is, asked for block 4 of inode 1 the function will replace the
1938	* 4 in ``*block``, with disk block relative to the disk start that holds that
1939	* block of the file.
1940	*
1941	* Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
1942	* hole, returns 0 and ``*block`` is also set to 0.
1943	*/
1944	int bmap(struct inode inode, sector_t block)
1945	{
1946	if (!inode->i_mapping->a_ops->bmap)
1947	return -EINVAL;
1948
1949	block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
1950	return `0`;
1951	}
1952	EXPORT_SYMBOL(bmap);
1953	#endif
1954
1955	/*
1956	* With relative atime, only update atime if the previous atime is
1957	* earlier than or equal to either the ctime or mtime,
1958	* or if at least a day has passed since the last atime update.
1959	*/
1960	static bool relatime_need_update(struct vfsmount mnt, struct* inode *inode,
1961	struct timespec64 now)
1962	{
1963	struct timespec64 atime, mtime, ctime;
1964
1965	if (!(mnt->mnt_flags & MNT_RELATIME))
1966	return true;
1967	/*
1968	* Is mtime younger than or equal to atime? If yes, update atime:
1969	*/
1970	atime = inode_get_atime(inode);
1971	mtime = inode_get_mtime(inode);
1972	if (timespec64_compare(lhs: &mtime, rhs: &atime) >= `0`)
1973	return true;
1974	/*
1975	* Is ctime younger than or equal to atime? If yes, update atime:
1976	*/
1977	ctime = inode_get_ctime(inode);
1978	if (timespec64_compare(lhs: &ctime, rhs: &atime) >= `0`)
1979	return true;
1980
1981	/*
1982	* Is the previous atime value older than a day? If yes,
1983	* update atime:
1984	*/
1985	if ((long)(now.tv_sec - atime.tv_sec) >= `24``60``60`)
1986	return true;
1987	/*
1988	* Good, we can skip the atime update:
1989	*/
1990	return false;
1991	}
1992
1993	/**
1994	* inode_update_timestamps - update the timestamps on the inode
1995	* @inode: inode to be updated
1996	* @flags: S_* flags that needed to be updated
1997	*
1998	* The update_time function is called when an inode's timestamps need to be
1999	* updated for a read or write operation. This function handles updating the
2000	* actual timestamps. It's up to the caller to ensure that the inode is marked
2001	* dirty appropriately.
2002	*
2003	* In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated,
2004	* attempt to update all three of them. S_ATIME updates can be handled
2005	* independently of the rest.
2006	*
2007	* Returns a set of S_* flags indicating which values changed.
2008	*/
2009	int inode_update_timestamps(struct inode inode, int* flags)
2010	{
2011	int updated = `0`;
2012	struct timespec64 now;
2013
2014	if (flags & (S_MTIME\|S_CTIME\|S_VERSION)) {
2015	struct timespec64 ctime = inode_get_ctime(inode);
2016	struct timespec64 mtime = inode_get_mtime(inode);
2017
2018	now = inode_set_ctime_current(inode);
2019	if (!timespec64_equal(a: &now, b: &ctime))
2020	updated \|= S_CTIME;
2021	if (!timespec64_equal(a: &now, b: &mtime)) {
2022	inode_set_mtime_to_ts(inode, ts: now);
2023	updated \|= S_MTIME;
2024	}
2025	if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, force: updated))
2026	updated \|= S_VERSION;
2027	} else {
2028	now = current_time(inode);
2029	}
2030
2031	if (flags & S_ATIME) {
2032	struct timespec64 atime = inode_get_atime(inode);
2033
2034	if (!timespec64_equal(a: &now, b: &atime)) {
2035	inode_set_atime_to_ts(inode, ts: now);
2036	updated \|= S_ATIME;
2037	}
2038	}
2039	return updated;
2040	}
2041	EXPORT_SYMBOL(inode_update_timestamps);
2042
2043	/**
2044	* generic_update_time - update the timestamps on the inode
2045	* @inode: inode to be updated
2046	* @flags: S_* flags that needed to be updated
2047	*
2048	* The update_time function is called when an inode's timestamps need to be
2049	* updated for a read or write operation. In the case where any of S_MTIME, S_CTIME,
2050	* or S_VERSION need to be updated we attempt to update all three of them. S_ATIME
2051	* updates can be handled done independently of the rest.
2052	*
2053	* Returns a S_* mask indicating which fields were updated.
2054	*/
2055	int generic_update_time(struct inode inode, int* flags)
2056	{
2057	int updated = inode_update_timestamps(inode, flags);
2058	int dirty_flags = `0`;
2059
2060	if (updated & (S_ATIME\|S_MTIME\|S_CTIME))
2061	dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC;
2062	if (updated & S_VERSION)
2063	dirty_flags \|= I_DIRTY_SYNC;
2064	__mark_inode_dirty(inode, dirty_flags);
2065	return updated;
2066	}
2067	EXPORT_SYMBOL(generic_update_time);
2068
2069	/*
2070	* This does the actual work of updating an inodes time or version. Must have
2071	* had called mnt_want_write() before calling this.
2072	*/
2073	int inode_update_time(struct inode inode, int* flags)
2074	{
2075	if (inode->i_op->update_time)
2076	return inode->i_op->update_time(inode, flags);
2077	generic_update_time(inode, flags);
2078	return `0`;
2079	}
2080	EXPORT_SYMBOL(inode_update_time);
2081
2082	/**
2083	* atime_needs_update - update the access time
2084	* @path: the &struct path to update
2085	* @inode: inode to update
2086	*
2087	* Update the accessed time on an inode and mark it for writeback.
2088	* This function automatically handles read only file systems and media,
2089	* as well as the "noatime" flag and inode specific "noatime" markers.
2090	*/
2091	bool atime_needs_update(const struct path path, struct* inode *inode)
2092	{
2093	struct vfsmount *mnt = path->mnt;
2094	struct timespec64 now, atime;
2095
2096	if (inode->i_flags & S_NOATIME)
2097	return false;
2098
2099	/ Atime updates will likely cause i_uid and i_gid to be written*
2100	* back improprely if their true value is unknown to the vfs.
2101	*/
2102	if (HAS_UNMAPPED_ID(idmap: mnt_idmap(mnt), inode))
2103	return false;
2104
2105	if (IS_NOATIME(inode))
2106	return false;
2107	if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
2108	return false;
2109
2110	if (mnt->mnt_flags & MNT_NOATIME)
2111	return false;
2112	if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
2113	return false;
2114
2115	now = current_time(inode);
2116
2117	if (!relatime_need_update(mnt, inode, now))
2118	return false;
2119
2120	atime = inode_get_atime(inode);
2121	if (timespec64_equal(a: &atime, b: &now))
2122	return false;
2123
2124	return true;
2125	}
2126
2127	void touch_atime(const struct path *path)
2128	{
2129	struct vfsmount *mnt = path->mnt;
2130	struct inode *inode = d_inode(dentry: path->dentry);
2131
2132	if (!atime_needs_update(path, inode))
2133	return;
2134
2135	if (!sb_start_write_trylock(sb: inode->i_sb))
2136	return;
2137
2138	if (mnt_get_write_access(mnt) != `0`)
2139	goto skip_update;
2140	/*
2141	* File systems can error out when updating inodes if they need to
2142	* allocate new space to modify an inode (such is the case for
2143	* Btrfs), but since we touch atime while walking down the path we
2144	* really don't care if we failed to update the atime of the file,
2145	* so just ignore the return value.
2146	* We may also fail on filesystems that have the ability to make parts
2147	* of the fs read only, e.g. subvolumes in Btrfs.
2148	*/
2149	inode_update_time(inode, S_ATIME);
2150	mnt_put_write_access(mnt);
2151	skip_update:
2152	sb_end_write(sb: inode->i_sb);
2153	}
2154	EXPORT_SYMBOL(touch_atime);
2155
2156	/*
2157	* Return mask of changes for notify_change() that need to be done as a
2158	* response to write or truncate. Return 0 if nothing has to be changed.
2159	* Negative value on error (change should be denied).
2160	*/
2161	int dentry_needs_remove_privs(struct mnt_idmap *idmap,
2162	struct dentry *dentry)
2163	{
2164	struct inode *inode = d_inode(dentry);
2165	int mask = `0`;
2166	int ret;
2167
2168	if (IS_NOSEC(inode))
2169	return `0`;
2170
2171	mask = setattr_should_drop_suidgid(idmap, inode);
2172	ret = security_inode_need_killpriv(dentry);
2173	if (ret < `0`)
2174	return ret;
2175	if (ret)
2176	mask \|= ATTR_KILL_PRIV;
2177	return mask;
2178	}
2179
2180	static int __remove_privs(struct mnt_idmap *idmap,
2181	struct dentry dentry, int* kill)
2182	{
2183	struct iattr newattrs;
2184
2185	newattrs.ia_valid = ATTR_FORCE \| kill;
2186	/*
2187	* Note we call this on write, so notify_change will not
2188	* encounter any conflicting delegations:
2189	*/
2190	return notify_change(idmap, dentry, &newattrs, NULL);
2191	}
2192
2193	int file_remove_privs_flags(struct file file, unsigned* int flags)
2194	{
2195	struct dentry *dentry = file_dentry(file);
2196	struct inode *inode = file_inode(f: file);
2197	int error = `0`;
2198	int kill;
2199
2200	if (IS_NOSEC(inode) \|\| !S_ISREG(inode->i_mode))
2201	return `0`;
2202
2203	kill = dentry_needs_remove_privs(idmap: file_mnt_idmap(file), dentry);
2204	if (kill < `0`)
2205	return kill;
2206
2207	if (kill) {
2208	if (flags & IOCB_NOWAIT)
2209	return -EAGAIN;
2210
2211	error = __remove_privs(idmap: file_mnt_idmap(file), dentry, kill);
2212	}
2213
2214	if (!error)
2215	inode_has_no_xattr(inode);
2216	return error;
2217	}
2218	EXPORT_SYMBOL_GPL(file_remove_privs_flags);
2219
2220	/**
2221	* file_remove_privs - remove special file privileges (suid, capabilities)
2222	* @file: file to remove privileges from
2223	*
2224	* When file is modified by a write or truncation ensure that special
2225	* file privileges are removed.
2226	*
2227	* Return: 0 on success, negative errno on failure.
2228	*/
2229	int file_remove_privs(struct file *file)
2230	{
2231	return file_remove_privs_flags(file, `0`);
2232	}
2233	EXPORT_SYMBOL(file_remove_privs);
2234
2235	/**
2236	* current_time - Return FS time (possibly fine-grained)
2237	* @inode: inode.
2238	*
2239	* Return the current time truncated to the time granularity supported by
2240	* the fs, as suitable for a ctime/mtime change. If the ctime is flagged
2241	* as having been QUERIED, get a fine-grained timestamp, but don't update
2242	* the floor.
2243	*
2244	* For a multigrain inode, this is effectively an estimate of the timestamp
2245	* that a file would receive. An actual update must go through
2246	* inode_set_ctime_current().
2247	*/
2248	struct timespec64 current_time(struct inode *inode)
2249	{
2250	struct timespec64 now;
2251	u32 cns;
2252
2253	ktime_get_coarse_real_ts64_mg(ts: &now);
2254
2255	if (!is_mgtime(inode))
2256	goto out;
2257
2258	/ If nothing has queried it, then coarse time is fine /
2259	cns = smp_load_acquire(&inode->i_ctime_nsec);
2260	if (cns & I_CTIME_QUERIED) {
2261	/*
2262	* If there is no apparent change, then get a fine-grained
2263	* timestamp.
2264	*/
2265	if (now.tv_nsec == (cns & ~I_CTIME_QUERIED))
2266	ktime_get_real_ts64(tv: &now);
2267	}
2268	out:
2269	return timestamp_truncate(t: now, inode);
2270	}
2271	EXPORT_SYMBOL(current_time);
2272
2273	static int inode_needs_update_time(struct inode *inode)
2274	{
2275	struct timespec64 now, ts;
2276	int sync_it = `0`;
2277
2278	/ First try to exhaust all avenues to not sync /
2279	if (IS_NOCMTIME(inode))
2280	return `0`;
2281
2282	now = current_time(inode);
2283
2284	ts = inode_get_mtime(inode);
2285	if (!timespec64_equal(a: &ts, b: &now))
2286	sync_it \|= S_MTIME;
2287
2288	ts = inode_get_ctime(inode);
2289	if (!timespec64_equal(a: &ts, b: &now))
2290	sync_it \|= S_CTIME;
2291
2292	if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
2293	sync_it \|= S_VERSION;
2294
2295	return sync_it;
2296	}
2297
2298	static int __file_update_time(struct file file, int* sync_mode)
2299	{
2300	int ret = `0`;
2301	struct inode *inode = file_inode(f: file);
2302
2303	/ try to update time settings /
2304	if (!mnt_get_write_access_file(file)) {
2305	ret = inode_update_time(inode, sync_mode);
2306	mnt_put_write_access_file(file);
2307	}
2308
2309	return ret;
2310	}
2311
2312	/**
2313	* file_update_time - update mtime and ctime time
2314	* @file: file accessed
2315	*
2316	* Update the mtime and ctime members of an inode and mark the inode for
2317	* writeback. Note that this function is meant exclusively for usage in
2318	* the file write path of filesystems, and filesystems may choose to
2319	* explicitly ignore updates via this function with the _NOCMTIME inode
2320	* flag, e.g. for network filesystem where these imestamps are handled
2321	* by the server. This can return an error for file systems who need to
2322	* allocate space in order to update an inode.
2323	*
2324	* Return: 0 on success, negative errno on failure.
2325	*/
2326	int file_update_time(struct file *file)
2327	{
2328	int ret;
2329	struct inode *inode = file_inode(f: file);
2330
2331	ret = inode_needs_update_time(inode);
2332	if (ret <= `0`)
2333	return ret;
2334
2335	return __file_update_time(file, sync_mode: ret);
2336	}
2337	EXPORT_SYMBOL(file_update_time);
2338
2339	/**
2340	* file_modified_flags - handle mandated vfs changes when modifying a file
2341	* @file: file that was modified
2342	* @flags: kiocb flags
2343	*
2344	* When file has been modified ensure that special
2345	* file privileges are removed and time settings are updated.
2346	*
2347	* If IOCB_NOWAIT is set, special file privileges will not be removed and
2348	* time settings will not be updated. It will return -EAGAIN.
2349	*
2350	* Context: Caller must hold the file's inode lock.
2351	*
2352	* Return: 0 on success, negative errno on failure.
2353	*/
2354	static int file_modified_flags(struct file file, int* flags)
2355	{
2356	int ret;
2357	struct inode *inode = file_inode(f: file);
2358
2359	/*
2360	* Clear the security bits if the process is not being run by root.
2361	* This keeps people from modifying setuid and setgid binaries.
2362	*/
2363	ret = file_remove_privs_flags(file, flags);
2364	if (ret)
2365	return ret;
2366
2367	if (unlikely(file->f_mode & FMODE_NOCMTIME))
2368	return `0`;
2369
2370	ret = inode_needs_update_time(inode);
2371	if (ret <= `0`)
2372	return ret;
2373	if (flags & IOCB_NOWAIT)
2374	return -EAGAIN;
2375
2376	return __file_update_time(file, sync_mode: ret);
2377	}
2378
2379	/**
2380	* file_modified - handle mandated vfs changes when modifying a file
2381	* @file: file that was modified
2382	*
2383	* When file has been modified ensure that special
2384	* file privileges are removed and time settings are updated.
2385	*
2386	* Context: Caller must hold the file's inode lock.
2387	*
2388	* Return: 0 on success, negative errno on failure.
2389	*/
2390	int file_modified(struct file *file)
2391	{
2392	return file_modified_flags(file, flags: `0`);
2393	}
2394	EXPORT_SYMBOL(file_modified);
2395
2396	/**
2397	* kiocb_modified - handle mandated vfs changes when modifying a file
2398	* @iocb: iocb that was modified
2399	*
2400	* When file has been modified ensure that special
2401	* file privileges are removed and time settings are updated.
2402	*
2403	* Context: Caller must hold the file's inode lock.
2404	*
2405	* Return: 0 on success, negative errno on failure.
2406	*/
2407	int kiocb_modified(struct kiocb *iocb)
2408	{
2409	return file_modified_flags(file: iocb->ki_filp, flags: iocb->ki_flags);
2410	}
2411	EXPORT_SYMBOL_GPL(kiocb_modified);
2412
2413	int inode_needs_sync(struct inode *inode)
2414	{
2415	if (IS_SYNC(inode))
2416	return `1`;
2417	if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
2418	return `1`;
2419	return `0`;
2420	}
2421	EXPORT_SYMBOL(inode_needs_sync);
2422
2423	/*
2424	* If we try to find an inode in the inode hash while it is being
2425	* deleted, we have to wait until the filesystem completes its
2426	* deletion before reporting that it isn't found. This function waits
2427	* until the deletion _might_ have completed. Callers are responsible
2428	* to recheck inode state.
2429	*
2430	* It doesn't matter if I_NEW is not set initially, a call to
2431	* wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
2432	* will DTRT.
2433	*/
2434	static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
2435	{
2436	struct wait_bit_queue_entry wqe;
2437	struct wait_queue_head *wq_head;
2438
2439	/*
2440	* Handle racing against evict(), see that routine for more details.
2441	*/
2442	if (unlikely(inode_unhashed(inode))) {
2443	WARN_ON(is_inode_hash_locked);
2444	spin_unlock(lock: &inode->i_lock);
2445	return;
2446	}
2447
2448	wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
2449	prepare_to_wait_event(wq_head, wq_entry: &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
2450	spin_unlock(lock: &inode->i_lock);
2451	rcu_read_unlock();
2452	if (is_inode_hash_locked)
2453	spin_unlock(lock: &inode_hash_lock);
2454	schedule();
2455	finish_wait(wq_head, wq_entry: &wqe.wq_entry);
2456	if (is_inode_hash_locked)
2457	spin_lock(lock: &inode_hash_lock);
2458	rcu_read_lock();
2459	}
2460
2461	static __initdata unsigned long ihash_entries;
2462	static int __init set_ihash_entries(char *str)
2463	{
2464	if (!str)
2465	return `0`;
2466	ihash_entries = simple_strtoul(str, &str, `0`);
2467	return `1`;
2468	}
2469	__setup("ihash_entries=", set_ihash_entries);
2470
2471	/*
2472	* Initialize the waitqueues and inode hash table.
2473	*/
2474	void __init inode_init_early(void)
2475	{
2476	/ If hashes are distributed across NUMA nodes, defer*
2477	* hash allocation until vmalloc space is available.
2478	*/
2479	if (hashdist)
2480	return;
2481
2482	inode_hashtable =
2483	alloc_large_system_hash(tablename: "Inode-cache",
2484	bucketsize: sizeof(struct hlist_head),
2485	numentries: ihash_entries,
2486	scale: `14`,
2487	HASH_EARLY \| HASH_ZERO,
2488	hash_shift: &i_hash_shift,
2489	hash_mask: &i_hash_mask,
2490	low_limit: `0`,
2491	high_limit: `0`);
2492	}
2493
2494	void __init inode_init(void)
2495	{
2496	/ inode slab cache /
2497	inode_cachep = kmem_cache_create("inode_cache",
2498	sizeof(struct inode),
2499	`0`,
2500	(SLAB_RECLAIM_ACCOUNT\|SLAB_PANIC\|
2501	SLAB_ACCOUNT),
2502	init_once);
2503
2504	/ Hash may have been set up in inode_init_early /
2505	if (!hashdist)
2506	return;
2507
2508	inode_hashtable =
2509	alloc_large_system_hash(tablename: "Inode-cache",
2510	bucketsize: sizeof(struct hlist_head),
2511	numentries: ihash_entries,
2512	scale: `14`,
2513	HASH_ZERO,
2514	hash_shift: &i_hash_shift,
2515	hash_mask: &i_hash_mask,
2516	low_limit: `0`,
2517	high_limit: `0`);
2518	}
2519
2520	void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
2521	{
2522	inode->i_mode = mode;
2523	if (S_ISCHR(mode)) {
2524	inode->i_fop = &def_chr_fops;
2525	inode->i_rdev = rdev;
2526	} else if (S_ISBLK(mode)) {
2527	if (IS_ENABLED(CONFIG_BLOCK))
2528	inode->i_fop = &def_blk_fops;
2529	inode->i_rdev = rdev;
2530	} else if (S_ISFIFO(mode))
2531	inode->i_fop = &pipefifo_fops;
2532	else if (S_ISSOCK(mode))
2533	; / leave it no_open_fops /
2534	else
2535	printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
2536	" inode %s:%lu\n", mode, inode->i_sb->s_id,
2537	inode->i_ino);
2538	}
2539	EXPORT_SYMBOL(init_special_inode);
2540
2541	/**
2542	* inode_init_owner - Init uid,gid,mode for new inode according to posix standards
2543	* @idmap: idmap of the mount the inode was created from
2544	* @inode: New inode
2545	* @dir: Directory inode
2546	* @mode: mode of the new inode
2547	*
2548	* If the inode has been created through an idmapped mount the idmap of
2549	* the vfsmount must be passed through @idmap. This function will then take
2550	* care to map the inode according to @idmap before checking permissions
2551	* and initializing i_uid and i_gid. On non-idmapped mounts or if permission
2552	* checking is to be performed on the raw inode simply pass @nop_mnt_idmap.
2553	*/
2554	void inode_init_owner(struct mnt_idmap idmap, struct* inode *inode,
2555	const struct inode *dir, umode_t mode)
2556	{
2557	inode_fsuid_set(inode, idmap);
2558	if (dir && dir->i_mode & S_ISGID) {
2559	inode->i_gid = dir->i_gid;
2560
2561	/ Directories are special, and always inherit S_ISGID /
2562	if (S_ISDIR(mode))
2563	mode \|= S_ISGID;
2564	} else
2565	inode_fsgid_set(inode, idmap);
2566	inode->i_mode = mode;
2567	}
2568	EXPORT_SYMBOL(inode_init_owner);
2569
2570	/**
2571	* inode_owner_or_capable - check current task permissions to inode
2572	* @idmap: idmap of the mount the inode was found from
2573	* @inode: inode being checked
2574	*
2575	* Return true if current either has CAP_FOWNER in a namespace with the
2576	* inode owner uid mapped, or owns the file.
2577	*
2578	* If the inode has been found through an idmapped mount the idmap of
2579	* the vfsmount must be passed through @idmap. This function will then take
2580	* care to map the inode according to @idmap before checking permissions.
2581	* On non-idmapped mounts or if permission checking is to be performed on the
2582	* raw inode simply pass @nop_mnt_idmap.
2583	*/
2584	bool inode_owner_or_capable(struct mnt_idmap *idmap,
2585	const struct inode *inode)
2586	{
2587	vfsuid_t vfsuid;
2588	struct user_namespace *ns;
2589
2590	vfsuid = i_uid_into_vfsuid(idmap, inode);
2591	if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
2592	return true;
2593
2594	ns = current_user_ns();
2595	if (vfsuid_has_mapping(userns: ns, vfsuid) && ns_capable(ns, CAP_FOWNER))
2596	return true;
2597	return false;
2598	}
2599	EXPORT_SYMBOL(inode_owner_or_capable);
2600
2601	/*
2602	* Direct i/o helper functions
2603	*/
2604	bool inode_dio_finished(const struct inode *inode)
2605	{
2606	return atomic_read(v: &inode->i_dio_count) == `0`;
2607	}
2608	EXPORT_SYMBOL(inode_dio_finished);
2609
2610	/**
2611	* inode_dio_wait - wait for outstanding DIO requests to finish
2612	* @inode: inode to wait for
2613	*
2614	* Waits for all pending direct I/O requests to finish so that we can
2615	* proceed with a truncate or equivalent operation.
2616	*
2617	* Must be called under a lock that serializes taking new references
2618	* to i_dio_count, usually by inode->i_mutex.
2619	*/
2620	void inode_dio_wait(struct inode *inode)
2621	{
2622	wait_var_event(&inode->i_dio_count, inode_dio_finished(inode));
2623	}
2624	EXPORT_SYMBOL(inode_dio_wait);
2625
2626	void inode_dio_wait_interruptible(struct inode *inode)
2627	{
2628	wait_var_event_interruptible(&inode->i_dio_count,
2629	inode_dio_finished(inode));
2630	}
2631	EXPORT_SYMBOL(inode_dio_wait_interruptible);
2632
2633	/*
2634	* inode_set_flags - atomically set some inode flags
2635	*
2636	* Note: the caller should be holding i_mutex, or else be sure that
2637	* they have exclusive access to the inode structure (i.e., while the
2638	* inode is being instantiated). The reason for the cmpxchg() loop
2639	* --- which wouldn't be necessary if all code paths which modify
2640	* i_flags actually followed this rule, is that there is at least one
2641	* code path which doesn't today so we use cmpxchg() out of an abundance
2642	* of caution.
2643	*
2644	* In the long run, i_mutex is overkill, and we should probably look
2645	* at using the i_lock spinlock to protect i_flags, and then make sure
2646	* it is so documented in include/linux/fs.h and that all code follows
2647	* the locking convention!!
2648	*/
2649	void inode_set_flags(struct inode inode, unsigned* int flags,
2650	unsigned int mask)
2651	{
2652	WARN_ON_ONCE(flags & ~mask);
2653	set_mask_bits(&inode->i_flags, mask, flags);
2654	}
2655	EXPORT_SYMBOL(inode_set_flags);
2656
2657	void inode_nohighmem(struct inode *inode)
2658	{
2659	mapping_set_gfp_mask(m: inode->i_mapping, GFP_USER);
2660	}
2661	EXPORT_SYMBOL(inode_nohighmem);
2662
2663	struct timespec64 inode_set_ctime_to_ts(struct inode inode, struct* timespec64 ts)
2664	{
2665	trace_inode_set_ctime_to_ts(inode, ctime: &ts);
2666	set_normalized_timespec64(ts: &ts, sec: ts.tv_sec, nsec: ts.tv_nsec);
2667	inode->i_ctime_sec = ts.tv_sec;
2668	inode->i_ctime_nsec = ts.tv_nsec;
2669	return ts;
2670	}
2671	EXPORT_SYMBOL(inode_set_ctime_to_ts);
2672
2673	/**
2674	* timestamp_truncate - Truncate timespec to a granularity
2675	* @t: Timespec
2676	* @inode: inode being updated
2677	*
2678	* Truncate a timespec to the granularity supported by the fs
2679	* containing the inode. Always rounds down. gran must
2680	* not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
2681	*/
2682	struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
2683	{
2684	struct super_block *sb = inode->i_sb;
2685	unsigned int gran = sb->s_time_gran;
2686
2687	t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
2688	if (unlikely(t.tv_sec == sb->s_time_max \|\| t.tv_sec == sb->s_time_min))
2689	t.tv_nsec = `0`;
2690
2691	/ Avoid division in the common cases 1 ns and 1 s. /
2692	if (gran == `1`)
2693	; / nothing /
2694	else if (gran == NSEC_PER_SEC)
2695	t.tv_nsec = `0`;
2696	else if (gran > `1` && gran < NSEC_PER_SEC)
2697	t.tv_nsec -= t.tv_nsec % gran;
2698	else
2699	WARN(`1`, "invalid file time granularity: %u", gran);
2700	return t;
2701	}
2702	EXPORT_SYMBOL(timestamp_truncate);
2703
2704	/**
2705	* inode_set_ctime_current - set the ctime to current_time
2706	* @inode: inode
2707	*
2708	* Set the inode's ctime to the current value for the inode. Returns the
2709	* current value that was assigned. If this is not a multigrain inode, then we
2710	* set it to the later of the coarse time and floor value.
2711	*
2712	* If it is multigrain, then we first see if the coarse-grained timestamp is
2713	* distinct from what is already there. If so, then use that. Otherwise, get a
2714	* fine-grained timestamp.
2715	*
2716	* After that, try to swap the new value into i_ctime_nsec. Accept the
2717	* resulting ctime, regardless of the outcome of the swap. If it has
2718	* already been replaced, then that timestamp is later than the earlier
2719	* unacceptable one, and is thus acceptable.
2720	*/
2721	struct timespec64 inode_set_ctime_current(struct inode *inode)
2722	{
2723	struct timespec64 now;
2724	u32 cns, cur;
2725
2726	ktime_get_coarse_real_ts64_mg(ts: &now);
2727	now = timestamp_truncate(now, inode);
2728
2729	/ Just return that if this is not a multigrain fs /
2730	if (!is_mgtime(inode)) {
2731	inode_set_ctime_to_ts(inode, now);
2732	goto out;
2733	}
2734
2735	/*
2736	* A fine-grained time is only needed if someone has queried
2737	* for timestamps, and the current coarse grained time isn't
2738	* later than what's already there.
2739	*/
2740	cns = smp_load_acquire(&inode->i_ctime_nsec);
2741	if (cns & I_CTIME_QUERIED) {
2742	struct timespec64 ctime = { .tv_sec = inode->i_ctime_sec,
2743	.tv_nsec = cns & ~I_CTIME_QUERIED };
2744
2745	if (timespec64_compare(lhs: &now, rhs: &ctime) <= `0`) {
2746	ktime_get_real_ts64_mg(ts: &now);
2747	now = timestamp_truncate(now, inode);
2748	mgtime_counter_inc(mg_fine_stamps);
2749	}
2750	}
2751	mgtime_counter_inc(mg_ctime_updates);
2752
2753	/ No need to cmpxchg if it's exactly the same /
2754	if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) {
2755	trace_ctime_xchg_skip(inode, ctime: &now);
2756	goto out;
2757	}
2758	cur = cns;
2759	retry:
2760	/ Try to swap the nsec value into place. /
2761	if (try_cmpxchg(&inode->i_ctime_nsec, &cur, now.tv_nsec)) {
2762	/ If swap occurred, then we're (mostly) done /
2763	inode->i_ctime_sec = now.tv_sec;
2764	trace_ctime_ns_xchg(inode, old: cns, new: now.tv_nsec, cur);
2765	mgtime_counter_inc(mg_ctime_swaps);
2766	} else {
2767	/*
2768	* Was the change due to someone marking the old ctime QUERIED?
2769	* If so then retry the swap. This can only happen once since
2770	* the only way to clear I_CTIME_QUERIED is to stamp the inode
2771	* with a new ctime.
2772	*/
2773	if (!(cns & I_CTIME_QUERIED) && (cns \| I_CTIME_QUERIED) == cur) {
2774	cns = cur;
2775	goto retry;
2776	}
2777	/ Otherwise, keep the existing ctime /
2778	now.tv_sec = inode->i_ctime_sec;
2779	now.tv_nsec = cur & ~I_CTIME_QUERIED;
2780	}
2781	out:
2782	return now;
2783	}
2784	EXPORT_SYMBOL(inode_set_ctime_current);
2785
2786	/**
2787	* inode_set_ctime_deleg - try to update the ctime on a delegated inode
2788	* @inode: inode to update
2789	* @update: timespec64 to set the ctime
2790	*
2791	* Attempt to atomically update the ctime on behalf of a delegation holder.
2792	*
2793	* The nfs server can call back the holder of a delegation to get updated
2794	* inode attributes, including the mtime. When updating the mtime, update
2795	* the ctime to a value at least equal to that.
2796	*
2797	* This can race with concurrent updates to the inode, in which
2798	* case the update is skipped.
2799	*
2800	* Note that this works even when multigrain timestamps are not enabled,
2801	* so it is used in either case.
2802	*/
2803	struct timespec64 inode_set_ctime_deleg(struct inode inode, struct* timespec64 update)
2804	{
2805	struct timespec64 now, cur_ts;
2806	u32 cur, old;
2807
2808	/ pairs with try_cmpxchg below /
2809	cur = smp_load_acquire(&inode->i_ctime_nsec);
2810	cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
2811	cur_ts.tv_sec = inode->i_ctime_sec;
2812
2813	/ If the update is older than the existing value, skip it. /
2814	if (timespec64_compare(lhs: &update, rhs: &cur_ts) <= `0`)
2815	return cur_ts;
2816
2817	ktime_get_coarse_real_ts64_mg(ts: &now);
2818
2819	/ Clamp the update to "now" if it's in the future /
2820	if (timespec64_compare(lhs: &update, rhs: &now) > `0`)
2821	update = now;
2822
2823	update = timestamp_truncate(update, inode);
2824
2825	/ No need to update if the values are already the same /
2826	if (timespec64_equal(a: &update, b: &cur_ts))
2827	return cur_ts;
2828
2829	/*
2830	* Try to swap the nsec value into place. If it fails, that means
2831	* it raced with an update due to a write or similar activity. That
2832	* stamp takes precedence, so just skip the update.
2833	*/
2834	retry:
2835	old = cur;
2836	if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) {
2837	inode->i_ctime_sec = update.tv_sec;
2838	mgtime_counter_inc(mg_ctime_swaps);
2839	return update;
2840	}
2841
2842	/*
2843	* Was the change due to another task marking the old ctime QUERIED?
2844	*
2845	* If so, then retry the swap. This can only happen once since
2846	* the only way to clear I_CTIME_QUERIED is to stamp the inode
2847	* with a new ctime.
2848	*/
2849	if (!(old & I_CTIME_QUERIED) && (cur == (old \| I_CTIME_QUERIED)))
2850	goto retry;
2851
2852	/ Otherwise, it was a new timestamp. /
2853	cur_ts.tv_sec = inode->i_ctime_sec;
2854	cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
2855	return cur_ts;
2856	}
2857	EXPORT_SYMBOL(inode_set_ctime_deleg);
2858
2859	/**
2860	* in_group_or_capable - check whether caller is CAP_FSETID privileged
2861	* @idmap: idmap of the mount @inode was found from
2862	* @inode: inode to check
2863	* @vfsgid: the new/current vfsgid of @inode
2864	*
2865	* Check whether @vfsgid is in the caller's group list or if the caller is
2866	* privileged with CAP_FSETID over @inode. This can be used to determine
2867	* whether the setgid bit can be kept or must be dropped.
2868	*
2869	* Return: true if the caller is sufficiently privileged, false if not.
2870	*/
2871	bool in_group_or_capable(struct mnt_idmap *idmap,
2872	const struct inode *inode, vfsgid_t vfsgid)
2873	{
2874	if (vfsgid_in_group_p(vfsgid))
2875	return true;
2876	if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
2877	return true;
2878	return false;
2879	}
2880	EXPORT_SYMBOL(in_group_or_capable);
2881
2882	/**
2883	* mode_strip_sgid - handle the sgid bit for non-directories
2884	* @idmap: idmap of the mount the inode was created from
2885	* @dir: parent directory inode
2886	* @mode: mode of the file to be created in @dir
2887	*
2888	* If the @mode of the new file has both the S_ISGID and S_IXGRP bit
2889	* raised and @dir has the S_ISGID bit raised ensure that the caller is
2890	* either in the group of the parent directory or they have CAP_FSETID
2891	* in their user namespace and are privileged over the parent directory.
2892	* In all other cases, strip the S_ISGID bit from @mode.
2893	*
2894	* Return: the new mode to use for the file
2895	*/
2896	umode_t mode_strip_sgid(struct mnt_idmap *idmap,
2897	const struct inode *dir, umode_t mode)
2898	{
2899	if ((mode & (S_ISGID \| S_IXGRP)) != (S_ISGID \| S_IXGRP))
2900	return mode;
2901	if (S_ISDIR(mode) \|\| !dir \|\| !(dir->i_mode & S_ISGID))
2902	return mode;
2903	if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, inode: dir)))
2904	return mode;
2905	return mode & ~S_ISGID;
2906	}
2907	EXPORT_SYMBOL(mode_strip_sgid);
2908
2909	#ifdef CONFIG_DEBUG_VFS
2910	/*
2911	* Dump an inode.
2912	*
2913	* TODO: add a proper inode dumping routine, this is a stub to get debug off the
2914	* ground.
2915	*/
2916	void dump_inode(struct inode inode, const* char *reason)
2917	{
2918	pr_warn("%s encountered for inode %px", reason, inode);
2919	}
2920
2921	EXPORT_SYMBOL(dump_inode);
2922	#endif
2923

source code of linux/fs/inode.c