xfs_inode.c source code [linux/fs/xfs/xfs_inode.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (c) 2000-2006 Silicon Graphics, Inc.
4	* All Rights Reserved.
5	*/
6	#include <linux/iversion.h>
7
8	#include "xfs.h"
9	#include "xfs_fs.h"
10	#include "xfs_shared.h"
11	#include "xfs_format.h"
12	#include "xfs_log_format.h"
13	#include "xfs_trans_resv.h"
14	#include "xfs_mount.h"
15	#include "xfs_defer.h"
16	#include "xfs_inode.h"
17	#include "xfs_dir2.h"
18	#include "xfs_attr.h"
19	#include "xfs_bit.h"
20	#include "xfs_trans_space.h"
21	#include "xfs_trans.h"
22	#include "xfs_buf_item.h"
23	#include "xfs_inode_item.h"
24	#include "xfs_iunlink_item.h"
25	#include "xfs_ialloc.h"
26	#include "xfs_bmap.h"
27	#include "xfs_bmap_util.h"
28	#include "xfs_errortag.h"
29	#include "xfs_error.h"
30	#include "xfs_quota.h"
31	#include "xfs_filestream.h"
32	#include "xfs_trace.h"
33	#include "xfs_icache.h"
34	#include "xfs_symlink.h"
35	#include "xfs_trans_priv.h"
36	#include "xfs_log.h"
37	#include "xfs_bmap_btree.h"
38	#include "xfs_reflink.h"
39	#include "xfs_ag.h"
40	#include "xfs_log_priv.h"
41	#include "xfs_health.h"
42	#include "xfs_pnfs.h"
43	#include "xfs_parent.h"
44	#include "xfs_xattr.h"
45	#include "xfs_inode_util.h"
46	#include "xfs_metafile.h"
47
48	struct kmem_cache *xfs_inode_cache;
49
50	/*
51	* These two are wrapper routines around the xfs_ilock() routine used to
52	* centralize some grungy code. They are used in places that wish to lock the
53	* inode solely for reading the extents. The reason these places can't just
54	* call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to
55	* bringing in of the extents from disk for a file in b-tree format. If the
56	* inode is in b-tree format, then we need to lock the inode exclusively until
57	* the extents are read in. Locking it exclusively all the time would limit
58	* our parallelism unnecessarily, though. What we do instead is check to see
59	* if the extents have been read in yet, and only lock the inode exclusively
60	* if they have not.
61	*
62	* The functions return a value which should be given to the corresponding
63	* xfs_iunlock() call.
64	*/
65	uint
66	xfs_ilock_data_map_shared(
67	struct xfs_inode *ip)
68	{
69	uint lock_mode = XFS_ILOCK_SHARED;
70
71	if (xfs_need_iread_extents(&ip->i_df))
72	lock_mode = XFS_ILOCK_EXCL;
73	xfs_ilock(ip, lock_mode);
74	return lock_mode;
75	}
76
77	uint
78	xfs_ilock_attr_map_shared(
79	struct xfs_inode *ip)
80	{
81	uint lock_mode = XFS_ILOCK_SHARED;
82
83	if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
84	lock_mode = XFS_ILOCK_EXCL;
85	xfs_ilock(ip, lock_mode);
86	return lock_mode;
87	}
88
89	/*
90	* You can't set both SHARED and EXCL for the same lock,
91	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_MMAPLOCK_SHARED,
92	* XFS_MMAPLOCK_EXCL, XFS_ILOCK_SHARED, XFS_ILOCK_EXCL are valid values
93	* to set in lock_flags.
94	*/
95	static inline void
96	xfs_lock_flags_assert(
97	uint lock_flags)
98	{
99	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
100	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
101	ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL)) !=
102	(XFS_MMAPLOCK_SHARED \| XFS_MMAPLOCK_EXCL));
103	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
104	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
105	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_SUBCLASS_MASK)) == `0`);
106	ASSERT(lock_flags != `0`);
107	}
108
109	/*
110	* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
111	* multi-reader locks: invalidate_lock and the i_lock. This routine allows
112	* various combinations of the locks to be obtained.
113	*
114	* The 3 locks should always be ordered so that the IO lock is obtained first,
115	* the mmap lock second and the ilock last in order to prevent deadlock.
116	*
117	* Basic locking order:
118	*
119	* i_rwsem -> invalidate_lock -> page_lock -> i_ilock
120	*
121	* mmap_lock locking order:
122	*
123	* i_rwsem -> page lock -> mmap_lock
124	* mmap_lock -> invalidate_lock -> page_lock
125	*
126	* The difference in mmap_lock locking order mean that we cannot hold the
127	* invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths
128	* can fault in pages during copy in/out (for buffered IO) or require the
129	* mmap_lock in get_user_pages() to map the user pages into the kernel address
130	* space for direct IO. Similarly the i_rwsem cannot be taken inside a page
131	* fault because page faults already hold the mmap_lock.
132	*
133	* Hence to serialise fully against both syscall and mmap based IO, we need to
134	* take both the i_rwsem and the invalidate_lock. These locks should only be
135	* both taken in places where we need to invalidate the page cache in a race
136	* free manner (e.g. truncate, hole punch and other extent manipulation
137	* functions).
138	*/
139	void
140	xfs_ilock(
141	xfs_inode_t *ip,
142	uint lock_flags)
143	{
144	trace_xfs_ilock(ip, lock_flags, _RET_IP_);
145
146	xfs_lock_flags_assert(lock_flags);
147
148	if (lock_flags & XFS_IOLOCK_EXCL) {
149	down_write_nested(sem: &VFS_I(ip)->i_rwsem,
150	XFS_IOLOCK_DEP(lock_flags));
151	} else if (lock_flags & XFS_IOLOCK_SHARED) {
152	down_read_nested(sem: &VFS_I(ip)->i_rwsem,
153	XFS_IOLOCK_DEP(lock_flags));
154	}
155
156	if (lock_flags & XFS_MMAPLOCK_EXCL) {
157	down_write_nested(sem: &VFS_I(ip)->i_mapping->invalidate_lock,
158	XFS_MMAPLOCK_DEP(lock_flags));
159	} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
160	down_read_nested(sem: &VFS_I(ip)->i_mapping->invalidate_lock,
161	XFS_MMAPLOCK_DEP(lock_flags));
162	}
163
164	if (lock_flags & XFS_ILOCK_EXCL)
165	down_write_nested(sem: &ip->i_lock, XFS_ILOCK_DEP(lock_flags));
166	else if (lock_flags & XFS_ILOCK_SHARED)
167	down_read_nested(sem: &ip->i_lock, XFS_ILOCK_DEP(lock_flags));
168	}
169
170	/*
171	* This is just like xfs_ilock(), except that the caller
172	* is guaranteed not to sleep. It returns 1 if it gets
173	* the requested locks and 0 otherwise. If the IO lock is
174	* obtained but the inode lock cannot be, then the IO lock
175	* is dropped before returning.
176	*
177	* ip -- the inode being locked
178	* lock_flags -- this parameter indicates the inode's locks to be
179	* to be locked. See the comment for xfs_ilock() for a list
180	* of valid values.
181	*/
182	int
183	xfs_ilock_nowait(
184	xfs_inode_t *ip,
185	uint lock_flags)
186	{
187	trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
188
189	xfs_lock_flags_assert(lock_flags);
190
191	if (lock_flags & XFS_IOLOCK_EXCL) {
192	if (!down_write_trylock(sem: &VFS_I(ip)->i_rwsem))
193	goto out;
194	} else if (lock_flags & XFS_IOLOCK_SHARED) {
195	if (!down_read_trylock(sem: &VFS_I(ip)->i_rwsem))
196	goto out;
197	}
198
199	if (lock_flags & XFS_MMAPLOCK_EXCL) {
200	if (!down_write_trylock(sem: &VFS_I(ip)->i_mapping->invalidate_lock))
201	goto out_undo_iolock;
202	} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
203	if (!down_read_trylock(sem: &VFS_I(ip)->i_mapping->invalidate_lock))
204	goto out_undo_iolock;
205	}
206
207	if (lock_flags & XFS_ILOCK_EXCL) {
208	if (!down_write_trylock(sem: &ip->i_lock))
209	goto out_undo_mmaplock;
210	} else if (lock_flags & XFS_ILOCK_SHARED) {
211	if (!down_read_trylock(sem: &ip->i_lock))
212	goto out_undo_mmaplock;
213	}
214	return `1`;
215
216	out_undo_mmaplock:
217	if (lock_flags & XFS_MMAPLOCK_EXCL)
218	up_write(sem: &VFS_I(ip)->i_mapping->invalidate_lock);
219	else if (lock_flags & XFS_MMAPLOCK_SHARED)
220	up_read(sem: &VFS_I(ip)->i_mapping->invalidate_lock);
221	out_undo_iolock:
222	if (lock_flags & XFS_IOLOCK_EXCL)
223	up_write(sem: &VFS_I(ip)->i_rwsem);
224	else if (lock_flags & XFS_IOLOCK_SHARED)
225	up_read(sem: &VFS_I(ip)->i_rwsem);
226	out:
227	return `0`;
228	}
229
230	/*
231	* xfs_iunlock() is used to drop the inode locks acquired with
232	* xfs_ilock() and xfs_ilock_nowait(). The caller must pass
233	* in the flags given to xfs_ilock() or xfs_ilock_nowait() so
234	* that we know which locks to drop.
235	*
236	* ip -- the inode being unlocked
237	* lock_flags -- this parameter indicates the inode's locks to be
238	* to be unlocked. See the comment for xfs_ilock() for a list
239	* of valid values for this parameter.
240	*
241	*/
242	void
243	xfs_iunlock(
244	xfs_inode_t *ip,
245	uint lock_flags)
246	{
247	xfs_lock_flags_assert(lock_flags);
248
249	if (lock_flags & XFS_IOLOCK_EXCL)
250	up_write(sem: &VFS_I(ip)->i_rwsem);
251	else if (lock_flags & XFS_IOLOCK_SHARED)
252	up_read(sem: &VFS_I(ip)->i_rwsem);
253
254	if (lock_flags & XFS_MMAPLOCK_EXCL)
255	up_write(sem: &VFS_I(ip)->i_mapping->invalidate_lock);
256	else if (lock_flags & XFS_MMAPLOCK_SHARED)
257	up_read(sem: &VFS_I(ip)->i_mapping->invalidate_lock);
258
259	if (lock_flags & XFS_ILOCK_EXCL)
260	up_write(sem: &ip->i_lock);
261	else if (lock_flags & XFS_ILOCK_SHARED)
262	up_read(sem: &ip->i_lock);
263
264	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
265	}
266
267	/*
268	* give up write locks. the i/o lock cannot be held nested
269	* if it is being demoted.
270	*/
271	void
272	xfs_ilock_demote(
273	xfs_inode_t *ip,
274	uint lock_flags)
275	{
276	ASSERT(lock_flags & (XFS_IOLOCK_EXCL\|XFS_MMAPLOCK_EXCL\|XFS_ILOCK_EXCL));
277	ASSERT((lock_flags &
278	~(XFS_IOLOCK_EXCL\|XFS_MMAPLOCK_EXCL\|XFS_ILOCK_EXCL)) == `0`);
279
280	if (lock_flags & XFS_ILOCK_EXCL)
281	downgrade_write(sem: &ip->i_lock);
282	if (lock_flags & XFS_MMAPLOCK_EXCL)
283	downgrade_write(sem: &VFS_I(ip)->i_mapping->invalidate_lock);
284	if (lock_flags & XFS_IOLOCK_EXCL)
285	downgrade_write(sem: &VFS_I(ip)->i_rwsem);
286
287	trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
288	}
289
290	void
291	xfs_assert_ilocked(
292	struct xfs_inode *ip,
293	uint lock_flags)
294	{
295	/*
296	* Sometimes we assert the ILOCK is held exclusively, but we're in
297	* a workqueue, so lockdep doesn't know we're the owner.
298	*/
299	if (lock_flags & XFS_ILOCK_SHARED)
300	rwsem_assert_held(sem: &ip->i_lock);
301	else if (lock_flags & XFS_ILOCK_EXCL)
302	rwsem_assert_held_write_nolockdep(sem: &ip->i_lock);
303
304	if (lock_flags & XFS_MMAPLOCK_SHARED)
305	rwsem_assert_held(sem: &VFS_I(ip)->i_mapping->invalidate_lock);
306	else if (lock_flags & XFS_MMAPLOCK_EXCL)
307	rwsem_assert_held_write(sem: &VFS_I(ip)->i_mapping->invalidate_lock);
308
309	if (lock_flags & XFS_IOLOCK_SHARED)
310	rwsem_assert_held(sem: &VFS_I(ip)->i_rwsem);
311	else if (lock_flags & XFS_IOLOCK_EXCL)
312	rwsem_assert_held_write(sem: &VFS_I(ip)->i_rwsem);
313	}
314
315	/*
316	* xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when
317	* DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined
318	* when CONFIG_LOCKDEP is set. Hence the complex define below to avoid build
319	* errors and warnings.
320	*/
321	#if (defined(DEBUG) \|\| defined(XFS_WARN)) && defined(CONFIG_LOCKDEP)
322	static bool
323	xfs_lockdep_subclass_ok(
324	int subclass)
325	{
326	return subclass < MAX_LOCKDEP_SUBCLASSES;
327	}
328	#else
329	#define xfs_lockdep_subclass_ok(subclass) (true)
330	#endif
331
332	/*
333	* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
334	* value. This can be called for any type of inode lock combination, including
335	* parent locking. Care must be taken to ensure we don't overrun the subclass
336	* storage fields in the class mask we build.
337	*/
338	static inline uint
339	xfs_lock_inumorder(
340	uint lock_mode,
341	uint subclass)
342	{
343	uint class = `0`;
344
345	ASSERT(!(lock_mode & XFS_ILOCK_PARENT));
346	ASSERT(xfs_lockdep_subclass_ok(subclass));
347
348	if (lock_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)) {
349	ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
350	class += subclass << XFS_IOLOCK_SHIFT;
351	}
352
353	if (lock_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)) {
354	ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
355	class += subclass << XFS_MMAPLOCK_SHIFT;
356	}
357
358	if (lock_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)) {
359	ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
360	class += subclass << XFS_ILOCK_SHIFT;
361	}
362
363	return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) \| class;
364	}
365
366	/*
367	* The following routine will lock n inodes in exclusive mode. We assume the
368	* caller calls us with the inodes in i_ino order.
369	*
370	* We need to detect deadlock where an inode that we lock is in the AIL and we
371	* start waiting for another inode that is locked by a thread in a long running
372	* transaction (such as truncate). This can result in deadlock since the long
373	* running trans might need to wait for the inode we just locked in order to
374	* push the tail and free space in the log.
375	*
376	* xfs_lock_inodes() can only be used to lock one type of lock at a time -
377	* the iolock, the mmaplock or the ilock, but not more than one at a time. If we
378	* lock more than one at a time, lockdep will report false positives saying we
379	* have violated locking orders.
380	*/
381	void
382	xfs_lock_inodes(
383	struct xfs_inode **ips,
384	int inodes,
385	uint lock_mode)
386	{
387	int attempts = `0`;
388	uint i;
389	int j;
390	bool try_lock;
391	struct xfs_log_item *lp;
392
393	/*
394	* Currently supports between 2 and 5 inodes with exclusive locking. We
395	* support an arbitrary depth of locking here, but absolute limits on
396	* inodes depend on the type of locking and the limits placed by
397	* lockdep annotations in xfs_lock_inumorder. These are all checked by
398	* the asserts.
399	*/
400	ASSERT(ips && inodes >= `2` && inodes <= `5`);
401	ASSERT(lock_mode & (XFS_IOLOCK_EXCL \| XFS_MMAPLOCK_EXCL \|
402	XFS_ILOCK_EXCL));
403	ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED \| XFS_MMAPLOCK_SHARED \|
404	XFS_ILOCK_SHARED)));
405	ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) \|\|
406	inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + `1`);
407	ASSERT(!(lock_mode & XFS_ILOCK_EXCL) \|\|
408	inodes <= XFS_ILOCK_MAX_SUBCLASS + `1`);
409
410	if (lock_mode & XFS_IOLOCK_EXCL) {
411	ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL \| XFS_ILOCK_EXCL)));
412	} else if (lock_mode & XFS_MMAPLOCK_EXCL)
413	ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
414
415	again:
416	try_lock = false;
417	i = `0`;
418	for (; i < inodes; i++) {
419	ASSERT(ips[i]);
420
421	if (i && (ips[i] == ips[i - `1`])) / Already locked /
422	continue;
423
424	/*
425	* If try_lock is not set yet, make sure all locked inodes are
426	* not in the AIL. If any are, set try_lock to be used later.
427	*/
428	if (!try_lock) {
429	for (j = (i - `1`); j >= `0` && !try_lock; j--) {
430	lp = &ips[j]->i_itemp->ili_item;
431	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
432	try_lock = true;
433	}
434	}
435
436	/*
437	* If any of the previous locks we have locked is in the AIL,
438	* we must TRY to get the second and subsequent locks. If
439	* we can't get any, we must release all we have
440	* and try again.
441	*/
442	if (!try_lock) {
443	xfs_ilock(ip: ips[i], lock_flags: xfs_lock_inumorder(lock_mode, subclass: i));
444	continue;
445	}
446
447	/ try_lock means we have an inode locked that is in the AIL. /
448	ASSERT(i != `0`);
449	if (xfs_ilock_nowait(ip: ips[i], lock_flags: xfs_lock_inumorder(lock_mode, subclass: i)))
450	continue;
451
452	/*
453	* Unlock all previous guys and try again. xfs_iunlock will try
454	* to push the tail if the inode is in the AIL.
455	*/
456	attempts++;
457	for (j = i - `1`; j >= `0`; j--) {
458	/*
459	* Check to see if we've already unlocked this one. Not
460	* the first one going back, and the inode ptr is the
461	* same.
462	*/
463	if (j != (i - `1`) && ips[j] == ips[j + `1`])
464	continue;
465
466	xfs_iunlock(ip: ips[j], lock_flags: lock_mode);
467	}
468
469	if ((attempts % `5`) == `0`) {
470	delay(ticks: `1`); / Don't just spin the CPU /
471	}
472	goto again;
473	}
474	}
475
476	/*
477	* xfs_lock_two_inodes() can only be used to lock ilock. The iolock and
478	* mmaplock must be double-locked separately since we use i_rwsem and
479	* invalidate_lock for that. We now support taking one lock EXCL and the
480	* other SHARED.
481	*/
482	void
483	xfs_lock_two_inodes(
484	struct xfs_inode *ip0,
485	uint ip0_mode,
486	struct xfs_inode *ip1,
487	uint ip1_mode)
488	{
489	int attempts = `0`;
490	struct xfs_log_item *lp;
491
492	ASSERT(hweight32(ip0_mode) == `1`);
493	ASSERT(hweight32(ip1_mode) == `1`);
494	ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)));
495	ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL)));
496	ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)));
497	ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED\|XFS_MMAPLOCK_EXCL)));
498	ASSERT(ip0->i_ino != ip1->i_ino);
499
500	if (ip0->i_ino > ip1->i_ino) {
501	swap(ip0, ip1);
502	swap(ip0_mode, ip1_mode);
503	}
504
505	again:
506	xfs_ilock(ip: ip0, lock_flags: xfs_lock_inumorder(lock_mode: ip0_mode, subclass: `0`));
507
508	/*
509	* If the first lock we have locked is in the AIL, we must TRY to get
510	* the second lock. If we can't get it, we must release the first one
511	* and try again.
512	*/
513	lp = &ip0->i_itemp->ili_item;
514	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
515	if (!xfs_ilock_nowait(ip: ip1, lock_flags: xfs_lock_inumorder(lock_mode: ip1_mode, subclass: `1`))) {
516	xfs_iunlock(ip: ip0, lock_flags: ip0_mode);
517	if ((++attempts % `5`) == `0`)
518	delay(ticks: `1`); / Don't just spin the CPU /
519	goto again;
520	}
521	} else {
522	xfs_ilock(ip: ip1, lock_flags: xfs_lock_inumorder(lock_mode: ip1_mode, subclass: `1`));
523	}
524	}
525
526	/*
527	* Lookups up an inode from "name". If ci_name is not NULL, then a CI match
528	* is allowed, otherwise it has to be an exact match. If a CI match is found,
529	* ci_name->name will point to a the actual name (caller must free) or
530	* will be set to NULL if an exact match is found.
531	*/
532	int
533	xfs_lookup(
534	struct xfs_inode *dp,
535	const struct xfs_name *name,
536	struct xfs_inode **ipp,
537	struct xfs_name *ci_name)
538	{
539	xfs_ino_t inum;
540	int error;
541
542	trace_xfs_lookup(dp, xfs_lookup: name);
543
544	if (xfs_is_shutdown(mp: dp->i_mount))
545	return -EIO;
546	if (xfs_ifork_zapped(dp, XFS_DATA_FORK))
547	return -EIO;
548
549	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
550	if (error)
551	goto out_unlock;
552
553	error = xfs_iget(mp: dp->i_mount, NULL, ino: inum, flags: `0`, lock_flags: `0`, ipp);
554	if (error)
555	goto out_free_name;
556
557	/*
558	* Fail if a directory entry in the regular directory tree points to
559	* a metadata file.
560	*/
561	if (XFS_IS_CORRUPT(dp->i_mount, xfs_is_metadir_inode(*ipp))) {
562	xfs_fs_mark_sick(dp->i_mount, XFS_SICK_FS_METADIR);
563	error = -EFSCORRUPTED;
564	goto out_irele;
565	}
566
567	return `0`;
568
569	out_irele:
570	xfs_irele(ip: *ipp);
571	out_free_name:
572	if (ci_name)
573	kfree(objp: ci_name->name);
574	out_unlock:
575	*ipp = NULL;
576	return error;
577	}
578
579	/*
580	* Initialise a newly allocated inode and return the in-core inode to the
581	* caller locked exclusively.
582	*
583	* Caller is responsible for unlocking the inode manually upon return
584	*/
585	int
586	xfs_icreate(
587	struct xfs_trans *tp,
588	xfs_ino_t ino,
589	const struct xfs_icreate_args *args,
590	struct xfs_inode **ipp)
591	{
592	struct xfs_mount *mp = tp->t_mountp;
593	struct xfs_inode *ip = NULL;
594	int error;
595
596	/*
597	* Get the in-core inode with the lock held exclusively to prevent
598	* others from looking at until we're done.
599	*/
600	error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, ipp: &ip);
601	if (error)
602	return error;
603
604	ASSERT(ip != NULL);
605	xfs_trans_ijoin(tp, ip, `0`);
606	xfs_inode_init(tp, args, ip);
607
608	/ now that we have an i_mode we can setup the inode structure /
609	xfs_setup_inode(ip);
610
611	*ipp = ip;
612	return `0`;
613	}
614
615	/ Return dquots for the ids that will be assigned to a new file. /
616	int
617	xfs_icreate_dqalloc(
618	const struct xfs_icreate_args *args,
619	struct xfs_dquot **udqpp,
620	struct xfs_dquot **gdqpp,
621	struct xfs_dquot **pdqpp)
622	{
623	struct inode *dir = VFS_I(ip: args->pip);
624	kuid_t uid = GLOBAL_ROOT_UID;
625	kgid_t gid = GLOBAL_ROOT_GID;
626	prid_t prid = `0`;
627	unsigned int flags = XFS_QMOPT_QUOTALL;
628
629	if (args->idmap) {
630	/*
631	* The uid/gid computation code must match what the VFS uses to
632	* assign i_[ug]id. INHERIT adjusts the gid computation for
633	* setgid/grpid systems.
634	*/
635	uid = mapped_fsuid(idmap: args->idmap, fs_userns: i_user_ns(inode: dir));
636	gid = mapped_fsgid(idmap: args->idmap, fs_userns: i_user_ns(inode: dir));
637	prid = xfs_get_initial_prid(args->pip);
638	flags \|= XFS_QMOPT_INHERIT;
639	}
640
641	udqpp = gdqpp = *pdqpp = NULL;
642
643	return xfs_qm_vop_dqalloc(args->pip, uid, gid, prid, flags, udqpp,
644	gdqpp, pdqpp);
645	}
646
647	int
648	xfs_create(
649	const struct xfs_icreate_args *args,
650	struct xfs_name *name,
651	struct xfs_inode **ipp)
652	{
653	struct xfs_inode *dp = args->pip;
654	struct xfs_dir_update du = {
655	.dp = dp,
656	.name = name,
657	};
658	struct xfs_mount *mp = dp->i_mount;
659	struct xfs_trans *tp = NULL;
660	struct xfs_dquot *udqp;
661	struct xfs_dquot *gdqp;
662	struct xfs_dquot *pdqp;
663	struct xfs_trans_res *tres;
664	xfs_ino_t ino;
665	bool unlock_dp_on_error = false;
666	bool is_dir = S_ISDIR(args->mode);
667	uint resblks;
668	int error;
669
670	trace_xfs_create(dp, xfs_create: name);
671
672	if (xfs_is_shutdown(mp))
673	return -EIO;
674	if (xfs_ifork_zapped(dp, XFS_DATA_FORK))
675	return -EIO;
676
677	/ Make sure that we have allocated dquot(s) on disk. /
678	error = xfs_icreate_dqalloc(args, udqpp: &udqp, gdqpp: &gdqp, pdqpp: &pdqp);
679	if (error)
680	return error;
681
682	if (is_dir) {
683	resblks = xfs_mkdir_space_res(mp, name->len);
684	tres = &M_RES(mp)->tr_mkdir;
685	} else {
686	resblks = xfs_create_space_res(mp, name->len);
687	tres = &M_RES(mp)->tr_create;
688	}
689
690	error = xfs_parent_start(mp, &du.ppargs);
691	if (error)
692	goto out_release_dquots;
693
694	/*
695	* Initially assume that the file does not exist and
696	* reserve the resources for that case. If that is not
697	* the case we'll drop the one we have and get a more
698	* appropriate transaction later.
699	*/
700	error = xfs_trans_alloc_icreate(mp, resv: tres, udqp, gdqp, pdqp, dblocks: resblks,
701	tpp: &tp);
702	if (error == -ENOSPC) {
703	/ flush outstanding delalloc blocks and retry /
704	xfs_flush_inodes(mp);
705	error = xfs_trans_alloc_icreate(mp, resv: tres, udqp, gdqp, pdqp,
706	dblocks: resblks, tpp: &tp);
707	}
708	if (error)
709	goto out_parent;
710
711	xfs_ilock(ip: dp, XFS_ILOCK_EXCL \| XFS_ILOCK_PARENT);
712	unlock_dp_on_error = true;
713
714	/*
715	* A newly created regular or special file just has one directory
716	* entry pointing to them, but a directory also the "." entry
717	* pointing to itself.
718	*/
719	error = xfs_dialloc(&tp, args, &ino);
720	if (!error)
721	error = xfs_icreate(tp, ino, args, ipp: &du.ip);
722	if (error)
723	goto out_trans_cancel;
724
725	/*
726	* Now we join the directory inode to the transaction. We do not do it
727	* earlier because xfs_dialloc might commit the previous transaction
728	* (and release all the locks). An error from here on will result in
729	* the transaction cancel unlocking dp so don't do it explicitly in the
730	* error path.
731	*/
732	xfs_trans_ijoin(tp, dp, `0`);
733
734	error = xfs_dir_create_child(tp, resblks, &du);
735	if (error)
736	goto out_trans_cancel;
737
738	/*
739	* If this is a synchronous mount, make sure that the
740	* create transaction goes to disk before returning to
741	* the user.
742	*/
743	if (xfs_has_wsync(mp) \|\| xfs_has_dirsync(mp))
744	xfs_trans_set_sync(tp);
745
746	/*
747	* Attach the dquot(s) to the inodes and modify them incore.
748	* These ids of the inode couldn't have changed since the new
749	* inode has been locked ever since it was created.
750	*/
751	xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp);
752
753	error = xfs_trans_commit(tp);
754	if (error)
755	goto out_release_inode;
756
757	xfs_qm_dqrele(udqp);
758	xfs_qm_dqrele(gdqp);
759	xfs_qm_dqrele(pdqp);
760
761	*ipp = du.ip;
762	xfs_iunlock(ip: du.ip, XFS_ILOCK_EXCL);
763	xfs_iunlock(ip: dp, XFS_ILOCK_EXCL);
764	xfs_parent_finish(mp, du.ppargs);
765	return `0`;
766
767	out_trans_cancel:
768	xfs_trans_cancel(tp);
769	out_release_inode:
770	/*
771	* Wait until after the current transaction is aborted to finish the
772	* setup of the inode and release the inode. This prevents recursive
773	* transactions and deadlocks from xfs_inactive.
774	*/
775	if (du.ip) {
776	xfs_iunlock(ip: du.ip, XFS_ILOCK_EXCL);
777	xfs_finish_inode_setup(ip: du.ip);
778	xfs_irele(ip: du.ip);
779	}
780	out_parent:
781	xfs_parent_finish(mp, du.ppargs);
782	out_release_dquots:
783	xfs_qm_dqrele(udqp);
784	xfs_qm_dqrele(gdqp);
785	xfs_qm_dqrele(pdqp);
786
787	if (unlock_dp_on_error)
788	xfs_iunlock(ip: dp, XFS_ILOCK_EXCL);
789	return error;
790	}
791
792	int
793	xfs_create_tmpfile(
794	const struct xfs_icreate_args *args,
795	struct xfs_inode **ipp)
796	{
797	struct xfs_inode *dp = args->pip;
798	struct xfs_mount *mp = dp->i_mount;
799	struct xfs_inode *ip = NULL;
800	struct xfs_trans *tp = NULL;
801	struct xfs_dquot *udqp;
802	struct xfs_dquot *gdqp;
803	struct xfs_dquot *pdqp;
804	struct xfs_trans_res *tres;
805	xfs_ino_t ino;
806	uint resblks;
807	int error;
808
809	ASSERT(args->flags & XFS_ICREATE_TMPFILE);
810
811	if (xfs_is_shutdown(mp))
812	return -EIO;
813
814	/ Make sure that we have allocated dquot(s) on disk. /
815	error = xfs_icreate_dqalloc(args, udqpp: &udqp, gdqpp: &gdqp, pdqpp: &pdqp);
816	if (error)
817	return error;
818
819	resblks = XFS_IALLOC_SPACE_RES(mp);
820	tres = &M_RES(mp)->tr_create_tmpfile;
821
822	error = xfs_trans_alloc_icreate(mp, resv: tres, udqp, gdqp, pdqp, dblocks: resblks,
823	tpp: &tp);
824	if (error)
825	goto out_release_dquots;
826
827	error = xfs_dialloc(&tp, args, &ino);
828	if (!error)
829	error = xfs_icreate(tp, ino, args, ipp: &ip);
830	if (error)
831	goto out_trans_cancel;
832
833	if (xfs_has_wsync(mp))
834	xfs_trans_set_sync(tp);
835
836	/*
837	* Attach the dquot(s) to the inodes and modify them incore.
838	* These ids of the inode couldn't have changed since the new
839	* inode has been locked ever since it was created.
840	*/
841	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
842
843	error = xfs_iunlink(tp, ip);
844	if (error)
845	goto out_trans_cancel;
846
847	error = xfs_trans_commit(tp);
848	if (error)
849	goto out_release_inode;
850
851	xfs_qm_dqrele(udqp);
852	xfs_qm_dqrele(gdqp);
853	xfs_qm_dqrele(pdqp);
854
855	*ipp = ip;
856	xfs_iunlock(ip, XFS_ILOCK_EXCL);
857	return `0`;
858
859	out_trans_cancel:
860	xfs_trans_cancel(tp);
861	out_release_inode:
862	/*
863	* Wait until after the current transaction is aborted to finish the
864	* setup of the inode and release the inode. This prevents recursive
865	* transactions and deadlocks from xfs_inactive.
866	*/
867	if (ip) {
868	xfs_iunlock(ip, XFS_ILOCK_EXCL);
869	xfs_finish_inode_setup(ip);
870	xfs_irele(ip);
871	}
872	out_release_dquots:
873	xfs_qm_dqrele(udqp);
874	xfs_qm_dqrele(gdqp);
875	xfs_qm_dqrele(pdqp);
876
877	return error;
878	}
879
880	int
881	xfs_link(
882	struct xfs_inode *tdp,
883	struct xfs_inode *sip,
884	struct xfs_name *target_name)
885	{
886	struct xfs_dir_update du = {
887	.dp = tdp,
888	.name = target_name,
889	.ip = sip,
890	};
891	struct xfs_mount *mp = tdp->i_mount;
892	struct xfs_trans *tp;
893	int error, nospace_error = `0`;
894	int resblks;
895
896	trace_xfs_link(dp: tdp, xfs_link: target_name);
897
898	ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
899
900	if (xfs_is_shutdown(mp))
901	return -EIO;
902	if (xfs_ifork_zapped(tdp, XFS_DATA_FORK))
903	return -EIO;
904
905	error = xfs_qm_dqattach(sip);
906	if (error)
907	goto std_return;
908
909	error = xfs_qm_dqattach(tdp);
910	if (error)
911	goto std_return;
912
913	error = xfs_parent_start(mp, &du.ppargs);
914	if (error)
915	goto std_return;
916
917	resblks = xfs_link_space_res(mp, target_name->len);
918	error = xfs_trans_alloc_dir(dp: tdp, resv: &M_RES(mp)->tr_link, ip: sip, dblocks: &resblks,
919	tpp: &tp, nospace_error: &nospace_error);
920	if (error)
921	goto out_parent;
922
923	/*
924	* We don't allow reservationless or quotaless hardlinking when parent
925	* pointers are enabled because we can't back out if the xattrs must
926	* grow.
927	*/
928	if (du.ppargs && nospace_error) {
929	error = nospace_error;
930	goto error_return;
931	}
932
933	/*
934	* If we are using project inheritance, we only allow hard link
935	* creation in our tree when the project IDs are the same; else
936	* the tree quota mechanism could be circumvented.
937	*/
938	if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
939	tdp->i_projid != sip->i_projid)) {
940	/*
941	* Project quota setup skips special files which can
942	* leave inodes in a PROJINHERIT directory without a
943	* project ID set. We need to allow links to be made
944	* to these "project-less" inodes because userspace
945	* expects them to succeed after project ID setup,
946	* but everything else should be rejected.
947	*/
948	if (!special_file(VFS_I(sip)->i_mode) \|\|
949	sip->i_projid != `0`) {
950	error = -EXDEV;
951	goto error_return;
952	}
953	}
954
955	error = xfs_dir_add_child(tp, resblks, &du);
956	if (error)
957	goto error_return;
958
959	/*
960	* If this is a synchronous mount, make sure that the
961	* link transaction goes to disk before returning to
962	* the user.
963	*/
964	if (xfs_has_wsync(mp) \|\| xfs_has_dirsync(mp))
965	xfs_trans_set_sync(tp);
966
967	error = xfs_trans_commit(tp);
968	xfs_iunlock(ip: tdp, XFS_ILOCK_EXCL);
969	xfs_iunlock(ip: sip, XFS_ILOCK_EXCL);
970	xfs_parent_finish(mp, du.ppargs);
971	return error;
972
973	error_return:
974	xfs_trans_cancel(tp);
975	xfs_iunlock(ip: tdp, XFS_ILOCK_EXCL);
976	xfs_iunlock(ip: sip, XFS_ILOCK_EXCL);
977	out_parent:
978	xfs_parent_finish(mp, du.ppargs);
979	std_return:
980	if (error == -ENOSPC && nospace_error)
981	error = nospace_error;
982	return error;
983	}
984
985	/ Clear the reflink flag and the cowblocks tag if possible. /
986	static void
987	xfs_itruncate_clear_reflink_flags(
988	struct xfs_inode *ip)
989	{
990	struct xfs_ifork *dfork;
991	struct xfs_ifork *cfork;
992
993	if (!xfs_is_reflink_inode(ip))
994	return;
995	dfork = xfs_ifork_ptr(ip, XFS_DATA_FORK);
996	cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
997	if (dfork->if_bytes == `0` && cfork->if_bytes == `0`)
998	ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
999	if (cfork->if_bytes == `0`)
1000	xfs_inode_clear_cowblocks_tag(ip);
1001	}
1002
1003	/*
1004	* Free up the underlying blocks past new_size. The new size must be smaller
1005	* than the current size. This routine can be used both for the attribute and
1006	* data fork, and does not modify the inode size, which is left to the caller.
1007	*
1008	* The transaction passed to this routine must have made a permanent log
1009	* reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
1010	* given transaction and start new ones, so make sure everything involved in
1011	* the transaction is tidy before calling here. Some transaction will be
1012	* returned to the caller to be committed. The incoming transaction must
1013	* already include the inode, and both inode locks must be held exclusively.
1014	* The inode must also be "held" within the transaction. On return the inode
1015	* will be "held" within the returned transaction. This routine does NOT
1016	* require any disk space to be reserved for it within the transaction.
1017	*
1018	* If we get an error, we must return with the inode locked and linked into the
1019	* current transaction. This keeps things simple for the higher level code,
1020	* because it always knows that the inode is locked and held in the transaction
1021	* that returns to it whether errors occur or not. We don't mark the inode
1022	* dirty on error so that transactions can be easily aborted if possible.
1023	*/
1024	int
1025	xfs_itruncate_extents_flags(
1026	struct xfs_trans **tpp,
1027	struct xfs_inode *ip,
1028	int whichfork,
1029	xfs_fsize_t new_size,
1030	int flags)
1031	{
1032	struct xfs_mount *mp = ip->i_mount;
1033	struct xfs_trans tp = tpp;
1034	xfs_fileoff_t first_unmap_block;
1035	int error = `0`;
1036
1037	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
1038	if (atomic_read(v: &VFS_I(ip)->i_count))
1039	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
1040	ASSERT(new_size <= XFS_ISIZE(ip));
1041	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
1042	ASSERT(ip->i_itemp != NULL);
1043	ASSERT(ip->i_itemp->ili_lock_flags == `0`);
1044	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1045
1046	trace_xfs_itruncate_extents_start(ip, new_size);
1047
1048	flags \|= xfs_bmapi_aflag(whichfork);
1049
1050	/*
1051	* Since it is possible for space to become allocated beyond
1052	* the end of the file (in a crash where the space is allocated
1053	* but the inode size is not yet updated), simply remove any
1054	* blocks which show up between the new EOF and the maximum
1055	* possible file size.
1056	*
1057	* We have to free all the blocks to the bmbt maximum offset, even if
1058	* the page cache can't scale that far.
1059	*/
1060	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1061	if (!xfs_verify_fileoff(mp, first_unmap_block)) {
1062	WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
1063	return `0`;
1064	}
1065
1066	error = xfs_bunmapi_range(&tp, ip, flags, first_unmap_block,
1067	XFS_MAX_FILEOFF);
1068	if (error)
1069	goto out;
1070
1071	if (whichfork == XFS_DATA_FORK) {
1072	/ Remove all pending CoW reservations. /
1073	error = xfs_reflink_cancel_cow_blocks(ip, &tp,
1074	first_unmap_block, XFS_MAX_FILEOFF, true);
1075	if (error)
1076	goto out;
1077
1078	xfs_itruncate_clear_reflink_flags(ip);
1079	}
1080
1081	/*
1082	* Always re-log the inode so that our permanent transaction can keep
1083	* on rolling it forward in the log.
1084	*/
1085	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1086
1087	trace_xfs_itruncate_extents_end(ip, new_size);
1088
1089	out:
1090	*tpp = tp;
1091	return error;
1092	}
1093
1094	/*
1095	* Mark all the buffers attached to this directory stale. In theory we should
1096	* never be freeing a directory with any blocks at all, but this covers the
1097	* case where we've recovered a directory swap with a "temporary" directory
1098	* created by online repair and now need to dump it.
1099	*/
1100	STATIC void
1101	xfs_inactive_dir(
1102	struct xfs_inode *dp)
1103	{
1104	struct xfs_iext_cursor icur;
1105	struct xfs_bmbt_irec got;
1106	struct xfs_mount *mp = dp->i_mount;
1107	struct xfs_da_geometry *geo = mp->m_dir_geo;
1108	struct xfs_ifork *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
1109	xfs_fileoff_t off;
1110
1111	/*
1112	* Invalidate each directory block. All directory blocks are of
1113	* fsbcount length and alignment, so we only need to walk those same
1114	* offsets. We hold the only reference to this inode, so we must wait
1115	* for the buffer locks.
1116	*/
1117	for_each_xfs_iext(ifp, &icur, &got) {
1118	for (off = round_up(got.br_startoff, geo->fsbcount);
1119	off < got.br_startoff + got.br_blockcount;
1120	off += geo->fsbcount) {
1121	struct xfs_buf *bp = NULL;
1122	xfs_fsblock_t fsbno;
1123	int error;
1124
1125	fsbno = (off - got.br_startoff) + got.br_startblock;
1126	error = xfs_buf_incore(mp->m_ddev_targp,
1127	XFS_FSB_TO_DADDR(mp, fsbno),
1128	XFS_FSB_TO_BB(mp, geo->fsbcount),
1129	XBF_LIVESCAN, &bp);
1130	if (error)
1131	continue;
1132
1133	xfs_buf_stale(bp);
1134	xfs_buf_relse(bp);
1135	}
1136	}
1137	}
1138
1139	/*
1140	* xfs_inactive_truncate
1141	*
1142	* Called to perform a truncate when an inode becomes unlinked.
1143	*/
1144	STATIC int
1145	xfs_inactive_truncate(
1146	struct xfs_inode *ip)
1147	{
1148	struct xfs_mount *mp = ip->i_mount;
1149	struct xfs_trans *tp;
1150	int error;
1151
1152	error = xfs_trans_alloc(mp, resp: &M_RES(mp)->tr_itruncate, blocks: `0`, rtextents: `0`, flags: `0`, tpp: &tp);
1153	if (error) {
1154	ASSERT(xfs_is_shutdown(mp));
1155	return error;
1156	}
1157	xfs_ilock(ip, XFS_ILOCK_EXCL);
1158	xfs_trans_ijoin(tp, ip, `0`);
1159
1160	/*
1161	* Log the inode size first to prevent stale data exposure in the event
1162	* of a system crash before the truncate completes. See the related
1163	* comment in xfs_vn_setattr_size() for details.
1164	*/
1165	ip->i_disk_size = `0`;
1166	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1167
1168	error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, `0`);
1169	if (error)
1170	goto error_trans_cancel;
1171
1172	ASSERT(ip->i_df.if_nextents == `0`);
1173
1174	error = xfs_trans_commit(tp);
1175	if (error)
1176	goto error_unlock;
1177
1178	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1179	return `0`;
1180
1181	error_trans_cancel:
1182	xfs_trans_cancel(tp);
1183	error_unlock:
1184	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1185	return error;
1186	}
1187
1188	/*
1189	* xfs_inactive_ifree()
1190	*
1191	* Perform the inode free when an inode is unlinked.
1192	*/
1193	STATIC int
1194	xfs_inactive_ifree(
1195	struct xfs_inode *ip)
1196	{
1197	struct xfs_mount *mp = ip->i_mount;
1198	struct xfs_trans *tp;
1199	int error;
1200
1201	/*
1202	* We try to use a per-AG reservation for any block needed by the finobt
1203	* tree, but as the finobt feature predates the per-AG reservation
1204	* support a degraded file system might not have enough space for the
1205	* reservation at mount time. In that case try to dip into the reserved
1206	* pool and pray.
1207	*
1208	* Send a warning if the reservation does happen to fail, as the inode
1209	* now remains allocated and sits on the unlinked list until the fs is
1210	* repaired.
1211	*/
1212	if (unlikely(mp->m_finobt_nores)) {
1213	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
1214	XFS_IFREE_SPACE_RES(mp), `0`, XFS_TRANS_RESERVE,
1215	&tp);
1216	} else {
1217	error = xfs_trans_alloc(mp, resp: &M_RES(mp)->tr_ifree, blocks: `0`, rtextents: `0`, flags: `0`, tpp: &tp);
1218	}
1219	if (error) {
1220	if (error == -ENOSPC) {
1221	xfs_warn_ratelimited(mp,
1222	"Failed to remove inode(s) from unlinked list. "
1223	"Please free space, unmount and run xfs_repair.");
1224	} else {
1225	ASSERT(xfs_is_shutdown(mp));
1226	}
1227	return error;
1228	}
1229
1230	/*
1231	* We do not hold the inode locked across the entire rolling transaction
1232	* here. We only need to hold it for the first transaction that
1233	* xfs_ifree() builds, which may mark the inode XFS_ISTALE if the
1234	* underlying cluster buffer is freed. Relogging an XFS_ISTALE inode
1235	* here breaks the relationship between cluster buffer invalidation and
1236	* stale inode invalidation on cluster buffer item journal commit
1237	* completion, and can result in leaving dirty stale inodes hanging
1238	* around in memory.
1239	*
1240	* We have no need for serialising this inode operation against other
1241	* operations - we freed the inode and hence reallocation is required
1242	* and that will serialise on reallocating the space the deferops need
1243	* to free. Hence we can unlock the inode on the first commit of
1244	* the transaction rather than roll it right through the deferops. This
1245	* avoids relogging the XFS_ISTALE inode.
1246	*
1247	* We check that xfs_ifree() hasn't grown an internal transaction roll
1248	* by asserting that the inode is still locked when it returns.
1249	*/
1250	xfs_ilock(ip, XFS_ILOCK_EXCL);
1251	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1252
1253	error = xfs_ifree(tp, ip);
1254	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
1255	if (error) {
1256	/*
1257	* If we fail to free the inode, shut down. The cancel
1258	* might do that, we need to make sure. Otherwise the
1259	* inode might be lost for a long time or forever.
1260	*/
1261	if (!xfs_is_shutdown(mp)) {
1262	xfs_notice(mp, "%s: xfs_ifree returned error %d",
1263	__func__, error);
1264	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1265	}
1266	xfs_trans_cancel(tp);
1267	return error;
1268	}
1269
1270	/*
1271	* Credit the quota account(s). The inode is gone.
1272	*/
1273	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -`1`);
1274
1275	return xfs_trans_commit(tp);
1276	}
1277
1278	/*
1279	* Returns true if we need to update the on-disk metadata before we can free
1280	* the memory used by this inode. Updates include freeing post-eof
1281	* preallocations; freeing COW staging extents; and marking the inode free in
1282	* the inobt if it is on the unlinked list.
1283	*/
1284	bool
1285	xfs_inode_needs_inactive(
1286	struct xfs_inode *ip)
1287	{
1288	struct xfs_mount *mp = ip->i_mount;
1289	struct xfs_ifork *cow_ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
1290
1291	/*
1292	* If the inode is already free, then there can be nothing
1293	* to clean up here.
1294	*/
1295	if (VFS_I(ip)->i_mode == `0`)
1296	return false;
1297
1298	/*
1299	* If this is a read-only mount, don't do this (would generate I/O)
1300	* unless we're in log recovery and cleaning the iunlinked list.
1301	*/
1302	if (xfs_is_readonly(mp) && !xlog_recovery_needed(log: mp->m_log))
1303	return false;
1304
1305	/ If the log isn't running, push inodes straight to reclaim. /
1306	if (xfs_is_shutdown(mp) \|\| xfs_has_norecovery(mp))
1307	return false;
1308
1309	/ Metadata inodes require explicit resource cleanup. /
1310	if (xfs_is_internal_inode(ip))
1311	return false;
1312
1313	/ Want to clean out the cow blocks if there are any. /
1314	if (cow_ifp && cow_ifp->if_bytes > `0`)
1315	return true;
1316
1317	/ Unlinked files must be freed. /
1318	if (VFS_I(ip)->i_nlink == `0`)
1319	return true;
1320
1321	/*
1322	* This file isn't being freed, so check if there are post-eof blocks
1323	* to free.
1324	*
1325	* Note: don't bother with iolock here since lockdep complains about
1326	* acquiring it in reclaim context. We have the only reference to the
1327	* inode at this point anyways.
1328	*/
1329	return xfs_can_free_eofblocks(ip);
1330	}
1331
1332	/*
1333	* Save health status somewhere, if we're dumping an inode with uncorrected
1334	* errors and online repair isn't running.
1335	*/
1336	static inline void
1337	xfs_inactive_health(
1338	struct xfs_inode *ip)
1339	{
1340	struct xfs_mount *mp = ip->i_mount;
1341	struct xfs_perag *pag;
1342	unsigned int sick;
1343	unsigned int checked;
1344
1345	xfs_inode_measure_sickness(ip, &sick, &checked);
1346	if (!sick)
1347	return;
1348
1349	trace_xfs_inode_unfixed_corruption(ip, flags: sick);
1350
1351	if (sick & XFS_SICK_INO_FORGET)
1352	return;
1353
1354	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
1355	if (!pag) {
1356	/ There had better still be a perag structure! /
1357	ASSERT(`0`);
1358	return;
1359	}
1360
1361	xfs_ag_mark_sick(pag, XFS_SICK_AG_INODES);
1362	xfs_perag_put(pag);
1363	}
1364
1365	/*
1366	* xfs_inactive
1367	*
1368	* This is called when the vnode reference count for the vnode
1369	* goes to zero. If the file has been unlinked, then it must
1370	* now be truncated. Also, we clear all of the read-ahead state
1371	* kept for the inode here since the file is now closed.
1372	*/
1373	int
1374	xfs_inactive(
1375	xfs_inode_t *ip)
1376	{
1377	struct xfs_mount *mp;
1378	int error = `0`;
1379	int truncate = `0`;
1380
1381	/*
1382	* If the inode is already free, then there can be nothing
1383	* to clean up here.
1384	*/
1385	if (VFS_I(ip)->i_mode == `0`) {
1386	ASSERT(ip->i_df.if_broot_bytes == `0`);
1387	goto out;
1388	}
1389
1390	mp = ip->i_mount;
1391	ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
1392
1393	xfs_inactive_health(ip);
1394
1395	/*
1396	* If this is a read-only mount, don't do this (would generate I/O)
1397	* unless we're in log recovery and cleaning the iunlinked list.
1398	*/
1399	if (xfs_is_readonly(mp) && !xlog_recovery_needed(log: mp->m_log))
1400	goto out;
1401
1402	/ Metadata inodes require explicit resource cleanup. /
1403	if (xfs_is_internal_inode(ip))
1404	goto out;
1405
1406	/ Try to clean out the cow blocks if there are any. /
1407	if (xfs_inode_has_cow_data(ip)) {
1408	error = xfs_reflink_cancel_cow_range(ip, `0`, NULLFILEOFF, true);
1409	if (error)
1410	goto out;
1411	}
1412
1413	if (VFS_I(ip)->i_nlink != `0`) {
1414	/*
1415	* Note: don't bother with iolock here since lockdep complains
1416	* about acquiring it in reclaim context. We have the only
1417	* reference to the inode at this point anyways.
1418	*/
1419	if (xfs_can_free_eofblocks(ip))
1420	error = xfs_free_eofblocks(ip);
1421
1422	goto out;
1423	}
1424
1425	if (S_ISREG(VFS_I(ip)->i_mode) &&
1426	(ip->i_disk_size != `0` \|\| XFS_ISIZE(ip) != `0` \|\|
1427	xfs_inode_has_filedata(ip)))
1428	truncate = `1`;
1429
1430	if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) {
1431	/*
1432	* If this inode is being inactivated during a quotacheck and
1433	* has not yet been scanned by quotacheck, we /must/ remove
1434	* the dquots from the inode before inactivation changes the
1435	* block and inode counts. Most probably this is a result of
1436	* reloading the incore iunlinked list to purge unrecovered
1437	* unlinked inodes.
1438	*/
1439	xfs_qm_dqdetach(ip);
1440	} else {
1441	error = xfs_qm_dqattach(ip);
1442	if (error)
1443	goto out;
1444	}
1445
1446	if (S_ISDIR(VFS_I(ip)->i_mode) && ip->i_df.if_nextents > `0`) {
1447	xfs_inactive_dir(dp: ip);
1448	truncate = `1`;
1449	}
1450
1451	if (S_ISLNK(VFS_I(ip)->i_mode))
1452	error = xfs_inactive_symlink(ip);
1453	else if (truncate)
1454	error = xfs_inactive_truncate(ip);
1455	if (error)
1456	goto out;
1457
1458	/*
1459	* If there are attributes associated with the file then blow them away
1460	* now. The code calls a routine that recursively deconstructs the
1461	* attribute fork. If also blows away the in-core attribute fork.
1462	*/
1463	if (xfs_inode_has_attr_fork(ip)) {
1464	error = xfs_attr_inactive(ip);
1465	if (error)
1466	goto out;
1467	}
1468
1469	ASSERT(ip->i_forkoff == `0`);
1470
1471	/*
1472	* Free the inode.
1473	*/
1474	error = xfs_inactive_ifree(ip);
1475
1476	out:
1477	/*
1478	* We're done making metadata updates for this inode, so we can release
1479	* the attached dquots.
1480	*/
1481	xfs_qm_dqdetach(ip);
1482	return error;
1483	}
1484
1485	/*
1486	* Find an inode on the unlinked list. This does not take references to the
1487	* inode as we have existence guarantees by holding the AGI buffer lock and that
1488	* only unlinked, referenced inodes can be on the unlinked inode list. If we
1489	* don't find the inode in cache, then let the caller handle the situation.
1490	*/
1491	struct xfs_inode *
1492	xfs_iunlink_lookup(
1493	struct xfs_perag *pag,
1494	xfs_agino_t agino)
1495	{
1496	struct xfs_inode *ip;
1497
1498	rcu_read_lock();
1499	ip = radix_tree_lookup(&pag->pag_ici_root, agino);
1500	if (!ip) {
1501	/ Caller can handle inode not being in memory. /
1502	rcu_read_unlock();
1503	return NULL;
1504	}
1505
1506	/*
1507	* Inode in RCU freeing limbo should not happen. Warn about this and
1508	* let the caller handle the failure.
1509	*/
1510	if (WARN_ON_ONCE(!ip->i_ino)) {
1511	rcu_read_unlock();
1512	return NULL;
1513	}
1514	ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE \| XFS_IRECLAIM));
1515	rcu_read_unlock();
1516	return ip;
1517	}
1518
1519	/*
1520	* Load the inode @next_agino into the cache and set its prev_unlinked pointer
1521	* to @prev_agino. Caller must hold the AGI to synchronize with other changes
1522	* to the unlinked list.
1523	*/
1524	int
1525	xfs_iunlink_reload_next(
1526	struct xfs_trans *tp,
1527	struct xfs_buf *agibp,
1528	xfs_agino_t prev_agino,
1529	xfs_agino_t next_agino)
1530	{
1531	struct xfs_perag *pag = agibp->b_pag;
1532	struct xfs_mount *mp = pag_mount(pag);
1533	struct xfs_inode *next_ip = NULL;
1534	int error;
1535
1536	ASSERT(next_agino != NULLAGINO);
1537
1538	#ifdef DEBUG
1539	rcu_read_lock();
1540	next_ip = radix_tree_lookup(&pag->pag_ici_root, next_agino);
1541	ASSERT(next_ip == NULL);
1542	rcu_read_unlock();
1543	#endif
1544
1545	xfs_info_ratelimited(mp,
1546	"Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating recovery.",
1547	next_agino, pag_agno(pag));
1548
1549	/*
1550	* Use an untrusted lookup just to be cautious in case the AGI has been
1551	* corrupted and now points at a free inode. That shouldn't happen,
1552	* but we'd rather shut down now since we're already running in a weird
1553	* situation.
1554	*/
1555	error = xfs_iget(mp, tp, ino: xfs_agino_to_ino(pag, next_agino),
1556	XFS_IGET_UNTRUSTED, lock_flags: `0`, ipp: &next_ip);
1557	if (error) {
1558	xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
1559	return error;
1560	}
1561
1562	/ If this is not an unlinked inode, something is very wrong. /
1563	if (VFS_I(ip: next_ip)->i_nlink != `0`) {
1564	xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
1565	error = -EFSCORRUPTED;
1566	goto rele;
1567	}
1568
1569	next_ip->i_prev_unlinked = prev_agino;
1570	trace_xfs_iunlink_reload_next(ip: next_ip);
1571	rele:
1572	ASSERT(!(VFS_I(next_ip)->i_state & I_DONTCACHE));
1573	if (xfs_is_quotacheck_running(mp) && next_ip)
1574	xfs_iflags_set(ip: next_ip, XFS_IQUOTAUNCHECKED);
1575	xfs_irele(ip: next_ip);
1576	return error;
1577	}
1578
1579	/*
1580	* Look up the inode number specified and if it is not already marked XFS_ISTALE
1581	* mark it stale. We should only find clean inodes in this lookup that aren't
1582	* already stale.
1583	*/
1584	static void
1585	xfs_ifree_mark_inode_stale(
1586	struct xfs_perag *pag,
1587	struct xfs_inode *free_ip,
1588	xfs_ino_t inum)
1589	{
1590	struct xfs_mount *mp = pag_mount(pag);
1591	struct xfs_inode_log_item *iip;
1592	struct xfs_inode *ip;
1593
1594	retry:
1595	rcu_read_lock();
1596	ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
1597
1598	/ Inode not in memory, nothing to do /
1599	if (!ip) {
1600	rcu_read_unlock();
1601	return;
1602	}
1603
1604	/*
1605	* because this is an RCU protected lookup, we could find a recently
1606	* freed or even reallocated inode during the lookup. We need to check
1607	* under the i_flags_lock for a valid inode here. Skip it if it is not
1608	* valid, the wrong inode or stale.
1609	*/
1610	spin_lock(lock: &ip->i_flags_lock);
1611	if (ip->i_ino != inum \|\| __xfs_iflags_test(ip, XFS_ISTALE))
1612	goto out_iflags_unlock;
1613
1614	/*
1615	* Don't try to lock/unlock the current inode, but we _cannot_ skip the
1616	* other inodes that we did not find in the list attached to the buffer
1617	* and are not already marked stale. If we can't lock it, back off and
1618	* retry.
1619	*/
1620	if (ip != free_ip) {
1621	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
1622	spin_unlock(lock: &ip->i_flags_lock);
1623	rcu_read_unlock();
1624	delay(ticks: `1`);
1625	goto retry;
1626	}
1627	}
1628	ip->i_flags \|= XFS_ISTALE;
1629
1630	/*
1631	* If the inode is flushing, it is already attached to the buffer. All
1632	* we needed to do here is mark the inode stale so buffer IO completion
1633	* will remove it from the AIL.
1634	*/
1635	iip = ip->i_itemp;
1636	if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
1637	ASSERT(!list_empty(&iip->ili_item.li_bio_list));
1638	ASSERT(iip->ili_last_fields);
1639	goto out_iunlock;
1640	}
1641
1642	/*
1643	* Inodes not attached to the buffer can be released immediately.
1644	* Everything else has to go through xfs_iflush_abort() on journal
1645	* commit as the flock synchronises removal of the inode from the
1646	* cluster buffer against inode reclaim.
1647	*/
1648	if (!iip \|\| list_empty(head: &iip->ili_item.li_bio_list))
1649	goto out_iunlock;
1650
1651	__xfs_iflags_set(ip, XFS_IFLUSHING);
1652	spin_unlock(lock: &ip->i_flags_lock);
1653	rcu_read_unlock();
1654
1655	/ we have a dirty inode in memory that has not yet been flushed. /
1656	spin_lock(lock: &iip->ili_lock);
1657	iip->ili_last_fields = iip->ili_fields;
1658	iip->ili_fields = `0`;
1659	iip->ili_fsync_fields = `0`;
1660	spin_unlock(lock: &iip->ili_lock);
1661	ASSERT(iip->ili_last_fields);
1662
1663	if (ip != free_ip)
1664	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1665	return;
1666
1667	out_iunlock:
1668	if (ip != free_ip)
1669	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1670	out_iflags_unlock:
1671	spin_unlock(lock: &ip->i_flags_lock);
1672	rcu_read_unlock();
1673	}
1674
1675	/*
1676	* A big issue when freeing the inode cluster is that we _cannot_ skip any
1677	* inodes that are in memory - they all must be marked stale and attached to
1678	* the cluster buffer.
1679	*/
1680	static int
1681	xfs_ifree_cluster(
1682	struct xfs_trans *tp,
1683	struct xfs_perag *pag,
1684	struct xfs_inode *free_ip,
1685	struct xfs_icluster *xic)
1686	{
1687	struct xfs_mount *mp = free_ip->i_mount;
1688	struct xfs_ino_geometry *igeo = M_IGEO(mp);
1689	struct xfs_buf *bp;
1690	xfs_daddr_t blkno;
1691	xfs_ino_t inum = xic->first_ino;
1692	int nbufs;
1693	int i, j;
1694	int ioffset;
1695	int error;
1696
1697	nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
1698
1699	for (j = `0`; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
1700	/*
1701	* The allocation bitmap tells us which inodes of the chunk were
1702	* physically allocated. Skip the cluster if an inode falls into
1703	* a sparse region.
1704	*/
1705	ioffset = inum - xic->first_ino;
1706	if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == `0`) {
1707	ASSERT(ioffset % igeo->inodes_per_cluster == `0`);
1708	continue;
1709	}
1710
1711	blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
1712	XFS_INO_TO_AGBNO(mp, inum));
1713
1714	/*
1715	* We obtain and lock the backing buffer first in the process
1716	* here to ensure dirty inodes attached to the buffer remain in
1717	* the flushing state while we mark them stale.
1718	*
1719	* If we scan the in-memory inodes first, then buffer IO can
1720	* complete before we get a lock on it, and hence we may fail
1721	* to mark all the active inodes on the buffer stale.
1722	*/
1723	error = xfs_trans_get_buf(tp, target: mp->m_ddev_targp, blkno,
1724	numblks: mp->m_bsize * igeo->blocks_per_cluster, flags: `0`, bpp: &bp);
1725	if (error)
1726	return error;
1727
1728	/*
1729	* This buffer may not have been correctly initialised as we
1730	* didn't read it from disk. That's not important because we are
1731	* only using to mark the buffer as stale in the log, and to
1732	* attach stale cached inodes on it.
1733	*
1734	* For the inode that triggered the cluster freeing, this
1735	* attachment may occur in xfs_inode_item_precommit() after we
1736	* have marked this buffer stale. If this buffer was not in
1737	* memory before xfs_ifree_cluster() started, it will not be
1738	* marked XBF_DONE and this will cause problems later in
1739	* xfs_inode_item_precommit() when we trip over a (stale, !done)
1740	* buffer to attached to the transaction.
1741	*
1742	* Hence we have to mark the buffer as XFS_DONE here. This is
1743	* safe because we are also marking the buffer as XBF_STALE and
1744	* XFS_BLI_STALE. That means it will never be dispatched for
1745	* IO and it won't be unlocked until the cluster freeing has
1746	* been committed to the journal and the buffer unpinned. If it
1747	* is written, we want to know about it, and we want it to
1748	* fail. We can acheive this by adding a write verifier to the
1749	* buffer.
1750	*/
1751	bp->b_flags \|= XBF_DONE;
1752	bp->b_ops = &xfs_inode_buf_ops;
1753
1754	/*
1755	* Now we need to set all the cached clean inodes as XFS_ISTALE,
1756	* too. This requires lookups, and will skip inodes that we've
1757	* already marked XFS_ISTALE.
1758	*/
1759	for (i = `0`; i < igeo->inodes_per_cluster; i++)
1760	xfs_ifree_mark_inode_stale(pag, free_ip, inum: inum + i);
1761
1762	xfs_trans_stale_inode_buf(tp, bp);
1763	xfs_trans_binval(tp, bp);
1764	}
1765	return `0`;
1766	}
1767
1768	/*
1769	* This is called to return an inode to the inode free list. The inode should
1770	* already be truncated to 0 length and have no pages associated with it. This
1771	* routine also assumes that the inode is already a part of the transaction.
1772	*
1773	* The on-disk copy of the inode will have been added to the list of unlinked
1774	* inodes in the AGI. We need to remove the inode from that list atomically with
1775	* respect to freeing it here.
1776	*/
1777	int
1778	xfs_ifree(
1779	struct xfs_trans *tp,
1780	struct xfs_inode *ip)
1781	{
1782	struct xfs_mount *mp = ip->i_mount;
1783	struct xfs_perag *pag;
1784	struct xfs_icluster xic = { `0` };
1785	struct xfs_inode_log_item *iip = ip->i_itemp;
1786	int error;
1787
1788	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
1789	ASSERT(VFS_I(ip)->i_nlink == `0`);
1790	ASSERT(ip->i_df.if_nextents == `0`);
1791	ASSERT(ip->i_disk_size == `0` \|\| !S_ISREG(VFS_I(ip)->i_mode));
1792	ASSERT(ip->i_nblocks == `0`);
1793
1794	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
1795
1796	error = xfs_inode_uninit(tp, pag, ip, &xic);
1797	if (error)
1798	goto out;
1799
1800	if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS))
1801	xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS);
1802
1803	/ Don't attempt to replay owner changes for a deleted inode /
1804	spin_lock(lock: &iip->ili_lock);
1805	iip->ili_fields &= ~(XFS_ILOG_AOWNER \| XFS_ILOG_DOWNER);
1806	spin_unlock(lock: &iip->ili_lock);
1807
1808	if (xic.deleted)
1809	error = xfs_ifree_cluster(tp, pag, free_ip: ip, xic: &xic);
1810	out:
1811	xfs_perag_put(pag);
1812	return error;
1813	}
1814
1815	/*
1816	* This is called to unpin an inode. The caller must have the inode locked
1817	* in at least shared mode so that the buffer cannot be subsequently pinned
1818	* once someone is waiting for it to be unpinned.
1819	*/
1820	static void
1821	xfs_iunpin(
1822	struct xfs_inode *ip)
1823	{
1824	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL \| XFS_ILOCK_SHARED);
1825
1826	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
1827
1828	/ Give the log a push to start the unpinning I/O /
1829	xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, `0`, NULL);
1830
1831	}
1832
1833	static void
1834	__xfs_iunpin_wait(
1835	struct xfs_inode *ip)
1836	{
1837	wait_queue_head_t *wq = bit_waitqueue(word: &ip->i_flags, __XFS_IPINNED_BIT);
1838	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
1839
1840	xfs_iunpin(ip);
1841
1842	do {
1843	prepare_to_wait(wq_head: wq, wq_entry: &wait.wq_entry, TASK_UNINTERRUPTIBLE);
1844	if (xfs_ipincount(ip))
1845	io_schedule();
1846	} while (xfs_ipincount(ip));
1847	finish_wait(wq_head: wq, wq_entry: &wait.wq_entry);
1848	}
1849
1850	void
1851	xfs_iunpin_wait(
1852	struct xfs_inode *ip)
1853	{
1854	if (xfs_ipincount(ip))
1855	__xfs_iunpin_wait(ip);
1856	}
1857
1858	/*
1859	* Removing an inode from the namespace involves removing the directory entry
1860	* and dropping the link count on the inode. Removing the directory entry can
1861	* result in locking an AGF (directory blocks were freed) and removing a link
1862	* count can result in placing the inode on an unlinked list which results in
1863	* locking an AGI.
1864	*
1865	* The big problem here is that we have an ordering constraint on AGF and AGI
1866	* locking - inode allocation locks the AGI, then can allocate a new extent for
1867	* new inodes, locking the AGF after the AGI. Similarly, freeing the inode
1868	* removes the inode from the unlinked list, requiring that we lock the AGI
1869	* first, and then freeing the inode can result in an inode chunk being freed
1870	* and hence freeing disk space requiring that we lock an AGF.
1871	*
1872	* Hence the ordering that is imposed by other parts of the code is AGI before
1873	* AGF. This means we cannot remove the directory entry before we drop the inode
1874	* reference count and put it on the unlinked list as this results in a lock
1875	* order of AGF then AGI, and this can deadlock against inode allocation and
1876	* freeing. Therefore we must drop the link counts before we remove the
1877	* directory entry.
1878	*
1879	* This is still safe from a transactional point of view - it is not until we
1880	* get to xfs_defer_finish() that we have the possibility of multiple
1881	* transactions in this operation. Hence as long as we remove the directory
1882	* entry and drop the link count in the first transaction of the remove
1883	* operation, there are no transactional constraints on the ordering here.
1884	*/
1885	int
1886	xfs_remove(
1887	struct xfs_inode *dp,
1888	struct xfs_name *name,
1889	struct xfs_inode *ip)
1890	{
1891	struct xfs_dir_update du = {
1892	.dp = dp,
1893	.name = name,
1894	.ip = ip,
1895	};
1896	struct xfs_mount *mp = dp->i_mount;
1897	struct xfs_trans *tp = NULL;
1898	int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
1899	int dontcare;
1900	int error = `0`;
1901	uint resblks;
1902
1903	trace_xfs_remove(dp, xfs_remove: name);
1904
1905	if (xfs_is_shutdown(mp))
1906	return -EIO;
1907	if (xfs_ifork_zapped(dp, XFS_DATA_FORK))
1908	return -EIO;
1909
1910	error = xfs_qm_dqattach(dp);
1911	if (error)
1912	goto std_return;
1913
1914	error = xfs_qm_dqattach(ip);
1915	if (error)
1916	goto std_return;
1917
1918	error = xfs_parent_start(mp, &du.ppargs);
1919	if (error)
1920	goto std_return;
1921
1922	/*
1923	* We try to get the real space reservation first, allowing for
1924	* directory btree deletion(s) implying possible bmap insert(s). If we
1925	* can't get the space reservation then we use 0 instead, and avoid the
1926	* bmap btree insert(s) in the directory code by, if the bmap insert
1927	* tries to happen, instead trimming the LAST block from the directory.
1928	*
1929	* Ignore EDQUOT and ENOSPC being returned via nospace_error because
1930	* the directory code can handle a reservationless update and we don't
1931	* want to prevent a user from trying to free space by deleting things.
1932	*/
1933	resblks = xfs_remove_space_res(mp, name->len);
1934	error = xfs_trans_alloc_dir(dp, resv: &M_RES(mp)->tr_remove, ip, dblocks: &resblks,
1935	tpp: &tp, nospace_error: &dontcare);
1936	if (error) {
1937	ASSERT(error != -ENOSPC);
1938	goto out_parent;
1939	}
1940
1941	error = xfs_dir_remove_child(tp, resblks, &du);
1942	if (error)
1943	goto out_trans_cancel;
1944
1945	/*
1946	* If this is a synchronous mount, make sure that the
1947	* remove transaction goes to disk before returning to
1948	* the user.
1949	*/
1950	if (xfs_has_wsync(mp) \|\| xfs_has_dirsync(mp))
1951	xfs_trans_set_sync(tp);
1952
1953	error = xfs_trans_commit(tp);
1954	if (error)
1955	goto out_unlock;
1956
1957	if (is_dir && xfs_inode_is_filestream(ip))
1958	xfs_filestream_deassociate(ip);
1959
1960	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1961	xfs_iunlock(ip: dp, XFS_ILOCK_EXCL);
1962	xfs_parent_finish(mp, du.ppargs);
1963	return `0`;
1964
1965	out_trans_cancel:
1966	xfs_trans_cancel(tp);
1967	out_unlock:
1968	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1969	xfs_iunlock(ip: dp, XFS_ILOCK_EXCL);
1970	out_parent:
1971	xfs_parent_finish(mp, du.ppargs);
1972	std_return:
1973	return error;
1974	}
1975
1976	static inline void
1977	xfs_iunlock_rename(
1978	struct xfs_inode **i_tab,
1979	int num_inodes)
1980	{
1981	int i;
1982
1983	for (i = num_inodes - `1`; i >= `0`; i--) {
1984	/ Skip duplicate inodes if src and target dps are the same /
1985	if (!i_tab[i] \|\| (i > `0` && i_tab[i] == i_tab[i - `1`]))
1986	continue;
1987	xfs_iunlock(ip: i_tab[i], XFS_ILOCK_EXCL);
1988	}
1989	}
1990
1991	/*
1992	* Enter all inodes for a rename transaction into a sorted array.
1993	*/
1994	#define __XFS_SORT_INODES 5
1995	STATIC void
1996	xfs_sort_for_rename(
1997	struct xfs_inode dp1, /* in: old (source) directory inode /
1998	struct xfs_inode dp2, /* in: new (target) directory inode /
1999	struct xfs_inode ip1, /* in: inode of old entry /
2000	struct xfs_inode ip2, /* in: inode of new entry /
2001	struct xfs_inode wip, /* in: whiteout inode /
2002	struct xfs_inode *i_tab,/* out: sorted array of inodes /
2003	int num_inodes) /* in/out: inodes in array /
2004	{
2005	int i;
2006
2007	ASSERT(*num_inodes == __XFS_SORT_INODES);
2008	memset(i_tab, `0`, num_inodes sizeof(struct xfs_inode *));
2009
2010	/*
2011	* i_tab contains a list of pointers to inodes. We initialize
2012	* the table here & we'll sort it. We will then use it to
2013	* order the acquisition of the inode locks.
2014	*
2015	* Note that the table may contain duplicates. e.g., dp1 == dp2.
2016	*/
2017	i = `0`;
2018	i_tab[i++] = dp1;
2019	i_tab[i++] = dp2;
2020	i_tab[i++] = ip1;
2021	if (ip2)
2022	i_tab[i++] = ip2;
2023	if (wip)
2024	i_tab[i++] = wip;
2025	*num_inodes = i;
2026
2027	xfs_sort_inodes(i_tab, num_inodes: *num_inodes);
2028	}
2029
2030	void
2031	xfs_sort_inodes(
2032	struct xfs_inode **i_tab,
2033	unsigned int num_inodes)
2034	{
2035	int i, j;
2036
2037	ASSERT(num_inodes <= __XFS_SORT_INODES);
2038
2039	/*
2040	* Sort the elements via bubble sort. (Remember, there are at
2041	* most 5 elements to sort, so this is adequate.)
2042	*/
2043	for (i = `0`; i < num_inodes; i++) {
2044	for (j = `1`; j < num_inodes; j++) {
2045	if (i_tab[j]->i_ino < i_tab[j-`1`]->i_ino)
2046	swap(i_tab[j], i_tab[j - `1`]);
2047	}
2048	}
2049	}
2050
2051	/*
2052	* xfs_rename_alloc_whiteout()
2053	*
2054	* Return a referenced, unlinked, unlocked inode that can be used as a
2055	* whiteout in a rename transaction. We use a tmpfile inode here so that if we
2056	* crash between allocating the inode and linking it into the rename transaction
2057	* recovery will free the inode and we won't leak it.
2058	*/
2059	static int
2060	xfs_rename_alloc_whiteout(
2061	struct mnt_idmap *idmap,
2062	struct xfs_name *src_name,
2063	struct xfs_inode *dp,
2064	struct xfs_inode **wip)
2065	{
2066	struct xfs_icreate_args args = {
2067	.idmap = idmap,
2068	.pip = dp,
2069	.mode = S_IFCHR \| WHITEOUT_MODE,
2070	.flags = XFS_ICREATE_TMPFILE,
2071	};
2072	struct xfs_inode *tmpfile;
2073	struct qstr name;
2074	int error;
2075
2076	error = xfs_create_tmpfile(iargs: &args, ipp: &tmpfile);
2077	if (error)
2078	return error;
2079
2080	name.name = src_name->name;
2081	name.len = src_name->len;
2082	error = xfs_inode_init_security(inode: VFS_I(ip: tmpfile), dir: VFS_I(ip: dp), qstr: &name);
2083	if (error) {
2084	xfs_finish_inode_setup(ip: tmpfile);
2085	xfs_irele(ip: tmpfile);
2086	return error;
2087	}
2088
2089	/*
2090	* Prepare the tmpfile inode as if it were created through the VFS.
2091	* Complete the inode setup and flag it as linkable. nlink is already
2092	* zero, so we can skip the drop_nlink.
2093	*/
2094	xfs_setup_iops(ip: tmpfile);
2095	xfs_finish_inode_setup(ip: tmpfile);
2096	VFS_I(ip: tmpfile)->i_state \|= I_LINKABLE;
2097
2098	*wip = tmpfile;
2099	return `0`;
2100	}
2101
2102	/*
2103	* xfs_rename
2104	*/
2105	int
2106	xfs_rename(
2107	struct mnt_idmap *idmap,
2108	struct xfs_inode *src_dp,
2109	struct xfs_name *src_name,
2110	struct xfs_inode *src_ip,
2111	struct xfs_inode *target_dp,
2112	struct xfs_name *target_name,
2113	struct xfs_inode *target_ip,
2114	unsigned int flags)
2115	{
2116	struct xfs_dir_update du_src = {
2117	.dp = src_dp,
2118	.name = src_name,
2119	.ip = src_ip,
2120	};
2121	struct xfs_dir_update du_tgt = {
2122	.dp = target_dp,
2123	.name = target_name,
2124	.ip = target_ip,
2125	};
2126	struct xfs_dir_update du_wip = { };
2127	struct xfs_mount *mp = src_dp->i_mount;
2128	struct xfs_trans *tp;
2129	struct xfs_inode *inodes[__XFS_SORT_INODES];
2130	int i;
2131	int num_inodes = __XFS_SORT_INODES;
2132	bool new_parent = (src_dp != target_dp);
2133	bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
2134	int spaceres;
2135	bool retried = false;
2136	int error, nospace_error = `0`;
2137
2138	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
2139
2140	if ((flags & RENAME_EXCHANGE) && !target_ip)
2141	return -EINVAL;
2142
2143	/*
2144	* If we are doing a whiteout operation, allocate the whiteout inode
2145	* we will be placing at the target and ensure the type is set
2146	* appropriately.
2147	*/
2148	if (flags & RENAME_WHITEOUT) {
2149	error = xfs_rename_alloc_whiteout(idmap, src_name, dp: target_dp,
2150	wip: &du_wip.ip);
2151	if (error)
2152	return error;
2153
2154	/ setup target dirent info as whiteout /
2155	src_name->type = XFS_DIR3_FT_CHRDEV;
2156	}
2157
2158	xfs_sort_for_rename(dp1: src_dp, dp2: target_dp, ip1: src_ip, ip2: target_ip, wip: du_wip.ip,
2159	i_tab: inodes, num_inodes: &num_inodes);
2160
2161	error = xfs_parent_start(mp, &du_src.ppargs);
2162	if (error)
2163	goto out_release_wip;
2164
2165	if (du_wip.ip) {
2166	error = xfs_parent_start(mp, &du_wip.ppargs);
2167	if (error)
2168	goto out_src_ppargs;
2169	}
2170
2171	if (target_ip) {
2172	error = xfs_parent_start(mp, &du_tgt.ppargs);
2173	if (error)
2174	goto out_wip_ppargs;
2175	}
2176
2177	retry:
2178	nospace_error = `0`;
2179	spaceres = xfs_rename_space_res(mp, src_name->len, target_ip != NULL,
2180	target_name->len, du_wip.ip != NULL);
2181	error = xfs_trans_alloc(mp, resp: &M_RES(mp)->tr_rename, blocks: spaceres, rtextents: `0`, flags: `0`, tpp: &tp);
2182	if (error == -ENOSPC) {
2183	nospace_error = error;
2184	spaceres = `0`;
2185	error = xfs_trans_alloc(mp, resp: &M_RES(mp)->tr_rename, blocks: `0`, rtextents: `0`, flags: `0`,
2186	tpp: &tp);
2187	}
2188	if (error)
2189	goto out_tgt_ppargs;
2190
2191	/*
2192	* We don't allow reservationless renaming when parent pointers are
2193	* enabled because we can't back out if the xattrs must grow.
2194	*/
2195	if (du_src.ppargs && nospace_error) {
2196	error = nospace_error;
2197	xfs_trans_cancel(tp);
2198	goto out_tgt_ppargs;
2199	}
2200
2201	/*
2202	* Attach the dquots to the inodes
2203	*/
2204	error = xfs_qm_vop_rename_dqattach(inodes);
2205	if (error) {
2206	xfs_trans_cancel(tp);
2207	goto out_tgt_ppargs;
2208	}
2209
2210	/*
2211	* Lock all the participating inodes. Depending upon whether
2212	* the target_name exists in the target directory, and
2213	* whether the target directory is the same as the source
2214	* directory, we can lock from 2 to 5 inodes.
2215	*/
2216	xfs_lock_inodes(ips: inodes, inodes: num_inodes, XFS_ILOCK_EXCL);
2217
2218	/*
2219	* Join all the inodes to the transaction.
2220	*/
2221	xfs_trans_ijoin(tp, src_dp, `0`);
2222	if (new_parent)
2223	xfs_trans_ijoin(tp, target_dp, `0`);
2224	xfs_trans_ijoin(tp, src_ip, `0`);
2225	if (target_ip)
2226	xfs_trans_ijoin(tp, target_ip, `0`);
2227	if (du_wip.ip)
2228	xfs_trans_ijoin(tp, du_wip.ip, `0`);
2229
2230	/*
2231	* If we are using project inheritance, we only allow renames
2232	* into our tree when the project IDs are the same; else the
2233	* tree quota mechanism would be circumvented.
2234	*/
2235	if (unlikely((target_dp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
2236	target_dp->i_projid != src_ip->i_projid)) {
2237	error = -EXDEV;
2238	goto out_trans_cancel;
2239	}
2240
2241	/ RENAME_EXCHANGE is unique from here on. /
2242	if (flags & RENAME_EXCHANGE) {
2243	error = xfs_dir_exchange_children(tp, &du_src, &du_tgt,
2244	spaceres);
2245	if (error)
2246	goto out_trans_cancel;
2247	goto out_commit;
2248	}
2249
2250	/*
2251	* Try to reserve quota to handle an expansion of the target directory.
2252	* We'll allow the rename to continue in reservationless mode if we hit
2253	* a space usage constraint. If we trigger reservationless mode, save
2254	* the errno if there isn't any free space in the target directory.
2255	*/
2256	if (spaceres != `0`) {
2257	error = xfs_trans_reserve_quota_nblks(tp, ip: target_dp, dblocks: spaceres,
2258	rblocks: `0`, force: false);
2259	if (error == -EDQUOT \|\| error == -ENOSPC) {
2260	if (!retried) {
2261	xfs_trans_cancel(tp);
2262	xfs_iunlock_rename(i_tab: inodes, num_inodes);
2263	xfs_blockgc_free_quota(ip: target_dp, iwalk_flags: `0`);
2264	retried = true;
2265	goto retry;
2266	}
2267
2268	nospace_error = error;
2269	spaceres = `0`;
2270	error = `0`;
2271	}
2272	if (error)
2273	goto out_trans_cancel;
2274	}
2275
2276	/*
2277	* We don't allow quotaless renaming when parent pointers are enabled
2278	* because we can't back out if the xattrs must grow.
2279	*/
2280	if (du_src.ppargs && nospace_error) {
2281	error = nospace_error;
2282	goto out_trans_cancel;
2283	}
2284
2285	/*
2286	* Lock the AGI buffers we need to handle bumping the nlink of the
2287	* whiteout inode off the unlinked list and to handle dropping the
2288	* nlink of the target inode. Per locking order rules, do this in
2289	* increasing AG order and before directory block allocation tries to
2290	* grab AGFs because we grab AGIs before AGFs.
2291	*
2292	* The (vfs) caller must ensure that if src is a directory then
2293	* target_ip is either null or an empty directory.
2294	*/
2295	for (i = `0`; i < num_inodes && inodes[i] != NULL; i++) {
2296	if (inodes[i] == du_wip.ip \|\|
2297	(inodes[i] == target_ip &&
2298	(VFS_I(ip: target_ip)->i_nlink == `1` \|\| src_is_directory))) {
2299	struct xfs_perag *pag;
2300	struct xfs_buf *bp;
2301
2302	pag = xfs_perag_get(mp,
2303	XFS_INO_TO_AGNO(mp, inodes[i]->i_ino));
2304	error = xfs_read_agi(pag, tp, `0`, &bp);
2305	xfs_perag_put(pag);
2306	if (error)
2307	goto out_trans_cancel;
2308	}
2309	}
2310
2311	error = xfs_dir_rename_children(tp, &du_src, &du_tgt, spaceres,
2312	&du_wip);
2313	if (error)
2314	goto out_trans_cancel;
2315
2316	if (du_wip.ip) {
2317	/*
2318	* Now we have a real link, clear the "I'm a tmpfile" state
2319	* flag from the inode so it doesn't accidentally get misused in
2320	* future.
2321	*/
2322	VFS_I(ip: du_wip.ip)->i_state &= ~I_LINKABLE;
2323	}
2324
2325	out_commit:
2326	/*
2327	* If this is a synchronous mount, make sure that the rename
2328	* transaction goes to disk before returning to the user.
2329	*/
2330	if (xfs_has_wsync(tp->t_mountp) \|\| xfs_has_dirsync(tp->t_mountp))
2331	xfs_trans_set_sync(tp);
2332
2333	error = xfs_trans_commit(tp);
2334	nospace_error = `0`;
2335	goto out_unlock;
2336
2337	out_trans_cancel:
2338	xfs_trans_cancel(tp);
2339	out_unlock:
2340	xfs_iunlock_rename(i_tab: inodes, num_inodes);
2341	out_tgt_ppargs:
2342	xfs_parent_finish(mp, du_tgt.ppargs);
2343	out_wip_ppargs:
2344	xfs_parent_finish(mp, du_wip.ppargs);
2345	out_src_ppargs:
2346	xfs_parent_finish(mp, du_src.ppargs);
2347	out_release_wip:
2348	if (du_wip.ip)
2349	xfs_irele(ip: du_wip.ip);
2350	if (error == -ENOSPC && nospace_error)
2351	error = nospace_error;
2352	return error;
2353	}
2354
2355	static int
2356	xfs_iflush(
2357	struct xfs_inode *ip,
2358	struct xfs_buf *bp)
2359	{
2360	struct xfs_inode_log_item *iip = ip->i_itemp;
2361	struct xfs_dinode *dip;
2362	struct xfs_mount *mp = ip->i_mount;
2363	int error;
2364
2365	xfs_assert_ilocked(ip, XFS_ILOCK_EXCL \| XFS_ILOCK_SHARED);
2366	ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING));
2367	ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE \|\|
2368	ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
2369	ASSERT(iip->ili_item.li_buf == bp);
2370
2371	dip = xfs_buf_offset(bp, offset: ip->i_imap.im_boffset);
2372
2373	/*
2374	* We don't flush the inode if any of the following checks fail, but we
2375	* do still update the log item and attach to the backing buffer as if
2376	* the flush happened. This is a formality to facilitate predictable
2377	* error handling as the caller will shutdown and fail the buffer.
2378	*/
2379	error = -EFSCORRUPTED;
2380	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
2381	mp, XFS_ERRTAG_IFLUSH_1)) {
2382	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2383	"%s: Bad inode %llu magic number 0x%x, ptr "PTR_FMT,
2384	__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
2385	goto flush_out;
2386	}
2387	if (ip->i_df.if_format == XFS_DINODE_FMT_META_BTREE) {
2388	if (!S_ISREG(VFS_I(ip)->i_mode) \|\|
2389	!(ip->i_diflags2 & XFS_DIFLAG2_METADATA)) {
2390	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2391	"%s: Bad %s meta btree inode %Lu, ptr "PTR_FMT,
2392	__func__, xfs_metafile_type_str(ip->i_metatype),
2393	ip->i_ino, ip);
2394	goto flush_out;
2395	}
2396	} else if (S_ISREG(VFS_I(ip)->i_mode)) {
2397	if (XFS_TEST_ERROR(
2398	ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
2399	ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
2400	mp, XFS_ERRTAG_IFLUSH_3)) {
2401	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2402	"%s: Bad regular inode %llu, ptr "PTR_FMT,
2403	__func__, ip->i_ino, ip);
2404	goto flush_out;
2405	}
2406	} else if (S_ISDIR(VFS_I(ip)->i_mode)) {
2407	if (XFS_TEST_ERROR(
2408	ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
2409	ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
2410	ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
2411	mp, XFS_ERRTAG_IFLUSH_4)) {
2412	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2413	"%s: Bad directory inode %llu, ptr "PTR_FMT,
2414	__func__, ip->i_ino, ip);
2415	goto flush_out;
2416	}
2417	}
2418	if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) >
2419	ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
2420	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2421	"%s: detected corrupt incore inode %llu, "
2422	"total extents = %llu nblocks = %lld, ptr "PTR_FMT,
2423	__func__, ip->i_ino,
2424	ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af),
2425	ip->i_nblocks, ip);
2426	goto flush_out;
2427	}
2428	if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize,
2429	mp, XFS_ERRTAG_IFLUSH_6)) {
2430	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2431	"%s: bad inode %llu, forkoff 0x%x, ptr "PTR_FMT,
2432	__func__, ip->i_ino, ip->i_forkoff, ip);
2433	goto flush_out;
2434	}
2435
2436	if (xfs_inode_has_attr_fork(ip) &&
2437	ip->i_af.if_format == XFS_DINODE_FMT_META_BTREE) {
2438	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2439	"%s: meta btree in inode %Lu attr fork, ptr "PTR_FMT,
2440	__func__, ip->i_ino, ip);
2441	goto flush_out;
2442	}
2443
2444	/*
2445	* Inode item log recovery for v2 inodes are dependent on the flushiter
2446	* count for correct sequencing. We bump the flush iteration count so
2447	* we can detect flushes which postdate a log record during recovery.
2448	* This is redundant as we now log every change and hence this can't
2449	* happen but we need to still do it to ensure backwards compatibility
2450	* with old kernels that predate logging all inode changes.
2451	*/
2452	if (!xfs_has_v3inodes(mp))
2453	ip->i_flushiter++;
2454
2455	/*
2456	* If there are inline format data / attr forks attached to this inode,
2457	* make sure they are not corrupt.
2458	*/
2459	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL &&
2460	xfs_ifork_verify_local_data(ip))
2461	goto flush_out;
2462	if (xfs_inode_has_attr_fork(ip) &&
2463	ip->i_af.if_format == XFS_DINODE_FMT_LOCAL &&
2464	xfs_ifork_verify_local_attr(ip))
2465	goto flush_out;
2466
2467	/*
2468	* Copy the dirty parts of the inode into the on-disk inode. We always
2469	* copy out the core of the inode, because if the inode is dirty at all
2470	* the core must be.
2471	*/
2472	xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
2473
2474	/ Wrap, we never let the log put out DI_MAX_FLUSH /
2475	if (!xfs_has_v3inodes(mp)) {
2476	if (ip->i_flushiter == DI_MAX_FLUSH)
2477	ip->i_flushiter = `0`;
2478	}
2479
2480	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
2481	if (xfs_inode_has_attr_fork(ip))
2482	xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
2483
2484	/*
2485	* We've recorded everything logged in the inode, so we'd like to clear
2486	* the ili_fields bits so we don't log and flush things unnecessarily.
2487	* However, we can't stop logging all this information until the data
2488	* we've copied into the disk buffer is written to disk. If we did we
2489	* might overwrite the copy of the inode in the log with all the data
2490	* after re-logging only part of it, and in the face of a crash we
2491	* wouldn't have all the data we need to recover.
2492	*
2493	* What we do is move the bits to the ili_last_fields field. When
2494	* logging the inode, these bits are moved back to the ili_fields field.
2495	* In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since
2496	* we know that the information those bits represent is permanently on
2497	* disk. As long as the flush completes before the inode is logged
2498	* again, then both ili_fields and ili_last_fields will be cleared.
2499	*/
2500	error = `0`;
2501	flush_out:
2502	spin_lock(lock: &iip->ili_lock);
2503	iip->ili_last_fields = iip->ili_fields;
2504	iip->ili_fields = `0`;
2505	iip->ili_fsync_fields = `0`;
2506	set_bit(XFS_LI_FLUSHING, addr: &iip->ili_item.li_flags);
2507	spin_unlock(lock: &iip->ili_lock);
2508
2509	/*
2510	* Store the current LSN of the inode so that we can tell whether the
2511	* item has moved in the AIL from xfs_buf_inode_iodone().
2512	*/
2513	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
2514	&iip->ili_item.li_lsn);
2515
2516	/ generate the checksum. /
2517	xfs_dinode_calc_crc(mp, dip);
2518	if (error)
2519	xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
2520	return error;
2521	}
2522
2523	/*
2524	* Non-blocking flush of dirty inode metadata into the backing buffer.
2525	*
2526	* The caller must have a reference to the inode and hold the cluster buffer
2527	* locked. The function will walk across all the inodes on the cluster buffer it
2528	* can find and lock without blocking, and flush them to the cluster buffer.
2529	*
2530	* On successful flushing of at least one inode, the caller must write out the
2531	* buffer and release it. If no inodes are flushed, -EAGAIN will be returned and
2532	* the caller needs to release the buffer. On failure, the filesystem will be
2533	* shut down, the buffer will have been unlocked and released, and EFSCORRUPTED
2534	* will be returned.
2535	*/
2536	int
2537	xfs_iflush_cluster(
2538	struct xfs_buf *bp)
2539	{
2540	struct xfs_mount *mp = bp->b_mount;
2541	struct xfs_log_item lip, n;
2542	struct xfs_inode *ip;
2543	struct xfs_inode_log_item *iip;
2544	int clcount = `0`;
2545	int error = `0`;
2546
2547	/*
2548	* We must use the safe variant here as on shutdown xfs_iflush_abort()
2549	* will remove itself from the list.
2550	*/
2551	list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
2552	iip = (struct xfs_inode_log_item *)lip;
2553	ip = iip->ili_inode;
2554
2555	/*
2556	* Quick and dirty check to avoid locks if possible.
2557	*/
2558	if (__xfs_iflags_test(ip, XFS_IRECLAIM \| XFS_IFLUSHING))
2559	continue;
2560	if (xfs_ipincount(ip))
2561	continue;
2562
2563	/*
2564	* The inode is still attached to the buffer, which means it is
2565	* dirty but reclaim might try to grab it. Check carefully for
2566	* that, and grab the ilock while still holding the i_flags_lock
2567	* to guarantee reclaim will not be able to reclaim this inode
2568	* once we drop the i_flags_lock.
2569	*/
2570	spin_lock(lock: &ip->i_flags_lock);
2571	ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
2572	if (__xfs_iflags_test(ip, XFS_IRECLAIM \| XFS_IFLUSHING)) {
2573	spin_unlock(lock: &ip->i_flags_lock);
2574	continue;
2575	}
2576
2577	/*
2578	* ILOCK will pin the inode against reclaim and prevent
2579	* concurrent transactions modifying the inode while we are
2580	* flushing the inode. If we get the lock, set the flushing
2581	* state before we drop the i_flags_lock.
2582	*/
2583	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
2584	spin_unlock(lock: &ip->i_flags_lock);
2585	continue;
2586	}
2587	__xfs_iflags_set(ip, XFS_IFLUSHING);
2588	spin_unlock(lock: &ip->i_flags_lock);
2589
2590	/*
2591	* Abort flushing this inode if we are shut down because the
2592	* inode may not currently be in the AIL. This can occur when
2593	* log I/O failure unpins the inode without inserting into the
2594	* AIL, leaving a dirty/unpinned inode attached to the buffer
2595	* that otherwise looks like it should be flushed.
2596	*/
2597	if (xlog_is_shutdown(log: mp->m_log)) {
2598	xfs_iunpin_wait(ip);
2599	xfs_iflush_abort(ip);
2600	xfs_iunlock(ip, XFS_ILOCK_SHARED);
2601	error = -EIO;
2602	continue;
2603	}
2604
2605	/ don't block waiting on a log force to unpin dirty inodes /
2606	if (xfs_ipincount(ip)) {
2607	xfs_iflags_clear(ip, XFS_IFLUSHING);
2608	xfs_iunlock(ip, XFS_ILOCK_SHARED);
2609	continue;
2610	}
2611
2612	if (!xfs_inode_clean(ip))
2613	error = xfs_iflush(ip, bp);
2614	else
2615	xfs_iflags_clear(ip, XFS_IFLUSHING);
2616	xfs_iunlock(ip, XFS_ILOCK_SHARED);
2617	if (error)
2618	break;
2619	clcount++;
2620	}
2621
2622	if (error) {
2623	/*
2624	* Shutdown first so we kill the log before we release this
2625	* buffer. If it is an INODE_ALLOC buffer and pins the tail
2626	* of the log, failing it before the _log_ is shut down can
2627	* result in the log tail being moved forward in the journal
2628	* on disk because log writes can still be taking place. Hence
2629	* unpinning the tail will allow the ICREATE intent to be
2630	* removed from the log an recovery will fail with uninitialised
2631	* inode cluster buffers.
2632	*/
2633	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
2634	bp->b_flags \|= XBF_ASYNC;
2635	xfs_buf_ioend_fail(bp);
2636	return error;
2637	}
2638
2639	if (!clcount)
2640	return -EAGAIN;
2641
2642	XFS_STATS_INC(mp, xs_icluster_flushcnt);
2643	XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
2644	return `0`;
2645
2646	}
2647
2648	/ Release an inode. /
2649	void
2650	xfs_irele(
2651	struct xfs_inode *ip)
2652	{
2653	trace_xfs_irele(ip, _RET_IP_);
2654	iput(VFS_I(ip));
2655	}
2656
2657	/*
2658	* Ensure all commited transactions touching the inode are written to the log.
2659	*/
2660	int
2661	xfs_log_force_inode(
2662	struct xfs_inode *ip)
2663	{
2664	xfs_csn_t seq = `0`;
2665
2666	xfs_ilock(ip, XFS_ILOCK_SHARED);
2667	if (xfs_ipincount(ip))
2668	seq = ip->i_itemp->ili_commit_seq;
2669	xfs_iunlock(ip, XFS_ILOCK_SHARED);
2670
2671	if (!seq)
2672	return `0`;
2673	return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL);
2674	}
2675
2676	/*
2677	* Grab the exclusive iolock for a data copy from src to dest, making sure to
2678	* abide vfs locking order (lowest pointer value goes first) and breaking the
2679	* layout leases before proceeding. The loop is needed because we cannot call
2680	* the blocking break_layout() with the iolocks held, and therefore have to
2681	* back out both locks.
2682	*/
2683	static int
2684	xfs_iolock_two_inodes_and_break_layout(
2685	struct inode *src,
2686	struct inode *dest)
2687	{
2688	int error;
2689
2690	if (src > dest)
2691	swap(src, dest);
2692
2693	retry:
2694	/ Wait to break both inodes' layouts before we start locking. /
2695	error = break_layout(inode: src, wait: true);
2696	if (error)
2697	return error;
2698	if (src != dest) {
2699	error = break_layout(inode: dest, wait: true);
2700	if (error)
2701	return error;
2702	}
2703
2704	/ Lock one inode and make sure nobody got in and leased it. /
2705	inode_lock(inode: src);
2706	error = break_layout(inode: src, wait: false);
2707	if (error) {
2708	inode_unlock(inode: src);
2709	if (error == -EWOULDBLOCK)
2710	goto retry;
2711	return error;
2712	}
2713
2714	if (src == dest)
2715	return `0`;
2716
2717	/ Lock the other inode and make sure nobody got in and leased it. /
2718	inode_lock_nested(inode: dest, subclass: I_MUTEX_NONDIR2);
2719	error = break_layout(inode: dest, wait: false);
2720	if (error) {
2721	inode_unlock(inode: src);
2722	inode_unlock(inode: dest);
2723	if (error == -EWOULDBLOCK)
2724	goto retry;
2725	return error;
2726	}
2727
2728	return `0`;
2729	}
2730
2731	static int
2732	xfs_mmaplock_two_inodes_and_break_dax_layout(
2733	struct xfs_inode *ip1,
2734	struct xfs_inode *ip2)
2735	{
2736	int error;
2737
2738	if (ip1->i_ino > ip2->i_ino)
2739	swap(ip1, ip2);
2740
2741	again:
2742	/ Lock the first inode /
2743	xfs_ilock(ip: ip1, XFS_MMAPLOCK_EXCL);
2744	error = xfs_break_dax_layouts(inode: VFS_I(ip: ip1));
2745	if (error) {
2746	xfs_iunlock(ip: ip1, XFS_MMAPLOCK_EXCL);
2747	return error;
2748	}
2749
2750	if (ip1 == ip2)
2751	return `0`;
2752
2753	/ Nested lock the second inode /
2754	xfs_ilock(ip: ip2, lock_flags: xfs_lock_inumorder(XFS_MMAPLOCK_EXCL, subclass: `1`));
2755	/*
2756	* We cannot use xfs_break_dax_layouts() directly here because it may
2757	* need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
2758	* for this nested lock case.
2759	*/
2760	error = dax_break_layout(inode: VFS_I(ip: ip2), start: `0`, end: -`1`, NULL);
2761	if (error) {
2762	xfs_iunlock(ip: ip2, XFS_MMAPLOCK_EXCL);
2763	xfs_iunlock(ip: ip1, XFS_MMAPLOCK_EXCL);
2764	goto again;
2765	}
2766
2767	return `0`;
2768	}
2769
2770	/*
2771	* Lock two inodes so that userspace cannot initiate I/O via file syscalls or
2772	* mmap activity.
2773	*/
2774	int
2775	xfs_ilock2_io_mmap(
2776	struct xfs_inode *ip1,
2777	struct xfs_inode *ip2)
2778	{
2779	int ret;
2780
2781	ret = xfs_iolock_two_inodes_and_break_layout(src: VFS_I(ip: ip1), dest: VFS_I(ip: ip2));
2782	if (ret)
2783	return ret;
2784
2785	if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) {
2786	ret = xfs_mmaplock_two_inodes_and_break_dax_layout(ip1, ip2);
2787	if (ret) {
2788	inode_unlock(inode: VFS_I(ip: ip2));
2789	if (ip1 != ip2)
2790	inode_unlock(inode: VFS_I(ip: ip1));
2791	return ret;
2792	}
2793	} else
2794	filemap_invalidate_lock_two(mapping1: VFS_I(ip: ip1)->i_mapping,
2795	mapping2: VFS_I(ip: ip2)->i_mapping);
2796
2797	return `0`;
2798	}
2799
2800	/ Unlock both inodes to allow IO and mmap activity. /
2801	void
2802	xfs_iunlock2_io_mmap(
2803	struct xfs_inode *ip1,
2804	struct xfs_inode *ip2)
2805	{
2806	if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) {
2807	xfs_iunlock(ip: ip2, XFS_MMAPLOCK_EXCL);
2808	if (ip1 != ip2)
2809	xfs_iunlock(ip: ip1, XFS_MMAPLOCK_EXCL);
2810	} else
2811	filemap_invalidate_unlock_two(mapping1: VFS_I(ip: ip1)->i_mapping,
2812	mapping2: VFS_I(ip: ip2)->i_mapping);
2813
2814	inode_unlock(inode: VFS_I(ip: ip2));
2815	if (ip1 != ip2)
2816	inode_unlock(inode: VFS_I(ip: ip1));
2817	}
2818
2819	/ Drop the MMAPLOCK and the IOLOCK after a remap completes. /
2820	void
2821	xfs_iunlock2_remapping(
2822	struct xfs_inode *ip1,
2823	struct xfs_inode *ip2)
2824	{
2825	xfs_iflags_clear(ip: ip1, XFS_IREMAPPING);
2826
2827	if (ip1 != ip2)
2828	xfs_iunlock(ip: ip1, XFS_MMAPLOCK_SHARED);
2829	xfs_iunlock(ip: ip2, XFS_MMAPLOCK_EXCL);
2830
2831	if (ip1 != ip2)
2832	inode_unlock_shared(inode: VFS_I(ip: ip1));
2833	inode_unlock(inode: VFS_I(ip: ip2));
2834	}
2835
2836	/*
2837	* Reload the incore inode list for this inode. Caller should ensure that
2838	* the link count cannot change, either by taking ILOCK_SHARED or otherwise
2839	* preventing other threads from executing.
2840	*/
2841	int
2842	xfs_inode_reload_unlinked_bucket(
2843	struct xfs_trans *tp,
2844	struct xfs_inode *ip)
2845	{
2846	struct xfs_mount *mp = tp->t_mountp;
2847	struct xfs_buf *agibp;
2848	struct xfs_agi *agi;
2849	struct xfs_perag *pag;
2850	xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
2851	xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
2852	xfs_agino_t prev_agino, next_agino;
2853	unsigned int bucket;
2854	bool foundit = false;
2855	int error;
2856
2857	/ Grab the first inode in the list /
2858	pag = xfs_perag_get(mp, agno);
2859	error = xfs_ialloc_read_agi(pag, tp, `0`, &agibp);
2860	xfs_perag_put(pag);
2861	if (error)
2862	return error;
2863
2864	/*
2865	* We've taken ILOCK_SHARED and the AGI buffer lock to stabilize the
2866	* incore unlinked list pointers for this inode. Check once more to
2867	* see if we raced with anyone else to reload the unlinked list.
2868	*/
2869	if (!xfs_inode_unlinked_incomplete(ip)) {
2870	foundit = true;
2871	goto out_agibp;
2872	}
2873
2874	bucket = agino % XFS_AGI_UNLINKED_BUCKETS;
2875	agi = agibp->b_addr;
2876
2877	trace_xfs_inode_reload_unlinked_bucket(ip);
2878
2879	xfs_info_ratelimited(mp,
2880	"Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating list recovery.",
2881	agino, agno);
2882
2883	prev_agino = NULLAGINO;
2884	next_agino = be32_to_cpu(agi->agi_unlinked[bucket]);
2885	while (next_agino != NULLAGINO) {
2886	struct xfs_inode *next_ip = NULL;
2887
2888	/ Found this caller's inode, set its backlink. /
2889	if (next_agino == agino) {
2890	next_ip = ip;
2891	next_ip->i_prev_unlinked = prev_agino;
2892	foundit = true;
2893	goto next_inode;
2894	}
2895
2896	/ Try in-memory lookup first. /
2897	next_ip = xfs_iunlink_lookup(pag, next_agino);
2898	if (next_ip)
2899	goto next_inode;
2900
2901	/ Inode not in memory, try reloading it. /
2902	error = xfs_iunlink_reload_next(tp, agibp, prev_agino,
2903	next_agino);
2904	if (error)
2905	break;
2906
2907	/ Grab the reloaded inode. /
2908	next_ip = xfs_iunlink_lookup(pag, next_agino);
2909	if (!next_ip) {
2910	/ No incore inode at all? We reloaded it... /
2911	ASSERT(next_ip != NULL);
2912	error = -EFSCORRUPTED;
2913	break;
2914	}
2915
2916	next_inode:
2917	prev_agino = next_agino;
2918	next_agino = next_ip->i_next_unlinked;
2919	}
2920
2921	out_agibp:
2922	xfs_trans_brelse(tp, agibp);
2923	/ Should have found this inode somewhere in the iunlinked bucket. /
2924	if (!error && !foundit)
2925	error = -EFSCORRUPTED;
2926	return error;
2927	}
2928
2929	/ Decide if this inode is missing its unlinked list and reload it. /
2930	int
2931	xfs_inode_reload_unlinked(
2932	struct xfs_inode *ip)
2933	{
2934	struct xfs_trans *tp;
2935	int error;
2936
2937	error = xfs_trans_alloc_empty(mp: ip->i_mount, tpp: &tp);
2938	if (error)
2939	return error;
2940
2941	xfs_ilock(ip, XFS_ILOCK_SHARED);
2942	if (xfs_inode_unlinked_incomplete(ip))
2943	error = xfs_inode_reload_unlinked_bucket(tp, ip);
2944	xfs_iunlock(ip, XFS_ILOCK_SHARED);
2945	xfs_trans_cancel(tp);
2946
2947	return error;
2948	}
2949
2950	/ Has this inode fork been zapped by repair? /
2951	bool
2952	xfs_ifork_zapped(
2953	const struct xfs_inode *ip,
2954	int whichfork)
2955	{
2956	unsigned int datamask = `0`;
2957
2958	switch (whichfork) {
2959	case XFS_DATA_FORK:
2960	switch (ip->i_vnode.i_mode & S_IFMT) {
2961	case S_IFDIR:
2962	datamask = XFS_SICK_INO_DIR_ZAPPED;
2963	break;
2964	case S_IFLNK:
2965	datamask = XFS_SICK_INO_SYMLINK_ZAPPED;
2966	break;
2967	}
2968	return ip->i_sick & (XFS_SICK_INO_BMBTD_ZAPPED \| datamask);
2969	case XFS_ATTR_FORK:
2970	return ip->i_sick & XFS_SICK_INO_BMBTA_ZAPPED;
2971	default:
2972	return false;
2973	}
2974	}
2975
2976	/ Compute the number of data and realtime blocks used by a file. /
2977	void
2978	xfs_inode_count_blocks(
2979	struct xfs_trans *tp,
2980	struct xfs_inode *ip,
2981	xfs_filblks_t *dblocks,
2982	xfs_filblks_t *rblocks)
2983	{
2984	struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
2985
2986	*rblocks = `0`;
2987	if (XFS_IS_REALTIME_INODE(ip))
2988	xfs_bmap_count_leaves(ifp, rblocks);
2989	dblocks = ip->i_nblocks - rblocks;
2990	}
2991
2992	static void
2993	xfs_wait_dax_page(
2994	struct inode *inode)
2995	{
2996	struct xfs_inode *ip = XFS_I(inode);
2997
2998	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
2999	schedule();
3000	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
3001	}
3002
3003	int
3004	xfs_break_dax_layouts(
3005	struct inode *inode)
3006	{
3007	xfs_assert_ilocked(ip: XFS_I(inode), XFS_MMAPLOCK_EXCL);
3008
3009	return dax_break_layout_inode(inode, cb: xfs_wait_dax_page);
3010	}
3011
3012	int
3013	xfs_break_layouts(
3014	struct inode *inode,
3015	uint *iolock,
3016	enum layout_break_reason reason)
3017	{
3018	bool retry;
3019	int error;
3020
3021	xfs_assert_ilocked(ip: XFS_I(inode), XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL);
3022
3023	do {
3024	retry = false;
3025	switch (reason) {
3026	case BREAK_UNMAP:
3027	error = xfs_break_dax_layouts(inode);
3028	if (error)
3029	break;
3030	fallthrough;
3031	case BREAK_WRITE:
3032	error = xfs_break_leased_layouts(inode, iolock, did_unlock: &retry);
3033	break;
3034	default:
3035	WARN_ON_ONCE(`1`);
3036	error = -EINVAL;
3037	}
3038	} while (error == `0` && retry);
3039
3040	return error;
3041	}
3042
3043	/ Returns the size of fundamental allocation unit for a file, in bytes. /
3044	unsigned int
3045	xfs_inode_alloc_unitsize(
3046	struct xfs_inode *ip)
3047	{
3048	unsigned int blocks = `1`;
3049
3050	if (XFS_IS_REALTIME_INODE(ip))
3051	blocks = ip->i_mount->m_sb.sb_rextsize;
3052
3053	return XFS_FSB_TO_B(ip->i_mount, blocks);
3054	}
3055
3056	/ Should we always be using copy on write for file writes? /
3057	bool
3058	xfs_is_always_cow_inode(
3059	const struct xfs_inode *ip)
3060	{
3061	return xfs_is_zoned_inode(ip) \|\|
3062	(ip->i_mount->m_always_cow && xfs_has_reflink(mp: ip->i_mount));
3063	}
3064

Provided by KDAB

Definitions

xfs_inode_cache
xfs_ilock_data_map_shared
xfs_ilock_attr_map_shared
xfs_lock_flags_assert
xfs_ilock
xfs_ilock_nowait
xfs_iunlock
xfs_ilock_demote
xfs_assert_ilocked
xfs_lockdep_subclass_ok
xfs_lock_inumorder
xfs_lock_inodes
xfs_lock_two_inodes
xfs_lookup
xfs_icreate
xfs_icreate_dqalloc
xfs_create
xfs_create_tmpfile
xfs_link
xfs_itruncate_clear_reflink_flags
xfs_itruncate_extents_flags
xfs_inactive_dir
xfs_inactive_truncate
xfs_inactive_ifree
xfs_inode_needs_inactive
xfs_inactive_health
xfs_inactive
xfs_iunlink_lookup
xfs_iunlink_reload_next
xfs_ifree_mark_inode_stale
xfs_ifree_cluster
xfs_ifree
xfs_iunpin
__xfs_iunpin_wait
xfs_iunpin_wait
xfs_remove
xfs_iunlock_rename
xfs_sort_for_rename
xfs_sort_inodes
xfs_rename_alloc_whiteout
xfs_rename
xfs_iflush
xfs_iflush_cluster
xfs_irele
xfs_log_force_inode
xfs_iolock_two_inodes_and_break_layout
xfs_mmaplock_two_inodes_and_break_dax_layout
xfs_ilock2_io_mmap
xfs_iunlock2_io_mmap
xfs_iunlock2_remapping
xfs_inode_reload_unlinked_bucket
xfs_inode_reload_unlinked
xfs_ifork_zapped
xfs_inode_count_blocks
xfs_wait_dax_page
xfs_break_dax_layouts
xfs_break_layouts
xfs_inode_alloc_unitsize

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/fs/xfs/xfs_inode.c