xfs_iwalk.c source code [linux/fs/xfs/xfs_iwalk.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Copyright (C) 2019 Oracle. All Rights Reserved.
4	* Author: Darrick J. Wong <darrick.wong@oracle.com>
5	*/
6	#include "xfs.h"
7	#include "xfs_fs.h"
8	#include "xfs_shared.h"
9	#include "xfs_format.h"
10	#include "xfs_log_format.h"
11	#include "xfs_trans_resv.h"
12	#include "xfs_mount.h"
13	#include "xfs_inode.h"
14	#include "xfs_btree.h"
15	#include "xfs_ialloc.h"
16	#include "xfs_ialloc_btree.h"
17	#include "xfs_iwalk.h"
18	#include "xfs_error.h"
19	#include "xfs_trace.h"
20	#include "xfs_icache.h"
21	#include "xfs_health.h"
22	#include "xfs_trans.h"
23	#include "xfs_pwork.h"
24	#include "xfs_ag.h"
25	#include "xfs_bit.h"
26
27	/*
28	* Walking Inodes in the Filesystem
29	* ================================
30	*
31	* This iterator function walks a subset of filesystem inodes in increasing
32	* order from @startino until there are no more inodes. For each allocated
33	* inode it finds, it calls a walk function with the relevant inode number and
34	* a pointer to caller-provided data. The walk function can return the usual
35	* negative error code to stop the iteration; 0 to continue the iteration; or
36	* -ECANCELED to stop the iteration. This return value is returned to the
37	* caller.
38	*
39	* Internally, we allow the walk function to do anything, which means that we
40	* cannot maintain the inobt cursor or our lock on the AGI buffer. We
41	* therefore cache the inobt records in kernel memory and only call the walk
42	* function when our memory buffer is full. @nr_recs is the number of records
43	* that we've cached, and @sz_recs is the size of our cache.
44	*
45	* It is the responsibility of the walk function to ensure it accesses
46	* allocated inodes, as the inobt records may be stale by the time they are
47	* acted upon.
48	*/
49
50	struct xfs_iwalk_ag {
51	/ parallel work control data; will be null if single threaded /
52	struct xfs_pwork pwork;
53
54	struct xfs_mount *mp;
55	struct xfs_trans *tp;
56	struct xfs_perag *pag;
57
58	/ Where do we start the traversal? /
59	xfs_ino_t startino;
60
61	/ What was the last inode number we saw when iterating the inobt? /
62	xfs_ino_t lastino;
63
64	/ Array of inobt records we cache. /
65	struct xfs_inobt_rec_incore *recs;
66
67	/ Number of entries allocated for the @recs array. /
68	unsigned int sz_recs;
69
70	/ Number of entries in the @recs array that are in use. /
71	unsigned int nr_recs;
72
73	/ Inode walk function and data pointer. /
74	xfs_iwalk_fn iwalk_fn;
75	xfs_inobt_walk_fn inobt_walk_fn;
76	void *data;
77
78	/*
79	* Make it look like the inodes up to startino are free so that
80	* bulkstat can start its inode iteration at the correct place without
81	* needing to special case everywhere.
82	*/
83	unsigned int trim_start:`1`;
84
85	/ Skip empty inobt records? /
86	unsigned int skip_empty:`1`;
87
88	/ Drop the (hopefully empty) transaction when calling iwalk_fn. /
89	unsigned int drop_trans:`1`;
90	};
91
92	/*
93	* Loop over all clusters in a chunk for a given incore inode allocation btree
94	* record. Do a readahead if there are any allocated inodes in that cluster.
95	*/
96	STATIC void
97	xfs_iwalk_ichunk_ra(
98	struct xfs_mount *mp,
99	struct xfs_perag *pag,
100	struct xfs_inobt_rec_incore *irec)
101	{
102	struct xfs_ino_geometry *igeo = M_IGEO(mp);
103	xfs_agnumber_t agno = pag->pag_agno;
104	xfs_agblock_t agbno;
105	struct blk_plug plug;
106	int i; / inode chunk index /
107
108	agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
109
110	blk_start_plug(&plug);
111	for (i = `0`; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) {
112	xfs_inofree_t imask;
113
114	imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster);
115	if (imask & ~irec->ir_free) {
116	xfs_buf_readahead(mp->m_ddev_targp,
117	XFS_AGB_TO_DADDR(mp, agno, agbno),
118	igeo->blocks_per_cluster * mp->m_bsize,
119	&xfs_inode_buf_ops);
120	}
121	agbno += igeo->blocks_per_cluster;
122	}
123	blk_finish_plug(&plug);
124	}
125
126	/*
127	* Set the bits in @irec's free mask that correspond to the inodes before
128	* @agino so that we skip them. This is how we restart an inode walk that was
129	* interrupted in the middle of an inode record.
130	*/
131	STATIC void
132	xfs_iwalk_adjust_start(
133	xfs_agino_t agino, / starting inode of chunk /
134	struct xfs_inobt_rec_incore irec) /* btree record /
135	{
136	int idx; / index into inode chunk /
137
138	idx = agino - irec->ir_startino;
139
140	irec->ir_free \|= xfs_inobt_maskn(`0`, idx);
141	irec->ir_freecount = hweight64(irec->ir_free);
142	}
143
144	/ Allocate memory for a walk. /
145	STATIC int
146	xfs_iwalk_alloc(
147	struct xfs_iwalk_ag *iwag)
148	{
149	size_t size;
150
151	ASSERT(iwag->recs == NULL);
152	iwag->nr_recs = `0`;
153
154	/ Allocate a prefetch buffer for inobt records. /
155	size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore);
156	iwag->recs = kmalloc(size, GFP_KERNEL \| __GFP_RETRY_MAYFAIL);
157	if (iwag->recs == NULL)
158	return -ENOMEM;
159
160	return `0`;
161	}
162
163	/ Free memory we allocated for a walk. /
164	STATIC void
165	xfs_iwalk_free(
166	struct xfs_iwalk_ag *iwag)
167	{
168	kfree(objp: iwag->recs);
169	iwag->recs = NULL;
170	}
171
172	/ For each inuse inode in each cached inobt record, call our function. /
173	STATIC int
174	xfs_iwalk_ag_recs(
175	struct xfs_iwalk_ag *iwag)
176	{
177	struct xfs_mount *mp = iwag->mp;
178	struct xfs_trans *tp = iwag->tp;
179	struct xfs_perag *pag = iwag->pag;
180	xfs_ino_t ino;
181	unsigned int i, j;
182	int error;
183
184	for (i = `0`; i < iwag->nr_recs; i++) {
185	struct xfs_inobt_rec_incore *irec = &iwag->recs[i];
186
187	trace_xfs_iwalk_ag_rec(mp, pag->pag_agno, irec);
188
189	if (xfs_pwork_want_abort(pwork: &iwag->pwork))
190	return `0`;
191
192	if (iwag->inobt_walk_fn) {
193	error = iwag->inobt_walk_fn(mp, tp, pag->pag_agno, irec,
194	iwag->data);
195	if (error)
196	return error;
197	}
198
199	if (!iwag->iwalk_fn)
200	continue;
201
202	for (j = `0`; j < XFS_INODES_PER_CHUNK; j++) {
203	if (xfs_pwork_want_abort(&iwag->pwork))
204	return `0`;
205
206	/ Skip if this inode is free /
207	if (XFS_INOBT_MASK(j) & irec->ir_free)
208	continue;
209
210	/ Otherwise call our function. /
211	ino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
212	irec->ir_startino + j);
213	error = iwag->iwalk_fn(mp, tp, ino, iwag->data);
214	if (error)
215	return error;
216	}
217	}
218
219	return `0`;
220	}
221
222	/ Delete cursor and let go of AGI. /
223	static inline void
224	xfs_iwalk_del_inobt(
225	struct xfs_trans *tp,
226	struct xfs_btree_cur **curpp,
227	struct xfs_buf **agi_bpp,
228	int error)
229	{
230	if (*curpp) {
231	xfs_btree_del_cursor(*curpp, error);
232	*curpp = NULL;
233	}
234	if (*agi_bpp) {
235	xfs_trans_brelse(tp, *agi_bpp);
236	*agi_bpp = NULL;
237	}
238	}
239
240	/*
241	* Set ourselves up for walking inobt records starting from a given point in
242	* the filesystem.
243	*
244	* If caller passed in a nonzero start inode number, load the record from the
245	* inobt and make the record look like all the inodes before agino are free so
246	* that we skip them, and then move the cursor to the next inobt record. This
247	* is how we support starting an iwalk in the middle of an inode chunk.
248	*
249	* If the caller passed in a start number of zero, move the cursor to the first
250	* inobt record.
251	*
252	* The caller is responsible for cleaning up the cursor and buffer pointer
253	* regardless of the error status.
254	*/
255	STATIC int
256	xfs_iwalk_ag_start(
257	struct xfs_iwalk_ag *iwag,
258	xfs_agino_t agino,
259	struct xfs_btree_cur **curpp,
260	struct xfs_buf **agi_bpp,
261	int *has_more)
262	{
263	struct xfs_mount *mp = iwag->mp;
264	struct xfs_trans *tp = iwag->tp;
265	struct xfs_perag *pag = iwag->pag;
266	struct xfs_inobt_rec_incore *irec;
267	int error;
268
269	/ Set up a fresh cursor and empty the inobt cache. /
270	iwag->nr_recs = `0`;
271	error = xfs_ialloc_read_agi(pag, tp, agi_bpp);
272	if (error)
273	return error;
274	curpp = xfs_inobt_init_cursor(pag, tp, agi_bpp);
275
276	/ Starting at the beginning of the AG? That's easy! /
277	if (agino == `0`)
278	return xfs_inobt_lookup(*curpp, `0`, XFS_LOOKUP_GE, has_more);
279
280	/*
281	* Otherwise, we have to grab the inobt record where we left off, stuff
282	* the record into our cache, and then see if there are more records.
283	* We require a lookup cache of at least two elements so that the
284	* caller doesn't have to deal with tearing down the cursor to walk the
285	* records.
286	*/
287	error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more);
288	if (error)
289	return error;
290
291	/*
292	* If the LE lookup at @agino yields no records, jump ahead to the
293	* inobt cursor increment to see if there are more records to process.
294	*/
295	if (!*has_more)
296	goto out_advance;
297
298	/ Get the record, should always work /
299	irec = &iwag->recs[iwag->nr_recs];
300	error = xfs_inobt_get_rec(*curpp, irec, has_more);
301	if (error)
302	return error;
303	if (XFS_IS_CORRUPT(mp, *has_more != `1`)) {
304	xfs_btree_mark_sick(*curpp);
305	return -EFSCORRUPTED;
306	}
307
308	iwag->lastino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
309	irec->ir_startino + XFS_INODES_PER_CHUNK - `1`);
310
311	/*
312	* If the LE lookup yielded an inobt record before the cursor position,
313	* skip it and see if there's another one after it.
314	*/
315	if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
316	goto out_advance;
317
318	/*
319	* If agino fell in the middle of the inode record, make it look like
320	* the inodes up to agino are free so that we don't return them again.
321	*/
322	if (iwag->trim_start)
323	xfs_iwalk_adjust_start(agino, irec);
324
325	/*
326	* The prefetch calculation is supposed to give us a large enough inobt
327	* record cache that grab_ichunk can stage a partial first record and
328	* the loop body can cache a record without having to check for cache
329	* space until after it reads an inobt record.
330	*/
331	iwag->nr_recs++;
332	ASSERT(iwag->nr_recs < iwag->sz_recs);
333
334	out_advance:
335	return xfs_btree_increment(*curpp, `0`, has_more);
336	}
337
338	/*
339	* The inobt record cache is full, so preserve the inobt cursor state and
340	* run callbacks on the cached inobt records. When we're done, restore the
341	* cursor state to wherever the cursor would have been had the cache not been
342	* full (and therefore we could've just incremented the cursor) if *@has_more
343	* is true. On exit, *@has_more will indicate whether or not the caller should
344	* try for more inode records.
345	*/
346	STATIC int
347	xfs_iwalk_run_callbacks(
348	struct xfs_iwalk_ag *iwag,
349	struct xfs_btree_cur **curpp,
350	struct xfs_buf **agi_bpp,
351	int *has_more)
352	{
353	struct xfs_mount *mp = iwag->mp;
354	struct xfs_inobt_rec_incore *irec;
355	xfs_agino_t next_agino;
356	int error;
357
358	next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + `1`;
359
360	ASSERT(iwag->nr_recs > `0`);
361
362	/ Delete cursor but remember the last record we cached... /
363	xfs_iwalk_del_inobt(tp: iwag->tp, curpp, agi_bpp, error: `0`);
364	irec = &iwag->recs[iwag->nr_recs - `1`];
365	ASSERT(next_agino >= irec->ir_startino + XFS_INODES_PER_CHUNK);
366
367	if (iwag->drop_trans) {
368	xfs_trans_cancel(iwag->tp);
369	iwag->tp = NULL;
370	}
371
372	error = xfs_iwalk_ag_recs(iwag);
373	if (error)
374	return error;
375
376	/ ...empty the cache... /
377	iwag->nr_recs = `0`;
378
379	if (!has_more)
380	return `0`;
381
382	if (iwag->drop_trans) {
383	error = xfs_trans_alloc_empty(mp, tpp: &iwag->tp);
384	if (error)
385	return error;
386	}
387
388	/ ...and recreate the cursor just past where we left off. /
389	error = xfs_ialloc_read_agi(iwag->pag, iwag->tp, agi_bpp);
390	if (error)
391	return error;
392	curpp = xfs_inobt_init_cursor(iwag->pag, iwag->tp, agi_bpp);
393	return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more);
394	}
395
396	/ Walk all inodes in a single AG, from @iwag->startino to the end of the AG. /
397	STATIC int
398	xfs_iwalk_ag(
399	struct xfs_iwalk_ag *iwag)
400	{
401	struct xfs_mount *mp = iwag->mp;
402	struct xfs_perag *pag = iwag->pag;
403	struct xfs_buf *agi_bp = NULL;
404	struct xfs_btree_cur *cur = NULL;
405	xfs_agino_t agino;
406	int has_more;
407	int error = `0`;
408
409	/ Set up our cursor at the right place in the inode btree. /
410	ASSERT(pag->pag_agno == XFS_INO_TO_AGNO(mp, iwag->startino));
411	agino = XFS_INO_TO_AGINO(mp, iwag->startino);
412	error = xfs_iwalk_ag_start(iwag, agino, &cur, &agi_bp, &has_more);
413
414	while (!error && has_more) {
415	struct xfs_inobt_rec_incore *irec;
416	xfs_ino_t rec_fsino;
417
418	cond_resched();
419	if (xfs_pwork_want_abort(pwork: &iwag->pwork))
420	goto out;
421
422	/ Fetch the inobt record. /
423	irec = &iwag->recs[iwag->nr_recs];
424	error = xfs_inobt_get_rec(cur, irec, &has_more);
425	if (error \|\| !has_more)
426	break;
427
428	/ Make sure that we always move forward. /
429	rec_fsino = XFS_AGINO_TO_INO(mp, pag->pag_agno, irec->ir_startino);
430	if (iwag->lastino != NULLFSINO &&
431	XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) {
432	xfs_btree_mark_sick(cur);
433	error = -EFSCORRUPTED;
434	goto out;
435	}
436	iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - `1`;
437
438	/ No allocated inodes in this chunk; skip it. /
439	if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
440	error = xfs_btree_increment(cur, `0`, &has_more);
441	if (error)
442	break;
443	continue;
444	}
445
446	/*
447	* Start readahead for this inode chunk in anticipation of
448	* walking the inodes.
449	*/
450	if (iwag->iwalk_fn)
451	xfs_iwalk_ichunk_ra(mp, pag, irec);
452
453	/*
454	* If there's space in the buffer for more records, increment
455	* the btree cursor and grab more.
456	*/
457	if (++iwag->nr_recs < iwag->sz_recs) {
458	error = xfs_btree_increment(cur, `0`, &has_more);
459	if (error \|\| !has_more)
460	break;
461	continue;
462	}
463
464	/*
465	* Otherwise, we need to save cursor state and run the callback
466	* function on the cached records. The run_callbacks function
467	* is supposed to return a cursor pointing to the record where
468	* we would be if we had been able to increment like above.
469	*/
470	ASSERT(has_more);
471	error = xfs_iwalk_run_callbacks(iwag, curpp: &cur, agi_bpp: &agi_bp, has_more: &has_more);
472	}
473
474	if (iwag->nr_recs == `0` \|\| error)
475	goto out;
476
477	/ Walk the unprocessed records in the cache. /
478	error = xfs_iwalk_run_callbacks(iwag, curpp: &cur, agi_bpp: &agi_bp, has_more: &has_more);
479
480	out:
481	xfs_iwalk_del_inobt(tp: iwag->tp, curpp: &cur, agi_bpp: &agi_bp, error);
482	return error;
483	}
484
485	/*
486	* We experimentally determined that the reduction in ioctl call overhead
487	* diminishes when userspace asks for more than 2048 inodes, so we'll cap
488	* prefetch at this point.
489	*/
490	#define IWALK_MAX_INODE_PREFETCH (2048U)
491
492	/*
493	* Given the number of inodes to prefetch, set the number of inobt records that
494	* we cache in memory, which controls the number of inodes we try to read
495	* ahead. Set the maximum if @inodes == 0.
496	*/
497	static inline unsigned int
498	xfs_iwalk_prefetch(
499	unsigned int inodes)
500	{
501	unsigned int inobt_records;
502
503	/*
504	* If the caller didn't tell us the number of inodes they wanted,
505	* assume the maximum prefetch possible for best performance.
506	* Otherwise, cap prefetch at that maximum so that we don't start an
507	* absurd amount of prefetch.
508	*/
509	if (inodes == `0`)
510	inodes = IWALK_MAX_INODE_PREFETCH;
511	inodes = min(inodes, IWALK_MAX_INODE_PREFETCH);
512
513	/ Round the inode count up to a full chunk. /
514	inodes = round_up(inodes, XFS_INODES_PER_CHUNK);
515
516	/*
517	* In order to convert the number of inodes to prefetch into an
518	* estimate of the number of inobt records to cache, we require a
519	* conversion factor that reflects our expectations of the average
520	* loading factor of an inode chunk. Based on data gathered, most
521	* (but not all) filesystems manage to keep the inode chunks totally
522	* full, so we'll underestimate slightly so that our readahead will
523	* still deliver the performance we want on aging filesystems:
524	*
525	* inobt = inodes / (INODES_PER_CHUNK * (4 / 5));
526	*
527	* The funny math is to avoid integer division.
528	*/
529	inobt_records = (inodes * `5`) / (`4` * XFS_INODES_PER_CHUNK);
530
531	/*
532	* Allocate enough space to prefetch at least two inobt records so that
533	* we can cache both the record where the iwalk started and the next
534	* record. This simplifies the AG inode walk loop setup code.
535	*/
536	return max(inobt_records, `2U`);
537	}
538
539	/*
540	* Walk all inodes in the filesystem starting from @startino. The @iwalk_fn
541	* will be called for each allocated inode, being passed the inode's number and
542	* @data. @max_prefetch controls how many inobt records' worth of inodes we
543	* try to readahead.
544	*/
545	int
546	xfs_iwalk(
547	struct xfs_mount *mp,
548	struct xfs_trans *tp,
549	xfs_ino_t startino,
550	unsigned int flags,
551	xfs_iwalk_fn iwalk_fn,
552	unsigned int inode_records,
553	void *data)
554	{
555	struct xfs_iwalk_ag iwag = {
556	.mp = mp,
557	.tp = tp,
558	.iwalk_fn = iwalk_fn,
559	.data = data,
560	.startino = startino,
561	.sz_recs = xfs_iwalk_prefetch(inode_records),
562	.trim_start = `1`,
563	.skip_empty = `1`,
564	.pwork = XFS_PWORK_SINGLE_THREADED,
565	.lastino = NULLFSINO,
566	};
567	struct xfs_perag *pag;
568	xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
569	int error;
570
571	ASSERT(agno < mp->m_sb.sb_agcount);
572	ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
573
574	error = xfs_iwalk_alloc(iwag: &iwag);
575	if (error)
576	return error;
577
578	for_each_perag_from(mp, agno, pag) {
579	iwag.pag = pag;
580	error = xfs_iwalk_ag(&iwag);
581	if (error)
582	break;
583	iwag.startino = XFS_AGINO_TO_INO(mp, agno + `1`, `0`);
584	if (flags & XFS_INOBT_WALK_SAME_AG)
585	break;
586	iwag.pag = NULL;
587	}
588
589	if (iwag.pag)
590	xfs_perag_rele(pag);
591	xfs_iwalk_free(iwag: &iwag);
592	return error;
593	}
594
595	/ Run per-thread iwalk work. /
596	static int
597	xfs_iwalk_ag_work(
598	struct xfs_mount *mp,
599	struct xfs_pwork *pwork)
600	{
601	struct xfs_iwalk_ag *iwag;
602	int error = `0`;
603
604	iwag = container_of(pwork, struct xfs_iwalk_ag, pwork);
605	if (xfs_pwork_want_abort(pwork))
606	goto out;
607
608	error = xfs_iwalk_alloc(iwag);
609	if (error)
610	goto out;
611	/*
612	* Grab an empty transaction so that we can use its recursive buffer
613	* locking abilities to detect cycles in the inobt without deadlocking.
614	*/
615	error = xfs_trans_alloc_empty(mp, tpp: &iwag->tp);
616	if (error)
617	goto out;
618	iwag->drop_trans = `1`;
619
620	error = xfs_iwalk_ag(iwag);
621	if (iwag->tp)
622	xfs_trans_cancel(iwag->tp);
623	xfs_iwalk_free(iwag);
624	out:
625	xfs_perag_put(iwag->pag);
626	kfree(objp: iwag);
627	return error;
628	}
629
630	/*
631	* Walk all the inodes in the filesystem using multiple threads to process each
632	* AG.
633	*/
634	int
635	xfs_iwalk_threaded(
636	struct xfs_mount *mp,
637	xfs_ino_t startino,
638	unsigned int flags,
639	xfs_iwalk_fn iwalk_fn,
640	unsigned int inode_records,
641	bool polled,
642	void *data)
643	{
644	struct xfs_pwork_ctl pctl;
645	struct xfs_perag *pag;
646	xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
647	int error;
648
649	ASSERT(agno < mp->m_sb.sb_agcount);
650	ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
651
652	error = xfs_pwork_init(mp, pctl: &pctl, work_fn: xfs_iwalk_ag_work, tag: "xfs_iwalk");
653	if (error)
654	return error;
655
656	for_each_perag_from(mp, agno, pag) {
657	struct xfs_iwalk_ag *iwag;
658
659	if (xfs_pwork_ctl_want_abort(&pctl))
660	break;
661
662	iwag = kzalloc(sizeof(struct xfs_iwalk_ag),
663	GFP_KERNEL \| __GFP_NOFAIL);
664	iwag->mp = mp;
665
666	/*
667	* perag is being handed off to async work, so take a passive
668	* reference for the async work to release.
669	*/
670	iwag->pag = xfs_perag_hold(pag);
671	iwag->iwalk_fn = iwalk_fn;
672	iwag->data = data;
673	iwag->startino = startino;
674	iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
675	iwag->lastino = NULLFSINO;
676	xfs_pwork_queue(&pctl, &iwag->pwork);
677	startino = XFS_AGINO_TO_INO(mp, pag->pag_agno + `1`, `0`);
678	if (flags & XFS_INOBT_WALK_SAME_AG)
679	break;
680	}
681	if (pag)
682	xfs_perag_rele(pag);
683	if (polled)
684	xfs_pwork_poll(pctl: &pctl);
685	return xfs_pwork_destroy(pctl: &pctl);
686	}
687
688	/*
689	* Allow callers to cache up to a page's worth of inobt records. This reflects
690	* the existing inumbers prefetching behavior. Since the inobt walk does not
691	* itself do anything with the inobt records, we can set a fairly high limit
692	* here.
693	*/
694	#define MAX_INOBT_WALK_PREFETCH \
695	(PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore))
696
697	/*
698	* Given the number of records that the user wanted, set the number of inobt
699	* records that we buffer in memory. Set the maximum if @inobt_records == 0.
700	*/
701	static inline unsigned int
702	xfs_inobt_walk_prefetch(
703	unsigned int inobt_records)
704	{
705	/*
706	* If the caller didn't tell us the number of inobt records they
707	* wanted, assume the maximum prefetch possible for best performance.
708	*/
709	if (inobt_records == `0`)
710	inobt_records = MAX_INOBT_WALK_PREFETCH;
711
712	/*
713	* Allocate enough space to prefetch at least two inobt records so that
714	* we can cache both the record where the iwalk started and the next
715	* record. This simplifies the AG inode walk loop setup code.
716	*/
717	inobt_records = max(inobt_records, `2U`);
718
719	/*
720	* Cap prefetch at that maximum so that we don't use an absurd amount
721	* of memory.
722	*/
723	return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH);
724	}
725
726	/*
727	* Walk all inode btree records in the filesystem starting from @startino. The
728	* @inobt_walk_fn will be called for each btree record, being passed the incore
729	* record and @data. @max_prefetch controls how many inobt records we try to
730	* cache ahead of time.
731	*/
732	int
733	xfs_inobt_walk(
734	struct xfs_mount *mp,
735	struct xfs_trans *tp,
736	xfs_ino_t startino,
737	unsigned int flags,
738	xfs_inobt_walk_fn inobt_walk_fn,
739	unsigned int inobt_records,
740	void *data)
741	{
742	struct xfs_iwalk_ag iwag = {
743	.mp = mp,
744	.tp = tp,
745	.inobt_walk_fn = inobt_walk_fn,
746	.data = data,
747	.startino = startino,
748	.sz_recs = xfs_inobt_walk_prefetch(inobt_records),
749	.pwork = XFS_PWORK_SINGLE_THREADED,
750	.lastino = NULLFSINO,
751	};
752	struct xfs_perag *pag;
753	xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
754	int error;
755
756	ASSERT(agno < mp->m_sb.sb_agcount);
757	ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL));
758
759	error = xfs_iwalk_alloc(iwag: &iwag);
760	if (error)
761	return error;
762
763	for_each_perag_from(mp, agno, pag) {
764	iwag.pag = pag;
765	error = xfs_iwalk_ag(&iwag);
766	if (error)
767	break;
768	iwag.startino = XFS_AGINO_TO_INO(mp, pag->pag_agno + `1`, `0`);
769	if (flags & XFS_INOBT_WALK_SAME_AG)
770	break;
771	iwag.pag = NULL;
772	}
773
774	if (iwag.pag)
775	xfs_perag_rele(pag);
776	xfs_iwalk_free(iwag: &iwag);
777	return error;
778	}
779

source code of linux/fs/xfs/xfs_iwalk.c