1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (c) 2021-2024 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_trans_resv.h" |
11 | #include "xfs_mount.h" |
12 | #include "xfs_log_format.h" |
13 | #include "xfs_trans.h" |
14 | #include "xfs_inode.h" |
15 | #include "xfs_btree.h" |
16 | #include "xfs_ialloc.h" |
17 | #include "xfs_ialloc_btree.h" |
18 | #include "xfs_ag.h" |
19 | #include "xfs_error.h" |
20 | #include "xfs_bit.h" |
21 | #include "xfs_icache.h" |
22 | #include "scrub/scrub.h" |
23 | #include "scrub/iscan.h" |
24 | #include "scrub/common.h" |
25 | #include "scrub/trace.h" |
26 | |
27 | /* |
28 | * Live File Scan |
29 | * ============== |
30 | * |
31 | * Live file scans walk every inode in a live filesystem. This is more or |
32 | * less like a regular iwalk, except that when we're advancing the scan cursor, |
33 | * we must ensure that inodes cannot be added or deleted anywhere between the |
34 | * old cursor value and the new cursor value. If we're advancing the cursor |
35 | * by one inode, the caller must hold that inode; if we're finding the next |
36 | * inode to scan, we must grab the AGI and hold it until we've updated the |
37 | * scan cursor. |
38 | * |
39 | * Callers are expected to use this code to scan all files in the filesystem to |
40 | * construct a new metadata index of some kind. The scan races against other |
41 | * live updates, which means there must be a provision to update the new index |
42 | * when updates are made to inodes that already been scanned. The iscan lock |
43 | * can be used in live update hook code to stop the scan and protect this data |
44 | * structure. |
45 | * |
46 | * To keep the new index up to date with other metadata updates being made to |
47 | * the live filesystem, it is assumed that the caller will add hooks as needed |
48 | * to be notified when a metadata update occurs. The inode scanner must tell |
49 | * the hook code when an inode has been visited with xchk_iscan_mark_visit. |
50 | * Hook functions can use xchk_iscan_want_live_update to decide if the |
51 | * scanner's observations must be updated. |
52 | */ |
53 | |
54 | /* |
55 | * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so |
56 | * that the scan ignores that inode. |
57 | */ |
58 | STATIC void |
59 | xchk_iscan_mask_skipino( |
60 | struct xchk_iscan *iscan, |
61 | struct xfs_perag *pag, |
62 | struct xfs_inobt_rec_incore *rec, |
63 | xfs_agino_t lastrecino) |
64 | { |
65 | struct xfs_scrub *sc = iscan->sc; |
66 | struct xfs_mount *mp = sc->mp; |
67 | xfs_agnumber_t skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino); |
68 | xfs_agnumber_t skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino); |
69 | |
70 | if (pag->pag_agno != skip_agno) |
71 | return; |
72 | if (skip_agino < rec->ir_startino) |
73 | return; |
74 | if (skip_agino > lastrecino) |
75 | return; |
76 | |
77 | rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1); |
78 | } |
79 | |
80 | /* |
81 | * Set *cursor to the next allocated inode after whatever it's set to now. |
82 | * If there are no more inodes in this AG, cursor is set to NULLAGINO. |
83 | */ |
84 | STATIC int |
85 | xchk_iscan_find_next( |
86 | struct xchk_iscan *iscan, |
87 | struct xfs_buf *agi_bp, |
88 | struct xfs_perag *pag, |
89 | xfs_inofree_t *allocmaskp, |
90 | xfs_agino_t *cursor, |
91 | uint8_t *nr_inodesp) |
92 | { |
93 | struct xfs_scrub *sc = iscan->sc; |
94 | struct xfs_inobt_rec_incore rec; |
95 | struct xfs_btree_cur *cur; |
96 | struct xfs_mount *mp = sc->mp; |
97 | struct xfs_trans *tp = sc->tp; |
98 | xfs_agnumber_t agno = pag->pag_agno; |
99 | xfs_agino_t lastino = NULLAGINO; |
100 | xfs_agino_t first, last; |
101 | xfs_agino_t agino = *cursor; |
102 | int has_rec; |
103 | int error; |
104 | |
105 | /* If the cursor is beyond the end of this AG, move to the next one. */ |
106 | xfs_agino_range(mp, agno, &first, &last); |
107 | if (agino > last) { |
108 | *cursor = NULLAGINO; |
109 | return 0; |
110 | } |
111 | |
112 | /* |
113 | * Look up the inode chunk for the current cursor position. If there |
114 | * is no chunk here, we want the next one. |
115 | */ |
116 | cur = xfs_inobt_init_cursor(pag, tp, agi_bp); |
117 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec); |
118 | if (!error && !has_rec) |
119 | error = xfs_btree_increment(cur, 0, &has_rec); |
120 | for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) { |
121 | xfs_inofree_t allocmask; |
122 | |
123 | /* |
124 | * If we've run out of inobt records in this AG, move the |
125 | * cursor on to the next AG and exit. The caller can try |
126 | * again with the next AG. |
127 | */ |
128 | if (!has_rec) { |
129 | *cursor = NULLAGINO; |
130 | break; |
131 | } |
132 | |
133 | error = xfs_inobt_get_rec(cur, &rec, &has_rec); |
134 | if (error) |
135 | break; |
136 | if (!has_rec) { |
137 | error = -EFSCORRUPTED; |
138 | break; |
139 | } |
140 | |
141 | /* Make sure that we always move forward. */ |
142 | if (lastino != NULLAGINO && |
143 | XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) { |
144 | error = -EFSCORRUPTED; |
145 | break; |
146 | } |
147 | lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1; |
148 | |
149 | /* |
150 | * If this record only covers inodes that come before the |
151 | * cursor, advance to the next record. |
152 | */ |
153 | if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) |
154 | continue; |
155 | |
156 | if (iscan->skip_ino) |
157 | xchk_iscan_mask_skipino(iscan, pag, &rec, lastino); |
158 | |
159 | /* |
160 | * If the incoming lookup put us in the middle of an inobt |
161 | * record, mark it and the previous inodes "free" so that the |
162 | * search for allocated inodes will start at the cursor. |
163 | * We don't care about ir_freecount here. |
164 | */ |
165 | if (agino >= rec.ir_startino) |
166 | rec.ir_free |= xfs_inobt_maskn(0, |
167 | agino + 1 - rec.ir_startino); |
168 | |
169 | /* |
170 | * If there are allocated inodes in this chunk, find them |
171 | * and update the scan cursor. |
172 | */ |
173 | allocmask = ~rec.ir_free; |
174 | if (hweight64(allocmask) > 0) { |
175 | int next = xfs_lowbit64(allocmask); |
176 | |
177 | ASSERT(next >= 0); |
178 | *cursor = rec.ir_startino + next; |
179 | *allocmaskp = allocmask >> next; |
180 | *nr_inodesp = XFS_INODES_PER_CHUNK - next; |
181 | break; |
182 | } |
183 | } |
184 | |
185 | xfs_btree_del_cursor(cur, error); |
186 | return error; |
187 | } |
188 | |
189 | /* |
190 | * Advance both the scan and the visited cursors. |
191 | * |
192 | * The inumber address space for a given filesystem is sparse, which means that |
193 | * the scan cursor can jump a long ways in a single iter() call. There are no |
194 | * inodes in these sparse areas, so we must move the visited cursor forward at |
195 | * the same time so that the scan user can receive live updates for inodes that |
196 | * may get created once we release the AGI buffer. |
197 | */ |
198 | static inline void |
199 | xchk_iscan_move_cursor( |
200 | struct xchk_iscan *iscan, |
201 | xfs_agnumber_t agno, |
202 | xfs_agino_t agino) |
203 | { |
204 | struct xfs_scrub *sc = iscan->sc; |
205 | struct xfs_mount *mp = sc->mp; |
206 | xfs_ino_t cursor, visited; |
207 | |
208 | BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO); |
209 | |
210 | /* |
211 | * Special-case ino == 0 here so that we never set visited_ino to |
212 | * NULLFSINO when wrapping around EOFS, for that will let through all |
213 | * live updates. |
214 | */ |
215 | cursor = XFS_AGINO_TO_INO(mp, agno, agino); |
216 | if (cursor == 0) |
217 | visited = XFS_MAXINUMBER; |
218 | else |
219 | visited = cursor - 1; |
220 | |
221 | mutex_lock(&iscan->lock); |
222 | iscan->cursor_ino = cursor; |
223 | iscan->__visited_ino = visited; |
224 | trace_xchk_iscan_move_cursor(iscan); |
225 | mutex_unlock(&iscan->lock); |
226 | } |
227 | |
228 | /* |
229 | * Prepare to return agno/agino to the iscan caller by moving the lastino |
230 | * cursor to the previous inode. Do this while we still hold the AGI so that |
231 | * no other threads can create or delete inodes in this AG. |
232 | */ |
233 | static inline void |
234 | xchk_iscan_finish( |
235 | struct xchk_iscan *iscan) |
236 | { |
237 | mutex_lock(&iscan->lock); |
238 | iscan->cursor_ino = NULLFSINO; |
239 | |
240 | /* All live updates will be applied from now on */ |
241 | iscan->__visited_ino = NULLFSINO; |
242 | |
243 | mutex_unlock(&iscan->lock); |
244 | } |
245 | |
246 | /* |
247 | * Advance ino to the next inode that the inobt thinks is allocated, being |
248 | * careful to jump to the next AG if we've reached the right end of this AG's |
249 | * inode btree. Advancing ino effectively means that we've pushed the inode |
250 | * scan forward, so set the iscan cursor to (ino - 1) so that our live update |
251 | * predicates will track inode allocations in that part of the inode number |
252 | * key space once we release the AGI buffer. |
253 | * |
254 | * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes, |
255 | * -ECANCELED if the live scan aborted, or the usual negative errno. |
256 | */ |
257 | STATIC int |
258 | xchk_iscan_advance( |
259 | struct xchk_iscan *iscan, |
260 | struct xfs_perag **pagp, |
261 | struct xfs_buf **agi_bpp, |
262 | xfs_inofree_t *allocmaskp, |
263 | uint8_t *nr_inodesp) |
264 | { |
265 | struct xfs_scrub *sc = iscan->sc; |
266 | struct xfs_mount *mp = sc->mp; |
267 | struct xfs_buf *agi_bp; |
268 | struct xfs_perag *pag; |
269 | xfs_agnumber_t agno; |
270 | xfs_agino_t agino; |
271 | int ret; |
272 | |
273 | ASSERT(iscan->cursor_ino >= iscan->__visited_ino); |
274 | |
275 | do { |
276 | if (xchk_iscan_aborted(iscan)) |
277 | return -ECANCELED; |
278 | |
279 | agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino); |
280 | pag = xfs_perag_get(mp, agno); |
281 | if (!pag) |
282 | return -ECANCELED; |
283 | |
284 | ret = xfs_ialloc_read_agi(pag, sc->tp, &agi_bp); |
285 | if (ret) |
286 | goto out_pag; |
287 | |
288 | agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino); |
289 | ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp, |
290 | &agino, nr_inodesp); |
291 | if (ret) |
292 | goto out_buf; |
293 | |
294 | if (agino != NULLAGINO) { |
295 | /* |
296 | * Found the next inode in this AG, so return it along |
297 | * with the AGI buffer and the perag structure to |
298 | * ensure it cannot go away. |
299 | */ |
300 | xchk_iscan_move_cursor(iscan, agno, agino); |
301 | *agi_bpp = agi_bp; |
302 | *pagp = pag; |
303 | return 1; |
304 | } |
305 | |
306 | /* |
307 | * Did not find any more inodes in this AG, move on to the next |
308 | * AG. |
309 | */ |
310 | agno = (agno + 1) % mp->m_sb.sb_agcount; |
311 | xchk_iscan_move_cursor(iscan, agno, 0); |
312 | xfs_trans_brelse(sc->tp, agi_bp); |
313 | xfs_perag_put(pag); |
314 | |
315 | trace_xchk_iscan_advance_ag(iscan); |
316 | } while (iscan->cursor_ino != iscan->scan_start_ino); |
317 | |
318 | xchk_iscan_finish(iscan); |
319 | return 0; |
320 | |
321 | out_buf: |
322 | xfs_trans_brelse(sc->tp, agi_bp); |
323 | out_pag: |
324 | xfs_perag_put(pag); |
325 | return ret; |
326 | } |
327 | |
328 | /* |
329 | * Grabbing the inode failed, so we need to back up the scan and ask the caller |
330 | * to try to _advance the scan again. Returns -EBUSY if we've run out of retry |
331 | * opportunities, -ECANCELED if the process has a fatal signal pending, or |
332 | * -EAGAIN if we should try again. |
333 | */ |
334 | STATIC int |
335 | xchk_iscan_iget_retry( |
336 | struct xchk_iscan *iscan, |
337 | bool wait) |
338 | { |
339 | ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1); |
340 | |
341 | if (!iscan->iget_timeout || |
342 | time_is_before_jiffies(iscan->__iget_deadline)) |
343 | return -EBUSY; |
344 | |
345 | if (wait) { |
346 | unsigned long relax; |
347 | |
348 | /* |
349 | * Sleep for a period of time to let the rest of the system |
350 | * catch up. If we return early, someone sent a kill signal to |
351 | * the calling process. |
352 | */ |
353 | relax = msecs_to_jiffies(iscan->iget_retry_delay); |
354 | trace_xchk_iscan_iget_retry_wait(iscan); |
355 | |
356 | if (schedule_timeout_killable(relax) || |
357 | xchk_iscan_aborted(iscan)) |
358 | return -ECANCELED; |
359 | } |
360 | |
361 | iscan->cursor_ino--; |
362 | return -EAGAIN; |
363 | } |
364 | |
365 | /* |
366 | * Grab an inode as part of an inode scan. While scanning this inode, the |
367 | * caller must ensure that no other threads can modify the inode until a call |
368 | * to xchk_iscan_visit succeeds. |
369 | * |
370 | * Returns the number of incore inodes grabbed; -EAGAIN if the caller should |
371 | * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode; |
372 | * -ECANCELED if there's a fatal signal pending; or some other negative errno. |
373 | */ |
374 | STATIC int |
375 | xchk_iscan_iget( |
376 | struct xchk_iscan *iscan, |
377 | struct xfs_perag *pag, |
378 | struct xfs_buf *agi_bp, |
379 | xfs_inofree_t allocmask, |
380 | uint8_t nr_inodes) |
381 | { |
382 | struct xfs_scrub *sc = iscan->sc; |
383 | struct xfs_mount *mp = sc->mp; |
384 | xfs_ino_t ino = iscan->cursor_ino; |
385 | unsigned int idx = 0; |
386 | unsigned int i; |
387 | int error; |
388 | |
389 | ASSERT(iscan->__inodes[0] == NULL); |
390 | |
391 | /* Fill the first slot in the inode array. */ |
392 | error = xfs_iget(sc->mp, sc->tp, ino, XFS_IGET_NORETRY, 0, |
393 | &iscan->__inodes[idx]); |
394 | |
395 | trace_xchk_iscan_iget(iscan, error); |
396 | |
397 | if (error == -ENOENT || error == -EAGAIN) { |
398 | xfs_trans_brelse(sc->tp, agi_bp); |
399 | xfs_perag_put(pag); |
400 | |
401 | /* |
402 | * It's possible that this inode has lost all of its links but |
403 | * hasn't yet been inactivated. If we don't have a transaction |
404 | * or it's not writable, flush the inodegc workers and wait. |
405 | */ |
406 | xfs_inodegc_flush(mp); |
407 | return xchk_iscan_iget_retry(iscan, true); |
408 | } |
409 | |
410 | if (error == -EINVAL) { |
411 | xfs_trans_brelse(sc->tp, agi_bp); |
412 | xfs_perag_put(pag); |
413 | |
414 | /* |
415 | * We thought the inode was allocated, but the inode btree |
416 | * lookup failed, which means that it was freed since the last |
417 | * time we advanced the cursor. Back up and try again. This |
418 | * should never happen since still hold the AGI buffer from the |
419 | * inobt check, but we need to be careful about infinite loops. |
420 | */ |
421 | return xchk_iscan_iget_retry(iscan, false); |
422 | } |
423 | |
424 | if (error) { |
425 | xfs_trans_brelse(sc->tp, agi_bp); |
426 | xfs_perag_put(pag); |
427 | return error; |
428 | } |
429 | idx++; |
430 | ino++; |
431 | allocmask >>= 1; |
432 | |
433 | /* |
434 | * Now that we've filled the first slot in __inodes, try to fill the |
435 | * rest of the batch with consecutively ordered inodes. to reduce the |
436 | * number of _iter calls. Make a bitmap of unallocated inodes from the |
437 | * zeroes in the inuse bitmap; these inodes will not be scanned, but |
438 | * the _want_live_update predicate will pass through all live updates. |
439 | * |
440 | * If we can't iget an allocated inode, stop and return what we have. |
441 | */ |
442 | mutex_lock(&iscan->lock); |
443 | iscan->__batch_ino = ino - 1; |
444 | iscan->__skipped_inomask = 0; |
445 | mutex_unlock(&iscan->lock); |
446 | |
447 | for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) { |
448 | if (!(allocmask & 1)) { |
449 | ASSERT(!(iscan->__skipped_inomask & (1ULL << i))); |
450 | |
451 | mutex_lock(&iscan->lock); |
452 | iscan->cursor_ino = ino; |
453 | iscan->__skipped_inomask |= (1ULL << i); |
454 | mutex_unlock(&iscan->lock); |
455 | continue; |
456 | } |
457 | |
458 | ASSERT(iscan->__inodes[idx] == NULL); |
459 | |
460 | error = xfs_iget(sc->mp, sc->tp, ino, XFS_IGET_NORETRY, 0, |
461 | &iscan->__inodes[idx]); |
462 | if (error) |
463 | break; |
464 | |
465 | mutex_lock(&iscan->lock); |
466 | iscan->cursor_ino = ino; |
467 | mutex_unlock(&iscan->lock); |
468 | idx++; |
469 | } |
470 | |
471 | trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx); |
472 | xfs_trans_brelse(sc->tp, agi_bp); |
473 | xfs_perag_put(pag); |
474 | return idx; |
475 | } |
476 | |
477 | /* |
478 | * Advance the visit cursor to reflect skipped inodes beyond whatever we |
479 | * scanned. |
480 | */ |
481 | STATIC void |
482 | xchk_iscan_finish_batch( |
483 | struct xchk_iscan *iscan) |
484 | { |
485 | xfs_ino_t highest_skipped; |
486 | |
487 | mutex_lock(&iscan->lock); |
488 | |
489 | if (iscan->__batch_ino != NULLFSINO) { |
490 | highest_skipped = iscan->__batch_ino + |
491 | xfs_highbit64(iscan->__skipped_inomask); |
492 | iscan->__visited_ino = max(iscan->__visited_ino, |
493 | highest_skipped); |
494 | |
495 | trace_xchk_iscan_skip(iscan); |
496 | } |
497 | |
498 | iscan->__batch_ino = NULLFSINO; |
499 | iscan->__skipped_inomask = 0; |
500 | |
501 | mutex_unlock(&iscan->lock); |
502 | } |
503 | |
504 | /* |
505 | * Advance the inode scan cursor to the next allocated inode and return up to |
506 | * 64 consecutive allocated inodes starting with the cursor position. |
507 | */ |
508 | STATIC int |
509 | xchk_iscan_iter_batch( |
510 | struct xchk_iscan *iscan) |
511 | { |
512 | struct xfs_scrub *sc = iscan->sc; |
513 | int ret; |
514 | |
515 | xchk_iscan_finish_batch(iscan); |
516 | |
517 | if (iscan->iget_timeout) |
518 | iscan->__iget_deadline = jiffies + |
519 | msecs_to_jiffies(iscan->iget_timeout); |
520 | |
521 | do { |
522 | struct xfs_buf *agi_bp = NULL; |
523 | struct xfs_perag *pag = NULL; |
524 | xfs_inofree_t allocmask = 0; |
525 | uint8_t nr_inodes = 0; |
526 | |
527 | ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask, |
528 | &nr_inodes); |
529 | if (ret != 1) |
530 | return ret; |
531 | |
532 | if (xchk_iscan_aborted(iscan)) { |
533 | xfs_trans_brelse(sc->tp, agi_bp); |
534 | xfs_perag_put(pag); |
535 | ret = -ECANCELED; |
536 | break; |
537 | } |
538 | |
539 | ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes); |
540 | } while (ret == -EAGAIN); |
541 | |
542 | return ret; |
543 | } |
544 | |
545 | /* |
546 | * Advance the inode scan cursor to the next allocated inode and return the |
547 | * incore inode structure associated with it. |
548 | * |
549 | * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes, |
550 | * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be |
551 | * grabbed, or the usual negative errno. |
552 | * |
553 | * If the function returns -EBUSY and the caller can handle skipping an inode, |
554 | * it may call this function again to continue the scan with the next allocated |
555 | * inode. |
556 | */ |
557 | int |
558 | xchk_iscan_iter( |
559 | struct xchk_iscan *iscan, |
560 | struct xfs_inode **ipp) |
561 | { |
562 | unsigned int i; |
563 | int error; |
564 | |
565 | /* Find a cached inode, or go get another batch. */ |
566 | for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { |
567 | if (iscan->__inodes[i]) |
568 | goto foundit; |
569 | } |
570 | |
571 | error = xchk_iscan_iter_batch(iscan); |
572 | if (error <= 0) |
573 | return error; |
574 | |
575 | ASSERT(iscan->__inodes[0] != NULL); |
576 | i = 0; |
577 | |
578 | foundit: |
579 | /* Give the caller our reference. */ |
580 | *ipp = iscan->__inodes[i]; |
581 | iscan->__inodes[i] = NULL; |
582 | return 1; |
583 | } |
584 | |
585 | /* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */ |
586 | void |
587 | xchk_iscan_iter_finish( |
588 | struct xchk_iscan *iscan) |
589 | { |
590 | struct xfs_scrub *sc = iscan->sc; |
591 | unsigned int i; |
592 | |
593 | for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { |
594 | if (iscan->__inodes[i]) { |
595 | xchk_irele(sc, iscan->__inodes[i]); |
596 | iscan->__inodes[i] = NULL; |
597 | } |
598 | } |
599 | } |
600 | |
601 | /* Mark this inode scan finished and release resources. */ |
602 | void |
603 | xchk_iscan_teardown( |
604 | struct xchk_iscan *iscan) |
605 | { |
606 | xchk_iscan_iter_finish(iscan); |
607 | xchk_iscan_finish(iscan); |
608 | mutex_destroy(&iscan->lock); |
609 | } |
610 | |
611 | /* Pick an AG from which to start a scan. */ |
612 | static inline xfs_ino_t |
613 | xchk_iscan_rotor( |
614 | struct xfs_mount *mp) |
615 | { |
616 | static atomic_t agi_rotor; |
617 | unsigned int r = atomic_inc_return(&agi_rotor) - 1; |
618 | |
619 | /* |
620 | * Rotoring *backwards* through the AGs, so we add one here before |
621 | * subtracting from the agcount to arrive at an AG number. |
622 | */ |
623 | r = (r % mp->m_sb.sb_agcount) + 1; |
624 | |
625 | return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0); |
626 | } |
627 | |
628 | /* |
629 | * Set ourselves up to start an inode scan. If the @iget_timeout and |
630 | * @iget_retry_delay parameters are set, the scan will try to iget each inode |
631 | * for @iget_timeout milliseconds. If an iget call indicates that the inode is |
632 | * waiting to be inactivated, the CPU will relax for @iget_retry_delay |
633 | * milliseconds after pushing the inactivation workers. |
634 | */ |
635 | void |
636 | xchk_iscan_start( |
637 | struct xfs_scrub *sc, |
638 | unsigned int iget_timeout, |
639 | unsigned int iget_retry_delay, |
640 | struct xchk_iscan *iscan) |
641 | { |
642 | xfs_ino_t start_ino; |
643 | |
644 | start_ino = xchk_iscan_rotor(sc->mp); |
645 | |
646 | iscan->__batch_ino = NULLFSINO; |
647 | iscan->__skipped_inomask = 0; |
648 | |
649 | iscan->sc = sc; |
650 | clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate); |
651 | iscan->iget_timeout = iget_timeout; |
652 | iscan->iget_retry_delay = iget_retry_delay; |
653 | iscan->__visited_ino = start_ino; |
654 | iscan->cursor_ino = start_ino; |
655 | iscan->scan_start_ino = start_ino; |
656 | mutex_init(&iscan->lock); |
657 | memset(iscan->__inodes, 0, sizeof(iscan->__inodes)); |
658 | |
659 | trace_xchk_iscan_start(iscan, start_ino); |
660 | } |
661 | |
662 | /* |
663 | * Mark this inode as having been visited. Callers must hold a sufficiently |
664 | * exclusive lock on the inode to prevent concurrent modifications. |
665 | */ |
666 | void |
667 | xchk_iscan_mark_visited( |
668 | struct xchk_iscan *iscan, |
669 | struct xfs_inode *ip) |
670 | { |
671 | mutex_lock(&iscan->lock); |
672 | iscan->__visited_ino = ip->i_ino; |
673 | trace_xchk_iscan_visit(iscan); |
674 | mutex_unlock(&iscan->lock); |
675 | } |
676 | |
677 | /* |
678 | * Did we skip this inode because it wasn't allocated when we loaded the batch? |
679 | * If so, it is newly allocated and will not be scanned. All live updates to |
680 | * this inode must be passed to the caller to maintain scan correctness. |
681 | */ |
682 | static inline bool |
683 | xchk_iscan_skipped( |
684 | const struct xchk_iscan *iscan, |
685 | xfs_ino_t ino) |
686 | { |
687 | if (iscan->__batch_ino == NULLFSINO) |
688 | return false; |
689 | if (ino < iscan->__batch_ino) |
690 | return false; |
691 | if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK) |
692 | return false; |
693 | |
694 | return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino)); |
695 | } |
696 | |
697 | /* |
698 | * Do we need a live update for this inode? This is true if the scanner thread |
699 | * has visited this inode and the scan hasn't been aborted due to errors. |
700 | * Callers must hold a sufficiently exclusive lock on the inode to prevent |
701 | * scanners from reading any inode metadata. |
702 | */ |
703 | bool |
704 | xchk_iscan_want_live_update( |
705 | struct xchk_iscan *iscan, |
706 | xfs_ino_t ino) |
707 | { |
708 | bool ret = false; |
709 | |
710 | if (xchk_iscan_aborted(iscan)) |
711 | return false; |
712 | |
713 | mutex_lock(&iscan->lock); |
714 | |
715 | trace_xchk_iscan_want_live_update(iscan, ino); |
716 | |
717 | /* Scan is finished, caller should receive all updates. */ |
718 | if (iscan->__visited_ino == NULLFSINO) { |
719 | ret = true; |
720 | goto unlock; |
721 | } |
722 | |
723 | /* |
724 | * No inodes have been visited yet, so the visited cursor points at the |
725 | * start of the scan range. The caller should not receive any updates. |
726 | */ |
727 | if (iscan->scan_start_ino == iscan->__visited_ino) { |
728 | ret = false; |
729 | goto unlock; |
730 | } |
731 | |
732 | /* |
733 | * This inode was not allocated at the time of the iscan batch. |
734 | * The caller should receive all updates. |
735 | */ |
736 | if (xchk_iscan_skipped(iscan, ino)) { |
737 | ret = true; |
738 | goto unlock; |
739 | } |
740 | |
741 | /* |
742 | * The visited cursor hasn't yet wrapped around the end of the FS. If |
743 | * @ino is inside the starred range, the caller should receive updates: |
744 | * |
745 | * 0 ------------ S ************ V ------------ EOFS |
746 | */ |
747 | if (iscan->scan_start_ino <= iscan->__visited_ino) { |
748 | if (ino >= iscan->scan_start_ino && |
749 | ino <= iscan->__visited_ino) |
750 | ret = true; |
751 | |
752 | goto unlock; |
753 | } |
754 | |
755 | /* |
756 | * The visited cursor wrapped around the end of the FS. If @ino is |
757 | * inside the starred range, the caller should receive updates: |
758 | * |
759 | * 0 ************ V ------------ S ************ EOFS |
760 | */ |
761 | if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino) |
762 | ret = true; |
763 | |
764 | unlock: |
765 | mutex_unlock(&iscan->lock); |
766 | return ret; |
767 | } |
768 | |