| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * Copyright (c) 2021-2024 Oracle. All Rights Reserved. |
| 4 | * Author: Darrick J. Wong <djwong@kernel.org> |
| 5 | */ |
| 6 | #include "xfs.h" |
| 7 | #include "xfs_fs.h" |
| 8 | #include "xfs_shared.h" |
| 9 | #include "xfs_format.h" |
| 10 | #include "xfs_trans_resv.h" |
| 11 | #include "xfs_mount.h" |
| 12 | #include "xfs_log_format.h" |
| 13 | #include "xfs_trans.h" |
| 14 | #include "xfs_inode.h" |
| 15 | #include "xfs_btree.h" |
| 16 | #include "xfs_ialloc.h" |
| 17 | #include "xfs_ialloc_btree.h" |
| 18 | #include "xfs_ag.h" |
| 19 | #include "xfs_error.h" |
| 20 | #include "xfs_bit.h" |
| 21 | #include "xfs_icache.h" |
| 22 | #include "scrub/scrub.h" |
| 23 | #include "scrub/iscan.h" |
| 24 | #include "scrub/common.h" |
| 25 | #include "scrub/trace.h" |
| 26 | |
| 27 | /* |
| 28 | * Live File Scan |
| 29 | * ============== |
| 30 | * |
| 31 | * Live file scans walk every inode in a live filesystem. This is more or |
| 32 | * less like a regular iwalk, except that when we're advancing the scan cursor, |
| 33 | * we must ensure that inodes cannot be added or deleted anywhere between the |
| 34 | * old cursor value and the new cursor value. If we're advancing the cursor |
| 35 | * by one inode, the caller must hold that inode; if we're finding the next |
| 36 | * inode to scan, we must grab the AGI and hold it until we've updated the |
| 37 | * scan cursor. |
| 38 | * |
| 39 | * Callers are expected to use this code to scan all files in the filesystem to |
| 40 | * construct a new metadata index of some kind. The scan races against other |
| 41 | * live updates, which means there must be a provision to update the new index |
| 42 | * when updates are made to inodes that already been scanned. The iscan lock |
| 43 | * can be used in live update hook code to stop the scan and protect this data |
| 44 | * structure. |
| 45 | * |
| 46 | * To keep the new index up to date with other metadata updates being made to |
| 47 | * the live filesystem, it is assumed that the caller will add hooks as needed |
| 48 | * to be notified when a metadata update occurs. The inode scanner must tell |
| 49 | * the hook code when an inode has been visited with xchk_iscan_mark_visit. |
| 50 | * Hook functions can use xchk_iscan_want_live_update to decide if the |
| 51 | * scanner's observations must be updated. |
| 52 | */ |
| 53 | |
| 54 | /* |
| 55 | * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so |
| 56 | * that the scan ignores that inode. |
| 57 | */ |
| 58 | STATIC void |
| 59 | xchk_iscan_mask_skipino( |
| 60 | struct xchk_iscan *iscan, |
| 61 | struct xfs_perag *pag, |
| 62 | struct xfs_inobt_rec_incore *rec, |
| 63 | xfs_agino_t lastrecino) |
| 64 | { |
| 65 | struct xfs_scrub *sc = iscan->sc; |
| 66 | struct xfs_mount *mp = sc->mp; |
| 67 | xfs_agnumber_t skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino); |
| 68 | xfs_agnumber_t skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino); |
| 69 | |
| 70 | if (pag_agno(pag) != skip_agno) |
| 71 | return; |
| 72 | if (skip_agino < rec->ir_startino) |
| 73 | return; |
| 74 | if (skip_agino > lastrecino) |
| 75 | return; |
| 76 | |
| 77 | rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1); |
| 78 | } |
| 79 | |
| 80 | /* |
| 81 | * Set *cursor to the next allocated inode after whatever it's set to now. |
| 82 | * If there are no more inodes in this AG, cursor is set to NULLAGINO. |
| 83 | */ |
| 84 | STATIC int |
| 85 | xchk_iscan_find_next( |
| 86 | struct xchk_iscan *iscan, |
| 87 | struct xfs_buf *agi_bp, |
| 88 | struct xfs_perag *pag, |
| 89 | xfs_inofree_t *allocmaskp, |
| 90 | xfs_agino_t *cursor, |
| 91 | uint8_t *nr_inodesp) |
| 92 | { |
| 93 | struct xfs_scrub *sc = iscan->sc; |
| 94 | struct xfs_inobt_rec_incore rec; |
| 95 | struct xfs_btree_cur *cur; |
| 96 | struct xfs_mount *mp = sc->mp; |
| 97 | struct xfs_trans *tp = sc->tp; |
| 98 | xfs_agnumber_t agno = pag_agno(pag); |
| 99 | xfs_agino_t lastino = NULLAGINO; |
| 100 | xfs_agino_t first, last; |
| 101 | xfs_agino_t agino = *cursor; |
| 102 | int has_rec; |
| 103 | int error; |
| 104 | |
| 105 | /* If the cursor is beyond the end of this AG, move to the next one. */ |
| 106 | xfs_agino_range(mp, agno, &first, &last); |
| 107 | if (agino > last) { |
| 108 | *cursor = NULLAGINO; |
| 109 | return 0; |
| 110 | } |
| 111 | |
| 112 | /* |
| 113 | * Look up the inode chunk for the current cursor position. If there |
| 114 | * is no chunk here, we want the next one. |
| 115 | */ |
| 116 | cur = xfs_inobt_init_cursor(pag, tp, agi_bp); |
| 117 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec); |
| 118 | if (!error && !has_rec) |
| 119 | error = xfs_btree_increment(cur, 0, &has_rec); |
| 120 | for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) { |
| 121 | xfs_inofree_t allocmask; |
| 122 | |
| 123 | /* |
| 124 | * If we've run out of inobt records in this AG, move the |
| 125 | * cursor on to the next AG and exit. The caller can try |
| 126 | * again with the next AG. |
| 127 | */ |
| 128 | if (!has_rec) { |
| 129 | *cursor = NULLAGINO; |
| 130 | break; |
| 131 | } |
| 132 | |
| 133 | error = xfs_inobt_get_rec(cur, &rec, &has_rec); |
| 134 | if (error) |
| 135 | break; |
| 136 | if (!has_rec) { |
| 137 | error = -EFSCORRUPTED; |
| 138 | break; |
| 139 | } |
| 140 | |
| 141 | /* Make sure that we always move forward. */ |
| 142 | if (lastino != NULLAGINO && |
| 143 | XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) { |
| 144 | error = -EFSCORRUPTED; |
| 145 | break; |
| 146 | } |
| 147 | lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1; |
| 148 | |
| 149 | /* |
| 150 | * If this record only covers inodes that come before the |
| 151 | * cursor, advance to the next record. |
| 152 | */ |
| 153 | if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) |
| 154 | continue; |
| 155 | |
| 156 | if (iscan->skip_ino) |
| 157 | xchk_iscan_mask_skipino(iscan, pag, &rec, lastino); |
| 158 | |
| 159 | /* |
| 160 | * If the incoming lookup put us in the middle of an inobt |
| 161 | * record, mark it and the previous inodes "free" so that the |
| 162 | * search for allocated inodes will start at the cursor. |
| 163 | * We don't care about ir_freecount here. |
| 164 | */ |
| 165 | if (agino >= rec.ir_startino) |
| 166 | rec.ir_free |= xfs_inobt_maskn(0, |
| 167 | agino + 1 - rec.ir_startino); |
| 168 | |
| 169 | /* |
| 170 | * If there are allocated inodes in this chunk, find them |
| 171 | * and update the scan cursor. |
| 172 | */ |
| 173 | allocmask = ~rec.ir_free; |
| 174 | if (hweight64(allocmask) > 0) { |
| 175 | int next = xfs_lowbit64(allocmask); |
| 176 | |
| 177 | ASSERT(next >= 0); |
| 178 | *cursor = rec.ir_startino + next; |
| 179 | *allocmaskp = allocmask >> next; |
| 180 | *nr_inodesp = XFS_INODES_PER_CHUNK - next; |
| 181 | break; |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | xfs_btree_del_cursor(cur, error); |
| 186 | return error; |
| 187 | } |
| 188 | |
| 189 | /* |
| 190 | * Advance both the scan and the visited cursors. |
| 191 | * |
| 192 | * The inumber address space for a given filesystem is sparse, which means that |
| 193 | * the scan cursor can jump a long ways in a single iter() call. There are no |
| 194 | * inodes in these sparse areas, so we must move the visited cursor forward at |
| 195 | * the same time so that the scan user can receive live updates for inodes that |
| 196 | * may get created once we release the AGI buffer. |
| 197 | */ |
| 198 | static inline void |
| 199 | xchk_iscan_move_cursor( |
| 200 | struct xchk_iscan *iscan, |
| 201 | xfs_agnumber_t agno, |
| 202 | xfs_agino_t agino) |
| 203 | { |
| 204 | struct xfs_scrub *sc = iscan->sc; |
| 205 | struct xfs_mount *mp = sc->mp; |
| 206 | xfs_ino_t cursor, visited; |
| 207 | |
| 208 | BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO); |
| 209 | |
| 210 | /* |
| 211 | * Special-case ino == 0 here so that we never set visited_ino to |
| 212 | * NULLFSINO when wrapping around EOFS, for that will let through all |
| 213 | * live updates. |
| 214 | */ |
| 215 | cursor = XFS_AGINO_TO_INO(mp, agno, agino); |
| 216 | if (cursor == 0) |
| 217 | visited = XFS_MAXINUMBER; |
| 218 | else |
| 219 | visited = cursor - 1; |
| 220 | |
| 221 | mutex_lock(&iscan->lock); |
| 222 | iscan->cursor_ino = cursor; |
| 223 | iscan->__visited_ino = visited; |
| 224 | trace_xchk_iscan_move_cursor(iscan); |
| 225 | mutex_unlock(&iscan->lock); |
| 226 | } |
| 227 | |
| 228 | /* |
| 229 | * Prepare to return agno/agino to the iscan caller by moving the lastino |
| 230 | * cursor to the previous inode. Do this while we still hold the AGI so that |
| 231 | * no other threads can create or delete inodes in this AG. |
| 232 | */ |
| 233 | static inline void |
| 234 | xchk_iscan_finish( |
| 235 | struct xchk_iscan *iscan) |
| 236 | { |
| 237 | mutex_lock(&iscan->lock); |
| 238 | iscan->cursor_ino = NULLFSINO; |
| 239 | |
| 240 | /* All live updates will be applied from now on */ |
| 241 | iscan->__visited_ino = NULLFSINO; |
| 242 | |
| 243 | mutex_unlock(&iscan->lock); |
| 244 | } |
| 245 | |
| 246 | /* Mark an inode scan finished before we actually scan anything. */ |
| 247 | void |
| 248 | xchk_iscan_finish_early( |
| 249 | struct xchk_iscan *iscan) |
| 250 | { |
| 251 | ASSERT(iscan->cursor_ino == iscan->scan_start_ino); |
| 252 | ASSERT(iscan->__visited_ino == iscan->scan_start_ino); |
| 253 | |
| 254 | xchk_iscan_finish(iscan); |
| 255 | } |
| 256 | |
| 257 | /* |
| 258 | * Grab the AGI to advance the inode scan. Returns 0 if *agi_bpp is now set, |
| 259 | * -ECANCELED if the live scan aborted, -EBUSY if the AGI could not be grabbed, |
| 260 | * or the usual negative errno. |
| 261 | */ |
| 262 | STATIC int |
| 263 | xchk_iscan_read_agi( |
| 264 | struct xchk_iscan *iscan, |
| 265 | struct xfs_perag *pag, |
| 266 | struct xfs_buf **agi_bpp) |
| 267 | { |
| 268 | struct xfs_scrub *sc = iscan->sc; |
| 269 | unsigned long relax; |
| 270 | int ret; |
| 271 | |
| 272 | if (!xchk_iscan_agi_needs_trylock(iscan)) |
| 273 | return xfs_ialloc_read_agi(pag, sc->tp, 0, agi_bpp); |
| 274 | |
| 275 | relax = msecs_to_jiffies(iscan->iget_retry_delay); |
| 276 | do { |
| 277 | ret = xfs_ialloc_read_agi(pag, sc->tp, XFS_IALLOC_FLAG_TRYLOCK, |
| 278 | agi_bpp); |
| 279 | if (ret != -EAGAIN) |
| 280 | return ret; |
| 281 | if (!iscan->iget_timeout || |
| 282 | time_is_before_jiffies(iscan->__iget_deadline)) |
| 283 | return -EBUSY; |
| 284 | |
| 285 | trace_xchk_iscan_agi_retry_wait(iscan); |
| 286 | } while (!schedule_timeout_killable(relax) && |
| 287 | !xchk_iscan_aborted(iscan)); |
| 288 | return -ECANCELED; |
| 289 | } |
| 290 | |
| 291 | /* |
| 292 | * Advance ino to the next inode that the inobt thinks is allocated, being |
| 293 | * careful to jump to the next AG if we've reached the right end of this AG's |
| 294 | * inode btree. Advancing ino effectively means that we've pushed the inode |
| 295 | * scan forward, so set the iscan cursor to (ino - 1) so that our live update |
| 296 | * predicates will track inode allocations in that part of the inode number |
| 297 | * key space once we release the AGI buffer. |
| 298 | * |
| 299 | * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes, |
| 300 | * -ECANCELED if the live scan aborted, or the usual negative errno. |
| 301 | */ |
| 302 | STATIC int |
| 303 | xchk_iscan_advance( |
| 304 | struct xchk_iscan *iscan, |
| 305 | struct xfs_perag **pagp, |
| 306 | struct xfs_buf **agi_bpp, |
| 307 | xfs_inofree_t *allocmaskp, |
| 308 | uint8_t *nr_inodesp) |
| 309 | { |
| 310 | struct xfs_scrub *sc = iscan->sc; |
| 311 | struct xfs_mount *mp = sc->mp; |
| 312 | struct xfs_buf *agi_bp; |
| 313 | struct xfs_perag *pag; |
| 314 | xfs_agnumber_t agno; |
| 315 | xfs_agino_t agino; |
| 316 | int ret; |
| 317 | |
| 318 | ASSERT(iscan->cursor_ino >= iscan->__visited_ino); |
| 319 | |
| 320 | do { |
| 321 | if (xchk_iscan_aborted(iscan)) |
| 322 | return -ECANCELED; |
| 323 | |
| 324 | agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino); |
| 325 | pag = xfs_perag_get(mp, agno); |
| 326 | if (!pag) |
| 327 | return -ECANCELED; |
| 328 | |
| 329 | ret = xchk_iscan_read_agi(iscan, pag, &agi_bp); |
| 330 | if (ret) |
| 331 | goto out_pag; |
| 332 | |
| 333 | agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino); |
| 334 | ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp, |
| 335 | &agino, nr_inodesp); |
| 336 | if (ret) |
| 337 | goto out_buf; |
| 338 | |
| 339 | if (agino != NULLAGINO) { |
| 340 | /* |
| 341 | * Found the next inode in this AG, so return it along |
| 342 | * with the AGI buffer and the perag structure to |
| 343 | * ensure it cannot go away. |
| 344 | */ |
| 345 | xchk_iscan_move_cursor(iscan, agno, agino); |
| 346 | *agi_bpp = agi_bp; |
| 347 | *pagp = pag; |
| 348 | return 1; |
| 349 | } |
| 350 | |
| 351 | /* |
| 352 | * Did not find any more inodes in this AG, move on to the next |
| 353 | * AG. |
| 354 | */ |
| 355 | agno = (agno + 1) % mp->m_sb.sb_agcount; |
| 356 | xchk_iscan_move_cursor(iscan, agno, 0); |
| 357 | xfs_trans_brelse(sc->tp, agi_bp); |
| 358 | xfs_perag_put(pag); |
| 359 | |
| 360 | trace_xchk_iscan_advance_ag(iscan); |
| 361 | } while (iscan->cursor_ino != iscan->scan_start_ino); |
| 362 | |
| 363 | xchk_iscan_finish(iscan); |
| 364 | return 0; |
| 365 | |
| 366 | out_buf: |
| 367 | xfs_trans_brelse(sc->tp, agi_bp); |
| 368 | out_pag: |
| 369 | xfs_perag_put(pag); |
| 370 | return ret; |
| 371 | } |
| 372 | |
| 373 | /* |
| 374 | * Grabbing the inode failed, so we need to back up the scan and ask the caller |
| 375 | * to try to _advance the scan again. Returns -EBUSY if we've run out of retry |
| 376 | * opportunities, -ECANCELED if the process has a fatal signal pending, or |
| 377 | * -EAGAIN if we should try again. |
| 378 | */ |
| 379 | STATIC int |
| 380 | xchk_iscan_iget_retry( |
| 381 | struct xchk_iscan *iscan, |
| 382 | bool wait) |
| 383 | { |
| 384 | ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1); |
| 385 | |
| 386 | if (!iscan->iget_timeout || |
| 387 | time_is_before_jiffies(iscan->__iget_deadline)) |
| 388 | return -EBUSY; |
| 389 | |
| 390 | if (wait) { |
| 391 | unsigned long relax; |
| 392 | |
| 393 | /* |
| 394 | * Sleep for a period of time to let the rest of the system |
| 395 | * catch up. If we return early, someone sent a kill signal to |
| 396 | * the calling process. |
| 397 | */ |
| 398 | relax = msecs_to_jiffies(iscan->iget_retry_delay); |
| 399 | trace_xchk_iscan_iget_retry_wait(iscan); |
| 400 | |
| 401 | if (schedule_timeout_killable(relax) || |
| 402 | xchk_iscan_aborted(iscan)) |
| 403 | return -ECANCELED; |
| 404 | } |
| 405 | |
| 406 | iscan->cursor_ino--; |
| 407 | return -EAGAIN; |
| 408 | } |
| 409 | |
| 410 | /* |
| 411 | * For an inode scan, we hold the AGI and want to try to grab a batch of |
| 412 | * inodes. Holding the AGI prevents inodegc from clearing freed inodes, |
| 413 | * so we must use noretry here. For every inode after the first one in the |
| 414 | * batch, we don't want to wait, so we use retry there too. Finally, use |
| 415 | * dontcache to avoid polluting the cache. |
| 416 | */ |
| 417 | #define ISCAN_IGET_FLAGS (XFS_IGET_NORETRY | XFS_IGET_DONTCACHE) |
| 418 | |
| 419 | /* |
| 420 | * Grab an inode as part of an inode scan. While scanning this inode, the |
| 421 | * caller must ensure that no other threads can modify the inode until a call |
| 422 | * to xchk_iscan_visit succeeds. |
| 423 | * |
| 424 | * Returns the number of incore inodes grabbed; -EAGAIN if the caller should |
| 425 | * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode; |
| 426 | * -ECANCELED if there's a fatal signal pending; or some other negative errno. |
| 427 | */ |
| 428 | STATIC int |
| 429 | xchk_iscan_iget( |
| 430 | struct xchk_iscan *iscan, |
| 431 | struct xfs_perag *pag, |
| 432 | struct xfs_buf *agi_bp, |
| 433 | xfs_inofree_t allocmask, |
| 434 | uint8_t nr_inodes) |
| 435 | { |
| 436 | struct xfs_scrub *sc = iscan->sc; |
| 437 | struct xfs_mount *mp = sc->mp; |
| 438 | xfs_ino_t ino = iscan->cursor_ino; |
| 439 | unsigned int idx = 0; |
| 440 | unsigned int i; |
| 441 | int error; |
| 442 | |
| 443 | ASSERT(iscan->__inodes[0] == NULL); |
| 444 | |
| 445 | /* Fill the first slot in the inode array. */ |
| 446 | error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0, |
| 447 | &iscan->__inodes[idx]); |
| 448 | |
| 449 | trace_xchk_iscan_iget(iscan, error); |
| 450 | |
| 451 | if (error == -ENOENT || error == -EAGAIN) { |
| 452 | xfs_trans_brelse(sc->tp, agi_bp); |
| 453 | xfs_perag_put(pag); |
| 454 | |
| 455 | /* |
| 456 | * It's possible that this inode has lost all of its links but |
| 457 | * hasn't yet been inactivated. If we don't have a transaction |
| 458 | * or it's not writable, flush the inodegc workers and wait. |
| 459 | * If we have a non-empty transaction, we must not block on |
| 460 | * inodegc, which allocates its own transactions. |
| 461 | */ |
| 462 | if (sc->tp && !(sc->tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) |
| 463 | xfs_inodegc_push(mp); |
| 464 | else |
| 465 | xfs_inodegc_flush(mp); |
| 466 | return xchk_iscan_iget_retry(iscan, true); |
| 467 | } |
| 468 | |
| 469 | if (error == -EINVAL) { |
| 470 | xfs_trans_brelse(sc->tp, agi_bp); |
| 471 | xfs_perag_put(pag); |
| 472 | |
| 473 | /* |
| 474 | * We thought the inode was allocated, but the inode btree |
| 475 | * lookup failed, which means that it was freed since the last |
| 476 | * time we advanced the cursor. Back up and try again. This |
| 477 | * should never happen since still hold the AGI buffer from the |
| 478 | * inobt check, but we need to be careful about infinite loops. |
| 479 | */ |
| 480 | return xchk_iscan_iget_retry(iscan, false); |
| 481 | } |
| 482 | |
| 483 | if (error) { |
| 484 | xfs_trans_brelse(sc->tp, agi_bp); |
| 485 | xfs_perag_put(pag); |
| 486 | return error; |
| 487 | } |
| 488 | idx++; |
| 489 | ino++; |
| 490 | allocmask >>= 1; |
| 491 | |
| 492 | /* |
| 493 | * Now that we've filled the first slot in __inodes, try to fill the |
| 494 | * rest of the batch with consecutively ordered inodes. to reduce the |
| 495 | * number of _iter calls. Make a bitmap of unallocated inodes from the |
| 496 | * zeroes in the inuse bitmap; these inodes will not be scanned, but |
| 497 | * the _want_live_update predicate will pass through all live updates. |
| 498 | * |
| 499 | * If we can't iget an allocated inode, stop and return what we have. |
| 500 | */ |
| 501 | mutex_lock(&iscan->lock); |
| 502 | iscan->__batch_ino = ino - 1; |
| 503 | iscan->__skipped_inomask = 0; |
| 504 | mutex_unlock(&iscan->lock); |
| 505 | |
| 506 | for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) { |
| 507 | if (!(allocmask & 1)) { |
| 508 | ASSERT(!(iscan->__skipped_inomask & (1ULL << i))); |
| 509 | |
| 510 | mutex_lock(&iscan->lock); |
| 511 | iscan->cursor_ino = ino; |
| 512 | iscan->__skipped_inomask |= (1ULL << i); |
| 513 | mutex_unlock(&iscan->lock); |
| 514 | continue; |
| 515 | } |
| 516 | |
| 517 | ASSERT(iscan->__inodes[idx] == NULL); |
| 518 | |
| 519 | error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0, |
| 520 | &iscan->__inodes[idx]); |
| 521 | if (error) |
| 522 | break; |
| 523 | |
| 524 | mutex_lock(&iscan->lock); |
| 525 | iscan->cursor_ino = ino; |
| 526 | mutex_unlock(&iscan->lock); |
| 527 | idx++; |
| 528 | } |
| 529 | |
| 530 | trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx); |
| 531 | xfs_trans_brelse(sc->tp, agi_bp); |
| 532 | xfs_perag_put(pag); |
| 533 | return idx; |
| 534 | } |
| 535 | |
| 536 | /* |
| 537 | * Advance the visit cursor to reflect skipped inodes beyond whatever we |
| 538 | * scanned. |
| 539 | */ |
| 540 | STATIC void |
| 541 | xchk_iscan_finish_batch( |
| 542 | struct xchk_iscan *iscan) |
| 543 | { |
| 544 | xfs_ino_t highest_skipped; |
| 545 | |
| 546 | mutex_lock(&iscan->lock); |
| 547 | |
| 548 | if (iscan->__batch_ino != NULLFSINO) { |
| 549 | highest_skipped = iscan->__batch_ino + |
| 550 | xfs_highbit64(iscan->__skipped_inomask); |
| 551 | iscan->__visited_ino = max(iscan->__visited_ino, |
| 552 | highest_skipped); |
| 553 | |
| 554 | trace_xchk_iscan_skip(iscan); |
| 555 | } |
| 556 | |
| 557 | iscan->__batch_ino = NULLFSINO; |
| 558 | iscan->__skipped_inomask = 0; |
| 559 | |
| 560 | mutex_unlock(&iscan->lock); |
| 561 | } |
| 562 | |
| 563 | /* |
| 564 | * Advance the inode scan cursor to the next allocated inode and return up to |
| 565 | * 64 consecutive allocated inodes starting with the cursor position. |
| 566 | */ |
| 567 | STATIC int |
| 568 | xchk_iscan_iter_batch( |
| 569 | struct xchk_iscan *iscan) |
| 570 | { |
| 571 | struct xfs_scrub *sc = iscan->sc; |
| 572 | int ret; |
| 573 | |
| 574 | xchk_iscan_finish_batch(iscan); |
| 575 | |
| 576 | if (iscan->iget_timeout) |
| 577 | iscan->__iget_deadline = jiffies + |
| 578 | msecs_to_jiffies(iscan->iget_timeout); |
| 579 | |
| 580 | do { |
| 581 | struct xfs_buf *agi_bp = NULL; |
| 582 | struct xfs_perag *pag = NULL; |
| 583 | xfs_inofree_t allocmask = 0; |
| 584 | uint8_t nr_inodes = 0; |
| 585 | |
| 586 | ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask, |
| 587 | &nr_inodes); |
| 588 | if (ret != 1) |
| 589 | return ret; |
| 590 | |
| 591 | if (xchk_iscan_aborted(iscan)) { |
| 592 | xfs_trans_brelse(sc->tp, agi_bp); |
| 593 | xfs_perag_put(pag); |
| 594 | ret = -ECANCELED; |
| 595 | break; |
| 596 | } |
| 597 | |
| 598 | ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes); |
| 599 | } while (ret == -EAGAIN); |
| 600 | |
| 601 | return ret; |
| 602 | } |
| 603 | |
| 604 | /* |
| 605 | * Advance the inode scan cursor to the next allocated inode and return the |
| 606 | * incore inode structure associated with it. |
| 607 | * |
| 608 | * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes, |
| 609 | * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be |
| 610 | * grabbed, or the usual negative errno. |
| 611 | * |
| 612 | * If the function returns -EBUSY and the caller can handle skipping an inode, |
| 613 | * it may call this function again to continue the scan with the next allocated |
| 614 | * inode. |
| 615 | */ |
| 616 | int |
| 617 | xchk_iscan_iter( |
| 618 | struct xchk_iscan *iscan, |
| 619 | struct xfs_inode **ipp) |
| 620 | { |
| 621 | unsigned int i; |
| 622 | int error; |
| 623 | |
| 624 | /* Find a cached inode, or go get another batch. */ |
| 625 | for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { |
| 626 | if (iscan->__inodes[i]) |
| 627 | goto foundit; |
| 628 | } |
| 629 | |
| 630 | error = xchk_iscan_iter_batch(iscan); |
| 631 | if (error <= 0) |
| 632 | return error; |
| 633 | |
| 634 | ASSERT(iscan->__inodes[0] != NULL); |
| 635 | i = 0; |
| 636 | |
| 637 | foundit: |
| 638 | /* Give the caller our reference. */ |
| 639 | *ipp = iscan->__inodes[i]; |
| 640 | iscan->__inodes[i] = NULL; |
| 641 | return 1; |
| 642 | } |
| 643 | |
| 644 | /* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */ |
| 645 | void |
| 646 | xchk_iscan_iter_finish( |
| 647 | struct xchk_iscan *iscan) |
| 648 | { |
| 649 | struct xfs_scrub *sc = iscan->sc; |
| 650 | unsigned int i; |
| 651 | |
| 652 | for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { |
| 653 | if (iscan->__inodes[i]) { |
| 654 | xchk_irele(sc, iscan->__inodes[i]); |
| 655 | iscan->__inodes[i] = NULL; |
| 656 | } |
| 657 | } |
| 658 | } |
| 659 | |
| 660 | /* Mark this inode scan finished and release resources. */ |
| 661 | void |
| 662 | xchk_iscan_teardown( |
| 663 | struct xchk_iscan *iscan) |
| 664 | { |
| 665 | xchk_iscan_iter_finish(iscan); |
| 666 | xchk_iscan_finish(iscan); |
| 667 | mutex_destroy(&iscan->lock); |
| 668 | } |
| 669 | |
| 670 | /* Pick an AG from which to start a scan. */ |
| 671 | static inline xfs_ino_t |
| 672 | xchk_iscan_rotor( |
| 673 | struct xfs_mount *mp) |
| 674 | { |
| 675 | static atomic_t agi_rotor; |
| 676 | unsigned int r = atomic_inc_return(&agi_rotor) - 1; |
| 677 | |
| 678 | /* |
| 679 | * Rotoring *backwards* through the AGs, so we add one here before |
| 680 | * subtracting from the agcount to arrive at an AG number. |
| 681 | */ |
| 682 | r = (r % mp->m_sb.sb_agcount) + 1; |
| 683 | |
| 684 | return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0); |
| 685 | } |
| 686 | |
| 687 | /* |
| 688 | * Set ourselves up to start an inode scan. If the @iget_timeout and |
| 689 | * @iget_retry_delay parameters are set, the scan will try to iget each inode |
| 690 | * for @iget_timeout milliseconds. If an iget call indicates that the inode is |
| 691 | * waiting to be inactivated, the CPU will relax for @iget_retry_delay |
| 692 | * milliseconds after pushing the inactivation workers. |
| 693 | */ |
| 694 | void |
| 695 | xchk_iscan_start( |
| 696 | struct xfs_scrub *sc, |
| 697 | unsigned int iget_timeout, |
| 698 | unsigned int iget_retry_delay, |
| 699 | struct xchk_iscan *iscan) |
| 700 | { |
| 701 | xfs_ino_t start_ino; |
| 702 | |
| 703 | start_ino = xchk_iscan_rotor(sc->mp); |
| 704 | |
| 705 | iscan->__batch_ino = NULLFSINO; |
| 706 | iscan->__skipped_inomask = 0; |
| 707 | |
| 708 | iscan->sc = sc; |
| 709 | clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate); |
| 710 | iscan->iget_timeout = iget_timeout; |
| 711 | iscan->iget_retry_delay = iget_retry_delay; |
| 712 | iscan->__visited_ino = start_ino; |
| 713 | iscan->cursor_ino = start_ino; |
| 714 | iscan->scan_start_ino = start_ino; |
| 715 | mutex_init(&iscan->lock); |
| 716 | memset(iscan->__inodes, 0, sizeof(iscan->__inodes)); |
| 717 | |
| 718 | trace_xchk_iscan_start(iscan, start_ino); |
| 719 | } |
| 720 | |
| 721 | /* |
| 722 | * Mark this inode as having been visited. Callers must hold a sufficiently |
| 723 | * exclusive lock on the inode to prevent concurrent modifications. |
| 724 | */ |
| 725 | void |
| 726 | xchk_iscan_mark_visited( |
| 727 | struct xchk_iscan *iscan, |
| 728 | struct xfs_inode *ip) |
| 729 | { |
| 730 | mutex_lock(&iscan->lock); |
| 731 | iscan->__visited_ino = ip->i_ino; |
| 732 | trace_xchk_iscan_visit(iscan); |
| 733 | mutex_unlock(&iscan->lock); |
| 734 | } |
| 735 | |
| 736 | /* |
| 737 | * Did we skip this inode because it wasn't allocated when we loaded the batch? |
| 738 | * If so, it is newly allocated and will not be scanned. All live updates to |
| 739 | * this inode must be passed to the caller to maintain scan correctness. |
| 740 | */ |
| 741 | static inline bool |
| 742 | xchk_iscan_skipped( |
| 743 | const struct xchk_iscan *iscan, |
| 744 | xfs_ino_t ino) |
| 745 | { |
| 746 | if (iscan->__batch_ino == NULLFSINO) |
| 747 | return false; |
| 748 | if (ino < iscan->__batch_ino) |
| 749 | return false; |
| 750 | if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK) |
| 751 | return false; |
| 752 | |
| 753 | return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino)); |
| 754 | } |
| 755 | |
| 756 | /* |
| 757 | * Do we need a live update for this inode? This is true if the scanner thread |
| 758 | * has visited this inode and the scan hasn't been aborted due to errors. |
| 759 | * Callers must hold a sufficiently exclusive lock on the inode to prevent |
| 760 | * scanners from reading any inode metadata. |
| 761 | */ |
| 762 | bool |
| 763 | xchk_iscan_want_live_update( |
| 764 | struct xchk_iscan *iscan, |
| 765 | xfs_ino_t ino) |
| 766 | { |
| 767 | bool ret = false; |
| 768 | |
| 769 | if (xchk_iscan_aborted(iscan)) |
| 770 | return false; |
| 771 | |
| 772 | mutex_lock(&iscan->lock); |
| 773 | |
| 774 | trace_xchk_iscan_want_live_update(iscan, ino); |
| 775 | |
| 776 | /* Scan is finished, caller should receive all updates. */ |
| 777 | if (iscan->__visited_ino == NULLFSINO) { |
| 778 | ret = true; |
| 779 | goto unlock; |
| 780 | } |
| 781 | |
| 782 | /* |
| 783 | * No inodes have been visited yet, so the visited cursor points at the |
| 784 | * start of the scan range. The caller should not receive any updates. |
| 785 | */ |
| 786 | if (iscan->scan_start_ino == iscan->__visited_ino) { |
| 787 | ret = false; |
| 788 | goto unlock; |
| 789 | } |
| 790 | |
| 791 | /* |
| 792 | * This inode was not allocated at the time of the iscan batch. |
| 793 | * The caller should receive all updates. |
| 794 | */ |
| 795 | if (xchk_iscan_skipped(iscan, ino)) { |
| 796 | ret = true; |
| 797 | goto unlock; |
| 798 | } |
| 799 | |
| 800 | /* |
| 801 | * The visited cursor hasn't yet wrapped around the end of the FS. If |
| 802 | * @ino is inside the starred range, the caller should receive updates: |
| 803 | * |
| 804 | * 0 ------------ S ************ V ------------ EOFS |
| 805 | */ |
| 806 | if (iscan->scan_start_ino <= iscan->__visited_ino) { |
| 807 | if (ino >= iscan->scan_start_ino && |
| 808 | ino <= iscan->__visited_ino) |
| 809 | ret = true; |
| 810 | |
| 811 | goto unlock; |
| 812 | } |
| 813 | |
| 814 | /* |
| 815 | * The visited cursor wrapped around the end of the FS. If @ino is |
| 816 | * inside the starred range, the caller should receive updates: |
| 817 | * |
| 818 | * 0 ************ V ------------ S ************ EOFS |
| 819 | */ |
| 820 | if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino) |
| 821 | ret = true; |
| 822 | |
| 823 | unlock: |
| 824 | mutex_unlock(&iscan->lock); |
| 825 | return ret; |
| 826 | } |
| 827 | |