| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * Copyright (C) 2022-2023 Oracle. All Rights Reserved. |
| 4 | * Author: Darrick J. Wong <djwong@kernel.org> |
| 5 | */ |
| 6 | #include "xfs.h" |
| 7 | #include "xfs_fs.h" |
| 8 | #include "xfs_shared.h" |
| 9 | #include "xfs_format.h" |
| 10 | #include "xfs_trans_resv.h" |
| 11 | #include "xfs_mount.h" |
| 12 | #include "xfs_btree.h" |
| 13 | #include "xfs_log_format.h" |
| 14 | #include "xfs_trans.h" |
| 15 | #include "xfs_sb.h" |
| 16 | #include "xfs_inode.h" |
| 17 | #include "xfs_alloc.h" |
| 18 | #include "xfs_alloc_btree.h" |
| 19 | #include "xfs_ialloc.h" |
| 20 | #include "xfs_ialloc_btree.h" |
| 21 | #include "xfs_rmap.h" |
| 22 | #include "xfs_rmap_btree.h" |
| 23 | #include "xfs_refcount.h" |
| 24 | #include "xfs_refcount_btree.h" |
| 25 | #include "xfs_extent_busy.h" |
| 26 | #include "xfs_ag.h" |
| 27 | #include "xfs_ag_resv.h" |
| 28 | #include "xfs_quota.h" |
| 29 | #include "xfs_qm.h" |
| 30 | #include "xfs_bmap.h" |
| 31 | #include "xfs_da_format.h" |
| 32 | #include "xfs_da_btree.h" |
| 33 | #include "xfs_attr.h" |
| 34 | #include "xfs_attr_remote.h" |
| 35 | #include "xfs_defer.h" |
| 36 | #include "xfs_metafile.h" |
| 37 | #include "xfs_rtgroup.h" |
| 38 | #include "xfs_rtrmap_btree.h" |
| 39 | #include "xfs_extfree_item.h" |
| 40 | #include "xfs_rmap_item.h" |
| 41 | #include "xfs_refcount_item.h" |
| 42 | #include "xfs_buf_item.h" |
| 43 | #include "xfs_bmap_item.h" |
| 44 | #include "xfs_bmap_btree.h" |
| 45 | #include "scrub/scrub.h" |
| 46 | #include "scrub/common.h" |
| 47 | #include "scrub/trace.h" |
| 48 | #include "scrub/repair.h" |
| 49 | #include "scrub/bitmap.h" |
| 50 | #include "scrub/agb_bitmap.h" |
| 51 | #include "scrub/fsb_bitmap.h" |
| 52 | #include "scrub/rtb_bitmap.h" |
| 53 | #include "scrub/reap.h" |
| 54 | |
| 55 | /* |
| 56 | * Disposal of Blocks from Old Metadata |
| 57 | * |
| 58 | * Now that we've constructed a new btree to replace the damaged one, we want |
| 59 | * to dispose of the blocks that (we think) the old btree was using. |
| 60 | * Previously, we used the rmapbt to collect the extents (bitmap) with the |
| 61 | * rmap owner corresponding to the tree we rebuilt, collected extents for any |
| 62 | * blocks with the same rmap owner that are owned by another data structure |
| 63 | * (sublist), and subtracted sublist from bitmap. In theory the extents |
| 64 | * remaining in bitmap are the old btree's blocks. |
| 65 | * |
| 66 | * Unfortunately, it's possible that the btree was crosslinked with other |
| 67 | * blocks on disk. The rmap data can tell us if there are multiple owners, so |
| 68 | * if the rmapbt says there is an owner of this block other than @oinfo, then |
| 69 | * the block is crosslinked. Remove the reverse mapping and continue. |
| 70 | * |
| 71 | * If there is one rmap record, we can free the block, which removes the |
| 72 | * reverse mapping but doesn't add the block to the free space. Our repair |
| 73 | * strategy is to hope the other metadata objects crosslinked on this block |
| 74 | * will be rebuilt (atop different blocks), thereby removing all the cross |
| 75 | * links. |
| 76 | * |
| 77 | * If there are no rmap records at all, we also free the block. If the btree |
| 78 | * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't |
| 79 | * supposed to be a rmap record and everything is ok. For other btrees there |
| 80 | * had to have been an rmap entry for the block to have ended up on @bitmap, |
| 81 | * so if it's gone now there's something wrong and the fs will shut down. |
| 82 | * |
| 83 | * Note: If there are multiple rmap records with only the same rmap owner as |
| 84 | * the btree we're trying to rebuild and the block is indeed owned by another |
| 85 | * data structure with the same rmap owner, then the block will be in sublist |
| 86 | * and therefore doesn't need disposal. If there are multiple rmap records |
| 87 | * with only the same rmap owner but the block is not owned by something with |
| 88 | * the same rmap owner, the block will be freed. |
| 89 | * |
| 90 | * The caller is responsible for locking the AG headers/inode for the entire |
| 91 | * rebuild operation so that nothing else can sneak in and change the incore |
| 92 | * state while we're not looking. We must also invalidate any buffers |
| 93 | * associated with @bitmap. |
| 94 | */ |
| 95 | |
| 96 | /* Information about reaping extents after a repair. */ |
| 97 | struct xreap_state { |
| 98 | struct xfs_scrub *sc; |
| 99 | |
| 100 | union { |
| 101 | struct { |
| 102 | /* |
| 103 | * For AG blocks, this is reverse mapping owner and |
| 104 | * metadata reservation type. |
| 105 | */ |
| 106 | const struct xfs_owner_info *oinfo; |
| 107 | enum xfs_ag_resv_type resv; |
| 108 | }; |
| 109 | struct { |
| 110 | /* For file blocks, this is the inode and fork. */ |
| 111 | struct xfs_inode *ip; |
| 112 | int whichfork; |
| 113 | }; |
| 114 | }; |
| 115 | |
| 116 | /* Number of invalidated buffers logged to the current transaction. */ |
| 117 | unsigned int nr_binval; |
| 118 | |
| 119 | /* Maximum number of buffers we can invalidate in a single tx. */ |
| 120 | unsigned int max_binval; |
| 121 | |
| 122 | /* Number of deferred reaps attached to the current transaction. */ |
| 123 | unsigned int nr_deferred; |
| 124 | |
| 125 | /* Maximum number of intents we can reap in a single transaction. */ |
| 126 | unsigned int max_deferred; |
| 127 | }; |
| 128 | |
| 129 | /* Put a block back on the AGFL. */ |
| 130 | STATIC int |
| 131 | xreap_put_freelist( |
| 132 | struct xfs_scrub *sc, |
| 133 | xfs_agblock_t agbno) |
| 134 | { |
| 135 | struct xfs_buf *agfl_bp; |
| 136 | int error; |
| 137 | |
| 138 | /* Make sure there's space on the freelist. */ |
| 139 | error = xrep_fix_freelist(sc, 0); |
| 140 | if (error) |
| 141 | return error; |
| 142 | |
| 143 | /* |
| 144 | * Since we're "freeing" a lost block onto the AGFL, we have to |
| 145 | * create an rmap for the block prior to merging it or else other |
| 146 | * parts will break. |
| 147 | */ |
| 148 | error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1, |
| 149 | &XFS_RMAP_OINFO_AG); |
| 150 | if (error) |
| 151 | return error; |
| 152 | |
| 153 | /* Put the block on the AGFL. */ |
| 154 | error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); |
| 155 | if (error) |
| 156 | return error; |
| 157 | |
| 158 | error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp, |
| 159 | agfl_bp, agbno, 0); |
| 160 | if (error) |
| 161 | return error; |
| 162 | xfs_extent_busy_insert(sc->tp, pag_group(sc->sa.pag), agbno, 1, |
| 163 | XFS_EXTENT_BUSY_SKIP_DISCARD); |
| 164 | |
| 165 | return 0; |
| 166 | } |
| 167 | |
| 168 | /* Are there any uncommitted reap operations? */ |
| 169 | static inline bool xreap_is_dirty(const struct xreap_state *rs) |
| 170 | { |
| 171 | return rs->nr_binval > 0 || rs->nr_deferred > 0; |
| 172 | } |
| 173 | |
| 174 | /* |
| 175 | * Decide if we need to roll the transaction to clear out the the log |
| 176 | * reservation that we allocated to buffer invalidations. |
| 177 | */ |
| 178 | static inline bool xreap_want_binval_roll(const struct xreap_state *rs) |
| 179 | { |
| 180 | return rs->nr_binval >= rs->max_binval; |
| 181 | } |
| 182 | |
| 183 | /* Reset the buffer invalidation count after rolling. */ |
| 184 | static inline void xreap_binval_reset(struct xreap_state *rs) |
| 185 | { |
| 186 | rs->nr_binval = 0; |
| 187 | } |
| 188 | |
| 189 | /* |
| 190 | * Bump the number of invalidated buffers, and return true if we can continue, |
| 191 | * or false if we need to roll the transaction. |
| 192 | */ |
| 193 | static inline bool xreap_inc_binval(struct xreap_state *rs) |
| 194 | { |
| 195 | rs->nr_binval++; |
| 196 | return rs->nr_binval < rs->max_binval; |
| 197 | } |
| 198 | |
| 199 | /* |
| 200 | * Decide if we want to finish the deferred ops that are attached to the scrub |
| 201 | * transaction. We don't want to queue huge chains of deferred ops because |
| 202 | * that can consume a lot of log space and kernel memory. Hence we trigger a |
| 203 | * xfs_defer_finish if there are too many deferred reap operations or we've run |
| 204 | * out of space for invalidations. |
| 205 | */ |
| 206 | static inline bool xreap_want_defer_finish(const struct xreap_state *rs) |
| 207 | { |
| 208 | return rs->nr_deferred >= rs->max_deferred; |
| 209 | } |
| 210 | |
| 211 | /* |
| 212 | * Reset the defer chain length and buffer invalidation count after finishing |
| 213 | * items. |
| 214 | */ |
| 215 | static inline void xreap_defer_finish_reset(struct xreap_state *rs) |
| 216 | { |
| 217 | rs->nr_deferred = 0; |
| 218 | rs->nr_binval = 0; |
| 219 | } |
| 220 | |
| 221 | /* |
| 222 | * Bump the number of deferred extent reaps. |
| 223 | */ |
| 224 | static inline void xreap_inc_defer(struct xreap_state *rs) |
| 225 | { |
| 226 | rs->nr_deferred++; |
| 227 | } |
| 228 | |
| 229 | /* Force the caller to finish a deferred item chain. */ |
| 230 | static inline void xreap_force_defer_finish(struct xreap_state *rs) |
| 231 | { |
| 232 | rs->nr_deferred = rs->max_deferred; |
| 233 | } |
| 234 | |
| 235 | /* Maximum number of fsblocks that we might find in a buffer to invalidate. */ |
| 236 | static inline unsigned int |
| 237 | xrep_binval_max_fsblocks( |
| 238 | struct xfs_mount *mp) |
| 239 | { |
| 240 | /* Remote xattr values are the largest buffers that we support. */ |
| 241 | return xfs_attr3_max_rmt_blocks(mp); |
| 242 | } |
| 243 | |
| 244 | /* |
| 245 | * Compute the maximum length of a buffer cache scan (in units of sectors), |
| 246 | * given a quantity of fs blocks. |
| 247 | */ |
| 248 | xfs_daddr_t |
| 249 | xrep_bufscan_max_sectors( |
| 250 | struct xfs_mount *mp, |
| 251 | xfs_extlen_t fsblocks) |
| 252 | { |
| 253 | return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, |
| 254 | xrep_binval_max_fsblocks(mp))); |
| 255 | } |
| 256 | |
| 257 | /* |
| 258 | * Return an incore buffer from a sector scan, or NULL if there are no buffers |
| 259 | * left to return. |
| 260 | */ |
| 261 | struct xfs_buf * |
| 262 | xrep_bufscan_advance( |
| 263 | struct xfs_mount *mp, |
| 264 | struct xrep_bufscan *scan) |
| 265 | { |
| 266 | scan->__sector_count += scan->daddr_step; |
| 267 | while (scan->__sector_count <= scan->max_sectors) { |
| 268 | struct xfs_buf *bp = NULL; |
| 269 | int error; |
| 270 | |
| 271 | error = xfs_buf_incore(mp->m_ddev_targp, scan->daddr, |
| 272 | scan->__sector_count, XBF_LIVESCAN, &bp); |
| 273 | if (!error) |
| 274 | return bp; |
| 275 | |
| 276 | scan->__sector_count += scan->daddr_step; |
| 277 | } |
| 278 | |
| 279 | return NULL; |
| 280 | } |
| 281 | |
| 282 | /* Try to invalidate the incore buffers for an extent that we're freeing. */ |
| 283 | STATIC void |
| 284 | xreap_agextent_binval( |
| 285 | struct xreap_state *rs, |
| 286 | xfs_agblock_t agbno, |
| 287 | xfs_extlen_t *aglenp) |
| 288 | { |
| 289 | struct xfs_scrub *sc = rs->sc; |
| 290 | struct xfs_perag *pag = sc->sa.pag; |
| 291 | struct xfs_mount *mp = sc->mp; |
| 292 | xfs_agblock_t agbno_next = agbno + *aglenp; |
| 293 | xfs_agblock_t bno = agbno; |
| 294 | |
| 295 | /* |
| 296 | * Avoid invalidating AG headers and post-EOFS blocks because we never |
| 297 | * own those. |
| 298 | */ |
| 299 | if (!xfs_verify_agbno(pag, agbno) || |
| 300 | !xfs_verify_agbno(pag, agbno_next - 1)) |
| 301 | return; |
| 302 | |
| 303 | /* |
| 304 | * If there are incore buffers for these blocks, invalidate them. We |
| 305 | * assume that the lack of any other known owners means that the buffer |
| 306 | * can be locked without risk of deadlocking. The buffer cache cannot |
| 307 | * detect aliasing, so employ nested loops to scan for incore buffers |
| 308 | * of any plausible size. |
| 309 | */ |
| 310 | while (bno < agbno_next) { |
| 311 | struct xrep_bufscan scan = { |
| 312 | .daddr = xfs_agbno_to_daddr(pag, bno), |
| 313 | .max_sectors = xrep_bufscan_max_sectors(mp, |
| 314 | agbno_next - bno), |
| 315 | .daddr_step = XFS_FSB_TO_BB(mp, 1), |
| 316 | }; |
| 317 | struct xfs_buf *bp; |
| 318 | |
| 319 | while ((bp = xrep_bufscan_advance(mp, scan: &scan)) != NULL) { |
| 320 | xfs_trans_bjoin(sc->tp, bp); |
| 321 | xfs_trans_binval(sc->tp, bp); |
| 322 | |
| 323 | /* |
| 324 | * Stop invalidating if we've hit the limit; we should |
| 325 | * still have enough reservation left to free however |
| 326 | * far we've gotten. |
| 327 | */ |
| 328 | if (!xreap_inc_binval(rs)) { |
| 329 | *aglenp -= agbno_next - bno; |
| 330 | goto out; |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | bno++; |
| 335 | } |
| 336 | |
| 337 | out: |
| 338 | trace_xreap_agextent_binval(pag_group(sc->sa.pag), agbno, *aglenp); |
| 339 | } |
| 340 | |
| 341 | /* |
| 342 | * Figure out the longest run of blocks that we can dispose of with a single |
| 343 | * call. Cross-linked blocks should have their reverse mappings removed, but |
| 344 | * single-owner extents can be freed. AGFL blocks can only be put back one at |
| 345 | * a time. |
| 346 | */ |
| 347 | STATIC int |
| 348 | xreap_agextent_select( |
| 349 | struct xreap_state *rs, |
| 350 | xfs_agblock_t agbno, |
| 351 | xfs_agblock_t agbno_next, |
| 352 | bool *crosslinked, |
| 353 | xfs_extlen_t *aglenp) |
| 354 | { |
| 355 | struct xfs_scrub *sc = rs->sc; |
| 356 | struct xfs_btree_cur *cur; |
| 357 | xfs_agblock_t bno = agbno + 1; |
| 358 | xfs_extlen_t len = 1; |
| 359 | int error; |
| 360 | |
| 361 | /* |
| 362 | * Determine if there are any other rmap records covering the first |
| 363 | * block of this extent. If so, the block is crosslinked. |
| 364 | */ |
| 365 | cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp, |
| 366 | sc->sa.pag); |
| 367 | error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo, |
| 368 | crosslinked); |
| 369 | if (error) |
| 370 | goto out_cur; |
| 371 | |
| 372 | /* AGFL blocks can only be deal with one at a time. */ |
| 373 | if (rs->resv == XFS_AG_RESV_AGFL) |
| 374 | goto out_found; |
| 375 | |
| 376 | /* |
| 377 | * Figure out how many of the subsequent blocks have the same crosslink |
| 378 | * status. |
| 379 | */ |
| 380 | while (bno < agbno_next) { |
| 381 | bool also_crosslinked; |
| 382 | |
| 383 | error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo, |
| 384 | &also_crosslinked); |
| 385 | if (error) |
| 386 | goto out_cur; |
| 387 | |
| 388 | if (*crosslinked != also_crosslinked) |
| 389 | break; |
| 390 | |
| 391 | len++; |
| 392 | bno++; |
| 393 | } |
| 394 | |
| 395 | out_found: |
| 396 | *aglenp = len; |
| 397 | trace_xreap_agextent_select(pag_group(sc->sa.pag), agbno, len, |
| 398 | *crosslinked); |
| 399 | out_cur: |
| 400 | xfs_btree_del_cursor(cur, error); |
| 401 | return error; |
| 402 | } |
| 403 | |
| 404 | /* |
| 405 | * Dispose of as much of the beginning of this AG extent as possible. The |
| 406 | * number of blocks disposed of will be returned in @aglenp. |
| 407 | */ |
| 408 | STATIC int |
| 409 | xreap_agextent_iter( |
| 410 | struct xreap_state *rs, |
| 411 | xfs_agblock_t agbno, |
| 412 | xfs_extlen_t *aglenp, |
| 413 | bool crosslinked) |
| 414 | { |
| 415 | struct xfs_scrub *sc = rs->sc; |
| 416 | xfs_fsblock_t fsbno; |
| 417 | int error = 0; |
| 418 | |
| 419 | ASSERT(rs->resv != XFS_AG_RESV_METAFILE); |
| 420 | |
| 421 | fsbno = xfs_agbno_to_fsb(sc->sa.pag, agbno); |
| 422 | |
| 423 | /* |
| 424 | * If there are other rmappings, this block is cross linked and must |
| 425 | * not be freed. Remove the reverse mapping and move on. Otherwise, |
| 426 | * we were the only owner of the block, so free the extent, which will |
| 427 | * also remove the rmap. |
| 428 | * |
| 429 | * XXX: XFS doesn't support detecting the case where a single block |
| 430 | * metadata structure is crosslinked with a multi-block structure |
| 431 | * because the buffer cache doesn't detect aliasing problems, so we |
| 432 | * can't fix 100% of crosslinking problems (yet). The verifiers will |
| 433 | * blow on writeout, the filesystem will shut down, and the admin gets |
| 434 | * to run xfs_repair. |
| 435 | */ |
| 436 | if (crosslinked) { |
| 437 | trace_xreap_dispose_unmap_extent(pag_group(sc->sa.pag), agbno, |
| 438 | *aglenp); |
| 439 | |
| 440 | if (rs->oinfo == &XFS_RMAP_OINFO_COW) { |
| 441 | /* |
| 442 | * t0: Unmapping CoW staging extents, remove the |
| 443 | * records from the refcountbt, which will remove the |
| 444 | * rmap record as well. |
| 445 | */ |
| 446 | xfs_refcount_free_cow_extent(sc->tp, false, fsbno, |
| 447 | *aglenp); |
| 448 | xreap_inc_defer(rs); |
| 449 | return 0; |
| 450 | } |
| 451 | |
| 452 | /* t1: unmap crosslinked metadata blocks */ |
| 453 | xfs_rmap_free_extent(sc->tp, false, fsbno, *aglenp, |
| 454 | rs->oinfo->oi_owner); |
| 455 | xreap_inc_defer(rs); |
| 456 | return 0; |
| 457 | } |
| 458 | |
| 459 | trace_xreap_dispose_free_extent(pag_group(sc->sa.pag), agbno, *aglenp); |
| 460 | |
| 461 | /* |
| 462 | * Invalidate as many buffers as we can, starting at agbno. If this |
| 463 | * function sets *aglenp to zero, the transaction is full of logged |
| 464 | * buffer invalidations, so we need to return early so that we can |
| 465 | * roll and retry. |
| 466 | */ |
| 467 | xreap_agextent_binval(rs, agbno, aglenp); |
| 468 | if (*aglenp == 0) { |
| 469 | ASSERT(xreap_want_binval_roll(rs)); |
| 470 | return 0; |
| 471 | } |
| 472 | |
| 473 | /* |
| 474 | * t2: To get rid of CoW staging extents, use deferred work items |
| 475 | * to remove the refcountbt records (which removes the rmap records) |
| 476 | * and free the extent. We're not worried about the system going down |
| 477 | * here because log recovery walks the refcount btree to clean out the |
| 478 | * CoW staging extents. |
| 479 | */ |
| 480 | if (rs->oinfo == &XFS_RMAP_OINFO_COW) { |
| 481 | ASSERT(rs->resv == XFS_AG_RESV_NONE); |
| 482 | |
| 483 | xfs_refcount_free_cow_extent(sc->tp, false, fsbno, *aglenp); |
| 484 | error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL, |
| 485 | rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD); |
| 486 | if (error) |
| 487 | return error; |
| 488 | |
| 489 | xreap_inc_defer(rs); |
| 490 | return 0; |
| 491 | } |
| 492 | |
| 493 | /* t3: Put blocks back on the AGFL one at a time. */ |
| 494 | if (rs->resv == XFS_AG_RESV_AGFL) { |
| 495 | ASSERT(*aglenp == 1); |
| 496 | error = xreap_put_freelist(sc, agbno); |
| 497 | if (error) |
| 498 | return error; |
| 499 | |
| 500 | xreap_force_defer_finish(rs); |
| 501 | return 0; |
| 502 | } |
| 503 | |
| 504 | /* |
| 505 | * t4: Use deferred frees to get rid of the old btree blocks to try to |
| 506 | * minimize the window in which we could crash and lose the old blocks. |
| 507 | * Add a defer ops barrier every other extent to avoid stressing the |
| 508 | * system with large EFIs. |
| 509 | */ |
| 510 | error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo, |
| 511 | rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD); |
| 512 | if (error) |
| 513 | return error; |
| 514 | |
| 515 | xreap_inc_defer(rs); |
| 516 | if (rs->nr_deferred % 2 == 0) |
| 517 | xfs_defer_add_barrier(sc->tp); |
| 518 | return 0; |
| 519 | } |
| 520 | |
| 521 | /* Configure the deferral and invalidation limits */ |
| 522 | static inline void |
| 523 | xreap_configure_limits( |
| 524 | struct xreap_state *rs, |
| 525 | unsigned int fixed_overhead, |
| 526 | unsigned int variable_overhead, |
| 527 | unsigned int per_intent, |
| 528 | unsigned int per_binval) |
| 529 | { |
| 530 | struct xfs_scrub *sc = rs->sc; |
| 531 | unsigned int res = sc->tp->t_log_res - fixed_overhead; |
| 532 | |
| 533 | /* Don't underflow the reservation */ |
| 534 | if (sc->tp->t_log_res < (fixed_overhead + variable_overhead)) { |
| 535 | ASSERT(sc->tp->t_log_res >= |
| 536 | (fixed_overhead + variable_overhead)); |
| 537 | xfs_force_shutdown(sc->mp, SHUTDOWN_CORRUPT_INCORE); |
| 538 | return; |
| 539 | } |
| 540 | |
| 541 | rs->max_deferred = per_intent ? res / variable_overhead : 0; |
| 542 | res -= rs->max_deferred * per_intent; |
| 543 | rs->max_binval = per_binval ? res / per_binval : 0; |
| 544 | } |
| 545 | |
| 546 | /* |
| 547 | * Compute the maximum number of intent items that reaping can attach to the |
| 548 | * scrub transaction given the worst case log overhead of the intent items |
| 549 | * needed to reap a single per-AG space extent. This is not for freeing CoW |
| 550 | * staging extents. |
| 551 | */ |
| 552 | STATIC void |
| 553 | xreap_configure_agextent_limits( |
| 554 | struct xreap_state *rs) |
| 555 | { |
| 556 | struct xfs_scrub *sc = rs->sc; |
| 557 | struct xfs_mount *mp = sc->mp; |
| 558 | |
| 559 | /* |
| 560 | * In the worst case, relogging an intent item causes both an intent |
| 561 | * item and a done item to be attached to a transaction for each extent |
| 562 | * that we'd like to process. |
| 563 | */ |
| 564 | const unsigned int efi = xfs_efi_log_space(1) + |
| 565 | xfs_efd_log_space(1); |
| 566 | const unsigned int rui = xfs_rui_log_space(1) + |
| 567 | xfs_rud_log_space(); |
| 568 | |
| 569 | /* |
| 570 | * Various things can happen when reaping non-CoW metadata blocks: |
| 571 | * |
| 572 | * t1: Unmapping crosslinked metadata blocks: deferred removal of rmap |
| 573 | * record. |
| 574 | * |
| 575 | * t3: Freeing to AGFL: roll and finish deferred items for every block. |
| 576 | * Limits here do not matter. |
| 577 | * |
| 578 | * t4: Freeing metadata blocks: deferred freeing of the space, which |
| 579 | * also removes the rmap record. |
| 580 | * |
| 581 | * For simplicity, we'll use the worst-case intents size to determine |
| 582 | * the maximum number of deferred extents before we have to finish the |
| 583 | * whole chain. If we're trying to reap a btree larger than this size, |
| 584 | * a crash midway through reaping can result in leaked blocks. |
| 585 | */ |
| 586 | const unsigned int t1 = rui; |
| 587 | const unsigned int t4 = rui + efi; |
| 588 | const unsigned int per_intent = max(t1, t4); |
| 589 | |
| 590 | /* |
| 591 | * For each transaction in a reap chain, we must be able to take one |
| 592 | * step in the defer item chain, which should only consist of EFI or |
| 593 | * RUI items. |
| 594 | */ |
| 595 | const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1); |
| 596 | const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1); |
| 597 | const unsigned int step_size = max(f1, f2); |
| 598 | |
| 599 | /* Largest buffer size (in fsblocks) that can be invalidated. */ |
| 600 | const unsigned int max_binval = xrep_binval_max_fsblocks(mp); |
| 601 | |
| 602 | /* Maximum overhead of invalidating one buffer. */ |
| 603 | const unsigned int per_binval = |
| 604 | xfs_buf_inval_log_space(1, XFS_B_TO_FSBT(mp, max_binval)); |
| 605 | |
| 606 | /* |
| 607 | * For each transaction in a reap chain, we can delete some number of |
| 608 | * extents and invalidate some number of blocks. We assume that btree |
| 609 | * blocks aren't usually contiguous; and that scrub likely pulled all |
| 610 | * the buffers into memory. From these assumptions, set the maximum |
| 611 | * number of deferrals we can queue before flushing the defer chain, |
| 612 | * and the number of invalidations we can queue before rolling to a |
| 613 | * clean transaction (and possibly relogging some of the deferrals) to |
| 614 | * the same quantity. |
| 615 | */ |
| 616 | const unsigned int variable_overhead = per_intent + per_binval; |
| 617 | |
| 618 | xreap_configure_limits(rs, fixed_overhead: step_size, variable_overhead, per_intent, |
| 619 | per_binval); |
| 620 | |
| 621 | trace_xreap_agextent_limits(sc->tp, per_binval, rs->max_binval, |
| 622 | step_size, per_intent, rs->max_deferred); |
| 623 | } |
| 624 | |
| 625 | /* |
| 626 | * Compute the maximum number of intent items that reaping can attach to the |
| 627 | * scrub transaction given the worst case log overhead of the intent items |
| 628 | * needed to reap a single CoW staging extent. This is not for freeing |
| 629 | * metadata blocks. |
| 630 | */ |
| 631 | STATIC void |
| 632 | xreap_configure_agcow_limits( |
| 633 | struct xreap_state *rs) |
| 634 | { |
| 635 | struct xfs_scrub *sc = rs->sc; |
| 636 | struct xfs_mount *mp = sc->mp; |
| 637 | |
| 638 | /* |
| 639 | * In the worst case, relogging an intent item causes both an intent |
| 640 | * item and a done item to be attached to a transaction for each extent |
| 641 | * that we'd like to process. |
| 642 | */ |
| 643 | const unsigned int efi = xfs_efi_log_space(1) + |
| 644 | xfs_efd_log_space(1); |
| 645 | const unsigned int rui = xfs_rui_log_space(1) + |
| 646 | xfs_rud_log_space(); |
| 647 | const unsigned int cui = xfs_cui_log_space(1) + |
| 648 | xfs_cud_log_space(); |
| 649 | |
| 650 | /* |
| 651 | * Various things can happen when reaping non-CoW metadata blocks: |
| 652 | * |
| 653 | * t0: Unmapping crosslinked CoW blocks: deferred removal of refcount |
| 654 | * record, which defers removal of rmap record |
| 655 | * |
| 656 | * t2: Freeing CoW blocks: deferred removal of refcount record, which |
| 657 | * defers removal of rmap record; and deferred removal of the space |
| 658 | * |
| 659 | * For simplicity, we'll use the worst-case intents size to determine |
| 660 | * the maximum number of deferred extents before we have to finish the |
| 661 | * whole chain. If we're trying to reap a btree larger than this size, |
| 662 | * a crash midway through reaping can result in leaked blocks. |
| 663 | */ |
| 664 | const unsigned int t0 = cui + rui; |
| 665 | const unsigned int t2 = cui + rui + efi; |
| 666 | const unsigned int per_intent = max(t0, t2); |
| 667 | |
| 668 | /* |
| 669 | * For each transaction in a reap chain, we must be able to take one |
| 670 | * step in the defer item chain, which should only consist of CUI, EFI, |
| 671 | * or RUI items. |
| 672 | */ |
| 673 | const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1); |
| 674 | const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1); |
| 675 | const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1); |
| 676 | const unsigned int step_size = max3(f1, f2, f3); |
| 677 | |
| 678 | /* Largest buffer size (in fsblocks) that can be invalidated. */ |
| 679 | const unsigned int max_binval = xrep_binval_max_fsblocks(mp); |
| 680 | |
| 681 | /* Overhead of invalidating one buffer */ |
| 682 | const unsigned int per_binval = |
| 683 | xfs_buf_inval_log_space(1, XFS_B_TO_FSBT(mp, max_binval)); |
| 684 | |
| 685 | /* |
| 686 | * For each transaction in a reap chain, we can delete some number of |
| 687 | * extents and invalidate some number of blocks. We assume that CoW |
| 688 | * staging extents are usually more than 1 fsblock, and that there |
| 689 | * shouldn't be any buffers for those blocks. From the assumptions, |
| 690 | * set the number of deferrals to use as much of the reservation as |
| 691 | * it can, but leave space to invalidate 1/8th that number of buffers. |
| 692 | */ |
| 693 | const unsigned int variable_overhead = per_intent + |
| 694 | (per_binval / 8); |
| 695 | |
| 696 | xreap_configure_limits(rs, fixed_overhead: step_size, variable_overhead, per_intent, |
| 697 | per_binval); |
| 698 | |
| 699 | trace_xreap_agcow_limits(sc->tp, per_binval, rs->max_binval, step_size, |
| 700 | per_intent, rs->max_deferred); |
| 701 | } |
| 702 | |
| 703 | /* |
| 704 | * Break an AG metadata extent into sub-extents by fate (crosslinked, not |
| 705 | * crosslinked), and dispose of each sub-extent separately. |
| 706 | */ |
| 707 | STATIC int |
| 708 | xreap_agmeta_extent( |
| 709 | uint32_t agbno, |
| 710 | uint32_t len, |
| 711 | void *priv) |
| 712 | { |
| 713 | struct xreap_state *rs = priv; |
| 714 | struct xfs_scrub *sc = rs->sc; |
| 715 | xfs_agblock_t agbno_next = agbno + len; |
| 716 | int error = 0; |
| 717 | |
| 718 | ASSERT(len <= XFS_MAX_BMBT_EXTLEN); |
| 719 | ASSERT(sc->ip == NULL); |
| 720 | |
| 721 | while (agbno < agbno_next) { |
| 722 | xfs_extlen_t aglen; |
| 723 | bool crosslinked; |
| 724 | |
| 725 | error = xreap_agextent_select(rs, agbno, agbno_next, |
| 726 | &crosslinked, &aglen); |
| 727 | if (error) |
| 728 | return error; |
| 729 | |
| 730 | error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked); |
| 731 | if (error) |
| 732 | return error; |
| 733 | |
| 734 | if (xreap_want_defer_finish(rs)) { |
| 735 | error = xrep_defer_finish(sc); |
| 736 | if (error) |
| 737 | return error; |
| 738 | xreap_defer_finish_reset(rs); |
| 739 | } else if (xreap_want_binval_roll(rs)) { |
| 740 | error = xrep_roll_ag_trans(sc); |
| 741 | if (error) |
| 742 | return error; |
| 743 | xreap_binval_reset(rs); |
| 744 | } |
| 745 | |
| 746 | agbno += aglen; |
| 747 | } |
| 748 | |
| 749 | return 0; |
| 750 | } |
| 751 | |
| 752 | /* Dispose of every block of every AG metadata extent in the bitmap. */ |
| 753 | int |
| 754 | xrep_reap_agblocks( |
| 755 | struct xfs_scrub *sc, |
| 756 | struct xagb_bitmap *bitmap, |
| 757 | const struct xfs_owner_info *oinfo, |
| 758 | enum xfs_ag_resv_type type) |
| 759 | { |
| 760 | struct xreap_state rs = { |
| 761 | .sc = sc, |
| 762 | .oinfo = oinfo, |
| 763 | .resv = type, |
| 764 | }; |
| 765 | int error; |
| 766 | |
| 767 | ASSERT(xfs_has_rmapbt(sc->mp)); |
| 768 | ASSERT(sc->ip == NULL); |
| 769 | |
| 770 | xreap_configure_agextent_limits(&rs); |
| 771 | error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs); |
| 772 | if (error) |
| 773 | return error; |
| 774 | |
| 775 | if (xreap_is_dirty(&rs)) |
| 776 | return xrep_defer_finish(sc); |
| 777 | |
| 778 | return 0; |
| 779 | } |
| 780 | |
| 781 | /* |
| 782 | * Break a file metadata extent into sub-extents by fate (crosslinked, not |
| 783 | * crosslinked), and dispose of each sub-extent separately. The extent must |
| 784 | * not cross an AG boundary. |
| 785 | */ |
| 786 | STATIC int |
| 787 | xreap_fsmeta_extent( |
| 788 | uint64_t fsbno, |
| 789 | uint64_t len, |
| 790 | void *priv) |
| 791 | { |
| 792 | struct xreap_state *rs = priv; |
| 793 | struct xfs_scrub *sc = rs->sc; |
| 794 | xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); |
| 795 | xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); |
| 796 | xfs_agblock_t agbno_next = agbno + len; |
| 797 | int error = 0; |
| 798 | |
| 799 | ASSERT(len <= XFS_MAX_BMBT_EXTLEN); |
| 800 | ASSERT(sc->ip != NULL); |
| 801 | ASSERT(!sc->sa.pag); |
| 802 | |
| 803 | /* |
| 804 | * We're reaping blocks after repairing file metadata, which means that |
| 805 | * we have to init the xchk_ag structure ourselves. |
| 806 | */ |
| 807 | sc->sa.pag = xfs_perag_get(sc->mp, agno); |
| 808 | if (!sc->sa.pag) |
| 809 | return -EFSCORRUPTED; |
| 810 | |
| 811 | error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp); |
| 812 | if (error) |
| 813 | goto out_pag; |
| 814 | |
| 815 | while (agbno < agbno_next) { |
| 816 | xfs_extlen_t aglen; |
| 817 | bool crosslinked; |
| 818 | |
| 819 | error = xreap_agextent_select(rs, agbno, agbno_next, |
| 820 | &crosslinked, &aglen); |
| 821 | if (error) |
| 822 | goto out_agf; |
| 823 | |
| 824 | error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked); |
| 825 | if (error) |
| 826 | goto out_agf; |
| 827 | |
| 828 | if (xreap_want_defer_finish(rs)) { |
| 829 | /* |
| 830 | * Holds the AGF buffer across the deferred chain |
| 831 | * processing. |
| 832 | */ |
| 833 | error = xrep_defer_finish(sc); |
| 834 | if (error) |
| 835 | goto out_agf; |
| 836 | xreap_defer_finish_reset(rs); |
| 837 | } else if (xreap_want_binval_roll(rs)) { |
| 838 | /* |
| 839 | * Hold the AGF buffer across the transaction roll so |
| 840 | * that we don't have to reattach it to the scrub |
| 841 | * context. |
| 842 | */ |
| 843 | xfs_trans_bhold(sc->tp, sc->sa.agf_bp); |
| 844 | error = xfs_trans_roll_inode(&sc->tp, sc->ip); |
| 845 | xfs_trans_bjoin(sc->tp, sc->sa.agf_bp); |
| 846 | if (error) |
| 847 | goto out_agf; |
| 848 | xreap_binval_reset(rs); |
| 849 | } |
| 850 | |
| 851 | agbno += aglen; |
| 852 | } |
| 853 | |
| 854 | out_agf: |
| 855 | xfs_trans_brelse(sc->tp, sc->sa.agf_bp); |
| 856 | sc->sa.agf_bp = NULL; |
| 857 | out_pag: |
| 858 | xfs_perag_put(sc->sa.pag); |
| 859 | sc->sa.pag = NULL; |
| 860 | return error; |
| 861 | } |
| 862 | |
| 863 | /* |
| 864 | * Dispose of every block of every fs metadata extent in the bitmap. |
| 865 | * Do not use this to dispose of the mappings in an ondisk inode fork. |
| 866 | */ |
| 867 | int |
| 868 | xrep_reap_fsblocks( |
| 869 | struct xfs_scrub *sc, |
| 870 | struct xfsb_bitmap *bitmap, |
| 871 | const struct xfs_owner_info *oinfo) |
| 872 | { |
| 873 | struct xreap_state rs = { |
| 874 | .sc = sc, |
| 875 | .oinfo = oinfo, |
| 876 | .resv = XFS_AG_RESV_NONE, |
| 877 | }; |
| 878 | int error; |
| 879 | |
| 880 | ASSERT(xfs_has_rmapbt(sc->mp)); |
| 881 | ASSERT(sc->ip != NULL); |
| 882 | |
| 883 | if (oinfo == &XFS_RMAP_OINFO_COW) |
| 884 | xreap_configure_agcow_limits(&rs); |
| 885 | else |
| 886 | xreap_configure_agextent_limits(&rs); |
| 887 | error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs); |
| 888 | if (error) |
| 889 | return error; |
| 890 | |
| 891 | if (xreap_is_dirty(&rs)) |
| 892 | return xrep_defer_finish(sc); |
| 893 | |
| 894 | return 0; |
| 895 | } |
| 896 | |
| 897 | #ifdef CONFIG_XFS_RT |
| 898 | /* |
| 899 | * Figure out the longest run of blocks that we can dispose of with a single |
| 900 | * call. Cross-linked blocks should have their reverse mappings removed, but |
| 901 | * single-owner extents can be freed. Units are rt blocks, not rt extents. |
| 902 | */ |
| 903 | STATIC int |
| 904 | xreap_rgextent_select( |
| 905 | struct xreap_state *rs, |
| 906 | xfs_rgblock_t rgbno, |
| 907 | xfs_rgblock_t rgbno_next, |
| 908 | bool *crosslinked, |
| 909 | xfs_extlen_t *rglenp) |
| 910 | { |
| 911 | struct xfs_scrub *sc = rs->sc; |
| 912 | struct xfs_btree_cur *cur; |
| 913 | xfs_rgblock_t bno = rgbno + 1; |
| 914 | xfs_extlen_t len = 1; |
| 915 | int error; |
| 916 | |
| 917 | /* |
| 918 | * Determine if there are any other rmap records covering the first |
| 919 | * block of this extent. If so, the block is crosslinked. |
| 920 | */ |
| 921 | cur = xfs_rtrmapbt_init_cursor(sc->tp, sc->sr.rtg); |
| 922 | error = xfs_rmap_has_other_keys(cur, rgbno, 1, rs->oinfo, |
| 923 | crosslinked); |
| 924 | if (error) |
| 925 | goto out_cur; |
| 926 | |
| 927 | /* |
| 928 | * Figure out how many of the subsequent blocks have the same crosslink |
| 929 | * status. |
| 930 | */ |
| 931 | while (bno < rgbno_next) { |
| 932 | bool also_crosslinked; |
| 933 | |
| 934 | error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo, |
| 935 | &also_crosslinked); |
| 936 | if (error) |
| 937 | goto out_cur; |
| 938 | |
| 939 | if (*crosslinked != also_crosslinked) |
| 940 | break; |
| 941 | |
| 942 | len++; |
| 943 | bno++; |
| 944 | } |
| 945 | |
| 946 | *rglenp = len; |
| 947 | trace_xreap_agextent_select(rtg_group(sc->sr.rtg), rgbno, len, |
| 948 | *crosslinked); |
| 949 | out_cur: |
| 950 | xfs_btree_del_cursor(cur, error); |
| 951 | return error; |
| 952 | } |
| 953 | |
| 954 | /* |
| 955 | * Dispose of as much of the beginning of this rtgroup extent as possible. |
| 956 | * The number of blocks disposed of will be returned in @rglenp. |
| 957 | */ |
| 958 | STATIC int |
| 959 | xreap_rgextent_iter( |
| 960 | struct xreap_state *rs, |
| 961 | xfs_rgblock_t rgbno, |
| 962 | xfs_extlen_t *rglenp, |
| 963 | bool crosslinked) |
| 964 | { |
| 965 | struct xfs_scrub *sc = rs->sc; |
| 966 | xfs_rtblock_t rtbno; |
| 967 | int error; |
| 968 | |
| 969 | /* |
| 970 | * The only caller so far is CoW fork repair, so we only know how to |
| 971 | * unlink or free CoW staging extents. Here we don't have to worry |
| 972 | * about invalidating buffers! |
| 973 | */ |
| 974 | if (rs->oinfo != &XFS_RMAP_OINFO_COW) { |
| 975 | ASSERT(rs->oinfo == &XFS_RMAP_OINFO_COW); |
| 976 | return -EFSCORRUPTED; |
| 977 | } |
| 978 | ASSERT(rs->resv == XFS_AG_RESV_NONE); |
| 979 | |
| 980 | rtbno = xfs_rgbno_to_rtb(sc->sr.rtg, rgbno); |
| 981 | |
| 982 | /* |
| 983 | * t1: There are other rmappings; this block is cross linked and must |
| 984 | * not be freed. Remove the forward and reverse mapping and move on. |
| 985 | */ |
| 986 | if (crosslinked) { |
| 987 | trace_xreap_dispose_unmap_extent(rtg_group(sc->sr.rtg), rgbno, |
| 988 | *rglenp); |
| 989 | |
| 990 | xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp); |
| 991 | xreap_inc_defer(rs); |
| 992 | return 0; |
| 993 | } |
| 994 | |
| 995 | trace_xreap_dispose_free_extent(rtg_group(sc->sr.rtg), rgbno, *rglenp); |
| 996 | |
| 997 | /* |
| 998 | * t2: The CoW staging extent is not crosslinked. Use deferred work |
| 999 | * to remove the refcountbt records (which removes the rmap records) |
| 1000 | * and free the extent. We're not worried about the system going down |
| 1001 | * here because log recovery walks the refcount btree to clean out the |
| 1002 | * CoW staging extents. |
| 1003 | */ |
| 1004 | xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp); |
| 1005 | error = xfs_free_extent_later(sc->tp, rtbno, *rglenp, NULL, |
| 1006 | rs->resv, |
| 1007 | XFS_FREE_EXTENT_REALTIME | |
| 1008 | XFS_FREE_EXTENT_SKIP_DISCARD); |
| 1009 | if (error) |
| 1010 | return error; |
| 1011 | |
| 1012 | xreap_inc_defer(rs); |
| 1013 | return 0; |
| 1014 | } |
| 1015 | |
| 1016 | /* |
| 1017 | * Compute the maximum number of intent items that reaping can attach to the |
| 1018 | * scrub transaction given the worst case log overhead of the intent items |
| 1019 | * needed to reap a single CoW staging extent. This is not for freeing |
| 1020 | * metadata blocks. |
| 1021 | */ |
| 1022 | STATIC void |
| 1023 | xreap_configure_rgcow_limits( |
| 1024 | struct xreap_state *rs) |
| 1025 | { |
| 1026 | struct xfs_scrub *sc = rs->sc; |
| 1027 | struct xfs_mount *mp = sc->mp; |
| 1028 | |
| 1029 | /* |
| 1030 | * In the worst case, relogging an intent item causes both an intent |
| 1031 | * item and a done item to be attached to a transaction for each extent |
| 1032 | * that we'd like to process. |
| 1033 | */ |
| 1034 | const unsigned int efi = xfs_efi_log_space(1) + |
| 1035 | xfs_efd_log_space(1); |
| 1036 | const unsigned int rui = xfs_rui_log_space(1) + |
| 1037 | xfs_rud_log_space(); |
| 1038 | const unsigned int cui = xfs_cui_log_space(1) + |
| 1039 | xfs_cud_log_space(); |
| 1040 | |
| 1041 | /* |
| 1042 | * Various things can happen when reaping non-CoW metadata blocks: |
| 1043 | * |
| 1044 | * t1: Unmapping crosslinked CoW blocks: deferred removal of refcount |
| 1045 | * record, which defers removal of rmap record |
| 1046 | * |
| 1047 | * t2: Freeing CoW blocks: deferred removal of refcount record, which |
| 1048 | * defers removal of rmap record; and deferred removal of the space |
| 1049 | * |
| 1050 | * For simplicity, we'll use the worst-case intents size to determine |
| 1051 | * the maximum number of deferred extents before we have to finish the |
| 1052 | * whole chain. If we're trying to reap a btree larger than this size, |
| 1053 | * a crash midway through reaping can result in leaked blocks. |
| 1054 | */ |
| 1055 | const unsigned int t1 = cui + rui; |
| 1056 | const unsigned int t2 = cui + rui + efi; |
| 1057 | const unsigned int per_intent = max(t1, t2); |
| 1058 | |
| 1059 | /* |
| 1060 | * For each transaction in a reap chain, we must be able to take one |
| 1061 | * step in the defer item chain, which should only consist of CUI, EFI, |
| 1062 | * or RUI items. |
| 1063 | */ |
| 1064 | const unsigned int f1 = xfs_calc_finish_rt_efi_reservation(mp, 1); |
| 1065 | const unsigned int f2 = xfs_calc_finish_rt_rui_reservation(mp, 1); |
| 1066 | const unsigned int f3 = xfs_calc_finish_rt_cui_reservation(mp, 1); |
| 1067 | const unsigned int step_size = max3(f1, f2, f3); |
| 1068 | |
| 1069 | /* |
| 1070 | * The only buffer for the rt device is the rtgroup super, so we don't |
| 1071 | * need to save space for buffer invalidations. |
| 1072 | */ |
| 1073 | xreap_configure_limits(rs, fixed_overhead: step_size, variable_overhead: per_intent, per_intent, per_binval: 0); |
| 1074 | |
| 1075 | trace_xreap_rgcow_limits(sc->tp, 0, 0, step_size, per_intent, |
| 1076 | rs->max_deferred); |
| 1077 | } |
| 1078 | |
| 1079 | #define XREAP_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP | \ |
| 1080 | XFS_RTGLOCK_RMAP | \ |
| 1081 | XFS_RTGLOCK_REFCOUNT) |
| 1082 | |
| 1083 | /* |
| 1084 | * Break a rt file metadata extent into sub-extents by fate (crosslinked, not |
| 1085 | * crosslinked), and dispose of each sub-extent separately. The extent must |
| 1086 | * be aligned to a realtime extent. |
| 1087 | */ |
| 1088 | STATIC int |
| 1089 | xreap_rtmeta_extent( |
| 1090 | uint64_t rtbno, |
| 1091 | uint64_t len, |
| 1092 | void *priv) |
| 1093 | { |
| 1094 | struct xreap_state *rs = priv; |
| 1095 | struct xfs_scrub *sc = rs->sc; |
| 1096 | xfs_rgblock_t rgbno = xfs_rtb_to_rgbno(sc->mp, rtbno); |
| 1097 | xfs_rgblock_t rgbno_next = rgbno + len; |
| 1098 | int error = 0; |
| 1099 | |
| 1100 | ASSERT(sc->ip != NULL); |
| 1101 | ASSERT(!sc->sr.rtg); |
| 1102 | |
| 1103 | /* |
| 1104 | * We're reaping blocks after repairing file metadata, which means that |
| 1105 | * we have to init the xchk_ag structure ourselves. |
| 1106 | */ |
| 1107 | sc->sr.rtg = xfs_rtgroup_get(sc->mp, xfs_rtb_to_rgno(sc->mp, rtbno)); |
| 1108 | if (!sc->sr.rtg) |
| 1109 | return -EFSCORRUPTED; |
| 1110 | |
| 1111 | xfs_rtgroup_lock(sc->sr.rtg, XREAP_RTGLOCK_ALL); |
| 1112 | |
| 1113 | while (rgbno < rgbno_next) { |
| 1114 | xfs_extlen_t rglen; |
| 1115 | bool crosslinked; |
| 1116 | |
| 1117 | error = xreap_rgextent_select(rs, rgbno, rgbno_next, |
| 1118 | &crosslinked, &rglen); |
| 1119 | if (error) |
| 1120 | goto out_unlock; |
| 1121 | |
| 1122 | error = xreap_rgextent_iter(rs, rgbno, &rglen, crosslinked); |
| 1123 | if (error) |
| 1124 | goto out_unlock; |
| 1125 | |
| 1126 | if (xreap_want_defer_finish(rs)) { |
| 1127 | error = xfs_defer_finish(&sc->tp); |
| 1128 | if (error) |
| 1129 | goto out_unlock; |
| 1130 | xreap_defer_finish_reset(rs); |
| 1131 | } else if (xreap_want_binval_roll(rs)) { |
| 1132 | error = xfs_trans_roll_inode(&sc->tp, sc->ip); |
| 1133 | if (error) |
| 1134 | goto out_unlock; |
| 1135 | xreap_binval_reset(rs); |
| 1136 | } |
| 1137 | |
| 1138 | rgbno += rglen; |
| 1139 | } |
| 1140 | |
| 1141 | out_unlock: |
| 1142 | xfs_rtgroup_unlock(sc->sr.rtg, XREAP_RTGLOCK_ALL); |
| 1143 | xfs_rtgroup_put(sc->sr.rtg); |
| 1144 | sc->sr.rtg = NULL; |
| 1145 | return error; |
| 1146 | } |
| 1147 | |
| 1148 | /* |
| 1149 | * Dispose of every block of every rt metadata extent in the bitmap. |
| 1150 | * Do not use this to dispose of the mappings in an ondisk inode fork. |
| 1151 | */ |
| 1152 | int |
| 1153 | xrep_reap_rtblocks( |
| 1154 | struct xfs_scrub *sc, |
| 1155 | struct xrtb_bitmap *bitmap, |
| 1156 | const struct xfs_owner_info *oinfo) |
| 1157 | { |
| 1158 | struct xreap_state rs = { |
| 1159 | .sc = sc, |
| 1160 | .oinfo = oinfo, |
| 1161 | .resv = XFS_AG_RESV_NONE, |
| 1162 | }; |
| 1163 | int error; |
| 1164 | |
| 1165 | ASSERT(xfs_has_rmapbt(sc->mp)); |
| 1166 | ASSERT(sc->ip != NULL); |
| 1167 | ASSERT(oinfo == &XFS_RMAP_OINFO_COW); |
| 1168 | |
| 1169 | xreap_configure_rgcow_limits(&rs); |
| 1170 | error = xrtb_bitmap_walk(bitmap, xreap_rtmeta_extent, &rs); |
| 1171 | if (error) |
| 1172 | return error; |
| 1173 | |
| 1174 | if (xreap_is_dirty(&rs)) |
| 1175 | return xrep_defer_finish(sc); |
| 1176 | |
| 1177 | return 0; |
| 1178 | } |
| 1179 | #endif /* CONFIG_XFS_RT */ |
| 1180 | |
| 1181 | /* |
| 1182 | * Dispose of every block of an old metadata btree that used to be rooted in a |
| 1183 | * metadata directory file. |
| 1184 | */ |
| 1185 | int |
| 1186 | xrep_reap_metadir_fsblocks( |
| 1187 | struct xfs_scrub *sc, |
| 1188 | struct xfsb_bitmap *bitmap) |
| 1189 | { |
| 1190 | /* |
| 1191 | * Reap old metadir btree blocks with XFS_AG_RESV_NONE because the old |
| 1192 | * blocks are no longer mapped by the inode, and inode metadata space |
| 1193 | * reservations can only account freed space to the i_nblocks. |
| 1194 | */ |
| 1195 | struct xfs_owner_info oinfo; |
| 1196 | struct xreap_state rs = { |
| 1197 | .sc = sc, |
| 1198 | .oinfo = &oinfo, |
| 1199 | .resv = XFS_AG_RESV_NONE, |
| 1200 | }; |
| 1201 | int error; |
| 1202 | |
| 1203 | ASSERT(xfs_has_rmapbt(sc->mp)); |
| 1204 | ASSERT(sc->ip != NULL); |
| 1205 | ASSERT(xfs_is_metadir_inode(sc->ip)); |
| 1206 | |
| 1207 | xreap_configure_agextent_limits(&rs); |
| 1208 | xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK); |
| 1209 | error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs); |
| 1210 | if (error) |
| 1211 | return error; |
| 1212 | |
| 1213 | if (xreap_is_dirty(&rs)) { |
| 1214 | error = xrep_defer_finish(sc); |
| 1215 | if (error) |
| 1216 | return error; |
| 1217 | } |
| 1218 | |
| 1219 | return xrep_reset_metafile_resv(sc); |
| 1220 | } |
| 1221 | |
| 1222 | /* |
| 1223 | * Metadata files are not supposed to share blocks with anything else. |
| 1224 | * If blocks are shared, we remove the reverse mapping (thus reducing the |
| 1225 | * crosslink factor); if blocks are not shared, we also need to free them. |
| 1226 | * |
| 1227 | * This first step determines the longest subset of the passed-in imap |
| 1228 | * (starting at its beginning) that is either crosslinked or not crosslinked. |
| 1229 | * The blockcount will be adjust down as needed. |
| 1230 | */ |
| 1231 | STATIC int |
| 1232 | xreap_bmapi_select( |
| 1233 | struct xreap_state *rs, |
| 1234 | struct xfs_bmbt_irec *imap, |
| 1235 | bool *crosslinked) |
| 1236 | { |
| 1237 | struct xfs_owner_info oinfo; |
| 1238 | struct xfs_scrub *sc = rs->sc; |
| 1239 | struct xfs_btree_cur *cur; |
| 1240 | xfs_filblks_t len = 1; |
| 1241 | xfs_agblock_t bno; |
| 1242 | xfs_agblock_t agbno; |
| 1243 | xfs_agblock_t agbno_next; |
| 1244 | int error; |
| 1245 | |
| 1246 | agbno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock); |
| 1247 | agbno_next = agbno + imap->br_blockcount; |
| 1248 | |
| 1249 | cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp, |
| 1250 | sc->sa.pag); |
| 1251 | |
| 1252 | xfs_rmap_ino_owner(&oinfo, rs->ip->i_ino, rs->whichfork, |
| 1253 | imap->br_startoff); |
| 1254 | error = xfs_rmap_has_other_keys(cur, agbno, 1, &oinfo, crosslinked); |
| 1255 | if (error) |
| 1256 | goto out_cur; |
| 1257 | |
| 1258 | bno = agbno + 1; |
| 1259 | while (bno < agbno_next) { |
| 1260 | bool also_crosslinked; |
| 1261 | |
| 1262 | oinfo.oi_offset++; |
| 1263 | error = xfs_rmap_has_other_keys(cur, bno, 1, &oinfo, |
| 1264 | &also_crosslinked); |
| 1265 | if (error) |
| 1266 | goto out_cur; |
| 1267 | |
| 1268 | if (also_crosslinked != *crosslinked) |
| 1269 | break; |
| 1270 | |
| 1271 | len++; |
| 1272 | bno++; |
| 1273 | } |
| 1274 | |
| 1275 | imap->br_blockcount = len; |
| 1276 | trace_xreap_bmapi_select(pag_group(sc->sa.pag), agbno, len, |
| 1277 | *crosslinked); |
| 1278 | out_cur: |
| 1279 | xfs_btree_del_cursor(cur, error); |
| 1280 | return error; |
| 1281 | } |
| 1282 | |
| 1283 | /* |
| 1284 | * Decide if this buffer can be joined to a transaction. This is true for most |
| 1285 | * buffers, but there are two cases that we want to catch: large remote xattr |
| 1286 | * value buffers are not logged and can overflow the buffer log item dirty |
| 1287 | * bitmap size; and oversized cached buffers if things have really gone |
| 1288 | * haywire. |
| 1289 | */ |
| 1290 | static inline bool |
| 1291 | xreap_buf_loggable( |
| 1292 | const struct xfs_buf *bp) |
| 1293 | { |
| 1294 | int i; |
| 1295 | |
| 1296 | for (i = 0; i < bp->b_map_count; i++) { |
| 1297 | int chunks; |
| 1298 | int map_size; |
| 1299 | |
| 1300 | chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len), |
| 1301 | XFS_BLF_CHUNK); |
| 1302 | map_size = DIV_ROUND_UP(chunks, NBWORD); |
| 1303 | if (map_size > XFS_BLF_DATAMAP_SIZE) |
| 1304 | return false; |
| 1305 | } |
| 1306 | |
| 1307 | return true; |
| 1308 | } |
| 1309 | |
| 1310 | /* |
| 1311 | * Invalidate any buffers for this file mapping. The @imap blockcount may be |
| 1312 | * adjusted downward if we need to roll the transaction. |
| 1313 | */ |
| 1314 | STATIC int |
| 1315 | xreap_bmapi_binval( |
| 1316 | struct xreap_state *rs, |
| 1317 | struct xfs_bmbt_irec *imap) |
| 1318 | { |
| 1319 | struct xfs_scrub *sc = rs->sc; |
| 1320 | struct xfs_mount *mp = sc->mp; |
| 1321 | struct xfs_perag *pag = sc->sa.pag; |
| 1322 | int bmap_flags = xfs_bmapi_aflag(rs->whichfork); |
| 1323 | xfs_fileoff_t off; |
| 1324 | xfs_fileoff_t max_off; |
| 1325 | xfs_extlen_t scan_blocks; |
| 1326 | xfs_agblock_t bno; |
| 1327 | xfs_agblock_t agbno; |
| 1328 | xfs_agblock_t agbno_next; |
| 1329 | int error; |
| 1330 | |
| 1331 | /* |
| 1332 | * Avoid invalidating AG headers and post-EOFS blocks because we never |
| 1333 | * own those. |
| 1334 | */ |
| 1335 | agbno = bno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock); |
| 1336 | agbno_next = agbno + imap->br_blockcount; |
| 1337 | if (!xfs_verify_agbno(pag, agbno) || |
| 1338 | !xfs_verify_agbno(pag, agbno_next - 1)) |
| 1339 | return 0; |
| 1340 | |
| 1341 | /* |
| 1342 | * Buffers for file blocks can span multiple contiguous mappings. This |
| 1343 | * means that for each block in the mapping, there could exist an |
| 1344 | * xfs_buf indexed by that block with any length up to the maximum |
| 1345 | * buffer size (remote xattr values) or to the next hole in the fork. |
| 1346 | * To set up our binval scan, first we need to figure out the location |
| 1347 | * of the next hole. |
| 1348 | */ |
| 1349 | off = imap->br_startoff + imap->br_blockcount; |
| 1350 | max_off = off + xfs_attr3_max_rmt_blocks(mp); |
| 1351 | while (off < max_off) { |
| 1352 | struct xfs_bmbt_irec hmap; |
| 1353 | int nhmaps = 1; |
| 1354 | |
| 1355 | error = xfs_bmapi_read(rs->ip, off, max_off - off, &hmap, |
| 1356 | &nhmaps, bmap_flags); |
| 1357 | if (error) |
| 1358 | return error; |
| 1359 | if (nhmaps != 1 || hmap.br_startblock == DELAYSTARTBLOCK) { |
| 1360 | ASSERT(0); |
| 1361 | return -EFSCORRUPTED; |
| 1362 | } |
| 1363 | |
| 1364 | if (!xfs_bmap_is_real_extent(&hmap)) |
| 1365 | break; |
| 1366 | |
| 1367 | off = hmap.br_startoff + hmap.br_blockcount; |
| 1368 | } |
| 1369 | scan_blocks = off - imap->br_startoff; |
| 1370 | |
| 1371 | trace_xreap_bmapi_binval_scan(sc, imap, scan_blocks); |
| 1372 | |
| 1373 | /* |
| 1374 | * If there are incore buffers for these blocks, invalidate them. If |
| 1375 | * we can't (try)lock the buffer we assume it's owned by someone else |
| 1376 | * and leave it alone. The buffer cache cannot detect aliasing, so |
| 1377 | * employ nested loops to detect incore buffers of any plausible size. |
| 1378 | */ |
| 1379 | while (bno < agbno_next) { |
| 1380 | struct xrep_bufscan scan = { |
| 1381 | .daddr = xfs_agbno_to_daddr(pag, bno), |
| 1382 | .max_sectors = xrep_bufscan_max_sectors(mp, |
| 1383 | scan_blocks), |
| 1384 | .daddr_step = XFS_FSB_TO_BB(mp, 1), |
| 1385 | }; |
| 1386 | struct xfs_buf *bp; |
| 1387 | |
| 1388 | while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) { |
| 1389 | if (xreap_buf_loggable(bp)) { |
| 1390 | xfs_trans_bjoin(sc->tp, bp); |
| 1391 | xfs_trans_binval(sc->tp, bp); |
| 1392 | } else { |
| 1393 | xfs_buf_stale(bp); |
| 1394 | xfs_buf_relse(bp); |
| 1395 | } |
| 1396 | |
| 1397 | /* |
| 1398 | * Stop invalidating if we've hit the limit; we should |
| 1399 | * still have enough reservation left to free however |
| 1400 | * far we've gotten. |
| 1401 | */ |
| 1402 | if (!xreap_inc_binval(rs)) { |
| 1403 | imap->br_blockcount = agbno_next - bno; |
| 1404 | goto out; |
| 1405 | } |
| 1406 | } |
| 1407 | |
| 1408 | bno++; |
| 1409 | scan_blocks--; |
| 1410 | } |
| 1411 | |
| 1412 | out: |
| 1413 | trace_xreap_bmapi_binval(pag_group(sc->sa.pag), agbno, |
| 1414 | imap->br_blockcount); |
| 1415 | return 0; |
| 1416 | } |
| 1417 | |
| 1418 | /* |
| 1419 | * Dispose of as much of the beginning of this file fork mapping as possible. |
| 1420 | * The number of blocks disposed of is returned in @imap->br_blockcount. |
| 1421 | */ |
| 1422 | STATIC int |
| 1423 | xrep_reap_bmapi_iter( |
| 1424 | struct xreap_state *rs, |
| 1425 | struct xfs_bmbt_irec *imap, |
| 1426 | bool crosslinked) |
| 1427 | { |
| 1428 | struct xfs_scrub *sc = rs->sc; |
| 1429 | int error; |
| 1430 | |
| 1431 | if (crosslinked) { |
| 1432 | /* |
| 1433 | * If there are other rmappings, this block is cross linked and |
| 1434 | * must not be freed. Remove the reverse mapping, leave the |
| 1435 | * buffer cache in its possibly confused state, and move on. |
| 1436 | * We don't want to risk discarding valid data buffers from |
| 1437 | * anybody else who thinks they own the block, even though that |
| 1438 | * runs the risk of stale buffer warnings in the future. |
| 1439 | */ |
| 1440 | trace_xreap_dispose_unmap_extent(pag_group(sc->sa.pag), |
| 1441 | XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock), |
| 1442 | imap->br_blockcount); |
| 1443 | |
| 1444 | /* |
| 1445 | * t0: Schedule removal of the mapping from the fork. We use |
| 1446 | * deferred log intents in this function to control the exact |
| 1447 | * sequence of metadata updates. |
| 1448 | */ |
| 1449 | xfs_bmap_unmap_extent(sc->tp, rs->ip, rs->whichfork, imap); |
| 1450 | xfs_trans_mod_dquot_byino(sc->tp, rs->ip, XFS_TRANS_DQ_BCOUNT, |
| 1451 | -(int64_t)imap->br_blockcount); |
| 1452 | xfs_rmap_unmap_extent(sc->tp, rs->ip, rs->whichfork, imap); |
| 1453 | return 0; |
| 1454 | } |
| 1455 | |
| 1456 | /* |
| 1457 | * If the block is not crosslinked, we can invalidate all the incore |
| 1458 | * buffers for the extent, and then free the extent. This is a bit of |
| 1459 | * a mess since we don't detect discontiguous buffers that are indexed |
| 1460 | * by a block starting before the first block of the extent but overlap |
| 1461 | * anyway. |
| 1462 | */ |
| 1463 | trace_xreap_dispose_free_extent(pag_group(sc->sa.pag), |
| 1464 | XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock), |
| 1465 | imap->br_blockcount); |
| 1466 | |
| 1467 | /* |
| 1468 | * Invalidate as many buffers as we can, starting at the beginning of |
| 1469 | * this mapping. If this function sets blockcount to zero, the |
| 1470 | * transaction is full of logged buffer invalidations, so we need to |
| 1471 | * return early so that we can roll and retry. |
| 1472 | */ |
| 1473 | error = xreap_bmapi_binval(rs, imap); |
| 1474 | if (error || imap->br_blockcount == 0) |
| 1475 | return error; |
| 1476 | |
| 1477 | /* |
| 1478 | * t1: Schedule removal of the mapping from the fork. We use deferred |
| 1479 | * work in this function to control the exact sequence of metadata |
| 1480 | * updates. |
| 1481 | */ |
| 1482 | xfs_bmap_unmap_extent(sc->tp, rs->ip, rs->whichfork, imap); |
| 1483 | xfs_trans_mod_dquot_byino(sc->tp, rs->ip, XFS_TRANS_DQ_BCOUNT, |
| 1484 | -(int64_t)imap->br_blockcount); |
| 1485 | return xfs_free_extent_later(sc->tp, imap->br_startblock, |
| 1486 | imap->br_blockcount, NULL, XFS_AG_RESV_NONE, |
| 1487 | XFS_FREE_EXTENT_SKIP_DISCARD); |
| 1488 | } |
| 1489 | |
| 1490 | /* Compute the maximum mapcount of a file buffer. */ |
| 1491 | static unsigned int |
| 1492 | xreap_bmapi_binval_mapcount( |
| 1493 | struct xfs_scrub *sc) |
| 1494 | { |
| 1495 | /* directory blocks can span multiple fsblocks and be discontiguous */ |
| 1496 | if (sc->sm->sm_type == XFS_SCRUB_TYPE_DIR) |
| 1497 | return sc->mp->m_dir_geo->fsbcount; |
| 1498 | |
| 1499 | /* all other file xattr/symlink blocks must be contiguous */ |
| 1500 | return 1; |
| 1501 | } |
| 1502 | |
| 1503 | /* Compute the maximum block size of a file buffer. */ |
| 1504 | static unsigned int |
| 1505 | xreap_bmapi_binval_blocksize( |
| 1506 | struct xfs_scrub *sc) |
| 1507 | { |
| 1508 | switch (sc->sm->sm_type) { |
| 1509 | case XFS_SCRUB_TYPE_DIR: |
| 1510 | return sc->mp->m_dir_geo->blksize; |
| 1511 | case XFS_SCRUB_TYPE_XATTR: |
| 1512 | case XFS_SCRUB_TYPE_PARENT: |
| 1513 | /* |
| 1514 | * The xattr structure itself consists of single fsblocks, but |
| 1515 | * there could be remote xattr blocks to invalidate. |
| 1516 | */ |
| 1517 | return XFS_XATTR_SIZE_MAX; |
| 1518 | } |
| 1519 | |
| 1520 | /* everything else is a single block */ |
| 1521 | return sc->mp->m_sb.sb_blocksize; |
| 1522 | } |
| 1523 | |
| 1524 | /* |
| 1525 | * Compute the maximum number of buffer invalidations that we can do while |
| 1526 | * reaping a single extent from a file fork. |
| 1527 | */ |
| 1528 | STATIC void |
| 1529 | xreap_configure_bmapi_limits( |
| 1530 | struct xreap_state *rs) |
| 1531 | { |
| 1532 | struct xfs_scrub *sc = rs->sc; |
| 1533 | struct xfs_mount *mp = sc->mp; |
| 1534 | |
| 1535 | /* overhead of invalidating a buffer */ |
| 1536 | const unsigned int per_binval = |
| 1537 | xfs_buf_inval_log_space(xreap_bmapi_binval_mapcount(sc), |
| 1538 | xreap_bmapi_binval_blocksize(sc)); |
| 1539 | |
| 1540 | /* |
| 1541 | * In the worst case, relogging an intent item causes both an intent |
| 1542 | * item and a done item to be attached to a transaction for each extent |
| 1543 | * that we'd like to process. |
| 1544 | */ |
| 1545 | const unsigned int efi = xfs_efi_log_space(1) + |
| 1546 | xfs_efd_log_space(1); |
| 1547 | const unsigned int rui = xfs_rui_log_space(1) + |
| 1548 | xfs_rud_log_space(); |
| 1549 | const unsigned int bui = xfs_bui_log_space(1) + |
| 1550 | xfs_bud_log_space(); |
| 1551 | |
| 1552 | /* |
| 1553 | * t1: Unmapping crosslinked file data blocks: one bmap deletion, |
| 1554 | * possibly an EFI for underfilled bmbt blocks, and an rmap deletion. |
| 1555 | * |
| 1556 | * t2: Freeing freeing file data blocks: one bmap deletion, possibly an |
| 1557 | * EFI for underfilled bmbt blocks, and another EFI for the space |
| 1558 | * itself. |
| 1559 | */ |
| 1560 | const unsigned int t1 = (bui + efi) + rui; |
| 1561 | const unsigned int t2 = (bui + efi) + efi; |
| 1562 | const unsigned int per_intent = max(t1, t2); |
| 1563 | |
| 1564 | /* |
| 1565 | * For each transaction in a reap chain, we must be able to take one |
| 1566 | * step in the defer item chain, which should only consist of CUI, EFI, |
| 1567 | * or RUI items. |
| 1568 | */ |
| 1569 | const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1); |
| 1570 | const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1); |
| 1571 | const unsigned int f3 = xfs_calc_finish_bui_reservation(mp, 1); |
| 1572 | const unsigned int step_size = max3(f1, f2, f3); |
| 1573 | |
| 1574 | /* |
| 1575 | * Each call to xreap_ifork_extent starts with a clean transaction and |
| 1576 | * operates on a single mapping by creating a chain of log intent items |
| 1577 | * for that mapping. We need to leave enough reservation in the |
| 1578 | * transaction to log btree buffer and inode updates for each step in |
| 1579 | * the chain, and to relog the log intents. |
| 1580 | */ |
| 1581 | const unsigned int per_extent_res = per_intent + step_size; |
| 1582 | |
| 1583 | xreap_configure_limits(rs, fixed_overhead: per_extent_res, variable_overhead: per_binval, per_intent: 0, per_binval); |
| 1584 | |
| 1585 | trace_xreap_bmapi_limits(sc->tp, per_binval, rs->max_binval, |
| 1586 | step_size, per_intent, 1); |
| 1587 | } |
| 1588 | |
| 1589 | /* |
| 1590 | * Dispose of as much of this file extent as we can. Upon successful return, |
| 1591 | * the imap will reflect the mapping that was removed from the fork. |
| 1592 | */ |
| 1593 | STATIC int |
| 1594 | xreap_ifork_extent( |
| 1595 | struct xreap_state *rs, |
| 1596 | struct xfs_bmbt_irec *imap) |
| 1597 | { |
| 1598 | struct xfs_scrub *sc = rs->sc; |
| 1599 | xfs_agnumber_t agno; |
| 1600 | bool crosslinked; |
| 1601 | int error; |
| 1602 | |
| 1603 | ASSERT(sc->sa.pag == NULL); |
| 1604 | |
| 1605 | trace_xreap_ifork_extent(sc, rs->ip, rs->whichfork, imap); |
| 1606 | |
| 1607 | agno = XFS_FSB_TO_AGNO(sc->mp, imap->br_startblock); |
| 1608 | sc->sa.pag = xfs_perag_get(sc->mp, agno); |
| 1609 | if (!sc->sa.pag) |
| 1610 | return -EFSCORRUPTED; |
| 1611 | |
| 1612 | error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp); |
| 1613 | if (error) |
| 1614 | goto out_pag; |
| 1615 | |
| 1616 | /* |
| 1617 | * Decide the fate of the blocks at the beginning of the mapping, then |
| 1618 | * update the mapping to use it with the unmap calls. |
| 1619 | */ |
| 1620 | error = xreap_bmapi_select(rs, imap, &crosslinked); |
| 1621 | if (error) |
| 1622 | goto out_agf; |
| 1623 | |
| 1624 | error = xrep_reap_bmapi_iter(rs, imap, crosslinked); |
| 1625 | if (error) |
| 1626 | goto out_agf; |
| 1627 | |
| 1628 | out_agf: |
| 1629 | xfs_trans_brelse(sc->tp, sc->sa.agf_bp); |
| 1630 | sc->sa.agf_bp = NULL; |
| 1631 | out_pag: |
| 1632 | xfs_perag_put(sc->sa.pag); |
| 1633 | sc->sa.pag = NULL; |
| 1634 | return error; |
| 1635 | } |
| 1636 | |
| 1637 | /* |
| 1638 | * Dispose of each block mapped to the given fork of the given file. Callers |
| 1639 | * must hold ILOCK_EXCL, and ip can only be sc->ip or sc->tempip. The fork |
| 1640 | * must not have any delalloc reservations. |
| 1641 | */ |
| 1642 | int |
| 1643 | xrep_reap_ifork( |
| 1644 | struct xfs_scrub *sc, |
| 1645 | struct xfs_inode *ip, |
| 1646 | int whichfork) |
| 1647 | { |
| 1648 | struct xreap_state rs = { |
| 1649 | .sc = sc, |
| 1650 | .ip = ip, |
| 1651 | .whichfork = whichfork, |
| 1652 | }; |
| 1653 | xfs_fileoff_t off = 0; |
| 1654 | int bmap_flags = xfs_bmapi_aflag(whichfork); |
| 1655 | int error; |
| 1656 | |
| 1657 | ASSERT(xfs_has_rmapbt(sc->mp)); |
| 1658 | ASSERT(ip == sc->ip || ip == sc->tempip); |
| 1659 | ASSERT(whichfork == XFS_ATTR_FORK || !XFS_IS_REALTIME_INODE(ip)); |
| 1660 | |
| 1661 | xreap_configure_bmapi_limits(&rs); |
| 1662 | while (off < XFS_MAX_FILEOFF) { |
| 1663 | struct xfs_bmbt_irec imap; |
| 1664 | int nimaps = 1; |
| 1665 | |
| 1666 | /* Read the next extent, skip past holes and delalloc. */ |
| 1667 | error = xfs_bmapi_read(ip, off, XFS_MAX_FILEOFF - off, &imap, |
| 1668 | &nimaps, bmap_flags); |
| 1669 | if (error) |
| 1670 | return error; |
| 1671 | if (nimaps != 1 || imap.br_startblock == DELAYSTARTBLOCK) { |
| 1672 | ASSERT(0); |
| 1673 | return -EFSCORRUPTED; |
| 1674 | } |
| 1675 | |
| 1676 | /* |
| 1677 | * If this is a real space mapping, reap as much of it as we |
| 1678 | * can in a single transaction. |
| 1679 | */ |
| 1680 | if (xfs_bmap_is_real_extent(&imap)) { |
| 1681 | error = xreap_ifork_extent(&rs, &imap); |
| 1682 | if (error) |
| 1683 | return error; |
| 1684 | |
| 1685 | error = xfs_defer_finish(&sc->tp); |
| 1686 | if (error) |
| 1687 | return error; |
| 1688 | xreap_defer_finish_reset(rs: &rs); |
| 1689 | } |
| 1690 | |
| 1691 | off = imap.br_startoff + imap.br_blockcount; |
| 1692 | } |
| 1693 | |
| 1694 | return 0; |
| 1695 | } |
| 1696 | |