1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2018-2023 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_trans_resv.h" |
11 | #include "xfs_mount.h" |
12 | #include "xfs_defer.h" |
13 | #include "xfs_btree.h" |
14 | #include "xfs_btree_staging.h" |
15 | #include "xfs_inode.h" |
16 | #include "xfs_bit.h" |
17 | #include "xfs_log_format.h" |
18 | #include "xfs_trans.h" |
19 | #include "xfs_sb.h" |
20 | #include "xfs_alloc.h" |
21 | #include "xfs_ialloc.h" |
22 | #include "xfs_rmap.h" |
23 | #include "xfs_rmap_btree.h" |
24 | #include "xfs_refcount.h" |
25 | #include "xfs_refcount_btree.h" |
26 | #include "xfs_error.h" |
27 | #include "xfs_ag.h" |
28 | #include "xfs_health.h" |
29 | #include "scrub/xfs_scrub.h" |
30 | #include "scrub/scrub.h" |
31 | #include "scrub/common.h" |
32 | #include "scrub/btree.h" |
33 | #include "scrub/trace.h" |
34 | #include "scrub/repair.h" |
35 | #include "scrub/bitmap.h" |
36 | #include "scrub/agb_bitmap.h" |
37 | #include "scrub/xfile.h" |
38 | #include "scrub/xfarray.h" |
39 | #include "scrub/newbt.h" |
40 | #include "scrub/reap.h" |
41 | #include "scrub/rcbag.h" |
42 | |
43 | /* |
44 | * Rebuilding the Reference Count Btree |
45 | * ==================================== |
46 | * |
47 | * This algorithm is "borrowed" from xfs_repair. Imagine the rmap |
48 | * entries as rectangles representing extents of physical blocks, and |
49 | * that the rectangles can be laid down to allow them to overlap each |
50 | * other; then we know that we must emit a refcnt btree entry wherever |
51 | * the amount of overlap changes, i.e. the emission stimulus is |
52 | * level-triggered: |
53 | * |
54 | * - --- |
55 | * -- ----- ---- --- ------ |
56 | * -- ---- ----------- ---- --------- |
57 | * -------------------------------- ----------- |
58 | * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^ |
59 | * 2 1 23 21 3 43 234 2123 1 01 2 3 0 |
60 | * |
61 | * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner). |
62 | * |
63 | * Note that in the actual refcnt btree we don't store the refcount < 2 |
64 | * cases because the bnobt tells us which blocks are free; single-use |
65 | * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt |
66 | * supports storing multiple entries covering a given block we could |
67 | * theoretically dispense with the refcntbt and simply count rmaps, but |
68 | * that's inefficient in the (hot) write path, so we'll take the cost of |
69 | * the extra tree to save time. Also there's no guarantee that rmap |
70 | * will be enabled. |
71 | * |
72 | * Given an array of rmaps sorted by physical block number, a starting |
73 | * physical block (sp), a bag to hold rmaps that cover sp, and the next |
74 | * physical block where the level changes (np), we can reconstruct the |
75 | * refcount btree as follows: |
76 | * |
77 | * While there are still unprocessed rmaps in the array, |
78 | * - Set sp to the physical block (pblk) of the next unprocessed rmap. |
79 | * - Add to the bag all rmaps in the array where startblock == sp. |
80 | * - Set np to the physical block where the bag size will change. This |
81 | * is the minimum of (the pblk of the next unprocessed rmap) and |
82 | * (startblock + len of each rmap in the bag). |
83 | * - Record the bag size as old_bag_size. |
84 | * |
85 | * - While the bag isn't empty, |
86 | * - Remove from the bag all rmaps where startblock + len == np. |
87 | * - Add to the bag all rmaps in the array where startblock == np. |
88 | * - If the bag size isn't old_bag_size, store the refcount entry |
89 | * (sp, np - sp, bag_size) in the refcnt btree. |
90 | * - If the bag is empty, break out of the inner loop. |
91 | * - Set old_bag_size to the bag size |
92 | * - Set sp = np. |
93 | * - Set np to the physical block where the bag size will change. |
94 | * This is the minimum of (the pblk of the next unprocessed rmap) |
95 | * and (startblock + len of each rmap in the bag). |
96 | * |
97 | * Like all the other repairers, we make a list of all the refcount |
98 | * records we need, then reinitialize the refcount btree root and |
99 | * insert all the records. |
100 | */ |
101 | |
102 | struct xrep_refc { |
103 | /* refcount extents */ |
104 | struct xfarray *refcount_records; |
105 | |
106 | /* new refcountbt information */ |
107 | struct xrep_newbt new_btree; |
108 | |
109 | /* old refcountbt blocks */ |
110 | struct xagb_bitmap old_refcountbt_blocks; |
111 | |
112 | struct xfs_scrub *sc; |
113 | |
114 | /* get_records()'s position in the refcount record array. */ |
115 | xfarray_idx_t array_cur; |
116 | |
117 | /* # of refcountbt blocks */ |
118 | xfs_extlen_t btblocks; |
119 | }; |
120 | |
121 | /* Set us up to repair refcount btrees. */ |
122 | int |
123 | xrep_setup_ag_refcountbt( |
124 | struct xfs_scrub *sc) |
125 | { |
126 | char *descr; |
127 | int error; |
128 | |
129 | descr = xchk_xfile_ag_descr(sc, "rmap record bag" ); |
130 | error = xrep_setup_xfbtree(sc, descr); |
131 | kfree(descr); |
132 | return error; |
133 | } |
134 | |
135 | /* Check for any obvious conflicts with this shared/CoW staging extent. */ |
136 | STATIC int |
137 | xrep_refc_check_ext( |
138 | struct xfs_scrub *sc, |
139 | const struct xfs_refcount_irec *rec) |
140 | { |
141 | enum xbtree_recpacking outcome; |
142 | int error; |
143 | |
144 | if (xfs_refcount_check_irec(sc->sa.pag, rec) != NULL) |
145 | return -EFSCORRUPTED; |
146 | |
147 | /* Make sure this isn't free space. */ |
148 | error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rc_startblock, |
149 | rec->rc_blockcount, &outcome); |
150 | if (error) |
151 | return error; |
152 | if (outcome != XBTREE_RECPACKING_EMPTY) |
153 | return -EFSCORRUPTED; |
154 | |
155 | /* Must not be an inode chunk. */ |
156 | error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur, |
157 | rec->rc_startblock, rec->rc_blockcount, &outcome); |
158 | if (error) |
159 | return error; |
160 | if (outcome != XBTREE_RECPACKING_EMPTY) |
161 | return -EFSCORRUPTED; |
162 | |
163 | return 0; |
164 | } |
165 | |
166 | /* Record a reference count extent. */ |
167 | STATIC int |
168 | xrep_refc_stash( |
169 | struct xrep_refc *rr, |
170 | enum xfs_refc_domain domain, |
171 | xfs_agblock_t agbno, |
172 | xfs_extlen_t len, |
173 | uint64_t refcount) |
174 | { |
175 | struct xfs_refcount_irec irec = { |
176 | .rc_startblock = agbno, |
177 | .rc_blockcount = len, |
178 | .rc_domain = domain, |
179 | }; |
180 | struct xfs_scrub *sc = rr->sc; |
181 | int error = 0; |
182 | |
183 | if (xchk_should_terminate(sc, &error)) |
184 | return error; |
185 | |
186 | irec.rc_refcount = min_t(uint64_t, MAXREFCOUNT, refcount); |
187 | |
188 | error = xrep_refc_check_ext(rr->sc, &irec); |
189 | if (error) |
190 | return error; |
191 | |
192 | trace_xrep_refc_found(sc->sa.pag, &irec); |
193 | |
194 | return xfarray_append(rr->refcount_records, &irec); |
195 | } |
196 | |
197 | /* Record a CoW staging extent. */ |
198 | STATIC int |
199 | xrep_refc_stash_cow( |
200 | struct xrep_refc *rr, |
201 | xfs_agblock_t agbno, |
202 | xfs_extlen_t len) |
203 | { |
204 | return xrep_refc_stash(rr, XFS_REFC_DOMAIN_COW, agbno, len, 1); |
205 | } |
206 | |
207 | /* Decide if an rmap could describe a shared extent. */ |
208 | static inline bool |
209 | xrep_refc_rmap_shareable( |
210 | struct xfs_mount *mp, |
211 | const struct xfs_rmap_irec *rmap) |
212 | { |
213 | /* AG metadata are never sharable */ |
214 | if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner)) |
215 | return false; |
216 | |
217 | /* Metadata in files are never shareable */ |
218 | if (xfs_internal_inum(mp, rmap->rm_owner)) |
219 | return false; |
220 | |
221 | /* Metadata and unwritten file blocks are not shareable. */ |
222 | if (rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK | |
223 | XFS_RMAP_UNWRITTEN)) |
224 | return false; |
225 | |
226 | return true; |
227 | } |
228 | |
229 | /* |
230 | * Walk along the reverse mapping records until we find one that could describe |
231 | * a shared extent. |
232 | */ |
233 | STATIC int |
234 | xrep_refc_walk_rmaps( |
235 | struct xrep_refc *rr, |
236 | struct xfs_rmap_irec *rmap, |
237 | bool *have_rec) |
238 | { |
239 | struct xfs_btree_cur *cur = rr->sc->sa.rmap_cur; |
240 | struct xfs_mount *mp = cur->bc_mp; |
241 | int have_gt; |
242 | int error = 0; |
243 | |
244 | *have_rec = false; |
245 | |
246 | /* |
247 | * Loop through the remaining rmaps. Remember CoW staging |
248 | * extents and the refcountbt blocks from the old tree for later |
249 | * disposal. We can only share written data fork extents, so |
250 | * keep looping until we find an rmap for one. |
251 | */ |
252 | do { |
253 | if (xchk_should_terminate(rr->sc, &error)) |
254 | return error; |
255 | |
256 | error = xfs_btree_increment(cur, 0, &have_gt); |
257 | if (error) |
258 | return error; |
259 | if (!have_gt) |
260 | return 0; |
261 | |
262 | error = xfs_rmap_get_rec(cur, rmap, &have_gt); |
263 | if (error) |
264 | return error; |
265 | if (XFS_IS_CORRUPT(mp, !have_gt)) { |
266 | xfs_btree_mark_sick(cur); |
267 | return -EFSCORRUPTED; |
268 | } |
269 | |
270 | if (rmap->rm_owner == XFS_RMAP_OWN_COW) { |
271 | error = xrep_refc_stash_cow(rr, rmap->rm_startblock, |
272 | rmap->rm_blockcount); |
273 | if (error) |
274 | return error; |
275 | } else if (rmap->rm_owner == XFS_RMAP_OWN_REFC) { |
276 | /* refcountbt block, dump it when we're done. */ |
277 | rr->btblocks += rmap->rm_blockcount; |
278 | error = xagb_bitmap_set(&rr->old_refcountbt_blocks, |
279 | rmap->rm_startblock, |
280 | rmap->rm_blockcount); |
281 | if (error) |
282 | return error; |
283 | } |
284 | } while (!xrep_refc_rmap_shareable(mp, rmap)); |
285 | |
286 | *have_rec = true; |
287 | return 0; |
288 | } |
289 | |
290 | static inline uint32_t |
291 | xrep_refc_encode_startblock( |
292 | const struct xfs_refcount_irec *irec) |
293 | { |
294 | uint32_t start; |
295 | |
296 | start = irec->rc_startblock & ~XFS_REFC_COWFLAG; |
297 | if (irec->rc_domain == XFS_REFC_DOMAIN_COW) |
298 | start |= XFS_REFC_COWFLAG; |
299 | |
300 | return start; |
301 | } |
302 | |
303 | /* Sort in the same order as the ondisk records. */ |
304 | static int |
305 | xrep_refc_extent_cmp( |
306 | const void *a, |
307 | const void *b) |
308 | { |
309 | const struct xfs_refcount_irec *ap = a; |
310 | const struct xfs_refcount_irec *bp = b; |
311 | uint32_t sa, sb; |
312 | |
313 | sa = xrep_refc_encode_startblock(ap); |
314 | sb = xrep_refc_encode_startblock(bp); |
315 | |
316 | if (sa > sb) |
317 | return 1; |
318 | if (sa < sb) |
319 | return -1; |
320 | return 0; |
321 | } |
322 | |
323 | /* |
324 | * Sort the refcount extents by startblock or else the btree records will be in |
325 | * the wrong order. Make sure the records do not overlap in physical space. |
326 | */ |
327 | STATIC int |
328 | xrep_refc_sort_records( |
329 | struct xrep_refc *rr) |
330 | { |
331 | struct xfs_refcount_irec irec; |
332 | xfarray_idx_t cur; |
333 | enum xfs_refc_domain dom = XFS_REFC_DOMAIN_SHARED; |
334 | xfs_agblock_t next_agbno = 0; |
335 | int error; |
336 | |
337 | error = xfarray_sort(rr->refcount_records, xrep_refc_extent_cmp, |
338 | XFARRAY_SORT_KILLABLE); |
339 | if (error) |
340 | return error; |
341 | |
342 | foreach_xfarray_idx(rr->refcount_records, cur) { |
343 | if (xchk_should_terminate(rr->sc, &error)) |
344 | return error; |
345 | |
346 | error = xfarray_load(rr->refcount_records, cur, &irec); |
347 | if (error) |
348 | return error; |
349 | |
350 | if (dom == XFS_REFC_DOMAIN_SHARED && |
351 | irec.rc_domain == XFS_REFC_DOMAIN_COW) { |
352 | dom = irec.rc_domain; |
353 | next_agbno = 0; |
354 | } |
355 | |
356 | if (dom != irec.rc_domain) |
357 | return -EFSCORRUPTED; |
358 | if (irec.rc_startblock < next_agbno) |
359 | return -EFSCORRUPTED; |
360 | |
361 | next_agbno = irec.rc_startblock + irec.rc_blockcount; |
362 | } |
363 | |
364 | return error; |
365 | } |
366 | |
367 | /* |
368 | * Walk forward through the rmap btree to collect all rmaps starting at |
369 | * @bno in @rmap_bag. These represent the file(s) that share ownership of |
370 | * the current block. Upon return, the rmap cursor points to the last record |
371 | * satisfying the startblock constraint. |
372 | */ |
373 | static int |
374 | xrep_refc_push_rmaps_at( |
375 | struct xrep_refc *rr, |
376 | struct rcbag *rcstack, |
377 | xfs_agblock_t bno, |
378 | struct xfs_rmap_irec *rmap, |
379 | bool *have) |
380 | { |
381 | struct xfs_scrub *sc = rr->sc; |
382 | int have_gt; |
383 | int error; |
384 | |
385 | while (*have && rmap->rm_startblock == bno) { |
386 | error = rcbag_add(rcstack, rr->sc->tp, rmap); |
387 | if (error) |
388 | return error; |
389 | |
390 | error = xrep_refc_walk_rmaps(rr, rmap, have); |
391 | if (error) |
392 | return error; |
393 | } |
394 | |
395 | error = xfs_btree_decrement(sc->sa.rmap_cur, 0, &have_gt); |
396 | if (error) |
397 | return error; |
398 | if (XFS_IS_CORRUPT(sc->mp, !have_gt)) { |
399 | xfs_btree_mark_sick(sc->sa.rmap_cur); |
400 | return -EFSCORRUPTED; |
401 | } |
402 | |
403 | return 0; |
404 | } |
405 | |
406 | /* Iterate all the rmap records to generate reference count data. */ |
407 | STATIC int |
408 | xrep_refc_find_refcounts( |
409 | struct xrep_refc *rr) |
410 | { |
411 | struct xfs_scrub *sc = rr->sc; |
412 | struct rcbag *rcstack; |
413 | uint64_t old_stack_height; |
414 | xfs_agblock_t sbno; |
415 | xfs_agblock_t cbno; |
416 | xfs_agblock_t nbno; |
417 | bool have; |
418 | int error; |
419 | |
420 | xrep_ag_btcur_init(sc, &sc->sa); |
421 | |
422 | /* |
423 | * Set up a bag to store all the rmap records that we're tracking to |
424 | * generate a reference count record. If the size of the bag exceeds |
425 | * MAXREFCOUNT, we clamp rc_refcount. |
426 | */ |
427 | error = rcbag_init(sc->mp, sc->xmbtp, &rcstack); |
428 | if (error) |
429 | goto out_cur; |
430 | |
431 | /* Start the rmapbt cursor to the left of all records. */ |
432 | error = xfs_btree_goto_left_edge(sc->sa.rmap_cur); |
433 | if (error) |
434 | goto out_bag; |
435 | |
436 | /* Process reverse mappings into refcount data. */ |
437 | while (xfs_btree_has_more_records(sc->sa.rmap_cur)) { |
438 | struct xfs_rmap_irec rmap; |
439 | |
440 | /* Push all rmaps with pblk == sbno onto the stack */ |
441 | error = xrep_refc_walk_rmaps(rr, &rmap, &have); |
442 | if (error) |
443 | goto out_bag; |
444 | if (!have) |
445 | break; |
446 | sbno = cbno = rmap.rm_startblock; |
447 | error = xrep_refc_push_rmaps_at(rr, rcstack, sbno, &rmap, |
448 | &have); |
449 | if (error) |
450 | goto out_bag; |
451 | |
452 | /* Set nbno to the bno of the next refcount change */ |
453 | error = rcbag_next_edge(rcstack, sc->tp, &rmap, have, &nbno); |
454 | if (error) |
455 | goto out_bag; |
456 | |
457 | ASSERT(nbno > sbno); |
458 | old_stack_height = rcbag_count(rcstack); |
459 | |
460 | /* While stack isn't empty... */ |
461 | while (rcbag_count(rcstack) > 0) { |
462 | /* Pop all rmaps that end at nbno */ |
463 | error = rcbag_remove_ending_at(rcstack, sc->tp, nbno); |
464 | if (error) |
465 | goto out_bag; |
466 | |
467 | /* Push array items that start at nbno */ |
468 | error = xrep_refc_walk_rmaps(rr, &rmap, &have); |
469 | if (error) |
470 | goto out_bag; |
471 | if (have) { |
472 | error = xrep_refc_push_rmaps_at(rr, rcstack, |
473 | nbno, &rmap, &have); |
474 | if (error) |
475 | goto out_bag; |
476 | } |
477 | |
478 | /* Emit refcount if necessary */ |
479 | ASSERT(nbno > cbno); |
480 | if (rcbag_count(rcstack) != old_stack_height) { |
481 | if (old_stack_height > 1) { |
482 | error = xrep_refc_stash(rr, |
483 | XFS_REFC_DOMAIN_SHARED, |
484 | cbno, nbno - cbno, |
485 | old_stack_height); |
486 | if (error) |
487 | goto out_bag; |
488 | } |
489 | cbno = nbno; |
490 | } |
491 | |
492 | /* Stack empty, go find the next rmap */ |
493 | if (rcbag_count(rcstack) == 0) |
494 | break; |
495 | old_stack_height = rcbag_count(rcstack); |
496 | sbno = nbno; |
497 | |
498 | /* Set nbno to the bno of the next refcount change */ |
499 | error = rcbag_next_edge(rcstack, sc->tp, &rmap, have, |
500 | &nbno); |
501 | if (error) |
502 | goto out_bag; |
503 | |
504 | ASSERT(nbno > sbno); |
505 | } |
506 | } |
507 | |
508 | ASSERT(rcbag_count(rcstack) == 0); |
509 | out_bag: |
510 | rcbag_free(&rcstack); |
511 | out_cur: |
512 | xchk_ag_btcur_free(&sc->sa); |
513 | return error; |
514 | } |
515 | |
516 | /* Retrieve refcountbt data for bulk load. */ |
517 | STATIC int |
518 | xrep_refc_get_records( |
519 | struct xfs_btree_cur *cur, |
520 | unsigned int idx, |
521 | struct xfs_btree_block *block, |
522 | unsigned int nr_wanted, |
523 | void *priv) |
524 | { |
525 | struct xfs_refcount_irec *irec = &cur->bc_rec.rc; |
526 | struct xrep_refc *rr = priv; |
527 | union xfs_btree_rec *block_rec; |
528 | unsigned int loaded; |
529 | int error; |
530 | |
531 | for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { |
532 | error = xfarray_load(rr->refcount_records, rr->array_cur++, |
533 | irec); |
534 | if (error) |
535 | return error; |
536 | |
537 | block_rec = xfs_btree_rec_addr(cur, idx, block); |
538 | cur->bc_ops->init_rec_from_cur(cur, block_rec); |
539 | } |
540 | |
541 | return loaded; |
542 | } |
543 | |
544 | /* Feed one of the new btree blocks to the bulk loader. */ |
545 | STATIC int |
546 | xrep_refc_claim_block( |
547 | struct xfs_btree_cur *cur, |
548 | union xfs_btree_ptr *ptr, |
549 | void *priv) |
550 | { |
551 | struct xrep_refc *rr = priv; |
552 | |
553 | return xrep_newbt_claim_block(cur, &rr->new_btree, ptr); |
554 | } |
555 | |
556 | /* Update the AGF counters. */ |
557 | STATIC int |
558 | xrep_refc_reset_counters( |
559 | struct xrep_refc *rr) |
560 | { |
561 | struct xfs_scrub *sc = rr->sc; |
562 | struct xfs_perag *pag = sc->sa.pag; |
563 | |
564 | /* |
565 | * After we commit the new btree to disk, it is possible that the |
566 | * process to reap the old btree blocks will race with the AIL trying |
567 | * to checkpoint the old btree blocks into the filesystem. If the new |
568 | * tree is shorter than the old one, the refcountbt write verifier will |
569 | * fail and the AIL will shut down the filesystem. |
570 | * |
571 | * To avoid this, save the old incore btree height values as the alt |
572 | * height values before re-initializing the perag info from the updated |
573 | * AGF to capture all the new values. |
574 | */ |
575 | pag->pagf_repair_refcount_level = pag->pagf_refcount_level; |
576 | |
577 | /* Reinitialize with the values we just logged. */ |
578 | return xrep_reinit_pagf(sc); |
579 | } |
580 | |
581 | /* |
582 | * Use the collected refcount information to stage a new refcount btree. If |
583 | * this is successful we'll return with the new btree root information logged |
584 | * to the repair transaction but not yet committed. |
585 | */ |
586 | STATIC int |
587 | xrep_refc_build_new_tree( |
588 | struct xrep_refc *rr) |
589 | { |
590 | struct xfs_scrub *sc = rr->sc; |
591 | struct xfs_btree_cur *refc_cur; |
592 | struct xfs_perag *pag = sc->sa.pag; |
593 | xfs_fsblock_t fsbno; |
594 | int error; |
595 | |
596 | error = xrep_refc_sort_records(rr); |
597 | if (error) |
598 | return error; |
599 | |
600 | /* |
601 | * Prepare to construct the new btree by reserving disk space for the |
602 | * new btree and setting up all the accounting information we'll need |
603 | * to root the new btree while it's under construction and before we |
604 | * attach it to the AG header. |
605 | */ |
606 | fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, xfs_refc_block(sc->mp)); |
607 | xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_REFC, fsbno, |
608 | XFS_AG_RESV_METADATA); |
609 | rr->new_btree.bload.get_records = xrep_refc_get_records; |
610 | rr->new_btree.bload.claim_block = xrep_refc_claim_block; |
611 | |
612 | /* Compute how many blocks we'll need. */ |
613 | refc_cur = xfs_refcountbt_init_cursor(sc->mp, NULL, NULL, pag); |
614 | xfs_btree_stage_afakeroot(refc_cur, &rr->new_btree.afake); |
615 | error = xfs_btree_bload_compute_geometry(refc_cur, |
616 | &rr->new_btree.bload, |
617 | xfarray_length(rr->refcount_records)); |
618 | if (error) |
619 | goto err_cur; |
620 | |
621 | /* Last chance to abort before we start committing fixes. */ |
622 | if (xchk_should_terminate(sc, &error)) |
623 | goto err_cur; |
624 | |
625 | /* Reserve the space we'll need for the new btree. */ |
626 | error = xrep_newbt_alloc_blocks(&rr->new_btree, |
627 | rr->new_btree.bload.nr_blocks); |
628 | if (error) |
629 | goto err_cur; |
630 | |
631 | /* |
632 | * Due to btree slack factors, it's possible for a new btree to be one |
633 | * level taller than the old btree. Update the incore btree height so |
634 | * that we don't trip the verifiers when writing the new btree blocks |
635 | * to disk. |
636 | */ |
637 | pag->pagf_repair_refcount_level = rr->new_btree.bload.btree_height; |
638 | |
639 | /* Add all observed refcount records. */ |
640 | rr->array_cur = XFARRAY_CURSOR_INIT; |
641 | error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr); |
642 | if (error) |
643 | goto err_level; |
644 | |
645 | /* |
646 | * Install the new btree in the AG header. After this point the old |
647 | * btree is no longer accessible and the new tree is live. |
648 | */ |
649 | xfs_refcountbt_commit_staged_btree(refc_cur, sc->tp, sc->sa.agf_bp); |
650 | xfs_btree_del_cursor(refc_cur, 0); |
651 | |
652 | /* Reset the AGF counters now that we've changed the btree shape. */ |
653 | error = xrep_refc_reset_counters(rr); |
654 | if (error) |
655 | goto err_newbt; |
656 | |
657 | /* Dispose of any unused blocks and the accounting information. */ |
658 | error = xrep_newbt_commit(&rr->new_btree); |
659 | if (error) |
660 | return error; |
661 | |
662 | return xrep_roll_ag_trans(sc); |
663 | |
664 | err_level: |
665 | pag->pagf_repair_refcount_level = 0; |
666 | err_cur: |
667 | xfs_btree_del_cursor(refc_cur, error); |
668 | err_newbt: |
669 | xrep_newbt_cancel(&rr->new_btree); |
670 | return error; |
671 | } |
672 | |
673 | /* |
674 | * Now that we've logged the roots of the new btrees, invalidate all of the |
675 | * old blocks and free them. |
676 | */ |
677 | STATIC int |
678 | xrep_refc_remove_old_tree( |
679 | struct xrep_refc *rr) |
680 | { |
681 | struct xfs_scrub *sc = rr->sc; |
682 | struct xfs_perag *pag = sc->sa.pag; |
683 | int error; |
684 | |
685 | /* Free the old refcountbt blocks if they're not in use. */ |
686 | error = xrep_reap_agblocks(sc, &rr->old_refcountbt_blocks, |
687 | &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); |
688 | if (error) |
689 | return error; |
690 | |
691 | /* |
692 | * Now that we've zapped all the old refcountbt blocks we can turn off |
693 | * the alternate height mechanism and reset the per-AG space |
694 | * reservations. |
695 | */ |
696 | pag->pagf_repair_refcount_level = 0; |
697 | sc->flags |= XREP_RESET_PERAG_RESV; |
698 | return 0; |
699 | } |
700 | |
701 | /* Rebuild the refcount btree. */ |
702 | int |
703 | xrep_refcountbt( |
704 | struct xfs_scrub *sc) |
705 | { |
706 | struct xrep_refc *rr; |
707 | struct xfs_mount *mp = sc->mp; |
708 | char *descr; |
709 | int error; |
710 | |
711 | /* We require the rmapbt to rebuild anything. */ |
712 | if (!xfs_has_rmapbt(mp)) |
713 | return -EOPNOTSUPP; |
714 | |
715 | rr = kzalloc(sizeof(struct xrep_refc), XCHK_GFP_FLAGS); |
716 | if (!rr) |
717 | return -ENOMEM; |
718 | rr->sc = sc; |
719 | |
720 | /* Set up enough storage to handle one refcount record per block. */ |
721 | descr = xchk_xfile_ag_descr(sc, "reference count records" ); |
722 | error = xfarray_create(descr, mp->m_sb.sb_agblocks, |
723 | sizeof(struct xfs_refcount_irec), |
724 | &rr->refcount_records); |
725 | kfree(descr); |
726 | if (error) |
727 | goto out_rr; |
728 | |
729 | /* Collect all reference counts. */ |
730 | xagb_bitmap_init(&rr->old_refcountbt_blocks); |
731 | error = xrep_refc_find_refcounts(rr); |
732 | if (error) |
733 | goto out_bitmap; |
734 | |
735 | /* Rebuild the refcount information. */ |
736 | error = xrep_refc_build_new_tree(rr); |
737 | if (error) |
738 | goto out_bitmap; |
739 | |
740 | /* Kill the old tree. */ |
741 | error = xrep_refc_remove_old_tree(rr); |
742 | if (error) |
743 | goto out_bitmap; |
744 | |
745 | out_bitmap: |
746 | xagb_bitmap_destroy(&rr->old_refcountbt_blocks); |
747 | xfarray_destroy(rr->refcount_records); |
748 | out_rr: |
749 | kfree(rr); |
750 | return error; |
751 | } |
752 | |