1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2018-2023 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_trans_resv.h" |
11 | #include "xfs_mount.h" |
12 | #include "xfs_defer.h" |
13 | #include "xfs_btree.h" |
14 | #include "xfs_btree_staging.h" |
15 | #include "xfs_bit.h" |
16 | #include "xfs_log_format.h" |
17 | #include "xfs_trans.h" |
18 | #include "xfs_sb.h" |
19 | #include "xfs_inode.h" |
20 | #include "xfs_inode_fork.h" |
21 | #include "xfs_alloc.h" |
22 | #include "xfs_rtalloc.h" |
23 | #include "xfs_bmap.h" |
24 | #include "xfs_bmap_util.h" |
25 | #include "xfs_bmap_btree.h" |
26 | #include "xfs_rmap.h" |
27 | #include "xfs_rmap_btree.h" |
28 | #include "xfs_refcount.h" |
29 | #include "xfs_quota.h" |
30 | #include "xfs_ialloc.h" |
31 | #include "xfs_ag.h" |
32 | #include "xfs_reflink.h" |
33 | #include "scrub/xfs_scrub.h" |
34 | #include "scrub/scrub.h" |
35 | #include "scrub/common.h" |
36 | #include "scrub/btree.h" |
37 | #include "scrub/trace.h" |
38 | #include "scrub/repair.h" |
39 | #include "scrub/bitmap.h" |
40 | #include "scrub/fsb_bitmap.h" |
41 | #include "scrub/xfile.h" |
42 | #include "scrub/xfarray.h" |
43 | #include "scrub/newbt.h" |
44 | #include "scrub/reap.h" |
45 | |
46 | /* |
47 | * Inode Fork Block Mapping (BMBT) Repair |
48 | * ====================================== |
49 | * |
50 | * Gather all the rmap records for the inode and fork we're fixing, reset the |
51 | * incore fork, then recreate the btree. |
52 | */ |
53 | |
54 | enum reflink_scan_state { |
55 | RLS_IRRELEVANT = -1, /* not applicable to this file */ |
56 | RLS_UNKNOWN, /* shared extent scans required */ |
57 | RLS_SET_IFLAG, /* iflag must be set */ |
58 | }; |
59 | |
60 | struct xrep_bmap { |
61 | /* Old bmbt blocks */ |
62 | struct xfsb_bitmap old_bmbt_blocks; |
63 | |
64 | /* New fork. */ |
65 | struct xrep_newbt new_bmapbt; |
66 | |
67 | /* List of new bmap records. */ |
68 | struct xfarray *bmap_records; |
69 | |
70 | struct xfs_scrub *sc; |
71 | |
72 | /* How many blocks did we find allocated to this file? */ |
73 | xfs_rfsblock_t nblocks; |
74 | |
75 | /* How many bmbt blocks did we find for this fork? */ |
76 | xfs_rfsblock_t old_bmbt_block_count; |
77 | |
78 | /* get_records()'s position in the free space record array. */ |
79 | xfarray_idx_t array_cur; |
80 | |
81 | /* How many real (non-hole, non-delalloc) mappings do we have? */ |
82 | uint64_t real_mappings; |
83 | |
84 | /* Which fork are we fixing? */ |
85 | int whichfork; |
86 | |
87 | /* What d the REFLINK flag be set when the repair is over? */ |
88 | enum reflink_scan_state reflink_scan; |
89 | |
90 | /* Do we allow unwritten extents? */ |
91 | bool allow_unwritten; |
92 | }; |
93 | |
94 | /* Is this space extent shared? Flag the inode if it is. */ |
95 | STATIC int |
96 | xrep_bmap_discover_shared( |
97 | struct xrep_bmap *rb, |
98 | xfs_fsblock_t startblock, |
99 | xfs_filblks_t blockcount) |
100 | { |
101 | struct xfs_scrub *sc = rb->sc; |
102 | xfs_agblock_t agbno; |
103 | xfs_agblock_t fbno; |
104 | xfs_extlen_t flen; |
105 | int error; |
106 | |
107 | agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock); |
108 | error = xfs_refcount_find_shared(sc->sa.refc_cur, agbno, blockcount, |
109 | &fbno, &flen, false); |
110 | if (error) |
111 | return error; |
112 | |
113 | if (fbno != NULLAGBLOCK) |
114 | rb->reflink_scan = RLS_SET_IFLAG; |
115 | |
116 | return 0; |
117 | } |
118 | |
119 | /* Remember this reverse-mapping as a series of bmap records. */ |
120 | STATIC int |
121 | xrep_bmap_from_rmap( |
122 | struct xrep_bmap *rb, |
123 | xfs_fileoff_t startoff, |
124 | xfs_fsblock_t startblock, |
125 | xfs_filblks_t blockcount, |
126 | bool unwritten) |
127 | { |
128 | struct xfs_bmbt_irec irec = { |
129 | .br_startoff = startoff, |
130 | .br_startblock = startblock, |
131 | .br_state = unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM, |
132 | }; |
133 | struct xfs_bmbt_rec rbe; |
134 | struct xfs_scrub *sc = rb->sc; |
135 | int error = 0; |
136 | |
137 | /* |
138 | * If we're repairing the data fork of a non-reflinked regular file on |
139 | * a reflink filesystem, we need to figure out if this space extent is |
140 | * shared. |
141 | */ |
142 | if (rb->reflink_scan == RLS_UNKNOWN && !unwritten) { |
143 | error = xrep_bmap_discover_shared(rb, startblock, blockcount); |
144 | if (error) |
145 | return error; |
146 | } |
147 | |
148 | do { |
149 | xfs_failaddr_t fa; |
150 | |
151 | irec.br_blockcount = min_t(xfs_filblks_t, blockcount, |
152 | XFS_MAX_BMBT_EXTLEN); |
153 | |
154 | fa = xfs_bmap_validate_extent(sc->ip, rb->whichfork, &irec); |
155 | if (fa) |
156 | return -EFSCORRUPTED; |
157 | |
158 | xfs_bmbt_disk_set_all(&rbe, &irec); |
159 | |
160 | trace_xrep_bmap_found(sc->ip, rb->whichfork, &irec); |
161 | |
162 | if (xchk_should_terminate(sc, &error)) |
163 | return error; |
164 | |
165 | error = xfarray_append(rb->bmap_records, &rbe); |
166 | if (error) |
167 | return error; |
168 | |
169 | rb->real_mappings++; |
170 | |
171 | irec.br_startblock += irec.br_blockcount; |
172 | irec.br_startoff += irec.br_blockcount; |
173 | blockcount -= irec.br_blockcount; |
174 | } while (blockcount > 0); |
175 | |
176 | return 0; |
177 | } |
178 | |
179 | /* Check for any obvious errors or conflicts in the file mapping. */ |
180 | STATIC int |
181 | xrep_bmap_check_fork_rmap( |
182 | struct xrep_bmap *rb, |
183 | struct xfs_btree_cur *cur, |
184 | const struct xfs_rmap_irec *rec) |
185 | { |
186 | struct xfs_scrub *sc = rb->sc; |
187 | enum xbtree_recpacking outcome; |
188 | int error; |
189 | |
190 | /* |
191 | * Data extents for rt files are never stored on the data device, but |
192 | * everything else (xattrs, bmbt blocks) can be. |
193 | */ |
194 | if (XFS_IS_REALTIME_INODE(sc->ip) && |
195 | !(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) |
196 | return -EFSCORRUPTED; |
197 | |
198 | /* Check that this is within the AG. */ |
199 | if (!xfs_verify_agbext(cur->bc_ag.pag, rec->rm_startblock, |
200 | rec->rm_blockcount)) |
201 | return -EFSCORRUPTED; |
202 | |
203 | /* Check the file offset range. */ |
204 | if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && |
205 | !xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount)) |
206 | return -EFSCORRUPTED; |
207 | |
208 | /* No contradictory flags. */ |
209 | if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) && |
210 | (rec->rm_flags & XFS_RMAP_UNWRITTEN)) |
211 | return -EFSCORRUPTED; |
212 | |
213 | /* Make sure this isn't free space. */ |
214 | error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock, |
215 | rec->rm_blockcount, &outcome); |
216 | if (error) |
217 | return error; |
218 | if (outcome != XBTREE_RECPACKING_EMPTY) |
219 | return -EFSCORRUPTED; |
220 | |
221 | /* Must not be an inode chunk. */ |
222 | error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur, |
223 | rec->rm_startblock, rec->rm_blockcount, &outcome); |
224 | if (error) |
225 | return error; |
226 | if (outcome != XBTREE_RECPACKING_EMPTY) |
227 | return -EFSCORRUPTED; |
228 | |
229 | return 0; |
230 | } |
231 | |
232 | /* Record extents that belong to this inode's fork. */ |
233 | STATIC int |
234 | xrep_bmap_walk_rmap( |
235 | struct xfs_btree_cur *cur, |
236 | const struct xfs_rmap_irec *rec, |
237 | void *priv) |
238 | { |
239 | struct xrep_bmap *rb = priv; |
240 | struct xfs_mount *mp = cur->bc_mp; |
241 | xfs_fsblock_t fsbno; |
242 | int error = 0; |
243 | |
244 | if (xchk_should_terminate(rb->sc, &error)) |
245 | return error; |
246 | |
247 | if (rec->rm_owner != rb->sc->ip->i_ino) |
248 | return 0; |
249 | |
250 | error = xrep_bmap_check_fork_rmap(rb, cur, rec); |
251 | if (error) |
252 | return error; |
253 | |
254 | /* |
255 | * Record all blocks allocated to this file even if the extent isn't |
256 | * for the fork we're rebuilding so that we can reset di_nblocks later. |
257 | */ |
258 | rb->nblocks += rec->rm_blockcount; |
259 | |
260 | /* If this rmap isn't for the fork we want, we're done. */ |
261 | if (rb->whichfork == XFS_DATA_FORK && |
262 | (rec->rm_flags & XFS_RMAP_ATTR_FORK)) |
263 | return 0; |
264 | if (rb->whichfork == XFS_ATTR_FORK && |
265 | !(rec->rm_flags & XFS_RMAP_ATTR_FORK)) |
266 | return 0; |
267 | |
268 | /* Reject unwritten extents if we don't allow those. */ |
269 | if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten) |
270 | return -EFSCORRUPTED; |
271 | |
272 | fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, |
273 | rec->rm_startblock); |
274 | |
275 | if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) { |
276 | rb->old_bmbt_block_count += rec->rm_blockcount; |
277 | return xfsb_bitmap_set(&rb->old_bmbt_blocks, fsbno, |
278 | rec->rm_blockcount); |
279 | } |
280 | |
281 | return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno, |
282 | rec->rm_blockcount, |
283 | rec->rm_flags & XFS_RMAP_UNWRITTEN); |
284 | } |
285 | |
286 | /* |
287 | * Compare two block mapping records. We want to sort in order of increasing |
288 | * file offset. |
289 | */ |
290 | static int |
291 | xrep_bmap_extent_cmp( |
292 | const void *a, |
293 | const void *b) |
294 | { |
295 | const struct xfs_bmbt_rec *ba = a; |
296 | const struct xfs_bmbt_rec *bb = b; |
297 | xfs_fileoff_t ao = xfs_bmbt_disk_get_startoff(ba); |
298 | xfs_fileoff_t bo = xfs_bmbt_disk_get_startoff(bb); |
299 | |
300 | if (ao > bo) |
301 | return 1; |
302 | else if (ao < bo) |
303 | return -1; |
304 | return 0; |
305 | } |
306 | |
307 | /* |
308 | * Sort the bmap extents by fork offset or else the records will be in the |
309 | * wrong order. Ensure there are no overlaps in the file offset ranges. |
310 | */ |
311 | STATIC int |
312 | xrep_bmap_sort_records( |
313 | struct xrep_bmap *rb) |
314 | { |
315 | struct xfs_bmbt_irec irec; |
316 | xfs_fileoff_t next_off = 0; |
317 | xfarray_idx_t array_cur; |
318 | int error; |
319 | |
320 | error = xfarray_sort(rb->bmap_records, xrep_bmap_extent_cmp, |
321 | XFARRAY_SORT_KILLABLE); |
322 | if (error) |
323 | return error; |
324 | |
325 | foreach_xfarray_idx(rb->bmap_records, array_cur) { |
326 | struct xfs_bmbt_rec rec; |
327 | |
328 | if (xchk_should_terminate(rb->sc, &error)) |
329 | return error; |
330 | |
331 | error = xfarray_load(rb->bmap_records, array_cur, &rec); |
332 | if (error) |
333 | return error; |
334 | |
335 | xfs_bmbt_disk_get_all(&rec, &irec); |
336 | |
337 | if (irec.br_startoff < next_off) |
338 | return -EFSCORRUPTED; |
339 | |
340 | next_off = irec.br_startoff + irec.br_blockcount; |
341 | } |
342 | |
343 | return 0; |
344 | } |
345 | |
346 | /* Scan one AG for reverse mappings that we can turn into extent maps. */ |
347 | STATIC int |
348 | xrep_bmap_scan_ag( |
349 | struct xrep_bmap *rb, |
350 | struct xfs_perag *pag) |
351 | { |
352 | struct xfs_scrub *sc = rb->sc; |
353 | int error; |
354 | |
355 | error = xrep_ag_init(sc, pag, &sc->sa); |
356 | if (error) |
357 | return error; |
358 | |
359 | error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_bmap_walk_rmap, rb); |
360 | xchk_ag_free(sc, &sc->sa); |
361 | return error; |
362 | } |
363 | |
364 | /* Find the delalloc extents from the old incore extent tree. */ |
365 | STATIC int |
366 | xrep_bmap_find_delalloc( |
367 | struct xrep_bmap *rb) |
368 | { |
369 | struct xfs_bmbt_irec irec; |
370 | struct xfs_iext_cursor icur; |
371 | struct xfs_bmbt_rec rbe; |
372 | struct xfs_inode *ip = rb->sc->ip; |
373 | struct xfs_ifork *ifp = xfs_ifork_ptr(ip, rb->whichfork); |
374 | int error = 0; |
375 | |
376 | /* |
377 | * Skip this scan if we don't expect to find delayed allocation |
378 | * reservations in this fork. |
379 | */ |
380 | if (rb->whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0) |
381 | return 0; |
382 | |
383 | for_each_xfs_iext(ifp, &icur, &irec) { |
384 | if (!isnullstartblock(irec.br_startblock)) |
385 | continue; |
386 | |
387 | xfs_bmbt_disk_set_all(&rbe, &irec); |
388 | |
389 | trace_xrep_bmap_found(ip, rb->whichfork, &irec); |
390 | |
391 | if (xchk_should_terminate(rb->sc, &error)) |
392 | return error; |
393 | |
394 | error = xfarray_append(rb->bmap_records, &rbe); |
395 | if (error) |
396 | return error; |
397 | } |
398 | |
399 | return 0; |
400 | } |
401 | |
402 | /* |
403 | * Collect block mappings for this fork of this inode and decide if we have |
404 | * enough space to rebuild. Caller is responsible for cleaning up the list if |
405 | * anything goes wrong. |
406 | */ |
407 | STATIC int |
408 | xrep_bmap_find_mappings( |
409 | struct xrep_bmap *rb) |
410 | { |
411 | struct xfs_scrub *sc = rb->sc; |
412 | struct xfs_perag *pag; |
413 | xfs_agnumber_t agno; |
414 | int error = 0; |
415 | |
416 | /* Iterate the rmaps for extents. */ |
417 | for_each_perag(sc->mp, agno, pag) { |
418 | error = xrep_bmap_scan_ag(rb, pag); |
419 | if (error) { |
420 | xfs_perag_rele(pag); |
421 | return error; |
422 | } |
423 | } |
424 | |
425 | return xrep_bmap_find_delalloc(rb); |
426 | } |
427 | |
428 | /* Retrieve real extent mappings for bulk loading the bmap btree. */ |
429 | STATIC int |
430 | xrep_bmap_get_records( |
431 | struct xfs_btree_cur *cur, |
432 | unsigned int idx, |
433 | struct xfs_btree_block *block, |
434 | unsigned int nr_wanted, |
435 | void *priv) |
436 | { |
437 | struct xfs_bmbt_rec rec; |
438 | struct xfs_bmbt_irec *irec = &cur->bc_rec.b; |
439 | struct xrep_bmap *rb = priv; |
440 | union xfs_btree_rec *block_rec; |
441 | unsigned int loaded; |
442 | int error; |
443 | |
444 | for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { |
445 | do { |
446 | error = xfarray_load(rb->bmap_records, rb->array_cur++, |
447 | &rec); |
448 | if (error) |
449 | return error; |
450 | |
451 | xfs_bmbt_disk_get_all(&rec, irec); |
452 | } while (isnullstartblock(irec->br_startblock)); |
453 | |
454 | block_rec = xfs_btree_rec_addr(cur, idx, block); |
455 | cur->bc_ops->init_rec_from_cur(cur, block_rec); |
456 | } |
457 | |
458 | return loaded; |
459 | } |
460 | |
461 | /* Feed one of the new btree blocks to the bulk loader. */ |
462 | STATIC int |
463 | xrep_bmap_claim_block( |
464 | struct xfs_btree_cur *cur, |
465 | union xfs_btree_ptr *ptr, |
466 | void *priv) |
467 | { |
468 | struct xrep_bmap *rb = priv; |
469 | |
470 | return xrep_newbt_claim_block(cur, &rb->new_bmapbt, ptr); |
471 | } |
472 | |
473 | /* Figure out how much space we need to create the incore btree root block. */ |
474 | STATIC size_t |
475 | xrep_bmap_iroot_size( |
476 | struct xfs_btree_cur *cur, |
477 | unsigned int level, |
478 | unsigned int nr_this_level, |
479 | void *priv) |
480 | { |
481 | ASSERT(level > 0); |
482 | |
483 | return XFS_BMAP_BROOT_SPACE_CALC(cur->bc_mp, nr_this_level); |
484 | } |
485 | |
486 | /* Update the inode counters. */ |
487 | STATIC int |
488 | xrep_bmap_reset_counters( |
489 | struct xrep_bmap *rb) |
490 | { |
491 | struct xfs_scrub *sc = rb->sc; |
492 | struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake; |
493 | int64_t delta; |
494 | |
495 | if (rb->reflink_scan == RLS_SET_IFLAG) |
496 | sc->ip->i_diflags2 |= XFS_DIFLAG2_REFLINK; |
497 | |
498 | /* |
499 | * Update the inode block counts to reflect the extents we found in the |
500 | * rmapbt. |
501 | */ |
502 | delta = ifake->if_blocks - rb->old_bmbt_block_count; |
503 | sc->ip->i_nblocks = rb->nblocks + delta; |
504 | xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); |
505 | |
506 | /* |
507 | * Adjust the quota counts by the difference in size between the old |
508 | * and new bmbt. |
509 | */ |
510 | xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT, delta); |
511 | return 0; |
512 | } |
513 | |
514 | /* |
515 | * Create a new iext tree and load it with block mappings. If the inode is |
516 | * in extents format, that's all we need to do to commit the new mappings. |
517 | * If it is in btree format, this takes care of preloading the incore tree. |
518 | */ |
519 | STATIC int |
520 | xrep_bmap_extents_load( |
521 | struct xrep_bmap *rb) |
522 | { |
523 | struct xfs_iext_cursor icur; |
524 | struct xfs_bmbt_irec irec; |
525 | struct xfs_ifork *ifp = rb->new_bmapbt.ifake.if_fork; |
526 | xfarray_idx_t array_cur; |
527 | int error; |
528 | |
529 | ASSERT(ifp->if_bytes == 0); |
530 | |
531 | /* Add all the mappings (incl. delalloc) to the incore extent tree. */ |
532 | xfs_iext_first(ifp, &icur); |
533 | foreach_xfarray_idx(rb->bmap_records, array_cur) { |
534 | struct xfs_bmbt_rec rec; |
535 | |
536 | error = xfarray_load(rb->bmap_records, array_cur, &rec); |
537 | if (error) |
538 | return error; |
539 | |
540 | xfs_bmbt_disk_get_all(&rec, &irec); |
541 | |
542 | xfs_iext_insert_raw(ifp, &icur, &irec); |
543 | if (!isnullstartblock(irec.br_startblock)) |
544 | ifp->if_nextents++; |
545 | |
546 | xfs_iext_next(ifp, &icur); |
547 | } |
548 | |
549 | return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork, |
550 | ifp->if_nextents); |
551 | } |
552 | |
553 | /* |
554 | * Reserve new btree blocks, bulk load the bmap records into the ondisk btree, |
555 | * and load the incore extent tree. |
556 | */ |
557 | STATIC int |
558 | xrep_bmap_btree_load( |
559 | struct xrep_bmap *rb, |
560 | struct xfs_btree_cur *bmap_cur) |
561 | { |
562 | struct xfs_scrub *sc = rb->sc; |
563 | int error; |
564 | |
565 | /* Compute how many blocks we'll need. */ |
566 | error = xfs_btree_bload_compute_geometry(bmap_cur, |
567 | &rb->new_bmapbt.bload, rb->real_mappings); |
568 | if (error) |
569 | return error; |
570 | |
571 | /* Last chance to abort before we start committing fixes. */ |
572 | if (xchk_should_terminate(sc, &error)) |
573 | return error; |
574 | |
575 | /* |
576 | * Guess how many blocks we're going to need to rebuild an entire bmap |
577 | * from the number of extents we found, and pump up our transaction to |
578 | * have sufficient block reservation. We're allowed to exceed file |
579 | * quota to repair inconsistent metadata. |
580 | */ |
581 | error = xfs_trans_reserve_more_inode(sc->tp, sc->ip, |
582 | rb->new_bmapbt.bload.nr_blocks, 0, true); |
583 | if (error) |
584 | return error; |
585 | |
586 | /* Reserve the space we'll need for the new btree. */ |
587 | error = xrep_newbt_alloc_blocks(&rb->new_bmapbt, |
588 | rb->new_bmapbt.bload.nr_blocks); |
589 | if (error) |
590 | return error; |
591 | |
592 | /* Add all observed bmap records. */ |
593 | rb->array_cur = XFARRAY_CURSOR_INIT; |
594 | error = xfs_btree_bload(bmap_cur, &rb->new_bmapbt.bload, rb); |
595 | if (error) |
596 | return error; |
597 | |
598 | /* |
599 | * Load the new bmap records into the new incore extent tree to |
600 | * preserve delalloc reservations for regular files. The directory |
601 | * code loads the extent tree during xfs_dir_open and assumes |
602 | * thereafter that it remains loaded, so we must not violate that |
603 | * assumption. |
604 | */ |
605 | return xrep_bmap_extents_load(rb); |
606 | } |
607 | |
608 | /* |
609 | * Use the collected bmap information to stage a new bmap fork. If this is |
610 | * successful we'll return with the new fork information logged to the repair |
611 | * transaction but not yet committed. The caller must ensure that the inode |
612 | * is joined to the transaction; the inode will be joined to a clean |
613 | * transaction when the function returns. |
614 | */ |
615 | STATIC int |
616 | xrep_bmap_build_new_fork( |
617 | struct xrep_bmap *rb) |
618 | { |
619 | struct xfs_owner_info oinfo; |
620 | struct xfs_scrub *sc = rb->sc; |
621 | struct xfs_btree_cur *bmap_cur; |
622 | struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake; |
623 | int error; |
624 | |
625 | error = xrep_bmap_sort_records(rb); |
626 | if (error) |
627 | return error; |
628 | |
629 | /* |
630 | * Prepare to construct the new fork by initializing the new btree |
631 | * structure and creating a fake ifork in the ifakeroot structure. |
632 | */ |
633 | xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork); |
634 | error = xrep_newbt_init_inode(&rb->new_bmapbt, sc, rb->whichfork, |
635 | &oinfo); |
636 | if (error) |
637 | return error; |
638 | |
639 | rb->new_bmapbt.bload.get_records = xrep_bmap_get_records; |
640 | rb->new_bmapbt.bload.claim_block = xrep_bmap_claim_block; |
641 | rb->new_bmapbt.bload.iroot_size = xrep_bmap_iroot_size; |
642 | |
643 | /* |
644 | * Allocate a new bmap btree cursor for reloading an inode block mapping |
645 | * data structure. |
646 | */ |
647 | bmap_cur = xfs_bmbt_init_cursor(sc->mp, NULL, sc->ip, XFS_STAGING_FORK); |
648 | xfs_btree_stage_ifakeroot(bmap_cur, ifake); |
649 | |
650 | /* |
651 | * Figure out the size and format of the new fork, then fill it with |
652 | * all the bmap records we've found. Join the inode to the transaction |
653 | * so that we can roll the transaction while holding the inode locked. |
654 | */ |
655 | if (rb->real_mappings <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) { |
656 | ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS; |
657 | error = xrep_bmap_extents_load(rb); |
658 | } else { |
659 | ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE; |
660 | error = xrep_bmap_btree_load(rb, bmap_cur); |
661 | } |
662 | if (error) |
663 | goto err_cur; |
664 | |
665 | /* |
666 | * Install the new fork in the inode. After this point the old mapping |
667 | * data are no longer accessible and the new tree is live. We delete |
668 | * the cursor immediately after committing the staged root because the |
669 | * staged fork might be in extents format. |
670 | */ |
671 | xfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork); |
672 | xfs_btree_del_cursor(bmap_cur, 0); |
673 | |
674 | /* Reset the inode counters now that we've changed the fork. */ |
675 | error = xrep_bmap_reset_counters(rb); |
676 | if (error) |
677 | goto err_newbt; |
678 | |
679 | /* Dispose of any unused blocks and the accounting information. */ |
680 | error = xrep_newbt_commit(&rb->new_bmapbt); |
681 | if (error) |
682 | return error; |
683 | |
684 | return xrep_roll_trans(sc); |
685 | |
686 | err_cur: |
687 | if (bmap_cur) |
688 | xfs_btree_del_cursor(bmap_cur, error); |
689 | err_newbt: |
690 | xrep_newbt_cancel(&rb->new_bmapbt); |
691 | return error; |
692 | } |
693 | |
694 | /* |
695 | * Now that we've logged the new inode btree, invalidate all of the old blocks |
696 | * and free them, if there were any. |
697 | */ |
698 | STATIC int |
699 | xrep_bmap_remove_old_tree( |
700 | struct xrep_bmap *rb) |
701 | { |
702 | struct xfs_scrub *sc = rb->sc; |
703 | struct xfs_owner_info oinfo; |
704 | |
705 | /* Free the old bmbt blocks if they're not in use. */ |
706 | xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork); |
707 | return xrep_reap_fsblocks(sc, &rb->old_bmbt_blocks, &oinfo); |
708 | } |
709 | |
710 | /* Check for garbage inputs. Returns -ECANCELED if there's nothing to do. */ |
711 | STATIC int |
712 | xrep_bmap_check_inputs( |
713 | struct xfs_scrub *sc, |
714 | int whichfork) |
715 | { |
716 | struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork); |
717 | |
718 | ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK); |
719 | |
720 | if (!xfs_has_rmapbt(sc->mp)) |
721 | return -EOPNOTSUPP; |
722 | |
723 | /* No fork means nothing to rebuild. */ |
724 | if (!ifp) |
725 | return -ECANCELED; |
726 | |
727 | /* |
728 | * We only know how to repair extent mappings, which is to say that we |
729 | * only support extents and btree fork format. Repairs to a local |
730 | * format fork require a higher level repair function, so we do not |
731 | * have any work to do here. |
732 | */ |
733 | switch (ifp->if_format) { |
734 | case XFS_DINODE_FMT_DEV: |
735 | case XFS_DINODE_FMT_LOCAL: |
736 | case XFS_DINODE_FMT_UUID: |
737 | return -ECANCELED; |
738 | case XFS_DINODE_FMT_EXTENTS: |
739 | case XFS_DINODE_FMT_BTREE: |
740 | break; |
741 | default: |
742 | return -EFSCORRUPTED; |
743 | } |
744 | |
745 | if (whichfork == XFS_ATTR_FORK) |
746 | return 0; |
747 | |
748 | /* Only files, symlinks, and directories get to have data forks. */ |
749 | switch (VFS_I(sc->ip)->i_mode & S_IFMT) { |
750 | case S_IFREG: |
751 | case S_IFDIR: |
752 | case S_IFLNK: |
753 | /* ok */ |
754 | break; |
755 | default: |
756 | return -EINVAL; |
757 | } |
758 | |
759 | /* Don't know how to rebuild realtime data forks. */ |
760 | if (XFS_IS_REALTIME_INODE(sc->ip)) |
761 | return -EOPNOTSUPP; |
762 | |
763 | return 0; |
764 | } |
765 | |
766 | /* Set up the initial state of the reflink scan. */ |
767 | static inline enum reflink_scan_state |
768 | xrep_bmap_init_reflink_scan( |
769 | struct xfs_scrub *sc, |
770 | int whichfork) |
771 | { |
772 | /* cannot share on non-reflink filesystem */ |
773 | if (!xfs_has_reflink(sc->mp)) |
774 | return RLS_IRRELEVANT; |
775 | |
776 | /* preserve flag if it's already set */ |
777 | if (xfs_is_reflink_inode(sc->ip)) |
778 | return RLS_SET_IFLAG; |
779 | |
780 | /* can only share regular files */ |
781 | if (!S_ISREG(VFS_I(sc->ip)->i_mode)) |
782 | return RLS_IRRELEVANT; |
783 | |
784 | /* cannot share attr fork extents */ |
785 | if (whichfork != XFS_DATA_FORK) |
786 | return RLS_IRRELEVANT; |
787 | |
788 | /* cannot share realtime extents */ |
789 | if (XFS_IS_REALTIME_INODE(sc->ip)) |
790 | return RLS_IRRELEVANT; |
791 | |
792 | return RLS_UNKNOWN; |
793 | } |
794 | |
795 | /* Repair an inode fork. */ |
796 | int |
797 | xrep_bmap( |
798 | struct xfs_scrub *sc, |
799 | int whichfork, |
800 | bool allow_unwritten) |
801 | { |
802 | struct xrep_bmap *rb; |
803 | char *descr; |
804 | unsigned int max_bmbt_recs; |
805 | bool large_extcount; |
806 | int error = 0; |
807 | |
808 | error = xrep_bmap_check_inputs(sc, whichfork); |
809 | if (error == -ECANCELED) |
810 | return 0; |
811 | if (error) |
812 | return error; |
813 | |
814 | rb = kzalloc(sizeof(struct xrep_bmap), XCHK_GFP_FLAGS); |
815 | if (!rb) |
816 | return -ENOMEM; |
817 | rb->sc = sc; |
818 | rb->whichfork = whichfork; |
819 | rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork); |
820 | rb->allow_unwritten = allow_unwritten; |
821 | |
822 | /* Set up enough storage to handle the max records for this fork. */ |
823 | large_extcount = xfs_has_large_extent_counts(sc->mp); |
824 | max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork); |
825 | descr = xchk_xfile_ino_descr(sc, "%s fork mapping records" , |
826 | whichfork == XFS_DATA_FORK ? "data" : "attr" ); |
827 | error = xfarray_create(descr, max_bmbt_recs, |
828 | sizeof(struct xfs_bmbt_rec), &rb->bmap_records); |
829 | kfree(descr); |
830 | if (error) |
831 | goto out_rb; |
832 | |
833 | /* Collect all reverse mappings for this fork's extents. */ |
834 | xfsb_bitmap_init(&rb->old_bmbt_blocks); |
835 | error = xrep_bmap_find_mappings(rb); |
836 | if (error) |
837 | goto out_bitmap; |
838 | |
839 | xfs_trans_ijoin(sc->tp, sc->ip, 0); |
840 | |
841 | /* Rebuild the bmap information. */ |
842 | error = xrep_bmap_build_new_fork(rb); |
843 | if (error) |
844 | goto out_bitmap; |
845 | |
846 | /* Kill the old tree. */ |
847 | error = xrep_bmap_remove_old_tree(rb); |
848 | if (error) |
849 | goto out_bitmap; |
850 | |
851 | out_bitmap: |
852 | xfsb_bitmap_destroy(&rb->old_bmbt_blocks); |
853 | xfarray_destroy(rb->bmap_records); |
854 | out_rb: |
855 | kfree(rb); |
856 | return error; |
857 | } |
858 | |
859 | /* Repair an inode's data fork. */ |
860 | int |
861 | xrep_bmap_data( |
862 | struct xfs_scrub *sc) |
863 | { |
864 | return xrep_bmap(sc, XFS_DATA_FORK, true); |
865 | } |
866 | |
867 | /* Repair an inode's attr fork. */ |
868 | int |
869 | xrep_bmap_attr( |
870 | struct xfs_scrub *sc) |
871 | { |
872 | return xrep_bmap(sc, XFS_ATTR_FORK, false); |
873 | } |
874 | |