1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Copyright (C) 2018-2023 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <djwong@kernel.org> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_trans_resv.h" |
11 | #include "xfs_mount.h" |
12 | #include "xfs_defer.h" |
13 | #include "xfs_btree.h" |
14 | #include "xfs_bit.h" |
15 | #include "xfs_format.h" |
16 | #include "xfs_log_format.h" |
17 | #include "xfs_trans.h" |
18 | #include "xfs_sb.h" |
19 | #include "xfs_inode.h" |
20 | #include "xfs_inode_fork.h" |
21 | #include "xfs_alloc.h" |
22 | #include "xfs_bmap.h" |
23 | #include "xfs_quota.h" |
24 | #include "xfs_qm.h" |
25 | #include "xfs_dquot.h" |
26 | #include "xfs_dquot_item.h" |
27 | #include "xfs_reflink.h" |
28 | #include "xfs_bmap_btree.h" |
29 | #include "xfs_trans_space.h" |
30 | #include "scrub/xfs_scrub.h" |
31 | #include "scrub/scrub.h" |
32 | #include "scrub/common.h" |
33 | #include "scrub/quota.h" |
34 | #include "scrub/trace.h" |
35 | #include "scrub/repair.h" |
36 | |
37 | /* |
38 | * Quota Repair |
39 | * ============ |
40 | * |
41 | * Quota repairs are fairly simplistic; we fix everything that the dquot |
42 | * verifiers complain about, cap any counters or limits that make no sense, |
43 | * and schedule a quotacheck if we had to fix anything. We also repair any |
44 | * data fork extent records that don't apply to metadata files. |
45 | */ |
46 | |
47 | struct xrep_quota_info { |
48 | struct xfs_scrub *sc; |
49 | bool need_quotacheck; |
50 | }; |
51 | |
52 | /* |
53 | * Allocate a new block into a sparse hole in the quota file backing this |
54 | * dquot, initialize the block, and commit the whole mess. |
55 | */ |
56 | STATIC int |
57 | xrep_quota_item_fill_bmap_hole( |
58 | struct xfs_scrub *sc, |
59 | struct xfs_dquot *dq, |
60 | struct xfs_bmbt_irec *irec) |
61 | { |
62 | struct xfs_buf *bp; |
63 | struct xfs_mount *mp = sc->mp; |
64 | int nmaps = 1; |
65 | int error; |
66 | |
67 | xfs_trans_ijoin(sc->tp, sc->ip, 0); |
68 | |
69 | /* Map a block into the file. */ |
70 | error = xfs_trans_reserve_more(sc->tp, XFS_QM_DQALLOC_SPACE_RES(mp), |
71 | 0); |
72 | if (error) |
73 | return error; |
74 | |
75 | error = xfs_bmapi_write(sc->tp, sc->ip, dq->q_fileoffset, |
76 | XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, |
77 | irec, &nmaps); |
78 | if (error) |
79 | return error; |
80 | if (nmaps != 1) |
81 | return -ENOSPC; |
82 | |
83 | dq->q_blkno = XFS_FSB_TO_DADDR(mp, irec->br_startblock); |
84 | |
85 | trace_xrep_dquot_item_fill_bmap_hole(sc->mp, dq->q_type, dq->q_id); |
86 | |
87 | /* Initialize the new block. */ |
88 | error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp, dq->q_blkno, |
89 | mp->m_quotainfo->qi_dqchunklen, 0, &bp); |
90 | if (error) |
91 | return error; |
92 | bp->b_ops = &xfs_dquot_buf_ops; |
93 | |
94 | xfs_qm_init_dquot_blk(sc->tp, dq->q_id, dq->q_type, bp); |
95 | xfs_buf_set_ref(bp, XFS_DQUOT_REF); |
96 | |
97 | /* |
98 | * Finish the mapping transactions and roll one more time to |
99 | * disconnect sc->ip from sc->tp. |
100 | */ |
101 | error = xrep_defer_finish(sc); |
102 | if (error) |
103 | return error; |
104 | return xfs_trans_roll(&sc->tp); |
105 | } |
106 | |
107 | /* Make sure there's a written block backing this dquot */ |
108 | STATIC int |
109 | xrep_quota_item_bmap( |
110 | struct xfs_scrub *sc, |
111 | struct xfs_dquot *dq, |
112 | bool *dirty) |
113 | { |
114 | struct xfs_bmbt_irec irec; |
115 | struct xfs_mount *mp = sc->mp; |
116 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
117 | xfs_fileoff_t offset = dq->q_id / qi->qi_dqperchunk; |
118 | int nmaps = 1; |
119 | int error; |
120 | |
121 | /* The computed file offset should always be valid. */ |
122 | if (!xfs_verify_fileoff(mp, offset)) { |
123 | ASSERT(xfs_verify_fileoff(mp, offset)); |
124 | return -EFSCORRUPTED; |
125 | } |
126 | dq->q_fileoffset = offset; |
127 | |
128 | error = xfs_bmapi_read(sc->ip, offset, 1, &irec, &nmaps, 0); |
129 | if (error) |
130 | return error; |
131 | |
132 | if (nmaps < 1 || !xfs_bmap_is_real_extent(&irec)) { |
133 | /* Hole/delalloc extent; allocate a real block. */ |
134 | error = xrep_quota_item_fill_bmap_hole(sc, dq, &irec); |
135 | if (error) |
136 | return error; |
137 | } else if (irec.br_state != XFS_EXT_NORM) { |
138 | /* Unwritten extent, which we already took care of? */ |
139 | ASSERT(irec.br_state == XFS_EXT_NORM); |
140 | return -EFSCORRUPTED; |
141 | } else if (dq->q_blkno != XFS_FSB_TO_DADDR(mp, irec.br_startblock)) { |
142 | /* |
143 | * If the cached daddr is incorrect, repair probably punched a |
144 | * hole out of the quota file and filled it back in with a new |
145 | * block. Update the block mapping in the dquot. |
146 | */ |
147 | dq->q_blkno = XFS_FSB_TO_DADDR(mp, irec.br_startblock); |
148 | } |
149 | |
150 | *dirty = true; |
151 | return 0; |
152 | } |
153 | |
154 | /* Reset quota timers if incorrectly set. */ |
155 | static inline void |
156 | xrep_quota_item_timer( |
157 | struct xfs_scrub *sc, |
158 | const struct xfs_dquot_res *res, |
159 | bool *dirty) |
160 | { |
161 | if ((res->softlimit && res->count > res->softlimit) || |
162 | (res->hardlimit && res->count > res->hardlimit)) { |
163 | if (!res->timer) |
164 | *dirty = true; |
165 | } else { |
166 | if (res->timer) |
167 | *dirty = true; |
168 | } |
169 | } |
170 | |
171 | /* Scrub the fields in an individual quota item. */ |
172 | STATIC int |
173 | xrep_quota_item( |
174 | struct xrep_quota_info *rqi, |
175 | struct xfs_dquot *dq) |
176 | { |
177 | struct xfs_scrub *sc = rqi->sc; |
178 | struct xfs_mount *mp = sc->mp; |
179 | xfs_ino_t fs_icount; |
180 | bool dirty = false; |
181 | int error = 0; |
182 | |
183 | /* Last chance to abort before we start committing fixes. */ |
184 | if (xchk_should_terminate(sc, &error)) |
185 | return error; |
186 | |
187 | /* |
188 | * We might need to fix holes in the bmap record for the storage |
189 | * backing this dquot, so we need to lock the dquot and the quota file. |
190 | * dqiterate gave us a locked dquot, so drop the dquot lock to get the |
191 | * ILOCK_EXCL. |
192 | */ |
193 | xfs_dqunlock(dq); |
194 | xchk_ilock(sc, XFS_ILOCK_EXCL); |
195 | xfs_dqlock(dq); |
196 | |
197 | error = xrep_quota_item_bmap(sc, dq, &dirty); |
198 | xchk_iunlock(sc, XFS_ILOCK_EXCL); |
199 | if (error) |
200 | return error; |
201 | |
202 | /* Check the limits. */ |
203 | if (dq->q_blk.softlimit > dq->q_blk.hardlimit) { |
204 | dq->q_blk.softlimit = dq->q_blk.hardlimit; |
205 | dirty = true; |
206 | } |
207 | |
208 | if (dq->q_ino.softlimit > dq->q_ino.hardlimit) { |
209 | dq->q_ino.softlimit = dq->q_ino.hardlimit; |
210 | dirty = true; |
211 | } |
212 | |
213 | if (dq->q_rtb.softlimit > dq->q_rtb.hardlimit) { |
214 | dq->q_rtb.softlimit = dq->q_rtb.hardlimit; |
215 | dirty = true; |
216 | } |
217 | |
218 | /* |
219 | * Check that usage doesn't exceed physical limits. However, on |
220 | * a reflink filesystem we're allowed to exceed physical space |
221 | * if there are no quota limits. We don't know what the real number |
222 | * is, but we can make quotacheck find out for us. |
223 | */ |
224 | if (!xfs_has_reflink(mp) && dq->q_blk.count > mp->m_sb.sb_dblocks) { |
225 | dq->q_blk.reserved -= dq->q_blk.count; |
226 | dq->q_blk.reserved += mp->m_sb.sb_dblocks; |
227 | dq->q_blk.count = mp->m_sb.sb_dblocks; |
228 | rqi->need_quotacheck = true; |
229 | dirty = true; |
230 | } |
231 | fs_icount = percpu_counter_sum(&mp->m_icount); |
232 | if (dq->q_ino.count > fs_icount) { |
233 | dq->q_ino.reserved -= dq->q_ino.count; |
234 | dq->q_ino.reserved += fs_icount; |
235 | dq->q_ino.count = fs_icount; |
236 | rqi->need_quotacheck = true; |
237 | dirty = true; |
238 | } |
239 | if (dq->q_rtb.count > mp->m_sb.sb_rblocks) { |
240 | dq->q_rtb.reserved -= dq->q_rtb.count; |
241 | dq->q_rtb.reserved += mp->m_sb.sb_rblocks; |
242 | dq->q_rtb.count = mp->m_sb.sb_rblocks; |
243 | rqi->need_quotacheck = true; |
244 | dirty = true; |
245 | } |
246 | |
247 | xrep_quota_item_timer(sc, &dq->q_blk, &dirty); |
248 | xrep_quota_item_timer(sc, &dq->q_ino, &dirty); |
249 | xrep_quota_item_timer(sc, &dq->q_rtb, &dirty); |
250 | |
251 | if (!dirty) |
252 | return 0; |
253 | |
254 | trace_xrep_dquot_item(sc->mp, dq->q_type, dq->q_id); |
255 | |
256 | dq->q_flags |= XFS_DQFLAG_DIRTY; |
257 | xfs_trans_dqjoin(sc->tp, dq); |
258 | if (dq->q_id) { |
259 | xfs_qm_adjust_dqlimits(dq); |
260 | xfs_qm_adjust_dqtimers(dq); |
261 | } |
262 | xfs_trans_log_dquot(sc->tp, dq); |
263 | error = xfs_trans_roll(&sc->tp); |
264 | xfs_dqlock(dq); |
265 | return error; |
266 | } |
267 | |
268 | /* Fix a quota timer so that we can pass the verifier. */ |
269 | STATIC void |
270 | xrep_quota_fix_timer( |
271 | struct xfs_mount *mp, |
272 | const struct xfs_disk_dquot *ddq, |
273 | __be64 softlimit, |
274 | __be64 countnow, |
275 | __be32 *timer, |
276 | time64_t timelimit) |
277 | { |
278 | uint64_t soft = be64_to_cpu(softlimit); |
279 | uint64_t count = be64_to_cpu(countnow); |
280 | time64_t new_timer; |
281 | uint32_t t; |
282 | |
283 | if (!soft || count <= soft || *timer != 0) |
284 | return; |
285 | |
286 | new_timer = xfs_dquot_set_timeout(mp, |
287 | ktime_get_real_seconds() + timelimit); |
288 | if (ddq->d_type & XFS_DQTYPE_BIGTIME) |
289 | t = xfs_dq_unix_to_bigtime(new_timer); |
290 | else |
291 | t = new_timer; |
292 | |
293 | *timer = cpu_to_be32(t); |
294 | } |
295 | |
296 | /* Fix anything the verifiers complain about. */ |
297 | STATIC int |
298 | xrep_quota_block( |
299 | struct xfs_scrub *sc, |
300 | xfs_daddr_t daddr, |
301 | xfs_dqtype_t dqtype, |
302 | xfs_dqid_t id) |
303 | { |
304 | struct xfs_dqblk *dqblk; |
305 | struct xfs_disk_dquot *ddq; |
306 | struct xfs_quotainfo *qi = sc->mp->m_quotainfo; |
307 | struct xfs_def_quota *defq = xfs_get_defquota(qi, dqtype); |
308 | struct xfs_buf *bp = NULL; |
309 | enum xfs_blft buftype = 0; |
310 | int i; |
311 | int error; |
312 | |
313 | error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp, daddr, |
314 | qi->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); |
315 | switch (error) { |
316 | case -EFSBADCRC: |
317 | case -EFSCORRUPTED: |
318 | /* Failed verifier, retry read with no ops. */ |
319 | error = xfs_trans_read_buf(sc->mp, sc->tp, |
320 | sc->mp->m_ddev_targp, daddr, qi->qi_dqchunklen, |
321 | 0, &bp, NULL); |
322 | if (error) |
323 | return error; |
324 | break; |
325 | case 0: |
326 | dqblk = bp->b_addr; |
327 | ddq = &dqblk[0].dd_diskdq; |
328 | |
329 | /* |
330 | * If there's nothing that would impede a dqiterate, we're |
331 | * done. |
332 | */ |
333 | if ((ddq->d_type & XFS_DQTYPE_REC_MASK) != dqtype || |
334 | id == be32_to_cpu(ddq->d_id)) { |
335 | xfs_trans_brelse(sc->tp, bp); |
336 | return 0; |
337 | } |
338 | break; |
339 | default: |
340 | return error; |
341 | } |
342 | |
343 | /* Something's wrong with the block, fix the whole thing. */ |
344 | dqblk = bp->b_addr; |
345 | bp->b_ops = &xfs_dquot_buf_ops; |
346 | for (i = 0; i < qi->qi_dqperchunk; i++, dqblk++) { |
347 | ddq = &dqblk->dd_diskdq; |
348 | |
349 | trace_xrep_disk_dquot(sc->mp, dqtype, id + i); |
350 | |
351 | ddq->d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); |
352 | ddq->d_version = XFS_DQUOT_VERSION; |
353 | ddq->d_type = dqtype; |
354 | ddq->d_id = cpu_to_be32(id + i); |
355 | |
356 | if (xfs_has_bigtime(sc->mp) && ddq->d_id) |
357 | ddq->d_type |= XFS_DQTYPE_BIGTIME; |
358 | |
359 | xrep_quota_fix_timer(sc->mp, ddq, ddq->d_blk_softlimit, |
360 | ddq->d_bcount, &ddq->d_btimer, |
361 | defq->blk.time); |
362 | |
363 | xrep_quota_fix_timer(sc->mp, ddq, ddq->d_ino_softlimit, |
364 | ddq->d_icount, &ddq->d_itimer, |
365 | defq->ino.time); |
366 | |
367 | xrep_quota_fix_timer(sc->mp, ddq, ddq->d_rtb_softlimit, |
368 | ddq->d_rtbcount, &ddq->d_rtbtimer, |
369 | defq->rtb.time); |
370 | |
371 | /* We only support v5 filesystems so always set these. */ |
372 | uuid_copy(&dqblk->dd_uuid, &sc->mp->m_sb.sb_meta_uuid); |
373 | xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk), |
374 | XFS_DQUOT_CRC_OFF); |
375 | dqblk->dd_lsn = 0; |
376 | } |
377 | switch (dqtype) { |
378 | case XFS_DQTYPE_USER: |
379 | buftype = XFS_BLFT_UDQUOT_BUF; |
380 | break; |
381 | case XFS_DQTYPE_GROUP: |
382 | buftype = XFS_BLFT_GDQUOT_BUF; |
383 | break; |
384 | case XFS_DQTYPE_PROJ: |
385 | buftype = XFS_BLFT_PDQUOT_BUF; |
386 | break; |
387 | } |
388 | xfs_trans_buf_set_type(sc->tp, bp, buftype); |
389 | xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1); |
390 | return xrep_roll_trans(sc); |
391 | } |
392 | |
393 | /* |
394 | * Repair a quota file's data fork. The function returns with the inode |
395 | * joined. |
396 | */ |
397 | STATIC int |
398 | xrep_quota_data_fork( |
399 | struct xfs_scrub *sc, |
400 | xfs_dqtype_t dqtype) |
401 | { |
402 | struct xfs_bmbt_irec irec = { 0 }; |
403 | struct xfs_iext_cursor icur; |
404 | struct xfs_quotainfo *qi = sc->mp->m_quotainfo; |
405 | struct xfs_ifork *ifp; |
406 | xfs_fileoff_t max_dqid_off; |
407 | xfs_fileoff_t off; |
408 | xfs_fsblock_t fsbno; |
409 | bool truncate = false; |
410 | bool joined = false; |
411 | int error = 0; |
412 | |
413 | error = xrep_metadata_inode_forks(sc); |
414 | if (error) |
415 | goto out; |
416 | |
417 | /* Check for data fork problems that apply only to quota files. */ |
418 | max_dqid_off = XFS_DQ_ID_MAX / qi->qi_dqperchunk; |
419 | ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); |
420 | for_each_xfs_iext(ifp, &icur, &irec) { |
421 | if (isnullstartblock(irec.br_startblock)) { |
422 | error = -EFSCORRUPTED; |
423 | goto out; |
424 | } |
425 | |
426 | if (irec.br_startoff > max_dqid_off || |
427 | irec.br_startoff + irec.br_blockcount - 1 > max_dqid_off) { |
428 | truncate = true; |
429 | break; |
430 | } |
431 | |
432 | /* Convert unwritten extents to real ones. */ |
433 | if (irec.br_state == XFS_EXT_UNWRITTEN) { |
434 | struct xfs_bmbt_irec nrec; |
435 | int nmap = 1; |
436 | |
437 | if (!joined) { |
438 | xfs_trans_ijoin(sc->tp, sc->ip, 0); |
439 | joined = true; |
440 | } |
441 | |
442 | error = xfs_bmapi_write(sc->tp, sc->ip, |
443 | irec.br_startoff, irec.br_blockcount, |
444 | XFS_BMAPI_CONVERT, 0, &nrec, &nmap); |
445 | if (error) |
446 | goto out; |
447 | if (nmap != 1) { |
448 | error = -ENOSPC; |
449 | goto out; |
450 | } |
451 | ASSERT(nrec.br_startoff == irec.br_startoff); |
452 | ASSERT(nrec.br_blockcount == irec.br_blockcount); |
453 | |
454 | error = xfs_defer_finish(&sc->tp); |
455 | if (error) |
456 | goto out; |
457 | } |
458 | } |
459 | |
460 | if (!joined) { |
461 | xfs_trans_ijoin(sc->tp, sc->ip, 0); |
462 | joined = true; |
463 | } |
464 | |
465 | if (truncate) { |
466 | /* Erase everything after the block containing the max dquot */ |
467 | error = xfs_bunmapi_range(&sc->tp, sc->ip, 0, |
468 | max_dqid_off * sc->mp->m_sb.sb_blocksize, |
469 | XFS_MAX_FILEOFF); |
470 | if (error) |
471 | goto out; |
472 | |
473 | /* Remove all CoW reservations. */ |
474 | error = xfs_reflink_cancel_cow_blocks(sc->ip, &sc->tp, 0, |
475 | XFS_MAX_FILEOFF, true); |
476 | if (error) |
477 | goto out; |
478 | sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; |
479 | |
480 | /* |
481 | * Always re-log the inode so that our permanent transaction |
482 | * can keep on rolling it forward in the log. |
483 | */ |
484 | xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); |
485 | } |
486 | |
487 | /* Now go fix anything that fails the verifiers. */ |
488 | for_each_xfs_iext(ifp, &icur, &irec) { |
489 | for (fsbno = irec.br_startblock, off = irec.br_startoff; |
490 | fsbno < irec.br_startblock + irec.br_blockcount; |
491 | fsbno += XFS_DQUOT_CLUSTER_SIZE_FSB, |
492 | off += XFS_DQUOT_CLUSTER_SIZE_FSB) { |
493 | error = xrep_quota_block(sc, |
494 | XFS_FSB_TO_DADDR(sc->mp, fsbno), |
495 | dqtype, off * qi->qi_dqperchunk); |
496 | if (error) |
497 | goto out; |
498 | } |
499 | } |
500 | |
501 | out: |
502 | return error; |
503 | } |
504 | |
505 | /* |
506 | * Go fix anything in the quota items that we could have been mad about. Now |
507 | * that we've checked the quota inode data fork we have to drop ILOCK_EXCL to |
508 | * use the regular dquot functions. |
509 | */ |
510 | STATIC int |
511 | xrep_quota_problems( |
512 | struct xfs_scrub *sc, |
513 | xfs_dqtype_t dqtype) |
514 | { |
515 | struct xchk_dqiter cursor = { }; |
516 | struct xrep_quota_info rqi = { .sc = sc }; |
517 | struct xfs_dquot *dq; |
518 | int error; |
519 | |
520 | xchk_dqiter_init(&cursor, sc, dqtype); |
521 | while ((error = xchk_dquot_iter(&cursor, &dq)) == 1) { |
522 | error = xrep_quota_item(&rqi, dq); |
523 | xfs_qm_dqput(dq); |
524 | if (error) |
525 | break; |
526 | } |
527 | if (error) |
528 | return error; |
529 | |
530 | /* Make a quotacheck happen. */ |
531 | if (rqi.need_quotacheck) |
532 | xrep_force_quotacheck(sc, dqtype); |
533 | return 0; |
534 | } |
535 | |
536 | /* Repair all of a quota type's items. */ |
537 | int |
538 | xrep_quota( |
539 | struct xfs_scrub *sc) |
540 | { |
541 | xfs_dqtype_t dqtype; |
542 | int error; |
543 | |
544 | dqtype = xchk_quota_to_dqtype(sc); |
545 | |
546 | /* |
547 | * Re-take the ILOCK so that we can fix any problems that we found |
548 | * with the data fork mappings, or with the dquot bufs themselves. |
549 | */ |
550 | if (!(sc->ilock_flags & XFS_ILOCK_EXCL)) |
551 | xchk_ilock(sc, XFS_ILOCK_EXCL); |
552 | error = xrep_quota_data_fork(sc, dqtype); |
553 | if (error) |
554 | return error; |
555 | |
556 | /* |
557 | * Finish deferred items and roll the transaction to unjoin the quota |
558 | * inode from transaction so that we can unlock the quota inode; we |
559 | * play only with dquots from now on. |
560 | */ |
561 | error = xrep_defer_finish(sc); |
562 | if (error) |
563 | return error; |
564 | error = xfs_trans_roll(&sc->tp); |
565 | if (error) |
566 | return error; |
567 | xchk_iunlock(sc, sc->ilock_flags); |
568 | |
569 | /* Fix anything the dquot verifiers don't complain about. */ |
570 | error = xrep_quota_problems(sc, dqtype); |
571 | if (error) |
572 | return error; |
573 | |
574 | return xrep_trans_commit(sc); |
575 | } |
576 | |