1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2000-2003 Silicon Graphics, Inc. |
4 | * All Rights Reserved. |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_format.h" |
9 | #include "xfs_log_format.h" |
10 | #include "xfs_shared.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_bit.h" |
13 | #include "xfs_mount.h" |
14 | #include "xfs_defer.h" |
15 | #include "xfs_inode.h" |
16 | #include "xfs_bmap.h" |
17 | #include "xfs_quota.h" |
18 | #include "xfs_trans.h" |
19 | #include "xfs_buf_item.h" |
20 | #include "xfs_trans_space.h" |
21 | #include "xfs_trans_priv.h" |
22 | #include "xfs_qm.h" |
23 | #include "xfs_trace.h" |
24 | #include "xfs_log.h" |
25 | #include "xfs_bmap_btree.h" |
26 | #include "xfs_error.h" |
27 | #include "xfs_health.h" |
28 | |
29 | /* |
30 | * Lock order: |
31 | * |
32 | * ip->i_lock |
33 | * qi->qi_tree_lock |
34 | * dquot->q_qlock (xfs_dqlock() and friends) |
35 | * dquot->q_flush (xfs_dqflock() and friends) |
36 | * qi->qi_lru_lock |
37 | * |
38 | * If two dquots need to be locked the order is user before group/project, |
39 | * otherwise by the lowest id first, see xfs_dqlock2. |
40 | */ |
41 | |
42 | struct kmem_cache *xfs_dqtrx_cache; |
43 | static struct kmem_cache *xfs_dquot_cache; |
44 | |
45 | static struct lock_class_key xfs_dquot_group_class; |
46 | static struct lock_class_key xfs_dquot_project_class; |
47 | |
48 | /* Record observations of quota corruption with the health tracking system. */ |
49 | static void |
50 | xfs_dquot_mark_sick( |
51 | struct xfs_dquot *dqp) |
52 | { |
53 | struct xfs_mount *mp = dqp->q_mount; |
54 | |
55 | switch (dqp->q_type) { |
56 | case XFS_DQTYPE_USER: |
57 | xfs_fs_mark_sick(mp, XFS_SICK_FS_UQUOTA); |
58 | break; |
59 | case XFS_DQTYPE_GROUP: |
60 | xfs_fs_mark_sick(mp, XFS_SICK_FS_GQUOTA); |
61 | break; |
62 | case XFS_DQTYPE_PROJ: |
63 | xfs_fs_mark_sick(mp, XFS_SICK_FS_PQUOTA); |
64 | break; |
65 | default: |
66 | ASSERT(0); |
67 | break; |
68 | } |
69 | } |
70 | |
71 | /* |
72 | * This is called to free all the memory associated with a dquot |
73 | */ |
74 | void |
75 | xfs_qm_dqdestroy( |
76 | struct xfs_dquot *dqp) |
77 | { |
78 | ASSERT(list_empty(&dqp->q_lru)); |
79 | |
80 | kvfree(addr: dqp->q_logitem.qli_item.li_lv_shadow); |
81 | mutex_destroy(lock: &dqp->q_qlock); |
82 | |
83 | XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); |
84 | kmem_cache_free(s: xfs_dquot_cache, objp: dqp); |
85 | } |
86 | |
87 | /* |
88 | * If default limits are in force, push them into the dquot now. |
89 | * We overwrite the dquot limits only if they are zero and this |
90 | * is not the root dquot. |
91 | */ |
92 | void |
93 | xfs_qm_adjust_dqlimits( |
94 | struct xfs_dquot *dq) |
95 | { |
96 | struct xfs_mount *mp = dq->q_mount; |
97 | struct xfs_quotainfo *q = mp->m_quotainfo; |
98 | struct xfs_def_quota *defq; |
99 | int prealloc = 0; |
100 | |
101 | ASSERT(dq->q_id); |
102 | defq = xfs_get_defquota(q, xfs_dquot_type(dqp: dq)); |
103 | |
104 | if (!dq->q_blk.softlimit) { |
105 | dq->q_blk.softlimit = defq->blk.soft; |
106 | prealloc = 1; |
107 | } |
108 | if (!dq->q_blk.hardlimit) { |
109 | dq->q_blk.hardlimit = defq->blk.hard; |
110 | prealloc = 1; |
111 | } |
112 | if (!dq->q_ino.softlimit) |
113 | dq->q_ino.softlimit = defq->ino.soft; |
114 | if (!dq->q_ino.hardlimit) |
115 | dq->q_ino.hardlimit = defq->ino.hard; |
116 | if (!dq->q_rtb.softlimit) |
117 | dq->q_rtb.softlimit = defq->rtb.soft; |
118 | if (!dq->q_rtb.hardlimit) |
119 | dq->q_rtb.hardlimit = defq->rtb.hard; |
120 | |
121 | if (prealloc) |
122 | xfs_dquot_set_prealloc_limits(dq); |
123 | } |
124 | |
125 | /* Set the expiration time of a quota's grace period. */ |
126 | time64_t |
127 | xfs_dquot_set_timeout( |
128 | struct xfs_mount *mp, |
129 | time64_t timeout) |
130 | { |
131 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
132 | |
133 | return clamp_t(time64_t, timeout, qi->qi_expiry_min, |
134 | qi->qi_expiry_max); |
135 | } |
136 | |
137 | /* Set the length of the default grace period. */ |
138 | time64_t |
139 | xfs_dquot_set_grace_period( |
140 | time64_t grace) |
141 | { |
142 | return clamp_t(time64_t, grace, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX); |
143 | } |
144 | |
145 | /* |
146 | * Determine if this quota counter is over either limit and set the quota |
147 | * timers as appropriate. |
148 | */ |
149 | static inline void |
150 | xfs_qm_adjust_res_timer( |
151 | struct xfs_mount *mp, |
152 | struct xfs_dquot_res *res, |
153 | struct xfs_quota_limits *qlim) |
154 | { |
155 | ASSERT(res->hardlimit == 0 || res->softlimit <= res->hardlimit); |
156 | |
157 | if ((res->softlimit && res->count > res->softlimit) || |
158 | (res->hardlimit && res->count > res->hardlimit)) { |
159 | if (res->timer == 0) |
160 | res->timer = xfs_dquot_set_timeout(mp, |
161 | timeout: ktime_get_real_seconds() + qlim->time); |
162 | } else { |
163 | res->timer = 0; |
164 | } |
165 | } |
166 | |
167 | /* |
168 | * Check the limits and timers of a dquot and start or reset timers |
169 | * if necessary. |
170 | * This gets called even when quota enforcement is OFF, which makes our |
171 | * life a little less complicated. (We just don't reject any quota |
172 | * reservations in that case, when enforcement is off). |
173 | * We also return 0 as the values of the timers in Q_GETQUOTA calls, when |
174 | * enforcement's off. |
175 | * In contrast, warnings are a little different in that they don't |
176 | * 'automatically' get started when limits get exceeded. They do |
177 | * get reset to zero, however, when we find the count to be under |
178 | * the soft limit (they are only ever set non-zero via userspace). |
179 | */ |
180 | void |
181 | xfs_qm_adjust_dqtimers( |
182 | struct xfs_dquot *dq) |
183 | { |
184 | struct xfs_mount *mp = dq->q_mount; |
185 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
186 | struct xfs_def_quota *defq; |
187 | |
188 | ASSERT(dq->q_id); |
189 | defq = xfs_get_defquota(qi, xfs_dquot_type(dqp: dq)); |
190 | |
191 | xfs_qm_adjust_res_timer(mp: dq->q_mount, res: &dq->q_blk, qlim: &defq->blk); |
192 | xfs_qm_adjust_res_timer(mp: dq->q_mount, res: &dq->q_ino, qlim: &defq->ino); |
193 | xfs_qm_adjust_res_timer(mp: dq->q_mount, res: &dq->q_rtb, qlim: &defq->rtb); |
194 | } |
195 | |
196 | /* |
197 | * initialize a buffer full of dquots and log the whole thing |
198 | */ |
199 | void |
200 | xfs_qm_init_dquot_blk( |
201 | struct xfs_trans *tp, |
202 | xfs_dqid_t id, |
203 | xfs_dqtype_t type, |
204 | struct xfs_buf *bp) |
205 | { |
206 | struct xfs_mount *mp = tp->t_mountp; |
207 | struct xfs_quotainfo *q = mp->m_quotainfo; |
208 | struct xfs_dqblk *d; |
209 | xfs_dqid_t curid; |
210 | unsigned int qflag; |
211 | unsigned int blftype; |
212 | int i; |
213 | |
214 | ASSERT(tp); |
215 | ASSERT(xfs_buf_islocked(bp)); |
216 | |
217 | switch (type) { |
218 | case XFS_DQTYPE_USER: |
219 | qflag = XFS_UQUOTA_CHKD; |
220 | blftype = XFS_BLF_UDQUOT_BUF; |
221 | break; |
222 | case XFS_DQTYPE_PROJ: |
223 | qflag = XFS_PQUOTA_CHKD; |
224 | blftype = XFS_BLF_PDQUOT_BUF; |
225 | break; |
226 | case XFS_DQTYPE_GROUP: |
227 | qflag = XFS_GQUOTA_CHKD; |
228 | blftype = XFS_BLF_GDQUOT_BUF; |
229 | break; |
230 | default: |
231 | ASSERT(0); |
232 | return; |
233 | } |
234 | |
235 | d = bp->b_addr; |
236 | |
237 | /* |
238 | * ID of the first dquot in the block - id's are zero based. |
239 | */ |
240 | curid = id - (id % q->qi_dqperchunk); |
241 | memset(d, 0, BBTOB(q->qi_dqchunklen)); |
242 | for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { |
243 | d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); |
244 | d->dd_diskdq.d_version = XFS_DQUOT_VERSION; |
245 | d->dd_diskdq.d_id = cpu_to_be32(curid); |
246 | d->dd_diskdq.d_type = type; |
247 | if (curid > 0 && xfs_has_bigtime(mp)) |
248 | d->dd_diskdq.d_type |= XFS_DQTYPE_BIGTIME; |
249 | if (xfs_has_crc(mp)) { |
250 | uuid_copy(dst: &d->dd_uuid, src: &mp->m_sb.sb_meta_uuid); |
251 | xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), |
252 | XFS_DQUOT_CRC_OFF); |
253 | } |
254 | } |
255 | |
256 | xfs_trans_dquot_buf(tp, bp, blftype); |
257 | |
258 | /* |
259 | * quotacheck uses delayed writes to update all the dquots on disk in an |
260 | * efficient manner instead of logging the individual dquot changes as |
261 | * they are made. However if we log the buffer allocated here and crash |
262 | * after quotacheck while the logged initialisation is still in the |
263 | * active region of the log, log recovery can replay the dquot buffer |
264 | * initialisation over the top of the checked dquots and corrupt quota |
265 | * accounting. |
266 | * |
267 | * To avoid this problem, quotacheck cannot log the initialised buffer. |
268 | * We must still dirty the buffer and write it back before the |
269 | * allocation transaction clears the log. Therefore, mark the buffer as |
270 | * ordered instead of logging it directly. This is safe for quotacheck |
271 | * because it detects and repairs allocated but initialized dquot blocks |
272 | * in the quota inodes. |
273 | */ |
274 | if (!(mp->m_qflags & qflag)) |
275 | xfs_trans_ordered_buf(tp, bp); |
276 | else |
277 | xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); |
278 | } |
279 | |
280 | /* |
281 | * Initialize the dynamic speculative preallocation thresholds. The lo/hi |
282 | * watermarks correspond to the soft and hard limits by default. If a soft limit |
283 | * is not specified, we use 95% of the hard limit. |
284 | */ |
285 | void |
286 | xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) |
287 | { |
288 | uint64_t space; |
289 | |
290 | dqp->q_prealloc_hi_wmark = dqp->q_blk.hardlimit; |
291 | dqp->q_prealloc_lo_wmark = dqp->q_blk.softlimit; |
292 | if (!dqp->q_prealloc_lo_wmark) { |
293 | dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark; |
294 | do_div(dqp->q_prealloc_lo_wmark, 100); |
295 | dqp->q_prealloc_lo_wmark *= 95; |
296 | } |
297 | |
298 | space = dqp->q_prealloc_hi_wmark; |
299 | |
300 | do_div(space, 100); |
301 | dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space; |
302 | dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3; |
303 | dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; |
304 | } |
305 | |
306 | /* |
307 | * Ensure that the given in-core dquot has a buffer on disk backing it, and |
308 | * return the buffer locked and held. This is called when the bmapi finds a |
309 | * hole. |
310 | */ |
311 | STATIC int |
312 | xfs_dquot_disk_alloc( |
313 | struct xfs_dquot *dqp, |
314 | struct xfs_buf **bpp) |
315 | { |
316 | struct xfs_bmbt_irec map; |
317 | struct xfs_trans *tp; |
318 | struct xfs_mount *mp = dqp->q_mount; |
319 | struct xfs_buf *bp; |
320 | xfs_dqtype_t qtype = xfs_dquot_type(dqp); |
321 | struct xfs_inode *quotip = xfs_quota_inode(mp, qtype); |
322 | int nmaps = 1; |
323 | int error; |
324 | |
325 | trace_xfs_dqalloc(dqp); |
326 | |
327 | error = xfs_trans_alloc(mp, resp: &M_RES(mp)->tr_qm_dqalloc, |
328 | blocks: XFS_QM_DQALLOC_SPACE_RES(mp), rtextents: 0, flags: 0, tpp: &tp); |
329 | if (error) |
330 | return error; |
331 | |
332 | xfs_ilock(quotip, XFS_ILOCK_EXCL); |
333 | xfs_trans_ijoin(tp, quotip, 0); |
334 | |
335 | if (!xfs_this_quota_on(dqp->q_mount, qtype)) { |
336 | /* |
337 | * Return if this type of quotas is turned off while we didn't |
338 | * have an inode lock |
339 | */ |
340 | error = -ESRCH; |
341 | goto err_cancel; |
342 | } |
343 | |
344 | error = xfs_iext_count_may_overflow(quotip, XFS_DATA_FORK, |
345 | XFS_IEXT_ADD_NOSPLIT_CNT); |
346 | if (error == -EFBIG) |
347 | error = xfs_iext_count_upgrade(tp, quotip, |
348 | XFS_IEXT_ADD_NOSPLIT_CNT); |
349 | if (error) |
350 | goto err_cancel; |
351 | |
352 | /* Create the block mapping. */ |
353 | error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset, |
354 | XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, &map, |
355 | &nmaps); |
356 | if (error) |
357 | goto err_cancel; |
358 | |
359 | ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); |
360 | ASSERT(nmaps == 1); |
361 | ASSERT((map.br_startblock != DELAYSTARTBLOCK) && |
362 | (map.br_startblock != HOLESTARTBLOCK)); |
363 | |
364 | /* |
365 | * Keep track of the blkno to save a lookup later |
366 | */ |
367 | dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); |
368 | |
369 | /* now we can just get the buffer (there's nothing to read yet) */ |
370 | error = xfs_trans_get_buf(tp, target: mp->m_ddev_targp, blkno: dqp->q_blkno, |
371 | numblks: mp->m_quotainfo->qi_dqchunklen, flags: 0, bpp: &bp); |
372 | if (error) |
373 | goto err_cancel; |
374 | bp->b_ops = &xfs_dquot_buf_ops; |
375 | |
376 | /* |
377 | * Make a chunk of dquots out of this buffer and log |
378 | * the entire thing. |
379 | */ |
380 | xfs_qm_init_dquot_blk(tp, dqp->q_id, qtype, bp); |
381 | xfs_buf_set_ref(bp, XFS_DQUOT_REF); |
382 | |
383 | /* |
384 | * Hold the buffer and join it to the dfops so that we'll still own |
385 | * the buffer when we return to the caller. The buffer disposal on |
386 | * error must be paid attention to very carefully, as it has been |
387 | * broken since commit efa092f3d4c6 "[XFS] Fixes a bug in the quota |
388 | * code when allocating a new dquot record" in 2005, and the later |
389 | * conversion to xfs_defer_ops in commit 310a75a3c6c747 failed to keep |
390 | * the buffer locked across the _defer_finish call. We can now do |
391 | * this correctly with xfs_defer_bjoin. |
392 | * |
393 | * Above, we allocated a disk block for the dquot information and used |
394 | * get_buf to initialize the dquot. If the _defer_finish fails, the old |
395 | * transaction is gone but the new buffer is not joined or held to any |
396 | * transaction, so we must _buf_relse it. |
397 | * |
398 | * If everything succeeds, the caller of this function is returned a |
399 | * buffer that is locked and held to the transaction. The caller |
400 | * is responsible for unlocking any buffer passed back, either |
401 | * manually or by committing the transaction. On error, the buffer is |
402 | * released and not passed back. |
403 | * |
404 | * Keep the quota inode ILOCKed until after the transaction commit to |
405 | * maintain the atomicity of bmap/rmap updates. |
406 | */ |
407 | xfs_trans_bhold(tp, bp); |
408 | error = xfs_trans_commit(tp); |
409 | xfs_iunlock(quotip, XFS_ILOCK_EXCL); |
410 | if (error) { |
411 | xfs_buf_relse(bp); |
412 | return error; |
413 | } |
414 | |
415 | *bpp = bp; |
416 | return 0; |
417 | |
418 | err_cancel: |
419 | xfs_trans_cancel(tp); |
420 | xfs_iunlock(quotip, XFS_ILOCK_EXCL); |
421 | return error; |
422 | } |
423 | |
424 | /* |
425 | * Read in the in-core dquot's on-disk metadata and return the buffer. |
426 | * Returns ENOENT to signal a hole. |
427 | */ |
428 | STATIC int |
429 | xfs_dquot_disk_read( |
430 | struct xfs_mount *mp, |
431 | struct xfs_dquot *dqp, |
432 | struct xfs_buf **bpp) |
433 | { |
434 | struct xfs_bmbt_irec map; |
435 | struct xfs_buf *bp; |
436 | xfs_dqtype_t qtype = xfs_dquot_type(dqp); |
437 | struct xfs_inode *quotip = xfs_quota_inode(mp, qtype); |
438 | uint lock_mode; |
439 | int nmaps = 1; |
440 | int error; |
441 | |
442 | lock_mode = xfs_ilock_data_map_shared(quotip); |
443 | if (!xfs_this_quota_on(mp, qtype)) { |
444 | /* |
445 | * Return if this type of quotas is turned off while we |
446 | * didn't have the quota inode lock. |
447 | */ |
448 | xfs_iunlock(quotip, lock_mode); |
449 | return -ESRCH; |
450 | } |
451 | |
452 | /* |
453 | * Find the block map; no allocations yet |
454 | */ |
455 | error = xfs_bmapi_read(quotip, dqp->q_fileoffset, |
456 | XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); |
457 | xfs_iunlock(quotip, lock_mode); |
458 | if (error) |
459 | return error; |
460 | |
461 | ASSERT(nmaps == 1); |
462 | ASSERT(map.br_blockcount >= 1); |
463 | ASSERT(map.br_startblock != DELAYSTARTBLOCK); |
464 | if (map.br_startblock == HOLESTARTBLOCK) |
465 | return -ENOENT; |
466 | |
467 | trace_xfs_dqtobp_read(dqp); |
468 | |
469 | /* |
470 | * store the blkno etc so that we don't have to do the |
471 | * mapping all the time |
472 | */ |
473 | dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); |
474 | |
475 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, |
476 | mp->m_quotainfo->qi_dqchunklen, 0, &bp, |
477 | &xfs_dquot_buf_ops); |
478 | if (xfs_metadata_is_sick(error)) |
479 | xfs_dquot_mark_sick(dqp); |
480 | if (error) { |
481 | ASSERT(bp == NULL); |
482 | return error; |
483 | } |
484 | |
485 | ASSERT(xfs_buf_islocked(bp)); |
486 | xfs_buf_set_ref(bp, XFS_DQUOT_REF); |
487 | *bpp = bp; |
488 | |
489 | return 0; |
490 | } |
491 | |
492 | /* Allocate and initialize everything we need for an incore dquot. */ |
493 | STATIC struct xfs_dquot * |
494 | xfs_dquot_alloc( |
495 | struct xfs_mount *mp, |
496 | xfs_dqid_t id, |
497 | xfs_dqtype_t type) |
498 | { |
499 | struct xfs_dquot *dqp; |
500 | |
501 | dqp = kmem_cache_zalloc(k: xfs_dquot_cache, GFP_KERNEL | __GFP_NOFAIL); |
502 | |
503 | dqp->q_type = type; |
504 | dqp->q_id = id; |
505 | dqp->q_mount = mp; |
506 | INIT_LIST_HEAD(list: &dqp->q_lru); |
507 | mutex_init(&dqp->q_qlock); |
508 | init_waitqueue_head(&dqp->q_pinwait); |
509 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; |
510 | /* |
511 | * Offset of dquot in the (fixed sized) dquot chunk. |
512 | */ |
513 | dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * |
514 | sizeof(struct xfs_dqblk); |
515 | |
516 | /* |
517 | * Because we want to use a counting completion, complete |
518 | * the flush completion once to allow a single access to |
519 | * the flush completion without blocking. |
520 | */ |
521 | init_completion(x: &dqp->q_flush); |
522 | complete(&dqp->q_flush); |
523 | |
524 | /* |
525 | * Make sure group quotas have a different lock class than user |
526 | * quotas. |
527 | */ |
528 | switch (type) { |
529 | case XFS_DQTYPE_USER: |
530 | /* uses the default lock class */ |
531 | break; |
532 | case XFS_DQTYPE_GROUP: |
533 | lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class); |
534 | break; |
535 | case XFS_DQTYPE_PROJ: |
536 | lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class); |
537 | break; |
538 | default: |
539 | ASSERT(0); |
540 | break; |
541 | } |
542 | |
543 | xfs_qm_dquot_logitem_init(dqp); |
544 | |
545 | XFS_STATS_INC(mp, xs_qm_dquot); |
546 | return dqp; |
547 | } |
548 | |
549 | /* Check the ondisk dquot's id and type match what the incore dquot expects. */ |
550 | static bool |
551 | xfs_dquot_check_type( |
552 | struct xfs_dquot *dqp, |
553 | struct xfs_disk_dquot *ddqp) |
554 | { |
555 | uint8_t ddqp_type; |
556 | uint8_t dqp_type; |
557 | |
558 | ddqp_type = ddqp->d_type & XFS_DQTYPE_REC_MASK; |
559 | dqp_type = xfs_dquot_type(dqp); |
560 | |
561 | if (be32_to_cpu(ddqp->d_id) != dqp->q_id) |
562 | return false; |
563 | |
564 | /* |
565 | * V5 filesystems always expect an exact type match. V4 filesystems |
566 | * expect an exact match for user dquots and for non-root group and |
567 | * project dquots. |
568 | */ |
569 | if (xfs_has_crc(dqp->q_mount) || |
570 | dqp_type == XFS_DQTYPE_USER || dqp->q_id != 0) |
571 | return ddqp_type == dqp_type; |
572 | |
573 | /* |
574 | * V4 filesystems support either group or project quotas, but not both |
575 | * at the same time. The non-user quota file can be switched between |
576 | * group and project quota uses depending on the mount options, which |
577 | * means that we can encounter the other type when we try to load quota |
578 | * defaults. Quotacheck will soon reset the entire quota file |
579 | * (including the root dquot) anyway, but don't log scary corruption |
580 | * reports to dmesg. |
581 | */ |
582 | return ddqp_type == XFS_DQTYPE_GROUP || ddqp_type == XFS_DQTYPE_PROJ; |
583 | } |
584 | |
585 | /* Copy the in-core quota fields in from the on-disk buffer. */ |
586 | STATIC int |
587 | xfs_dquot_from_disk( |
588 | struct xfs_dquot *dqp, |
589 | struct xfs_buf *bp) |
590 | { |
591 | struct xfs_dqblk *dqb = xfs_buf_offset(bp, dqp->q_bufoffset); |
592 | struct xfs_disk_dquot *ddqp = &dqb->dd_diskdq; |
593 | |
594 | /* |
595 | * Ensure that we got the type and ID we were looking for. |
596 | * Everything else was checked by the dquot buffer verifier. |
597 | */ |
598 | if (!xfs_dquot_check_type(dqp, ddqp)) { |
599 | xfs_alert_tag(bp->b_mount, XFS_PTAG_VERIFIER_ERROR, |
600 | "Metadata corruption detected at %pS, quota %u" , |
601 | __this_address, dqp->q_id); |
602 | xfs_alert(bp->b_mount, "Unmount and run xfs_repair" ); |
603 | xfs_dquot_mark_sick(dqp); |
604 | return -EFSCORRUPTED; |
605 | } |
606 | |
607 | /* copy everything from disk dquot to the incore dquot */ |
608 | dqp->q_type = ddqp->d_type; |
609 | dqp->q_blk.hardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); |
610 | dqp->q_blk.softlimit = be64_to_cpu(ddqp->d_blk_softlimit); |
611 | dqp->q_ino.hardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); |
612 | dqp->q_ino.softlimit = be64_to_cpu(ddqp->d_ino_softlimit); |
613 | dqp->q_rtb.hardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); |
614 | dqp->q_rtb.softlimit = be64_to_cpu(ddqp->d_rtb_softlimit); |
615 | |
616 | dqp->q_blk.count = be64_to_cpu(ddqp->d_bcount); |
617 | dqp->q_ino.count = be64_to_cpu(ddqp->d_icount); |
618 | dqp->q_rtb.count = be64_to_cpu(ddqp->d_rtbcount); |
619 | |
620 | dqp->q_blk.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_btimer); |
621 | dqp->q_ino.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_itimer); |
622 | dqp->q_rtb.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_rtbtimer); |
623 | |
624 | /* |
625 | * Reservation counters are defined as reservation plus current usage |
626 | * to avoid having to add every time. |
627 | */ |
628 | dqp->q_blk.reserved = dqp->q_blk.count; |
629 | dqp->q_ino.reserved = dqp->q_ino.count; |
630 | dqp->q_rtb.reserved = dqp->q_rtb.count; |
631 | |
632 | /* initialize the dquot speculative prealloc thresholds */ |
633 | xfs_dquot_set_prealloc_limits(dqp); |
634 | return 0; |
635 | } |
636 | |
637 | /* Copy the in-core quota fields into the on-disk buffer. */ |
638 | void |
639 | xfs_dquot_to_disk( |
640 | struct xfs_disk_dquot *ddqp, |
641 | struct xfs_dquot *dqp) |
642 | { |
643 | ddqp->d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); |
644 | ddqp->d_version = XFS_DQUOT_VERSION; |
645 | ddqp->d_type = dqp->q_type; |
646 | ddqp->d_id = cpu_to_be32(dqp->q_id); |
647 | ddqp->d_pad0 = 0; |
648 | ddqp->d_pad = 0; |
649 | |
650 | ddqp->d_blk_hardlimit = cpu_to_be64(dqp->q_blk.hardlimit); |
651 | ddqp->d_blk_softlimit = cpu_to_be64(dqp->q_blk.softlimit); |
652 | ddqp->d_ino_hardlimit = cpu_to_be64(dqp->q_ino.hardlimit); |
653 | ddqp->d_ino_softlimit = cpu_to_be64(dqp->q_ino.softlimit); |
654 | ddqp->d_rtb_hardlimit = cpu_to_be64(dqp->q_rtb.hardlimit); |
655 | ddqp->d_rtb_softlimit = cpu_to_be64(dqp->q_rtb.softlimit); |
656 | |
657 | ddqp->d_bcount = cpu_to_be64(dqp->q_blk.count); |
658 | ddqp->d_icount = cpu_to_be64(dqp->q_ino.count); |
659 | ddqp->d_rtbcount = cpu_to_be64(dqp->q_rtb.count); |
660 | |
661 | ddqp->d_bwarns = 0; |
662 | ddqp->d_iwarns = 0; |
663 | ddqp->d_rtbwarns = 0; |
664 | |
665 | ddqp->d_btimer = xfs_dquot_to_disk_ts(dqp, dqp->q_blk.timer); |
666 | ddqp->d_itimer = xfs_dquot_to_disk_ts(dqp, dqp->q_ino.timer); |
667 | ddqp->d_rtbtimer = xfs_dquot_to_disk_ts(dqp, dqp->q_rtb.timer); |
668 | } |
669 | |
670 | /* |
671 | * Read in the ondisk dquot using dqtobp() then copy it to an incore version, |
672 | * and release the buffer immediately. If @can_alloc is true, fill any |
673 | * holes in the on-disk metadata. |
674 | */ |
675 | static int |
676 | xfs_qm_dqread( |
677 | struct xfs_mount *mp, |
678 | xfs_dqid_t id, |
679 | xfs_dqtype_t type, |
680 | bool can_alloc, |
681 | struct xfs_dquot **dqpp) |
682 | { |
683 | struct xfs_dquot *dqp; |
684 | struct xfs_buf *bp; |
685 | int error; |
686 | |
687 | dqp = xfs_dquot_alloc(mp, id, type); |
688 | trace_xfs_dqread(dqp); |
689 | |
690 | /* Try to read the buffer, allocating if necessary. */ |
691 | error = xfs_dquot_disk_read(mp, dqp, bpp: &bp); |
692 | if (error == -ENOENT && can_alloc) |
693 | error = xfs_dquot_disk_alloc(dqp, bpp: &bp); |
694 | if (error) |
695 | goto err; |
696 | |
697 | /* |
698 | * At this point we should have a clean locked buffer. Copy the data |
699 | * to the incore dquot and release the buffer since the incore dquot |
700 | * has its own locking protocol so we needn't tie up the buffer any |
701 | * further. |
702 | */ |
703 | ASSERT(xfs_buf_islocked(bp)); |
704 | error = xfs_dquot_from_disk(dqp, bp); |
705 | xfs_buf_relse(bp); |
706 | if (error) |
707 | goto err; |
708 | |
709 | *dqpp = dqp; |
710 | return error; |
711 | |
712 | err: |
713 | trace_xfs_dqread_fail(dqp); |
714 | xfs_qm_dqdestroy(dqp); |
715 | *dqpp = NULL; |
716 | return error; |
717 | } |
718 | |
719 | /* |
720 | * Advance to the next id in the current chunk, or if at the |
721 | * end of the chunk, skip ahead to first id in next allocated chunk |
722 | * using the SEEK_DATA interface. |
723 | */ |
724 | static int |
725 | xfs_dq_get_next_id( |
726 | struct xfs_mount *mp, |
727 | xfs_dqtype_t type, |
728 | xfs_dqid_t *id) |
729 | { |
730 | struct xfs_inode *quotip = xfs_quota_inode(mp, type); |
731 | xfs_dqid_t next_id = *id + 1; /* simple advance */ |
732 | uint lock_flags; |
733 | struct xfs_bmbt_irec got; |
734 | struct xfs_iext_cursor cur; |
735 | xfs_fsblock_t start; |
736 | int error = 0; |
737 | |
738 | /* If we'd wrap past the max ID, stop */ |
739 | if (next_id < *id) |
740 | return -ENOENT; |
741 | |
742 | /* If new ID is within the current chunk, advancing it sufficed */ |
743 | if (next_id % mp->m_quotainfo->qi_dqperchunk) { |
744 | *id = next_id; |
745 | return 0; |
746 | } |
747 | |
748 | /* Nope, next_id is now past the current chunk, so find the next one */ |
749 | start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk; |
750 | |
751 | lock_flags = xfs_ilock_data_map_shared(quotip); |
752 | error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK); |
753 | if (error) |
754 | return error; |
755 | |
756 | if (xfs_iext_lookup_extent(quotip, "ip->i_df, start, &cur, &got)) { |
757 | /* contiguous chunk, bump startoff for the id calculation */ |
758 | if (got.br_startoff < start) |
759 | got.br_startoff = start; |
760 | *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk; |
761 | } else { |
762 | error = -ENOENT; |
763 | } |
764 | |
765 | xfs_iunlock(quotip, lock_flags); |
766 | |
767 | return error; |
768 | } |
769 | |
770 | /* |
771 | * Look up the dquot in the in-core cache. If found, the dquot is returned |
772 | * locked and ready to go. |
773 | */ |
774 | static struct xfs_dquot * |
775 | xfs_qm_dqget_cache_lookup( |
776 | struct xfs_mount *mp, |
777 | struct xfs_quotainfo *qi, |
778 | struct radix_tree_root *tree, |
779 | xfs_dqid_t id) |
780 | { |
781 | struct xfs_dquot *dqp; |
782 | |
783 | restart: |
784 | mutex_lock(&qi->qi_tree_lock); |
785 | dqp = radix_tree_lookup(tree, id); |
786 | if (!dqp) { |
787 | mutex_unlock(lock: &qi->qi_tree_lock); |
788 | XFS_STATS_INC(mp, xs_qm_dqcachemisses); |
789 | return NULL; |
790 | } |
791 | |
792 | xfs_dqlock(dqp); |
793 | if (dqp->q_flags & XFS_DQFLAG_FREEING) { |
794 | xfs_dqunlock(dqp); |
795 | mutex_unlock(lock: &qi->qi_tree_lock); |
796 | trace_xfs_dqget_freeing(dqp); |
797 | delay(ticks: 1); |
798 | goto restart; |
799 | } |
800 | |
801 | dqp->q_nrefs++; |
802 | mutex_unlock(lock: &qi->qi_tree_lock); |
803 | |
804 | trace_xfs_dqget_hit(dqp); |
805 | XFS_STATS_INC(mp, xs_qm_dqcachehits); |
806 | return dqp; |
807 | } |
808 | |
809 | /* |
810 | * Try to insert a new dquot into the in-core cache. If an error occurs the |
811 | * caller should throw away the dquot and start over. Otherwise, the dquot |
812 | * is returned locked (and held by the cache) as if there had been a cache |
813 | * hit. |
814 | * |
815 | * The insert needs to be done under memalloc_nofs context because the radix |
816 | * tree can do memory allocation during insert. The qi->qi_tree_lock is taken in |
817 | * memory reclaim when freeing unused dquots, so we cannot have the radix tree |
818 | * node allocation recursing into filesystem reclaim whilst we hold the |
819 | * qi_tree_lock. |
820 | */ |
821 | static int |
822 | xfs_qm_dqget_cache_insert( |
823 | struct xfs_mount *mp, |
824 | struct xfs_quotainfo *qi, |
825 | struct radix_tree_root *tree, |
826 | xfs_dqid_t id, |
827 | struct xfs_dquot *dqp) |
828 | { |
829 | unsigned int nofs_flags; |
830 | int error; |
831 | |
832 | nofs_flags = memalloc_nofs_save(); |
833 | mutex_lock(&qi->qi_tree_lock); |
834 | error = radix_tree_insert(tree, index: id, dqp); |
835 | if (unlikely(error)) { |
836 | /* Duplicate found! Caller must try again. */ |
837 | trace_xfs_dqget_dup(dqp); |
838 | goto out_unlock; |
839 | } |
840 | |
841 | /* Return a locked dquot to the caller, with a reference taken. */ |
842 | xfs_dqlock(dqp); |
843 | dqp->q_nrefs = 1; |
844 | qi->qi_dquots++; |
845 | |
846 | out_unlock: |
847 | mutex_unlock(lock: &qi->qi_tree_lock); |
848 | memalloc_nofs_restore(flags: nofs_flags); |
849 | return error; |
850 | } |
851 | |
852 | /* Check our input parameters. */ |
853 | static int |
854 | xfs_qm_dqget_checks( |
855 | struct xfs_mount *mp, |
856 | xfs_dqtype_t type) |
857 | { |
858 | switch (type) { |
859 | case XFS_DQTYPE_USER: |
860 | if (!XFS_IS_UQUOTA_ON(mp)) |
861 | return -ESRCH; |
862 | return 0; |
863 | case XFS_DQTYPE_GROUP: |
864 | if (!XFS_IS_GQUOTA_ON(mp)) |
865 | return -ESRCH; |
866 | return 0; |
867 | case XFS_DQTYPE_PROJ: |
868 | if (!XFS_IS_PQUOTA_ON(mp)) |
869 | return -ESRCH; |
870 | return 0; |
871 | default: |
872 | WARN_ON_ONCE(0); |
873 | return -EINVAL; |
874 | } |
875 | } |
876 | |
877 | /* |
878 | * Given the file system, id, and type (UDQUOT/GDQUOT/PDQUOT), return a |
879 | * locked dquot, doing an allocation (if requested) as needed. |
880 | */ |
881 | int |
882 | xfs_qm_dqget( |
883 | struct xfs_mount *mp, |
884 | xfs_dqid_t id, |
885 | xfs_dqtype_t type, |
886 | bool can_alloc, |
887 | struct xfs_dquot **O_dqpp) |
888 | { |
889 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
890 | struct radix_tree_root *tree = xfs_dquot_tree(qi, type); |
891 | struct xfs_dquot *dqp; |
892 | int error; |
893 | |
894 | error = xfs_qm_dqget_checks(mp, type); |
895 | if (error) |
896 | return error; |
897 | |
898 | restart: |
899 | dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id); |
900 | if (dqp) { |
901 | *O_dqpp = dqp; |
902 | return 0; |
903 | } |
904 | |
905 | error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp); |
906 | if (error) |
907 | return error; |
908 | |
909 | error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp); |
910 | if (error) { |
911 | /* |
912 | * Duplicate found. Just throw away the new dquot and start |
913 | * over. |
914 | */ |
915 | xfs_qm_dqdestroy(dqp); |
916 | XFS_STATS_INC(mp, xs_qm_dquot_dups); |
917 | goto restart; |
918 | } |
919 | |
920 | trace_xfs_dqget_miss(dqp); |
921 | *O_dqpp = dqp; |
922 | return 0; |
923 | } |
924 | |
925 | /* |
926 | * Given a dquot id and type, read and initialize a dquot from the on-disk |
927 | * metadata. This function is only for use during quota initialization so |
928 | * it ignores the dquot cache assuming that the dquot shrinker isn't set up. |
929 | * The caller is responsible for _qm_dqdestroy'ing the returned dquot. |
930 | */ |
931 | int |
932 | xfs_qm_dqget_uncached( |
933 | struct xfs_mount *mp, |
934 | xfs_dqid_t id, |
935 | xfs_dqtype_t type, |
936 | struct xfs_dquot **dqpp) |
937 | { |
938 | int error; |
939 | |
940 | error = xfs_qm_dqget_checks(mp, type); |
941 | if (error) |
942 | return error; |
943 | |
944 | return xfs_qm_dqread(mp, id, type, 0, dqpp); |
945 | } |
946 | |
947 | /* Return the quota id for a given inode and type. */ |
948 | xfs_dqid_t |
949 | xfs_qm_id_for_quotatype( |
950 | struct xfs_inode *ip, |
951 | xfs_dqtype_t type) |
952 | { |
953 | switch (type) { |
954 | case XFS_DQTYPE_USER: |
955 | return i_uid_read(inode: VFS_I(ip)); |
956 | case XFS_DQTYPE_GROUP: |
957 | return i_gid_read(inode: VFS_I(ip)); |
958 | case XFS_DQTYPE_PROJ: |
959 | return ip->i_projid; |
960 | } |
961 | ASSERT(0); |
962 | return 0; |
963 | } |
964 | |
965 | /* |
966 | * Return the dquot for a given inode and type. If @can_alloc is true, then |
967 | * allocate blocks if needed. The inode's ILOCK must be held and it must not |
968 | * have already had an inode attached. |
969 | */ |
970 | int |
971 | xfs_qm_dqget_inode( |
972 | struct xfs_inode *ip, |
973 | xfs_dqtype_t type, |
974 | bool can_alloc, |
975 | struct xfs_dquot **O_dqpp) |
976 | { |
977 | struct xfs_mount *mp = ip->i_mount; |
978 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
979 | struct radix_tree_root *tree = xfs_dquot_tree(qi, type); |
980 | struct xfs_dquot *dqp; |
981 | xfs_dqid_t id; |
982 | int error; |
983 | |
984 | error = xfs_qm_dqget_checks(mp, type); |
985 | if (error) |
986 | return error; |
987 | |
988 | xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); |
989 | ASSERT(xfs_inode_dquot(ip, type) == NULL); |
990 | |
991 | id = xfs_qm_id_for_quotatype(ip, type); |
992 | |
993 | restart: |
994 | dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id); |
995 | if (dqp) { |
996 | *O_dqpp = dqp; |
997 | return 0; |
998 | } |
999 | |
1000 | /* |
1001 | * Dquot cache miss. We don't want to keep the inode lock across |
1002 | * a (potential) disk read. Also we don't want to deal with the lock |
1003 | * ordering between quotainode and this inode. OTOH, dropping the inode |
1004 | * lock here means dealing with a chown that can happen before |
1005 | * we re-acquire the lock. |
1006 | */ |
1007 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1008 | error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp); |
1009 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
1010 | if (error) |
1011 | return error; |
1012 | |
1013 | /* |
1014 | * A dquot could be attached to this inode by now, since we had |
1015 | * dropped the ilock. |
1016 | */ |
1017 | if (xfs_this_quota_on(mp, type)) { |
1018 | struct xfs_dquot *dqp1; |
1019 | |
1020 | dqp1 = xfs_inode_dquot(ip, type); |
1021 | if (dqp1) { |
1022 | xfs_qm_dqdestroy(dqp); |
1023 | dqp = dqp1; |
1024 | xfs_dqlock(dqp); |
1025 | goto dqret; |
1026 | } |
1027 | } else { |
1028 | /* inode stays locked on return */ |
1029 | xfs_qm_dqdestroy(dqp); |
1030 | return -ESRCH; |
1031 | } |
1032 | |
1033 | error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp); |
1034 | if (error) { |
1035 | /* |
1036 | * Duplicate found. Just throw away the new dquot and start |
1037 | * over. |
1038 | */ |
1039 | xfs_qm_dqdestroy(dqp); |
1040 | XFS_STATS_INC(mp, xs_qm_dquot_dups); |
1041 | goto restart; |
1042 | } |
1043 | |
1044 | dqret: |
1045 | xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); |
1046 | trace_xfs_dqget_miss(dqp); |
1047 | *O_dqpp = dqp; |
1048 | return 0; |
1049 | } |
1050 | |
1051 | /* |
1052 | * Starting at @id and progressing upwards, look for an initialized incore |
1053 | * dquot, lock it, and return it. |
1054 | */ |
1055 | int |
1056 | xfs_qm_dqget_next( |
1057 | struct xfs_mount *mp, |
1058 | xfs_dqid_t id, |
1059 | xfs_dqtype_t type, |
1060 | struct xfs_dquot **dqpp) |
1061 | { |
1062 | struct xfs_dquot *dqp; |
1063 | int error = 0; |
1064 | |
1065 | *dqpp = NULL; |
1066 | for (; !error; error = xfs_dq_get_next_id(mp, type, &id)) { |
1067 | error = xfs_qm_dqget(mp, id, type, false, &dqp); |
1068 | if (error == -ENOENT) |
1069 | continue; |
1070 | else if (error != 0) |
1071 | break; |
1072 | |
1073 | if (!XFS_IS_DQUOT_UNINITIALIZED(dqp)) { |
1074 | *dqpp = dqp; |
1075 | return 0; |
1076 | } |
1077 | |
1078 | xfs_qm_dqput(dqp); |
1079 | } |
1080 | |
1081 | return error; |
1082 | } |
1083 | |
1084 | /* |
1085 | * Release a reference to the dquot (decrement ref-count) and unlock it. |
1086 | * |
1087 | * If there is a group quota attached to this dquot, carefully release that |
1088 | * too without tripping over deadlocks'n'stuff. |
1089 | */ |
1090 | void |
1091 | xfs_qm_dqput( |
1092 | struct xfs_dquot *dqp) |
1093 | { |
1094 | ASSERT(dqp->q_nrefs > 0); |
1095 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
1096 | |
1097 | trace_xfs_dqput(dqp); |
1098 | |
1099 | if (--dqp->q_nrefs == 0) { |
1100 | struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; |
1101 | trace_xfs_dqput_free(dqp); |
1102 | |
1103 | if (list_lru_add_obj(lru: &qi->qi_lru, item: &dqp->q_lru)) |
1104 | XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused); |
1105 | } |
1106 | xfs_dqunlock(dqp); |
1107 | } |
1108 | |
1109 | /* |
1110 | * Release a dquot. Flush it if dirty, then dqput() it. |
1111 | * dquot must not be locked. |
1112 | */ |
1113 | void |
1114 | xfs_qm_dqrele( |
1115 | struct xfs_dquot *dqp) |
1116 | { |
1117 | if (!dqp) |
1118 | return; |
1119 | |
1120 | trace_xfs_dqrele(dqp); |
1121 | |
1122 | xfs_dqlock(dqp); |
1123 | /* |
1124 | * We don't care to flush it if the dquot is dirty here. |
1125 | * That will create stutters that we want to avoid. |
1126 | * Instead we do a delayed write when we try to reclaim |
1127 | * a dirty dquot. Also xfs_sync will take part of the burden... |
1128 | */ |
1129 | xfs_qm_dqput(dqp); |
1130 | } |
1131 | |
1132 | /* |
1133 | * This is the dquot flushing I/O completion routine. It is called |
1134 | * from interrupt level when the buffer containing the dquot is |
1135 | * flushed to disk. It is responsible for removing the dquot logitem |
1136 | * from the AIL if it has not been re-logged, and unlocking the dquot's |
1137 | * flush lock. This behavior is very similar to that of inodes.. |
1138 | */ |
1139 | static void |
1140 | xfs_qm_dqflush_done( |
1141 | struct xfs_log_item *lip) |
1142 | { |
1143 | struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip; |
1144 | struct xfs_dquot *dqp = qip->qli_dquot; |
1145 | struct xfs_ail *ailp = lip->li_ailp; |
1146 | xfs_lsn_t tail_lsn; |
1147 | |
1148 | /* |
1149 | * We only want to pull the item from the AIL if its |
1150 | * location in the log has not changed since we started the flush. |
1151 | * Thus, we only bother if the dquot's lsn has |
1152 | * not changed. First we check the lsn outside the lock |
1153 | * since it's cheaper, and then we recheck while |
1154 | * holding the lock before removing the dquot from the AIL. |
1155 | */ |
1156 | if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) && |
1157 | ((lip->li_lsn == qip->qli_flush_lsn) || |
1158 | test_bit(XFS_LI_FAILED, &lip->li_flags))) { |
1159 | |
1160 | spin_lock(lock: &ailp->ail_lock); |
1161 | xfs_clear_li_failed(lip); |
1162 | if (lip->li_lsn == qip->qli_flush_lsn) { |
1163 | /* xfs_ail_update_finish() drops the AIL lock */ |
1164 | tail_lsn = xfs_ail_delete_one(ailp, lip); |
1165 | xfs_ail_update_finish(ailp, tail_lsn); |
1166 | } else { |
1167 | spin_unlock(lock: &ailp->ail_lock); |
1168 | } |
1169 | } |
1170 | |
1171 | /* |
1172 | * Release the dq's flush lock since we're done with it. |
1173 | */ |
1174 | xfs_dqfunlock(dqp); |
1175 | } |
1176 | |
1177 | void |
1178 | xfs_buf_dquot_iodone( |
1179 | struct xfs_buf *bp) |
1180 | { |
1181 | struct xfs_log_item *lip, *n; |
1182 | |
1183 | list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) { |
1184 | list_del_init(entry: &lip->li_bio_list); |
1185 | xfs_qm_dqflush_done(lip); |
1186 | } |
1187 | } |
1188 | |
1189 | void |
1190 | xfs_buf_dquot_io_fail( |
1191 | struct xfs_buf *bp) |
1192 | { |
1193 | struct xfs_log_item *lip; |
1194 | |
1195 | spin_lock(lock: &bp->b_mount->m_ail->ail_lock); |
1196 | list_for_each_entry(lip, &bp->b_li_list, li_bio_list) |
1197 | xfs_set_li_failed(lip, bp); |
1198 | spin_unlock(lock: &bp->b_mount->m_ail->ail_lock); |
1199 | } |
1200 | |
1201 | /* Check incore dquot for errors before we flush. */ |
1202 | static xfs_failaddr_t |
1203 | xfs_qm_dqflush_check( |
1204 | struct xfs_dquot *dqp) |
1205 | { |
1206 | xfs_dqtype_t type = xfs_dquot_type(dqp); |
1207 | |
1208 | if (type != XFS_DQTYPE_USER && |
1209 | type != XFS_DQTYPE_GROUP && |
1210 | type != XFS_DQTYPE_PROJ) |
1211 | return __this_address; |
1212 | |
1213 | if (dqp->q_id == 0) |
1214 | return NULL; |
1215 | |
1216 | if (dqp->q_blk.softlimit && dqp->q_blk.count > dqp->q_blk.softlimit && |
1217 | !dqp->q_blk.timer) |
1218 | return __this_address; |
1219 | |
1220 | if (dqp->q_ino.softlimit && dqp->q_ino.count > dqp->q_ino.softlimit && |
1221 | !dqp->q_ino.timer) |
1222 | return __this_address; |
1223 | |
1224 | if (dqp->q_rtb.softlimit && dqp->q_rtb.count > dqp->q_rtb.softlimit && |
1225 | !dqp->q_rtb.timer) |
1226 | return __this_address; |
1227 | |
1228 | /* bigtime flag should never be set on root dquots */ |
1229 | if (dqp->q_type & XFS_DQTYPE_BIGTIME) { |
1230 | if (!xfs_has_bigtime(mp: dqp->q_mount)) |
1231 | return __this_address; |
1232 | if (dqp->q_id == 0) |
1233 | return __this_address; |
1234 | } |
1235 | |
1236 | return NULL; |
1237 | } |
1238 | |
1239 | /* |
1240 | * Write a modified dquot to disk. |
1241 | * The dquot must be locked and the flush lock too taken by caller. |
1242 | * The flush lock will not be unlocked until the dquot reaches the disk, |
1243 | * but the dquot is free to be unlocked and modified by the caller |
1244 | * in the interim. Dquot is still locked on return. This behavior is |
1245 | * identical to that of inodes. |
1246 | */ |
1247 | int |
1248 | xfs_qm_dqflush( |
1249 | struct xfs_dquot *dqp, |
1250 | struct xfs_buf **bpp) |
1251 | { |
1252 | struct xfs_mount *mp = dqp->q_mount; |
1253 | struct xfs_log_item *lip = &dqp->q_logitem.qli_item; |
1254 | struct xfs_buf *bp; |
1255 | struct xfs_dqblk *dqblk; |
1256 | xfs_failaddr_t fa; |
1257 | int error; |
1258 | |
1259 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
1260 | ASSERT(!completion_done(&dqp->q_flush)); |
1261 | |
1262 | trace_xfs_dqflush(dqp); |
1263 | |
1264 | *bpp = NULL; |
1265 | |
1266 | xfs_qm_dqunpin_wait(dqp); |
1267 | |
1268 | /* |
1269 | * Get the buffer containing the on-disk dquot |
1270 | */ |
1271 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, |
1272 | mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK, |
1273 | &bp, &xfs_dquot_buf_ops); |
1274 | if (error == -EAGAIN) |
1275 | goto out_unlock; |
1276 | if (xfs_metadata_is_sick(error)) |
1277 | xfs_dquot_mark_sick(dqp); |
1278 | if (error) |
1279 | goto out_abort; |
1280 | |
1281 | fa = xfs_qm_dqflush_check(dqp); |
1282 | if (fa) { |
1283 | xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS" , |
1284 | dqp->q_id, fa); |
1285 | xfs_buf_relse(bp); |
1286 | xfs_dquot_mark_sick(dqp); |
1287 | error = -EFSCORRUPTED; |
1288 | goto out_abort; |
1289 | } |
1290 | |
1291 | /* Flush the incore dquot to the ondisk buffer. */ |
1292 | dqblk = xfs_buf_offset(bp, dqp->q_bufoffset); |
1293 | xfs_dquot_to_disk(ddqp: &dqblk->dd_diskdq, dqp); |
1294 | |
1295 | /* |
1296 | * Clear the dirty field and remember the flush lsn for later use. |
1297 | */ |
1298 | dqp->q_flags &= ~XFS_DQFLAG_DIRTY; |
1299 | |
1300 | xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, |
1301 | &dqp->q_logitem.qli_item.li_lsn); |
1302 | |
1303 | /* |
1304 | * copy the lsn into the on-disk dquot now while we have the in memory |
1305 | * dquot here. This can't be done later in the write verifier as we |
1306 | * can't get access to the log item at that point in time. |
1307 | * |
1308 | * We also calculate the CRC here so that the on-disk dquot in the |
1309 | * buffer always has a valid CRC. This ensures there is no possibility |
1310 | * of a dquot without an up-to-date CRC getting to disk. |
1311 | */ |
1312 | if (xfs_has_crc(mp)) { |
1313 | dqblk->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn); |
1314 | xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk), |
1315 | XFS_DQUOT_CRC_OFF); |
1316 | } |
1317 | |
1318 | /* |
1319 | * Attach the dquot to the buffer so that we can remove this dquot from |
1320 | * the AIL and release the flush lock once the dquot is synced to disk. |
1321 | */ |
1322 | bp->b_flags |= _XBF_DQUOTS; |
1323 | list_add_tail(new: &dqp->q_logitem.qli_item.li_bio_list, head: &bp->b_li_list); |
1324 | |
1325 | /* |
1326 | * If the buffer is pinned then push on the log so we won't |
1327 | * get stuck waiting in the write for too long. |
1328 | */ |
1329 | if (xfs_buf_ispinned(bp)) { |
1330 | trace_xfs_dqflush_force(dqp); |
1331 | xfs_log_force(mp, flags: 0); |
1332 | } |
1333 | |
1334 | trace_xfs_dqflush_done(dqp); |
1335 | *bpp = bp; |
1336 | return 0; |
1337 | |
1338 | out_abort: |
1339 | dqp->q_flags &= ~XFS_DQFLAG_DIRTY; |
1340 | xfs_trans_ail_delete(lip, shutdown_type: 0); |
1341 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
1342 | out_unlock: |
1343 | xfs_dqfunlock(dqp); |
1344 | return error; |
1345 | } |
1346 | |
1347 | /* |
1348 | * Lock two xfs_dquot structures. |
1349 | * |
1350 | * To avoid deadlocks we always lock the quota structure with |
1351 | * the lowerd id first. |
1352 | */ |
1353 | void |
1354 | xfs_dqlock2( |
1355 | struct xfs_dquot *d1, |
1356 | struct xfs_dquot *d2) |
1357 | { |
1358 | if (d1 && d2) { |
1359 | ASSERT(d1 != d2); |
1360 | if (d1->q_id > d2->q_id) { |
1361 | mutex_lock(&d2->q_qlock); |
1362 | mutex_lock_nested(lock: &d1->q_qlock, subclass: XFS_QLOCK_NESTED); |
1363 | } else { |
1364 | mutex_lock(&d1->q_qlock); |
1365 | mutex_lock_nested(lock: &d2->q_qlock, subclass: XFS_QLOCK_NESTED); |
1366 | } |
1367 | } else if (d1) { |
1368 | mutex_lock(&d1->q_qlock); |
1369 | } else if (d2) { |
1370 | mutex_lock(&d2->q_qlock); |
1371 | } |
1372 | } |
1373 | |
1374 | int __init |
1375 | xfs_qm_init(void) |
1376 | { |
1377 | xfs_dquot_cache = kmem_cache_create(name: "xfs_dquot" , |
1378 | size: sizeof(struct xfs_dquot), |
1379 | align: 0, flags: 0, NULL); |
1380 | if (!xfs_dquot_cache) |
1381 | goto out; |
1382 | |
1383 | xfs_dqtrx_cache = kmem_cache_create(name: "xfs_dqtrx" , |
1384 | size: sizeof(struct xfs_dquot_acct), |
1385 | align: 0, flags: 0, NULL); |
1386 | if (!xfs_dqtrx_cache) |
1387 | goto out_free_dquot_cache; |
1388 | |
1389 | return 0; |
1390 | |
1391 | out_free_dquot_cache: |
1392 | kmem_cache_destroy(s: xfs_dquot_cache); |
1393 | out: |
1394 | return -ENOMEM; |
1395 | } |
1396 | |
1397 | void |
1398 | xfs_qm_exit(void) |
1399 | { |
1400 | kmem_cache_destroy(s: xfs_dqtrx_cache); |
1401 | kmem_cache_destroy(s: xfs_dquot_cache); |
1402 | } |
1403 | |