1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * Copyright (C) 2016 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_mount.h" |
13 | #include "xfs_btree.h" |
14 | #include "xfs_btree_staging.h" |
15 | #include "xfs_refcount_btree.h" |
16 | #include "xfs_refcount.h" |
17 | #include "xfs_alloc.h" |
18 | #include "xfs_error.h" |
19 | #include "xfs_health.h" |
20 | #include "xfs_trace.h" |
21 | #include "xfs_trans.h" |
22 | #include "xfs_bit.h" |
23 | #include "xfs_rmap.h" |
24 | #include "xfs_ag.h" |
25 | |
26 | static struct kmem_cache *xfs_refcountbt_cur_cache; |
27 | |
28 | static struct xfs_btree_cur * |
29 | xfs_refcountbt_dup_cursor( |
30 | struct xfs_btree_cur *cur) |
31 | { |
32 | return xfs_refcountbt_init_cursor(mp: cur->bc_mp, tp: cur->bc_tp, |
33 | agbp: cur->bc_ag.agbp, pag: cur->bc_ag.pag); |
34 | } |
35 | |
36 | STATIC void |
37 | xfs_refcountbt_set_root( |
38 | struct xfs_btree_cur *cur, |
39 | const union xfs_btree_ptr *ptr, |
40 | int inc) |
41 | { |
42 | struct xfs_buf *agbp = cur->bc_ag.agbp; |
43 | struct xfs_agf *agf = agbp->b_addr; |
44 | struct xfs_perag *pag = agbp->b_pag; |
45 | |
46 | ASSERT(ptr->s != 0); |
47 | |
48 | agf->agf_refcount_root = ptr->s; |
49 | be32_add_cpu(&agf->agf_refcount_level, inc); |
50 | pag->pagf_refcount_level += inc; |
51 | |
52 | xfs_alloc_log_agf(cur->bc_tp, agbp, |
53 | XFS_AGF_REFCOUNT_ROOT | XFS_AGF_REFCOUNT_LEVEL); |
54 | } |
55 | |
56 | STATIC int |
57 | xfs_refcountbt_alloc_block( |
58 | struct xfs_btree_cur *cur, |
59 | const union xfs_btree_ptr *start, |
60 | union xfs_btree_ptr *new, |
61 | int *stat) |
62 | { |
63 | struct xfs_buf *agbp = cur->bc_ag.agbp; |
64 | struct xfs_agf *agf = agbp->b_addr; |
65 | struct xfs_alloc_arg args; /* block allocation args */ |
66 | int error; /* error return value */ |
67 | |
68 | memset(&args, 0, sizeof(args)); |
69 | args.tp = cur->bc_tp; |
70 | args.mp = cur->bc_mp; |
71 | args.pag = cur->bc_ag.pag; |
72 | args.oinfo = XFS_RMAP_OINFO_REFC; |
73 | args.minlen = args.maxlen = args.prod = 1; |
74 | args.resv = XFS_AG_RESV_METADATA; |
75 | |
76 | error = xfs_alloc_vextent_near_bno(&args, |
77 | XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno, |
78 | xfs_refc_block(args.mp))); |
79 | if (error) |
80 | goto out_error; |
81 | if (args.fsbno == NULLFSBLOCK) { |
82 | *stat = 0; |
83 | return 0; |
84 | } |
85 | ASSERT(args.agno == cur->bc_ag.pag->pag_agno); |
86 | ASSERT(args.len == 1); |
87 | |
88 | new->s = cpu_to_be32(args.agbno); |
89 | be32_add_cpu(&agf->agf_refcount_blocks, 1); |
90 | xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); |
91 | |
92 | *stat = 1; |
93 | return 0; |
94 | |
95 | out_error: |
96 | return error; |
97 | } |
98 | |
99 | STATIC int |
100 | xfs_refcountbt_free_block( |
101 | struct xfs_btree_cur *cur, |
102 | struct xfs_buf *bp) |
103 | { |
104 | struct xfs_mount *mp = cur->bc_mp; |
105 | struct xfs_buf *agbp = cur->bc_ag.agbp; |
106 | struct xfs_agf *agf = agbp->b_addr; |
107 | xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); |
108 | |
109 | be32_add_cpu(&agf->agf_refcount_blocks, -1); |
110 | xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); |
111 | return xfs_free_extent_later(cur->bc_tp, fsbno, 1, |
112 | &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, false); |
113 | } |
114 | |
115 | STATIC int |
116 | xfs_refcountbt_get_minrecs( |
117 | struct xfs_btree_cur *cur, |
118 | int level) |
119 | { |
120 | return cur->bc_mp->m_refc_mnr[level != 0]; |
121 | } |
122 | |
123 | STATIC int |
124 | xfs_refcountbt_get_maxrecs( |
125 | struct xfs_btree_cur *cur, |
126 | int level) |
127 | { |
128 | return cur->bc_mp->m_refc_mxr[level != 0]; |
129 | } |
130 | |
131 | STATIC void |
132 | xfs_refcountbt_init_key_from_rec( |
133 | union xfs_btree_key *key, |
134 | const union xfs_btree_rec *rec) |
135 | { |
136 | key->refc.rc_startblock = rec->refc.rc_startblock; |
137 | } |
138 | |
139 | STATIC void |
140 | xfs_refcountbt_init_high_key_from_rec( |
141 | union xfs_btree_key *key, |
142 | const union xfs_btree_rec *rec) |
143 | { |
144 | __u32 x; |
145 | |
146 | x = be32_to_cpu(rec->refc.rc_startblock); |
147 | x += be32_to_cpu(rec->refc.rc_blockcount) - 1; |
148 | key->refc.rc_startblock = cpu_to_be32(x); |
149 | } |
150 | |
151 | STATIC void |
152 | xfs_refcountbt_init_rec_from_cur( |
153 | struct xfs_btree_cur *cur, |
154 | union xfs_btree_rec *rec) |
155 | { |
156 | const struct xfs_refcount_irec *irec = &cur->bc_rec.rc; |
157 | uint32_t start; |
158 | |
159 | start = xfs_refcount_encode_startblock(irec->rc_startblock, |
160 | irec->rc_domain); |
161 | rec->refc.rc_startblock = cpu_to_be32(start); |
162 | rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount); |
163 | rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount); |
164 | } |
165 | |
166 | STATIC void |
167 | xfs_refcountbt_init_ptr_from_cur( |
168 | struct xfs_btree_cur *cur, |
169 | union xfs_btree_ptr *ptr) |
170 | { |
171 | struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; |
172 | |
173 | ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); |
174 | |
175 | ptr->s = agf->agf_refcount_root; |
176 | } |
177 | |
178 | STATIC int64_t |
179 | xfs_refcountbt_key_diff( |
180 | struct xfs_btree_cur *cur, |
181 | const union xfs_btree_key *key) |
182 | { |
183 | const struct xfs_refcount_key *kp = &key->refc; |
184 | const struct xfs_refcount_irec *irec = &cur->bc_rec.rc; |
185 | uint32_t start; |
186 | |
187 | start = xfs_refcount_encode_startblock(irec->rc_startblock, |
188 | irec->rc_domain); |
189 | return (int64_t)be32_to_cpu(kp->rc_startblock) - start; |
190 | } |
191 | |
192 | STATIC int64_t |
193 | xfs_refcountbt_diff_two_keys( |
194 | struct xfs_btree_cur *cur, |
195 | const union xfs_btree_key *k1, |
196 | const union xfs_btree_key *k2, |
197 | const union xfs_btree_key *mask) |
198 | { |
199 | ASSERT(!mask || mask->refc.rc_startblock); |
200 | |
201 | return (int64_t)be32_to_cpu(k1->refc.rc_startblock) - |
202 | be32_to_cpu(k2->refc.rc_startblock); |
203 | } |
204 | |
205 | STATIC xfs_failaddr_t |
206 | xfs_refcountbt_verify( |
207 | struct xfs_buf *bp) |
208 | { |
209 | struct xfs_mount *mp = bp->b_mount; |
210 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
211 | struct xfs_perag *pag = bp->b_pag; |
212 | xfs_failaddr_t fa; |
213 | unsigned int level; |
214 | |
215 | if (!xfs_verify_magic(bp, block->bb_magic)) |
216 | return __this_address; |
217 | |
218 | if (!xfs_has_reflink(mp)) |
219 | return __this_address; |
220 | fa = xfs_btree_agblock_v5hdr_verify(bp); |
221 | if (fa) |
222 | return fa; |
223 | |
224 | level = be16_to_cpu(block->bb_level); |
225 | if (pag && xfs_perag_initialised_agf(pag)) { |
226 | unsigned int maxlevel = pag->pagf_refcount_level; |
227 | |
228 | #ifdef CONFIG_XFS_ONLINE_REPAIR |
229 | /* |
230 | * Online repair could be rewriting the refcount btree, so |
231 | * we'll validate against the larger of either tree while this |
232 | * is going on. |
233 | */ |
234 | maxlevel = max_t(unsigned int, maxlevel, |
235 | pag->pagf_repair_refcount_level); |
236 | #endif |
237 | if (level >= maxlevel) |
238 | return __this_address; |
239 | } else if (level >= mp->m_refc_maxlevels) |
240 | return __this_address; |
241 | |
242 | return xfs_btree_agblock_verify(bp, mp->m_refc_mxr[level != 0]); |
243 | } |
244 | |
245 | STATIC void |
246 | xfs_refcountbt_read_verify( |
247 | struct xfs_buf *bp) |
248 | { |
249 | xfs_failaddr_t fa; |
250 | |
251 | if (!xfs_btree_agblock_verify_crc(bp)) |
252 | xfs_verifier_error(bp, -EFSBADCRC, __this_address); |
253 | else { |
254 | fa = xfs_refcountbt_verify(bp); |
255 | if (fa) |
256 | xfs_verifier_error(bp, -EFSCORRUPTED, fa); |
257 | } |
258 | |
259 | if (bp->b_error) |
260 | trace_xfs_btree_corrupt(bp, _RET_IP_); |
261 | } |
262 | |
263 | STATIC void |
264 | xfs_refcountbt_write_verify( |
265 | struct xfs_buf *bp) |
266 | { |
267 | xfs_failaddr_t fa; |
268 | |
269 | fa = xfs_refcountbt_verify(bp); |
270 | if (fa) { |
271 | trace_xfs_btree_corrupt(bp, _RET_IP_); |
272 | xfs_verifier_error(bp, -EFSCORRUPTED, fa); |
273 | return; |
274 | } |
275 | xfs_btree_agblock_calc_crc(bp); |
276 | |
277 | } |
278 | |
279 | const struct xfs_buf_ops xfs_refcountbt_buf_ops = { |
280 | .name = "xfs_refcountbt" , |
281 | .magic = { 0, cpu_to_be32(XFS_REFC_CRC_MAGIC) }, |
282 | .verify_read = xfs_refcountbt_read_verify, |
283 | .verify_write = xfs_refcountbt_write_verify, |
284 | .verify_struct = xfs_refcountbt_verify, |
285 | }; |
286 | |
287 | STATIC int |
288 | xfs_refcountbt_keys_inorder( |
289 | struct xfs_btree_cur *cur, |
290 | const union xfs_btree_key *k1, |
291 | const union xfs_btree_key *k2) |
292 | { |
293 | return be32_to_cpu(k1->refc.rc_startblock) < |
294 | be32_to_cpu(k2->refc.rc_startblock); |
295 | } |
296 | |
297 | STATIC int |
298 | xfs_refcountbt_recs_inorder( |
299 | struct xfs_btree_cur *cur, |
300 | const union xfs_btree_rec *r1, |
301 | const union xfs_btree_rec *r2) |
302 | { |
303 | return be32_to_cpu(r1->refc.rc_startblock) + |
304 | be32_to_cpu(r1->refc.rc_blockcount) <= |
305 | be32_to_cpu(r2->refc.rc_startblock); |
306 | } |
307 | |
308 | STATIC enum xbtree_key_contig |
309 | xfs_refcountbt_keys_contiguous( |
310 | struct xfs_btree_cur *cur, |
311 | const union xfs_btree_key *key1, |
312 | const union xfs_btree_key *key2, |
313 | const union xfs_btree_key *mask) |
314 | { |
315 | ASSERT(!mask || mask->refc.rc_startblock); |
316 | |
317 | return xbtree_key_contig(be32_to_cpu(key1->refc.rc_startblock), |
318 | be32_to_cpu(key2->refc.rc_startblock)); |
319 | } |
320 | |
321 | const struct xfs_btree_ops xfs_refcountbt_ops = { |
322 | .name = "refcount" , |
323 | .type = XFS_BTREE_TYPE_AG, |
324 | |
325 | .rec_len = sizeof(struct xfs_refcount_rec), |
326 | .key_len = sizeof(struct xfs_refcount_key), |
327 | .ptr_len = XFS_BTREE_SHORT_PTR_LEN, |
328 | |
329 | .lru_refs = XFS_REFC_BTREE_REF, |
330 | .statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2), |
331 | .sick_mask = XFS_SICK_AG_REFCNTBT, |
332 | |
333 | .dup_cursor = xfs_refcountbt_dup_cursor, |
334 | .set_root = xfs_refcountbt_set_root, |
335 | .alloc_block = xfs_refcountbt_alloc_block, |
336 | .free_block = xfs_refcountbt_free_block, |
337 | .get_minrecs = xfs_refcountbt_get_minrecs, |
338 | .get_maxrecs = xfs_refcountbt_get_maxrecs, |
339 | .init_key_from_rec = xfs_refcountbt_init_key_from_rec, |
340 | .init_high_key_from_rec = xfs_refcountbt_init_high_key_from_rec, |
341 | .init_rec_from_cur = xfs_refcountbt_init_rec_from_cur, |
342 | .init_ptr_from_cur = xfs_refcountbt_init_ptr_from_cur, |
343 | .key_diff = xfs_refcountbt_key_diff, |
344 | .buf_ops = &xfs_refcountbt_buf_ops, |
345 | .diff_two_keys = xfs_refcountbt_diff_two_keys, |
346 | .keys_inorder = xfs_refcountbt_keys_inorder, |
347 | .recs_inorder = xfs_refcountbt_recs_inorder, |
348 | .keys_contiguous = xfs_refcountbt_keys_contiguous, |
349 | }; |
350 | |
351 | /* |
352 | * Create a new refcount btree cursor. |
353 | * |
354 | * For staging cursors tp and agbp are NULL. |
355 | */ |
356 | struct xfs_btree_cur * |
357 | xfs_refcountbt_init_cursor( |
358 | struct xfs_mount *mp, |
359 | struct xfs_trans *tp, |
360 | struct xfs_buf *agbp, |
361 | struct xfs_perag *pag) |
362 | { |
363 | struct xfs_btree_cur *cur; |
364 | |
365 | ASSERT(pag->pag_agno < mp->m_sb.sb_agcount); |
366 | |
367 | cur = xfs_btree_alloc_cursor(mp, tp, &xfs_refcountbt_ops, |
368 | mp->m_refc_maxlevels, xfs_refcountbt_cur_cache); |
369 | cur->bc_ag.pag = xfs_perag_hold(pag); |
370 | cur->bc_refc.nr_ops = 0; |
371 | cur->bc_refc.shape_changes = 0; |
372 | cur->bc_ag.agbp = agbp; |
373 | if (agbp) { |
374 | struct xfs_agf *agf = agbp->b_addr; |
375 | |
376 | cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); |
377 | } |
378 | return cur; |
379 | } |
380 | |
381 | /* |
382 | * Swap in the new btree root. Once we pass this point the newly rebuilt btree |
383 | * is in place and we have to kill off all the old btree blocks. |
384 | */ |
385 | void |
386 | xfs_refcountbt_commit_staged_btree( |
387 | struct xfs_btree_cur *cur, |
388 | struct xfs_trans *tp, |
389 | struct xfs_buf *agbp) |
390 | { |
391 | struct xfs_agf *agf = agbp->b_addr; |
392 | struct xbtree_afakeroot *afake = cur->bc_ag.afake; |
393 | |
394 | ASSERT(cur->bc_flags & XFS_BTREE_STAGING); |
395 | |
396 | agf->agf_refcount_root = cpu_to_be32(afake->af_root); |
397 | agf->agf_refcount_level = cpu_to_be32(afake->af_levels); |
398 | agf->agf_refcount_blocks = cpu_to_be32(afake->af_blocks); |
399 | xfs_alloc_log_agf(tp, agbp, XFS_AGF_REFCOUNT_BLOCKS | |
400 | XFS_AGF_REFCOUNT_ROOT | |
401 | XFS_AGF_REFCOUNT_LEVEL); |
402 | xfs_btree_commit_afakeroot(cur, tp, agbp); |
403 | } |
404 | |
405 | /* Calculate number of records in a refcount btree block. */ |
406 | static inline unsigned int |
407 | xfs_refcountbt_block_maxrecs( |
408 | unsigned int blocklen, |
409 | bool leaf) |
410 | { |
411 | if (leaf) |
412 | return blocklen / sizeof(struct xfs_refcount_rec); |
413 | return blocklen / (sizeof(struct xfs_refcount_key) + |
414 | sizeof(xfs_refcount_ptr_t)); |
415 | } |
416 | |
417 | /* |
418 | * Calculate the number of records in a refcount btree block. |
419 | */ |
420 | int |
421 | xfs_refcountbt_maxrecs( |
422 | int blocklen, |
423 | bool leaf) |
424 | { |
425 | blocklen -= XFS_REFCOUNT_BLOCK_LEN; |
426 | return xfs_refcountbt_block_maxrecs(blocklen, leaf); |
427 | } |
428 | |
429 | /* Compute the max possible height of the maximally sized refcount btree. */ |
430 | unsigned int |
431 | xfs_refcountbt_maxlevels_ondisk(void) |
432 | { |
433 | unsigned int minrecs[2]; |
434 | unsigned int blocklen; |
435 | |
436 | blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; |
437 | |
438 | minrecs[0] = xfs_refcountbt_block_maxrecs(blocklen, true) / 2; |
439 | minrecs[1] = xfs_refcountbt_block_maxrecs(blocklen, false) / 2; |
440 | |
441 | return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_CRC_AG_BLOCKS); |
442 | } |
443 | |
444 | /* Compute the maximum height of a refcount btree. */ |
445 | void |
446 | xfs_refcountbt_compute_maxlevels( |
447 | struct xfs_mount *mp) |
448 | { |
449 | if (!xfs_has_reflink(mp)) { |
450 | mp->m_refc_maxlevels = 0; |
451 | return; |
452 | } |
453 | |
454 | mp->m_refc_maxlevels = xfs_btree_compute_maxlevels( |
455 | limits: mp->m_refc_mnr, records: mp->m_sb.sb_agblocks); |
456 | ASSERT(mp->m_refc_maxlevels <= xfs_refcountbt_maxlevels_ondisk()); |
457 | } |
458 | |
459 | /* Calculate the refcount btree size for some records. */ |
460 | xfs_extlen_t |
461 | xfs_refcountbt_calc_size( |
462 | struct xfs_mount *mp, |
463 | unsigned long long len) |
464 | { |
465 | return xfs_btree_calc_size(limits: mp->m_refc_mnr, records: len); |
466 | } |
467 | |
468 | /* |
469 | * Calculate the maximum refcount btree size. |
470 | */ |
471 | xfs_extlen_t |
472 | xfs_refcountbt_max_size( |
473 | struct xfs_mount *mp, |
474 | xfs_agblock_t agblocks) |
475 | { |
476 | /* Bail out if we're uninitialized, which can happen in mkfs. */ |
477 | if (mp->m_refc_mxr[0] == 0) |
478 | return 0; |
479 | |
480 | return xfs_refcountbt_calc_size(mp, agblocks); |
481 | } |
482 | |
483 | /* |
484 | * Figure out how many blocks to reserve and how many are used by this btree. |
485 | */ |
486 | int |
487 | xfs_refcountbt_calc_reserves( |
488 | struct xfs_mount *mp, |
489 | struct xfs_trans *tp, |
490 | struct xfs_perag *pag, |
491 | xfs_extlen_t *ask, |
492 | xfs_extlen_t *used) |
493 | { |
494 | struct xfs_buf *agbp; |
495 | struct xfs_agf *agf; |
496 | xfs_agblock_t agblocks; |
497 | xfs_extlen_t tree_len; |
498 | int error; |
499 | |
500 | if (!xfs_has_reflink(mp)) |
501 | return 0; |
502 | |
503 | error = xfs_alloc_read_agf(pag, tp, flags: 0, agfbpp: &agbp); |
504 | if (error) |
505 | return error; |
506 | |
507 | agf = agbp->b_addr; |
508 | agblocks = be32_to_cpu(agf->agf_length); |
509 | tree_len = be32_to_cpu(agf->agf_refcount_blocks); |
510 | xfs_trans_brelse(tp, agbp); |
511 | |
512 | /* |
513 | * The log is permanently allocated, so the space it occupies will |
514 | * never be available for the kinds of things that would require btree |
515 | * expansion. We therefore can pretend the space isn't there. |
516 | */ |
517 | if (xfs_ag_contains_log(mp, pag->pag_agno)) |
518 | agblocks -= mp->m_sb.sb_logblocks; |
519 | |
520 | *ask += xfs_refcountbt_max_size(mp, agblocks); |
521 | *used += tree_len; |
522 | |
523 | return error; |
524 | } |
525 | |
526 | int __init |
527 | xfs_refcountbt_init_cur_cache(void) |
528 | { |
529 | xfs_refcountbt_cur_cache = kmem_cache_create("xfs_refcbt_cur" , |
530 | xfs_btree_cur_sizeof(xfs_refcountbt_maxlevels_ondisk()), |
531 | 0, 0, NULL); |
532 | |
533 | if (!xfs_refcountbt_cur_cache) |
534 | return -ENOMEM; |
535 | return 0; |
536 | } |
537 | |
538 | void |
539 | xfs_refcountbt_destroy_cur_cache(void) |
540 | { |
541 | kmem_cache_destroy(xfs_refcountbt_cur_cache); |
542 | xfs_refcountbt_cur_cache = NULL; |
543 | } |
544 | |