1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * Copyright (C) 2016 Oracle. All Rights Reserved. |
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_mount.h" |
13 | #include "xfs_defer.h" |
14 | #include "xfs_btree.h" |
15 | #include "xfs_bmap.h" |
16 | #include "xfs_refcount_btree.h" |
17 | #include "xfs_alloc.h" |
18 | #include "xfs_errortag.h" |
19 | #include "xfs_error.h" |
20 | #include "xfs_trace.h" |
21 | #include "xfs_trans.h" |
22 | #include "xfs_bit.h" |
23 | #include "xfs_refcount.h" |
24 | #include "xfs_rmap.h" |
25 | #include "xfs_ag.h" |
26 | #include "xfs_health.h" |
27 | |
28 | struct kmem_cache *xfs_refcount_intent_cache; |
29 | |
30 | /* Allowable refcount adjustment amounts. */ |
31 | enum xfs_refc_adjust_op { |
32 | XFS_REFCOUNT_ADJUST_INCREASE = 1, |
33 | XFS_REFCOUNT_ADJUST_DECREASE = -1, |
34 | XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, |
35 | XFS_REFCOUNT_ADJUST_COW_FREE = -1, |
36 | }; |
37 | |
38 | STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, |
39 | xfs_agblock_t agbno, xfs_extlen_t aglen); |
40 | STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, |
41 | xfs_agblock_t agbno, xfs_extlen_t aglen); |
42 | |
43 | /* |
44 | * Look up the first record less than or equal to [bno, len] in the btree |
45 | * given by cur. |
46 | */ |
47 | int |
48 | xfs_refcount_lookup_le( |
49 | struct xfs_btree_cur *cur, |
50 | enum xfs_refc_domain domain, |
51 | xfs_agblock_t bno, |
52 | int *stat) |
53 | { |
54 | trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, |
55 | xfs_refcount_encode_startblock(bno, domain), |
56 | XFS_LOOKUP_LE); |
57 | cur->bc_rec.rc.rc_startblock = bno; |
58 | cur->bc_rec.rc.rc_blockcount = 0; |
59 | cur->bc_rec.rc.rc_domain = domain; |
60 | return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); |
61 | } |
62 | |
63 | /* |
64 | * Look up the first record greater than or equal to [bno, len] in the btree |
65 | * given by cur. |
66 | */ |
67 | int |
68 | xfs_refcount_lookup_ge( |
69 | struct xfs_btree_cur *cur, |
70 | enum xfs_refc_domain domain, |
71 | xfs_agblock_t bno, |
72 | int *stat) |
73 | { |
74 | trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, |
75 | xfs_refcount_encode_startblock(bno, domain), |
76 | XFS_LOOKUP_GE); |
77 | cur->bc_rec.rc.rc_startblock = bno; |
78 | cur->bc_rec.rc.rc_blockcount = 0; |
79 | cur->bc_rec.rc.rc_domain = domain; |
80 | return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); |
81 | } |
82 | |
83 | /* |
84 | * Look up the first record equal to [bno, len] in the btree |
85 | * given by cur. |
86 | */ |
87 | int |
88 | xfs_refcount_lookup_eq( |
89 | struct xfs_btree_cur *cur, |
90 | enum xfs_refc_domain domain, |
91 | xfs_agblock_t bno, |
92 | int *stat) |
93 | { |
94 | trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, |
95 | xfs_refcount_encode_startblock(bno, domain), |
96 | XFS_LOOKUP_LE); |
97 | cur->bc_rec.rc.rc_startblock = bno; |
98 | cur->bc_rec.rc.rc_blockcount = 0; |
99 | cur->bc_rec.rc.rc_domain = domain; |
100 | return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); |
101 | } |
102 | |
103 | /* Convert on-disk record to in-core format. */ |
104 | void |
105 | xfs_refcount_btrec_to_irec( |
106 | const union xfs_btree_rec *rec, |
107 | struct xfs_refcount_irec *irec) |
108 | { |
109 | uint32_t start; |
110 | |
111 | start = be32_to_cpu(rec->refc.rc_startblock); |
112 | if (start & XFS_REFC_COWFLAG) { |
113 | start &= ~XFS_REFC_COWFLAG; |
114 | irec->rc_domain = XFS_REFC_DOMAIN_COW; |
115 | } else { |
116 | irec->rc_domain = XFS_REFC_DOMAIN_SHARED; |
117 | } |
118 | |
119 | irec->rc_startblock = start; |
120 | irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); |
121 | irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); |
122 | } |
123 | |
124 | /* Simple checks for refcount records. */ |
125 | xfs_failaddr_t |
126 | xfs_refcount_check_irec( |
127 | struct xfs_perag *pag, |
128 | const struct xfs_refcount_irec *irec) |
129 | { |
130 | if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) |
131 | return __this_address; |
132 | |
133 | if (!xfs_refcount_check_domain(irec)) |
134 | return __this_address; |
135 | |
136 | /* check for valid extent range, including overflow */ |
137 | if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) |
138 | return __this_address; |
139 | |
140 | if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) |
141 | return __this_address; |
142 | |
143 | return NULL; |
144 | } |
145 | |
146 | static inline int |
147 | xfs_refcount_complain_bad_rec( |
148 | struct xfs_btree_cur *cur, |
149 | xfs_failaddr_t fa, |
150 | const struct xfs_refcount_irec *irec) |
151 | { |
152 | struct xfs_mount *mp = cur->bc_mp; |
153 | |
154 | xfs_warn(mp, |
155 | "Refcount BTree record corruption in AG %d detected at %pS!" , |
156 | cur->bc_ag.pag->pag_agno, fa); |
157 | xfs_warn(mp, |
158 | "Start block 0x%x, block count 0x%x, references 0x%x" , |
159 | irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); |
160 | xfs_btree_mark_sick(cur); |
161 | return -EFSCORRUPTED; |
162 | } |
163 | |
164 | /* |
165 | * Get the data from the pointed-to record. |
166 | */ |
167 | int |
168 | xfs_refcount_get_rec( |
169 | struct xfs_btree_cur *cur, |
170 | struct xfs_refcount_irec *irec, |
171 | int *stat) |
172 | { |
173 | union xfs_btree_rec *rec; |
174 | xfs_failaddr_t fa; |
175 | int error; |
176 | |
177 | error = xfs_btree_get_rec(cur, &rec, stat); |
178 | if (error || !*stat) |
179 | return error; |
180 | |
181 | xfs_refcount_btrec_to_irec(rec, irec); |
182 | fa = xfs_refcount_check_irec(cur->bc_ag.pag, irec); |
183 | if (fa) |
184 | return xfs_refcount_complain_bad_rec(cur, fa, irec); |
185 | |
186 | trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); |
187 | return 0; |
188 | } |
189 | |
190 | /* |
191 | * Update the record referred to by cur to the value given |
192 | * by [bno, len, refcount]. |
193 | * This either works (return 0) or gets an EFSCORRUPTED error. |
194 | */ |
195 | STATIC int |
196 | xfs_refcount_update( |
197 | struct xfs_btree_cur *cur, |
198 | struct xfs_refcount_irec *irec) |
199 | { |
200 | union xfs_btree_rec rec; |
201 | uint32_t start; |
202 | int error; |
203 | |
204 | trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); |
205 | |
206 | start = xfs_refcount_encode_startblock(irec->rc_startblock, |
207 | irec->rc_domain); |
208 | rec.refc.rc_startblock = cpu_to_be32(start); |
209 | rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); |
210 | rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); |
211 | |
212 | error = xfs_btree_update(cur, &rec); |
213 | if (error) |
214 | trace_xfs_refcount_update_error(cur->bc_mp, |
215 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
216 | return error; |
217 | } |
218 | |
219 | /* |
220 | * Insert the record referred to by cur to the value given |
221 | * by [bno, len, refcount]. |
222 | * This either works (return 0) or gets an EFSCORRUPTED error. |
223 | */ |
224 | int |
225 | xfs_refcount_insert( |
226 | struct xfs_btree_cur *cur, |
227 | struct xfs_refcount_irec *irec, |
228 | int *i) |
229 | { |
230 | int error; |
231 | |
232 | trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); |
233 | |
234 | cur->bc_rec.rc.rc_startblock = irec->rc_startblock; |
235 | cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; |
236 | cur->bc_rec.rc.rc_refcount = irec->rc_refcount; |
237 | cur->bc_rec.rc.rc_domain = irec->rc_domain; |
238 | |
239 | error = xfs_btree_insert(cur, i); |
240 | if (error) |
241 | goto out_error; |
242 | if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { |
243 | xfs_btree_mark_sick(cur); |
244 | error = -EFSCORRUPTED; |
245 | goto out_error; |
246 | } |
247 | |
248 | out_error: |
249 | if (error) |
250 | trace_xfs_refcount_insert_error(cur->bc_mp, |
251 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
252 | return error; |
253 | } |
254 | |
255 | /* |
256 | * Remove the record referred to by cur, then set the pointer to the spot |
257 | * where the record could be re-inserted, in case we want to increment or |
258 | * decrement the cursor. |
259 | * This either works (return 0) or gets an EFSCORRUPTED error. |
260 | */ |
261 | STATIC int |
262 | xfs_refcount_delete( |
263 | struct xfs_btree_cur *cur, |
264 | int *i) |
265 | { |
266 | struct xfs_refcount_irec irec; |
267 | int found_rec; |
268 | int error; |
269 | |
270 | error = xfs_refcount_get_rec(cur, irec: &irec, stat: &found_rec); |
271 | if (error) |
272 | goto out_error; |
273 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
274 | xfs_btree_mark_sick(cur); |
275 | error = -EFSCORRUPTED; |
276 | goto out_error; |
277 | } |
278 | trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec); |
279 | error = xfs_btree_delete(cur, i); |
280 | if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { |
281 | xfs_btree_mark_sick(cur); |
282 | error = -EFSCORRUPTED; |
283 | goto out_error; |
284 | } |
285 | if (error) |
286 | goto out_error; |
287 | error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, |
288 | &found_rec); |
289 | out_error: |
290 | if (error) |
291 | trace_xfs_refcount_delete_error(cur->bc_mp, |
292 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
293 | return error; |
294 | } |
295 | |
296 | /* |
297 | * Adjusting the Reference Count |
298 | * |
299 | * As stated elsewhere, the reference count btree (refcbt) stores |
300 | * >1 reference counts for extents of physical blocks. In this |
301 | * operation, we're either raising or lowering the reference count of |
302 | * some subrange stored in the tree: |
303 | * |
304 | * <------ adjustment range ------> |
305 | * ----+ +---+-----+ +--+--------+--------- |
306 | * 2 | | 3 | 4 | |17| 55 | 10 |
307 | * ----+ +---+-----+ +--+--------+--------- |
308 | * X axis is physical blocks number; |
309 | * reference counts are the numbers inside the rectangles |
310 | * |
311 | * The first thing we need to do is to ensure that there are no |
312 | * refcount extents crossing either boundary of the range to be |
313 | * adjusted. For any extent that does cross a boundary, split it into |
314 | * two extents so that we can increment the refcount of one of the |
315 | * pieces later: |
316 | * |
317 | * <------ adjustment range ------> |
318 | * ----+ +---+-----+ +--+--------+----+---- |
319 | * 2 | | 3 | 2 | |17| 55 | 10 | 10 |
320 | * ----+ +---+-----+ +--+--------+----+---- |
321 | * |
322 | * For this next step, let's assume that all the physical blocks in |
323 | * the adjustment range are mapped to a file and are therefore in use |
324 | * at least once. Therefore, we can infer that any gap in the |
325 | * refcount tree within the adjustment range represents a physical |
326 | * extent with refcount == 1: |
327 | * |
328 | * <------ adjustment range ------> |
329 | * ----+---+---+-----+-+--+--------+----+---- |
330 | * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 |
331 | * ----+---+---+-----+-+--+--------+----+---- |
332 | * ^ |
333 | * |
334 | * For each extent that falls within the interval range, figure out |
335 | * which extent is to the left or the right of that extent. Now we |
336 | * have a left, current, and right extent. If the new reference count |
337 | * of the center extent enables us to merge left, center, and right |
338 | * into one record covering all three, do so. If the center extent is |
339 | * at the left end of the range, abuts the left extent, and its new |
340 | * reference count matches the left extent's record, then merge them. |
341 | * If the center extent is at the right end of the range, abuts the |
342 | * right extent, and the reference counts match, merge those. In the |
343 | * example, we can left merge (assuming an increment operation): |
344 | * |
345 | * <------ adjustment range ------> |
346 | * --------+---+-----+-+--+--------+----+---- |
347 | * 2 | 3 | 2 |1|17| 55 | 10 | 10 |
348 | * --------+---+-----+-+--+--------+----+---- |
349 | * ^ |
350 | * |
351 | * For all other extents within the range, adjust the reference count |
352 | * or delete it if the refcount falls below 2. If we were |
353 | * incrementing, the end result looks like this: |
354 | * |
355 | * <------ adjustment range ------> |
356 | * --------+---+-----+-+--+--------+----+---- |
357 | * 2 | 4 | 3 |2|18| 56 | 11 | 10 |
358 | * --------+---+-----+-+--+--------+----+---- |
359 | * |
360 | * The result of a decrement operation looks as such: |
361 | * |
362 | * <------ adjustment range ------> |
363 | * ----+ +---+ +--+--------+----+---- |
364 | * 2 | | 2 | |16| 54 | 9 | 10 |
365 | * ----+ +---+ +--+--------+----+---- |
366 | * DDDD 111111DD |
367 | * |
368 | * The blocks marked "D" are freed; the blocks marked "1" are only |
369 | * referenced once and therefore the record is removed from the |
370 | * refcount btree. |
371 | */ |
372 | |
373 | /* Next block after this extent. */ |
374 | static inline xfs_agblock_t |
375 | xfs_refc_next( |
376 | struct xfs_refcount_irec *rc) |
377 | { |
378 | return rc->rc_startblock + rc->rc_blockcount; |
379 | } |
380 | |
381 | /* |
382 | * Split a refcount extent that crosses agbno. |
383 | */ |
384 | STATIC int |
385 | xfs_refcount_split_extent( |
386 | struct xfs_btree_cur *cur, |
387 | enum xfs_refc_domain domain, |
388 | xfs_agblock_t agbno, |
389 | bool *shape_changed) |
390 | { |
391 | struct xfs_refcount_irec rcext, tmp; |
392 | int found_rec; |
393 | int error; |
394 | |
395 | *shape_changed = false; |
396 | error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); |
397 | if (error) |
398 | goto out_error; |
399 | if (!found_rec) |
400 | return 0; |
401 | |
402 | error = xfs_refcount_get_rec(cur, irec: &rcext, stat: &found_rec); |
403 | if (error) |
404 | goto out_error; |
405 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
406 | xfs_btree_mark_sick(cur); |
407 | error = -EFSCORRUPTED; |
408 | goto out_error; |
409 | } |
410 | if (rcext.rc_domain != domain) |
411 | return 0; |
412 | if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) |
413 | return 0; |
414 | |
415 | *shape_changed = true; |
416 | trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, |
417 | &rcext, agbno); |
418 | |
419 | /* Establish the right extent. */ |
420 | tmp = rcext; |
421 | tmp.rc_startblock = agbno; |
422 | tmp.rc_blockcount -= (agbno - rcext.rc_startblock); |
423 | error = xfs_refcount_update(cur, &tmp); |
424 | if (error) |
425 | goto out_error; |
426 | |
427 | /* Insert the left extent. */ |
428 | tmp = rcext; |
429 | tmp.rc_blockcount = agbno - rcext.rc_startblock; |
430 | error = xfs_refcount_insert(cur, irec: &tmp, i: &found_rec); |
431 | if (error) |
432 | goto out_error; |
433 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
434 | xfs_btree_mark_sick(cur); |
435 | error = -EFSCORRUPTED; |
436 | goto out_error; |
437 | } |
438 | return error; |
439 | |
440 | out_error: |
441 | trace_xfs_refcount_split_extent_error(cur->bc_mp, |
442 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
443 | return error; |
444 | } |
445 | |
446 | /* |
447 | * Merge the left, center, and right extents. |
448 | */ |
449 | STATIC int |
450 | xfs_refcount_merge_center_extents( |
451 | struct xfs_btree_cur *cur, |
452 | struct xfs_refcount_irec *left, |
453 | struct xfs_refcount_irec *center, |
454 | struct xfs_refcount_irec *right, |
455 | unsigned long long extlen, |
456 | xfs_extlen_t *aglen) |
457 | { |
458 | int error; |
459 | int found_rec; |
460 | |
461 | trace_xfs_refcount_merge_center_extents(cur->bc_mp, |
462 | cur->bc_ag.pag->pag_agno, left, center, right); |
463 | |
464 | ASSERT(left->rc_domain == center->rc_domain); |
465 | ASSERT(right->rc_domain == center->rc_domain); |
466 | |
467 | /* |
468 | * Make sure the center and right extents are not in the btree. |
469 | * If the center extent was synthesized, the first delete call |
470 | * removes the right extent and we skip the second deletion. |
471 | * If center and right were in the btree, then the first delete |
472 | * call removes the center and the second one removes the right |
473 | * extent. |
474 | */ |
475 | error = xfs_refcount_lookup_ge(cur, center->rc_domain, |
476 | center->rc_startblock, &found_rec); |
477 | if (error) |
478 | goto out_error; |
479 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
480 | xfs_btree_mark_sick(cur); |
481 | error = -EFSCORRUPTED; |
482 | goto out_error; |
483 | } |
484 | |
485 | error = xfs_refcount_delete(cur, &found_rec); |
486 | if (error) |
487 | goto out_error; |
488 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
489 | xfs_btree_mark_sick(cur); |
490 | error = -EFSCORRUPTED; |
491 | goto out_error; |
492 | } |
493 | |
494 | if (center->rc_refcount > 1) { |
495 | error = xfs_refcount_delete(cur, &found_rec); |
496 | if (error) |
497 | goto out_error; |
498 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
499 | xfs_btree_mark_sick(cur); |
500 | error = -EFSCORRUPTED; |
501 | goto out_error; |
502 | } |
503 | } |
504 | |
505 | /* Enlarge the left extent. */ |
506 | error = xfs_refcount_lookup_le(cur, left->rc_domain, |
507 | left->rc_startblock, &found_rec); |
508 | if (error) |
509 | goto out_error; |
510 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
511 | xfs_btree_mark_sick(cur); |
512 | error = -EFSCORRUPTED; |
513 | goto out_error; |
514 | } |
515 | |
516 | left->rc_blockcount = extlen; |
517 | error = xfs_refcount_update(cur, left); |
518 | if (error) |
519 | goto out_error; |
520 | |
521 | *aglen = 0; |
522 | return error; |
523 | |
524 | out_error: |
525 | trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, |
526 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
527 | return error; |
528 | } |
529 | |
530 | /* |
531 | * Merge with the left extent. |
532 | */ |
533 | STATIC int |
534 | xfs_refcount_merge_left_extent( |
535 | struct xfs_btree_cur *cur, |
536 | struct xfs_refcount_irec *left, |
537 | struct xfs_refcount_irec *cleft, |
538 | xfs_agblock_t *agbno, |
539 | xfs_extlen_t *aglen) |
540 | { |
541 | int error; |
542 | int found_rec; |
543 | |
544 | trace_xfs_refcount_merge_left_extent(cur->bc_mp, |
545 | cur->bc_ag.pag->pag_agno, left, cleft); |
546 | |
547 | ASSERT(left->rc_domain == cleft->rc_domain); |
548 | |
549 | /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ |
550 | if (cleft->rc_refcount > 1) { |
551 | error = xfs_refcount_lookup_le(cur, cleft->rc_domain, |
552 | cleft->rc_startblock, &found_rec); |
553 | if (error) |
554 | goto out_error; |
555 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
556 | xfs_btree_mark_sick(cur); |
557 | error = -EFSCORRUPTED; |
558 | goto out_error; |
559 | } |
560 | |
561 | error = xfs_refcount_delete(cur, &found_rec); |
562 | if (error) |
563 | goto out_error; |
564 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
565 | xfs_btree_mark_sick(cur); |
566 | error = -EFSCORRUPTED; |
567 | goto out_error; |
568 | } |
569 | } |
570 | |
571 | /* Enlarge the left extent. */ |
572 | error = xfs_refcount_lookup_le(cur, left->rc_domain, |
573 | left->rc_startblock, &found_rec); |
574 | if (error) |
575 | goto out_error; |
576 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
577 | xfs_btree_mark_sick(cur); |
578 | error = -EFSCORRUPTED; |
579 | goto out_error; |
580 | } |
581 | |
582 | left->rc_blockcount += cleft->rc_blockcount; |
583 | error = xfs_refcount_update(cur, left); |
584 | if (error) |
585 | goto out_error; |
586 | |
587 | *agbno += cleft->rc_blockcount; |
588 | *aglen -= cleft->rc_blockcount; |
589 | return error; |
590 | |
591 | out_error: |
592 | trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, |
593 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
594 | return error; |
595 | } |
596 | |
597 | /* |
598 | * Merge with the right extent. |
599 | */ |
600 | STATIC int |
601 | xfs_refcount_merge_right_extent( |
602 | struct xfs_btree_cur *cur, |
603 | struct xfs_refcount_irec *right, |
604 | struct xfs_refcount_irec *cright, |
605 | xfs_extlen_t *aglen) |
606 | { |
607 | int error; |
608 | int found_rec; |
609 | |
610 | trace_xfs_refcount_merge_right_extent(cur->bc_mp, |
611 | cur->bc_ag.pag->pag_agno, cright, right); |
612 | |
613 | ASSERT(right->rc_domain == cright->rc_domain); |
614 | |
615 | /* |
616 | * If the extent ending at agbno+aglen (cright) wasn't synthesized, |
617 | * remove it. |
618 | */ |
619 | if (cright->rc_refcount > 1) { |
620 | error = xfs_refcount_lookup_le(cur, cright->rc_domain, |
621 | cright->rc_startblock, &found_rec); |
622 | if (error) |
623 | goto out_error; |
624 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
625 | xfs_btree_mark_sick(cur); |
626 | error = -EFSCORRUPTED; |
627 | goto out_error; |
628 | } |
629 | |
630 | error = xfs_refcount_delete(cur, &found_rec); |
631 | if (error) |
632 | goto out_error; |
633 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
634 | xfs_btree_mark_sick(cur); |
635 | error = -EFSCORRUPTED; |
636 | goto out_error; |
637 | } |
638 | } |
639 | |
640 | /* Enlarge the right extent. */ |
641 | error = xfs_refcount_lookup_le(cur, right->rc_domain, |
642 | right->rc_startblock, &found_rec); |
643 | if (error) |
644 | goto out_error; |
645 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
646 | xfs_btree_mark_sick(cur); |
647 | error = -EFSCORRUPTED; |
648 | goto out_error; |
649 | } |
650 | |
651 | right->rc_startblock -= cright->rc_blockcount; |
652 | right->rc_blockcount += cright->rc_blockcount; |
653 | error = xfs_refcount_update(cur, right); |
654 | if (error) |
655 | goto out_error; |
656 | |
657 | *aglen -= cright->rc_blockcount; |
658 | return error; |
659 | |
660 | out_error: |
661 | trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, |
662 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
663 | return error; |
664 | } |
665 | |
666 | /* |
667 | * Find the left extent and the one after it (cleft). This function assumes |
668 | * that we've already split any extent crossing agbno. |
669 | */ |
670 | STATIC int |
671 | xfs_refcount_find_left_extents( |
672 | struct xfs_btree_cur *cur, |
673 | struct xfs_refcount_irec *left, |
674 | struct xfs_refcount_irec *cleft, |
675 | enum xfs_refc_domain domain, |
676 | xfs_agblock_t agbno, |
677 | xfs_extlen_t aglen) |
678 | { |
679 | struct xfs_refcount_irec tmp; |
680 | int error; |
681 | int found_rec; |
682 | |
683 | left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; |
684 | error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); |
685 | if (error) |
686 | goto out_error; |
687 | if (!found_rec) |
688 | return 0; |
689 | |
690 | error = xfs_refcount_get_rec(cur, irec: &tmp, stat: &found_rec); |
691 | if (error) |
692 | goto out_error; |
693 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
694 | xfs_btree_mark_sick(cur); |
695 | error = -EFSCORRUPTED; |
696 | goto out_error; |
697 | } |
698 | |
699 | if (tmp.rc_domain != domain) |
700 | return 0; |
701 | if (xfs_refc_next(&tmp) != agbno) |
702 | return 0; |
703 | /* We have a left extent; retrieve (or invent) the next right one */ |
704 | *left = tmp; |
705 | |
706 | error = xfs_btree_increment(cur, 0, &found_rec); |
707 | if (error) |
708 | goto out_error; |
709 | if (found_rec) { |
710 | error = xfs_refcount_get_rec(cur, irec: &tmp, stat: &found_rec); |
711 | if (error) |
712 | goto out_error; |
713 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
714 | xfs_btree_mark_sick(cur); |
715 | error = -EFSCORRUPTED; |
716 | goto out_error; |
717 | } |
718 | |
719 | if (tmp.rc_domain != domain) |
720 | goto not_found; |
721 | |
722 | /* if tmp starts at the end of our range, just use that */ |
723 | if (tmp.rc_startblock == agbno) |
724 | *cleft = tmp; |
725 | else { |
726 | /* |
727 | * There's a gap in the refcntbt at the start of the |
728 | * range we're interested in (refcount == 1) so |
729 | * synthesize the implied extent and pass it back. |
730 | * We assume here that the agbno/aglen range was |
731 | * passed in from a data fork extent mapping and |
732 | * therefore is allocated to exactly one owner. |
733 | */ |
734 | cleft->rc_startblock = agbno; |
735 | cleft->rc_blockcount = min(aglen, |
736 | tmp.rc_startblock - agbno); |
737 | cleft->rc_refcount = 1; |
738 | cleft->rc_domain = domain; |
739 | } |
740 | } else { |
741 | not_found: |
742 | /* |
743 | * No extents, so pretend that there's one covering the whole |
744 | * range. |
745 | */ |
746 | cleft->rc_startblock = agbno; |
747 | cleft->rc_blockcount = aglen; |
748 | cleft->rc_refcount = 1; |
749 | cleft->rc_domain = domain; |
750 | } |
751 | trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, |
752 | left, cleft, agbno); |
753 | return error; |
754 | |
755 | out_error: |
756 | trace_xfs_refcount_find_left_extent_error(cur->bc_mp, |
757 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
758 | return error; |
759 | } |
760 | |
761 | /* |
762 | * Find the right extent and the one before it (cright). This function |
763 | * assumes that we've already split any extents crossing agbno + aglen. |
764 | */ |
765 | STATIC int |
766 | xfs_refcount_find_right_extents( |
767 | struct xfs_btree_cur *cur, |
768 | struct xfs_refcount_irec *right, |
769 | struct xfs_refcount_irec *cright, |
770 | enum xfs_refc_domain domain, |
771 | xfs_agblock_t agbno, |
772 | xfs_extlen_t aglen) |
773 | { |
774 | struct xfs_refcount_irec tmp; |
775 | int error; |
776 | int found_rec; |
777 | |
778 | right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; |
779 | error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); |
780 | if (error) |
781 | goto out_error; |
782 | if (!found_rec) |
783 | return 0; |
784 | |
785 | error = xfs_refcount_get_rec(cur, irec: &tmp, stat: &found_rec); |
786 | if (error) |
787 | goto out_error; |
788 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
789 | xfs_btree_mark_sick(cur); |
790 | error = -EFSCORRUPTED; |
791 | goto out_error; |
792 | } |
793 | |
794 | if (tmp.rc_domain != domain) |
795 | return 0; |
796 | if (tmp.rc_startblock != agbno + aglen) |
797 | return 0; |
798 | /* We have a right extent; retrieve (or invent) the next left one */ |
799 | *right = tmp; |
800 | |
801 | error = xfs_btree_decrement(cur, 0, &found_rec); |
802 | if (error) |
803 | goto out_error; |
804 | if (found_rec) { |
805 | error = xfs_refcount_get_rec(cur, irec: &tmp, stat: &found_rec); |
806 | if (error) |
807 | goto out_error; |
808 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
809 | xfs_btree_mark_sick(cur); |
810 | error = -EFSCORRUPTED; |
811 | goto out_error; |
812 | } |
813 | |
814 | if (tmp.rc_domain != domain) |
815 | goto not_found; |
816 | |
817 | /* if tmp ends at the end of our range, just use that */ |
818 | if (xfs_refc_next(&tmp) == agbno + aglen) |
819 | *cright = tmp; |
820 | else { |
821 | /* |
822 | * There's a gap in the refcntbt at the end of the |
823 | * range we're interested in (refcount == 1) so |
824 | * create the implied extent and pass it back. |
825 | * We assume here that the agbno/aglen range was |
826 | * passed in from a data fork extent mapping and |
827 | * therefore is allocated to exactly one owner. |
828 | */ |
829 | cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); |
830 | cright->rc_blockcount = right->rc_startblock - |
831 | cright->rc_startblock; |
832 | cright->rc_refcount = 1; |
833 | cright->rc_domain = domain; |
834 | } |
835 | } else { |
836 | not_found: |
837 | /* |
838 | * No extents, so pretend that there's one covering the whole |
839 | * range. |
840 | */ |
841 | cright->rc_startblock = agbno; |
842 | cright->rc_blockcount = aglen; |
843 | cright->rc_refcount = 1; |
844 | cright->rc_domain = domain; |
845 | } |
846 | trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, |
847 | cright, right, agbno + aglen); |
848 | return error; |
849 | |
850 | out_error: |
851 | trace_xfs_refcount_find_right_extent_error(cur->bc_mp, |
852 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
853 | return error; |
854 | } |
855 | |
856 | /* Is this extent valid? */ |
857 | static inline bool |
858 | xfs_refc_valid( |
859 | const struct xfs_refcount_irec *rc) |
860 | { |
861 | return rc->rc_startblock != NULLAGBLOCK; |
862 | } |
863 | |
864 | static inline xfs_nlink_t |
865 | xfs_refc_merge_refcount( |
866 | const struct xfs_refcount_irec *irec, |
867 | enum xfs_refc_adjust_op adjust) |
868 | { |
869 | /* Once a record hits MAXREFCOUNT, it is pinned there forever */ |
870 | if (irec->rc_refcount == MAXREFCOUNT) |
871 | return MAXREFCOUNT; |
872 | return irec->rc_refcount + adjust; |
873 | } |
874 | |
875 | static inline bool |
876 | xfs_refc_want_merge_center( |
877 | const struct xfs_refcount_irec *left, |
878 | const struct xfs_refcount_irec *cleft, |
879 | const struct xfs_refcount_irec *cright, |
880 | const struct xfs_refcount_irec *right, |
881 | bool cleft_is_cright, |
882 | enum xfs_refc_adjust_op adjust, |
883 | unsigned long long *ulenp) |
884 | { |
885 | unsigned long long ulen = left->rc_blockcount; |
886 | xfs_nlink_t new_refcount; |
887 | |
888 | /* |
889 | * To merge with a center record, both shoulder records must be |
890 | * adjacent to the record we want to adjust. This is only true if |
891 | * find_left and find_right made all four records valid. |
892 | */ |
893 | if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || |
894 | !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) |
895 | return false; |
896 | |
897 | /* There must only be one record for the entire range. */ |
898 | if (!cleft_is_cright) |
899 | return false; |
900 | |
901 | /* The shoulder record refcounts must match the new refcount. */ |
902 | new_refcount = xfs_refc_merge_refcount(cleft, adjust); |
903 | if (left->rc_refcount != new_refcount) |
904 | return false; |
905 | if (right->rc_refcount != new_refcount) |
906 | return false; |
907 | |
908 | /* |
909 | * The new record cannot exceed the max length. ulen is a ULL as the |
910 | * individual record block counts can be up to (u32 - 1) in length |
911 | * hence we need to catch u32 addition overflows here. |
912 | */ |
913 | ulen += cleft->rc_blockcount + right->rc_blockcount; |
914 | if (ulen >= MAXREFCEXTLEN) |
915 | return false; |
916 | |
917 | *ulenp = ulen; |
918 | return true; |
919 | } |
920 | |
921 | static inline bool |
922 | xfs_refc_want_merge_left( |
923 | const struct xfs_refcount_irec *left, |
924 | const struct xfs_refcount_irec *cleft, |
925 | enum xfs_refc_adjust_op adjust) |
926 | { |
927 | unsigned long long ulen = left->rc_blockcount; |
928 | xfs_nlink_t new_refcount; |
929 | |
930 | /* |
931 | * For a left merge, the left shoulder record must be adjacent to the |
932 | * start of the range. If this is true, find_left made left and cleft |
933 | * contain valid contents. |
934 | */ |
935 | if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) |
936 | return false; |
937 | |
938 | /* Left shoulder record refcount must match the new refcount. */ |
939 | new_refcount = xfs_refc_merge_refcount(cleft, adjust); |
940 | if (left->rc_refcount != new_refcount) |
941 | return false; |
942 | |
943 | /* |
944 | * The new record cannot exceed the max length. ulen is a ULL as the |
945 | * individual record block counts can be up to (u32 - 1) in length |
946 | * hence we need to catch u32 addition overflows here. |
947 | */ |
948 | ulen += cleft->rc_blockcount; |
949 | if (ulen >= MAXREFCEXTLEN) |
950 | return false; |
951 | |
952 | return true; |
953 | } |
954 | |
955 | static inline bool |
956 | xfs_refc_want_merge_right( |
957 | const struct xfs_refcount_irec *cright, |
958 | const struct xfs_refcount_irec *right, |
959 | enum xfs_refc_adjust_op adjust) |
960 | { |
961 | unsigned long long ulen = right->rc_blockcount; |
962 | xfs_nlink_t new_refcount; |
963 | |
964 | /* |
965 | * For a right merge, the right shoulder record must be adjacent to the |
966 | * end of the range. If this is true, find_right made cright and right |
967 | * contain valid contents. |
968 | */ |
969 | if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) |
970 | return false; |
971 | |
972 | /* Right shoulder record refcount must match the new refcount. */ |
973 | new_refcount = xfs_refc_merge_refcount(cright, adjust); |
974 | if (right->rc_refcount != new_refcount) |
975 | return false; |
976 | |
977 | /* |
978 | * The new record cannot exceed the max length. ulen is a ULL as the |
979 | * individual record block counts can be up to (u32 - 1) in length |
980 | * hence we need to catch u32 addition overflows here. |
981 | */ |
982 | ulen += cright->rc_blockcount; |
983 | if (ulen >= MAXREFCEXTLEN) |
984 | return false; |
985 | |
986 | return true; |
987 | } |
988 | |
989 | /* |
990 | * Try to merge with any extents on the boundaries of the adjustment range. |
991 | */ |
992 | STATIC int |
993 | xfs_refcount_merge_extents( |
994 | struct xfs_btree_cur *cur, |
995 | enum xfs_refc_domain domain, |
996 | xfs_agblock_t *agbno, |
997 | xfs_extlen_t *aglen, |
998 | enum xfs_refc_adjust_op adjust, |
999 | bool *shape_changed) |
1000 | { |
1001 | struct xfs_refcount_irec left = {0}, cleft = {0}; |
1002 | struct xfs_refcount_irec cright = {0}, right = {0}; |
1003 | int error; |
1004 | unsigned long long ulen; |
1005 | bool cequal; |
1006 | |
1007 | *shape_changed = false; |
1008 | /* |
1009 | * Find the extent just below agbno [left], just above agbno [cleft], |
1010 | * just below (agbno + aglen) [cright], and just above (agbno + aglen) |
1011 | * [right]. |
1012 | */ |
1013 | error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain, |
1014 | *agbno, *aglen); |
1015 | if (error) |
1016 | return error; |
1017 | error = xfs_refcount_find_right_extents(cur, &right, &cright, domain, |
1018 | *agbno, *aglen); |
1019 | if (error) |
1020 | return error; |
1021 | |
1022 | /* No left or right extent to merge; exit. */ |
1023 | if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) |
1024 | return 0; |
1025 | |
1026 | cequal = (cleft.rc_startblock == cright.rc_startblock) && |
1027 | (cleft.rc_blockcount == cright.rc_blockcount); |
1028 | |
1029 | /* Try to merge left, cleft, and right. cleft must == cright. */ |
1030 | if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, |
1031 | adjust, &ulen)) { |
1032 | *shape_changed = true; |
1033 | return xfs_refcount_merge_center_extents(cur, &left, &cleft, |
1034 | &right, ulen, aglen); |
1035 | } |
1036 | |
1037 | /* Try to merge left and cleft. */ |
1038 | if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { |
1039 | *shape_changed = true; |
1040 | error = xfs_refcount_merge_left_extent(cur, &left, &cleft, |
1041 | agbno, aglen); |
1042 | if (error) |
1043 | return error; |
1044 | |
1045 | /* |
1046 | * If we just merged left + cleft and cleft == cright, |
1047 | * we no longer have a cright to merge with right. We're done. |
1048 | */ |
1049 | if (cequal) |
1050 | return 0; |
1051 | } |
1052 | |
1053 | /* Try to merge cright and right. */ |
1054 | if (xfs_refc_want_merge_right(&cright, &right, adjust)) { |
1055 | *shape_changed = true; |
1056 | return xfs_refcount_merge_right_extent(cur, &right, &cright, |
1057 | aglen); |
1058 | } |
1059 | |
1060 | return 0; |
1061 | } |
1062 | |
1063 | /* |
1064 | * XXX: This is a pretty hand-wavy estimate. The penalty for guessing |
1065 | * true incorrectly is a shutdown FS; the penalty for guessing false |
1066 | * incorrectly is more transaction rolls than might be necessary. |
1067 | * Be conservative here. |
1068 | */ |
1069 | static bool |
1070 | xfs_refcount_still_have_space( |
1071 | struct xfs_btree_cur *cur) |
1072 | { |
1073 | unsigned long overhead; |
1074 | |
1075 | /* |
1076 | * Worst case estimate: full splits of the free space and rmap btrees |
1077 | * to handle each of the shape changes to the refcount btree. |
1078 | */ |
1079 | overhead = xfs_allocfree_block_count(cur->bc_mp, |
1080 | cur->bc_refc.shape_changes); |
1081 | overhead += cur->bc_mp->m_refc_maxlevels; |
1082 | overhead *= cur->bc_mp->m_sb.sb_blocksize; |
1083 | |
1084 | /* |
1085 | * Only allow 2 refcount extent updates per transaction if the |
1086 | * refcount continue update "error" has been injected. |
1087 | */ |
1088 | if (cur->bc_refc.nr_ops > 2 && |
1089 | XFS_TEST_ERROR(false, cur->bc_mp, |
1090 | XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) |
1091 | return false; |
1092 | |
1093 | if (cur->bc_refc.nr_ops == 0) |
1094 | return true; |
1095 | else if (overhead > cur->bc_tp->t_log_res) |
1096 | return false; |
1097 | return cur->bc_tp->t_log_res - overhead > |
1098 | cur->bc_refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; |
1099 | } |
1100 | |
1101 | /* |
1102 | * Adjust the refcounts of middle extents. At this point we should have |
1103 | * split extents that crossed the adjustment range; merged with adjacent |
1104 | * extents; and updated agbno/aglen to reflect the merges. Therefore, |
1105 | * all we have to do is update the extents inside [agbno, agbno + aglen]. |
1106 | */ |
1107 | STATIC int |
1108 | xfs_refcount_adjust_extents( |
1109 | struct xfs_btree_cur *cur, |
1110 | xfs_agblock_t *agbno, |
1111 | xfs_extlen_t *aglen, |
1112 | enum xfs_refc_adjust_op adj) |
1113 | { |
1114 | struct xfs_refcount_irec ext, tmp; |
1115 | int error; |
1116 | int found_rec, found_tmp; |
1117 | xfs_fsblock_t fsbno; |
1118 | |
1119 | /* Merging did all the work already. */ |
1120 | if (*aglen == 0) |
1121 | return 0; |
1122 | |
1123 | error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, |
1124 | &found_rec); |
1125 | if (error) |
1126 | goto out_error; |
1127 | |
1128 | while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { |
1129 | error = xfs_refcount_get_rec(cur, irec: &ext, stat: &found_rec); |
1130 | if (error) |
1131 | goto out_error; |
1132 | if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { |
1133 | ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; |
1134 | ext.rc_blockcount = 0; |
1135 | ext.rc_refcount = 0; |
1136 | ext.rc_domain = XFS_REFC_DOMAIN_SHARED; |
1137 | } |
1138 | |
1139 | /* |
1140 | * Deal with a hole in the refcount tree; if a file maps to |
1141 | * these blocks and there's no refcountbt record, pretend that |
1142 | * there is one with refcount == 1. |
1143 | */ |
1144 | if (ext.rc_startblock != *agbno) { |
1145 | tmp.rc_startblock = *agbno; |
1146 | tmp.rc_blockcount = min(*aglen, |
1147 | ext.rc_startblock - *agbno); |
1148 | tmp.rc_refcount = 1 + adj; |
1149 | tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; |
1150 | |
1151 | trace_xfs_refcount_modify_extent(cur->bc_mp, |
1152 | cur->bc_ag.pag->pag_agno, &tmp); |
1153 | |
1154 | /* |
1155 | * Either cover the hole (increment) or |
1156 | * delete the range (decrement). |
1157 | */ |
1158 | cur->bc_refc.nr_ops++; |
1159 | if (tmp.rc_refcount) { |
1160 | error = xfs_refcount_insert(cur, irec: &tmp, |
1161 | i: &found_tmp); |
1162 | if (error) |
1163 | goto out_error; |
1164 | if (XFS_IS_CORRUPT(cur->bc_mp, |
1165 | found_tmp != 1)) { |
1166 | xfs_btree_mark_sick(cur); |
1167 | error = -EFSCORRUPTED; |
1168 | goto out_error; |
1169 | } |
1170 | } else { |
1171 | fsbno = XFS_AGB_TO_FSB(cur->bc_mp, |
1172 | cur->bc_ag.pag->pag_agno, |
1173 | tmp.rc_startblock); |
1174 | error = xfs_free_extent_later(cur->bc_tp, fsbno, |
1175 | tmp.rc_blockcount, NULL, |
1176 | XFS_AG_RESV_NONE, false); |
1177 | if (error) |
1178 | goto out_error; |
1179 | } |
1180 | |
1181 | (*agbno) += tmp.rc_blockcount; |
1182 | (*aglen) -= tmp.rc_blockcount; |
1183 | |
1184 | /* Stop if there's nothing left to modify */ |
1185 | if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) |
1186 | break; |
1187 | |
1188 | /* Move the cursor to the start of ext. */ |
1189 | error = xfs_refcount_lookup_ge(cur, |
1190 | XFS_REFC_DOMAIN_SHARED, *agbno, |
1191 | &found_rec); |
1192 | if (error) |
1193 | goto out_error; |
1194 | } |
1195 | |
1196 | /* |
1197 | * A previous step trimmed agbno/aglen such that the end of the |
1198 | * range would not be in the middle of the record. If this is |
1199 | * no longer the case, something is seriously wrong with the |
1200 | * btree. Make sure we never feed the synthesized record into |
1201 | * the processing loop below. |
1202 | */ |
1203 | if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || |
1204 | XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { |
1205 | xfs_btree_mark_sick(cur); |
1206 | error = -EFSCORRUPTED; |
1207 | goto out_error; |
1208 | } |
1209 | |
1210 | /* |
1211 | * Adjust the reference count and either update the tree |
1212 | * (incr) or free the blocks (decr). |
1213 | */ |
1214 | if (ext.rc_refcount == MAXREFCOUNT) |
1215 | goto skip; |
1216 | ext.rc_refcount += adj; |
1217 | trace_xfs_refcount_modify_extent(cur->bc_mp, |
1218 | cur->bc_ag.pag->pag_agno, &ext); |
1219 | cur->bc_refc.nr_ops++; |
1220 | if (ext.rc_refcount > 1) { |
1221 | error = xfs_refcount_update(cur, &ext); |
1222 | if (error) |
1223 | goto out_error; |
1224 | } else if (ext.rc_refcount == 1) { |
1225 | error = xfs_refcount_delete(cur, &found_rec); |
1226 | if (error) |
1227 | goto out_error; |
1228 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
1229 | xfs_btree_mark_sick(cur); |
1230 | error = -EFSCORRUPTED; |
1231 | goto out_error; |
1232 | } |
1233 | goto advloop; |
1234 | } else { |
1235 | fsbno = XFS_AGB_TO_FSB(cur->bc_mp, |
1236 | cur->bc_ag.pag->pag_agno, |
1237 | ext.rc_startblock); |
1238 | error = xfs_free_extent_later(cur->bc_tp, fsbno, |
1239 | ext.rc_blockcount, NULL, |
1240 | XFS_AG_RESV_NONE, false); |
1241 | if (error) |
1242 | goto out_error; |
1243 | } |
1244 | |
1245 | skip: |
1246 | error = xfs_btree_increment(cur, 0, &found_rec); |
1247 | if (error) |
1248 | goto out_error; |
1249 | |
1250 | advloop: |
1251 | (*agbno) += ext.rc_blockcount; |
1252 | (*aglen) -= ext.rc_blockcount; |
1253 | } |
1254 | |
1255 | return error; |
1256 | out_error: |
1257 | trace_xfs_refcount_modify_extent_error(cur->bc_mp, |
1258 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
1259 | return error; |
1260 | } |
1261 | |
1262 | /* Adjust the reference count of a range of AG blocks. */ |
1263 | STATIC int |
1264 | xfs_refcount_adjust( |
1265 | struct xfs_btree_cur *cur, |
1266 | xfs_agblock_t *agbno, |
1267 | xfs_extlen_t *aglen, |
1268 | enum xfs_refc_adjust_op adj) |
1269 | { |
1270 | bool shape_changed; |
1271 | int shape_changes = 0; |
1272 | int error; |
1273 | |
1274 | if (adj == XFS_REFCOUNT_ADJUST_INCREASE) |
1275 | trace_xfs_refcount_increase(cur->bc_mp, |
1276 | cur->bc_ag.pag->pag_agno, *agbno, *aglen); |
1277 | else |
1278 | trace_xfs_refcount_decrease(cur->bc_mp, |
1279 | cur->bc_ag.pag->pag_agno, *agbno, *aglen); |
1280 | |
1281 | /* |
1282 | * Ensure that no rcextents cross the boundary of the adjustment range. |
1283 | */ |
1284 | error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, |
1285 | *agbno, &shape_changed); |
1286 | if (error) |
1287 | goto out_error; |
1288 | if (shape_changed) |
1289 | shape_changes++; |
1290 | |
1291 | error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, |
1292 | *agbno + *aglen, &shape_changed); |
1293 | if (error) |
1294 | goto out_error; |
1295 | if (shape_changed) |
1296 | shape_changes++; |
1297 | |
1298 | /* |
1299 | * Try to merge with the left or right extents of the range. |
1300 | */ |
1301 | error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, |
1302 | agbno, aglen, adj, &shape_changed); |
1303 | if (error) |
1304 | goto out_error; |
1305 | if (shape_changed) |
1306 | shape_changes++; |
1307 | if (shape_changes) |
1308 | cur->bc_refc.shape_changes++; |
1309 | |
1310 | /* Now that we've taken care of the ends, adjust the middle extents */ |
1311 | error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj); |
1312 | if (error) |
1313 | goto out_error; |
1314 | |
1315 | return 0; |
1316 | |
1317 | out_error: |
1318 | trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, |
1319 | error, _RET_IP_); |
1320 | return error; |
1321 | } |
1322 | |
1323 | /* Clean up after calling xfs_refcount_finish_one. */ |
1324 | void |
1325 | xfs_refcount_finish_one_cleanup( |
1326 | struct xfs_trans *tp, |
1327 | struct xfs_btree_cur *rcur, |
1328 | int error) |
1329 | { |
1330 | struct xfs_buf *agbp; |
1331 | |
1332 | if (rcur == NULL) |
1333 | return; |
1334 | agbp = rcur->bc_ag.agbp; |
1335 | xfs_btree_del_cursor(cur: rcur, error); |
1336 | if (error) |
1337 | xfs_trans_brelse(tp, agbp); |
1338 | } |
1339 | |
1340 | /* |
1341 | * Set up a continuation a deferred refcount operation by updating the intent. |
1342 | * Checks to make sure we're not going to run off the end of the AG. |
1343 | */ |
1344 | static inline int |
1345 | xfs_refcount_continue_op( |
1346 | struct xfs_btree_cur *cur, |
1347 | struct xfs_refcount_intent *ri, |
1348 | xfs_agblock_t new_agbno) |
1349 | { |
1350 | struct xfs_mount *mp = cur->bc_mp; |
1351 | struct xfs_perag *pag = cur->bc_ag.pag; |
1352 | |
1353 | if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, |
1354 | ri->ri_blockcount))) { |
1355 | xfs_btree_mark_sick(cur); |
1356 | return -EFSCORRUPTED; |
1357 | } |
1358 | |
1359 | ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); |
1360 | |
1361 | ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount)); |
1362 | ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); |
1363 | |
1364 | return 0; |
1365 | } |
1366 | |
1367 | /* |
1368 | * Process one of the deferred refcount operations. We pass back the |
1369 | * btree cursor to maintain our lock on the btree between calls. |
1370 | * This saves time and eliminates a buffer deadlock between the |
1371 | * superblock and the AGF because we'll always grab them in the same |
1372 | * order. |
1373 | */ |
1374 | int |
1375 | xfs_refcount_finish_one( |
1376 | struct xfs_trans *tp, |
1377 | struct xfs_refcount_intent *ri, |
1378 | struct xfs_btree_cur **pcur) |
1379 | { |
1380 | struct xfs_mount *mp = tp->t_mountp; |
1381 | struct xfs_btree_cur *rcur; |
1382 | struct xfs_buf *agbp = NULL; |
1383 | int error = 0; |
1384 | xfs_agblock_t bno; |
1385 | unsigned long nr_ops = 0; |
1386 | int shape_changes = 0; |
1387 | |
1388 | bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock); |
1389 | |
1390 | trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock), |
1391 | ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock), |
1392 | ri->ri_blockcount); |
1393 | |
1394 | if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) |
1395 | return -EIO; |
1396 | |
1397 | /* |
1398 | * If we haven't gotten a cursor or the cursor AG doesn't match |
1399 | * the startblock, get one now. |
1400 | */ |
1401 | rcur = *pcur; |
1402 | if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) { |
1403 | nr_ops = rcur->bc_refc.nr_ops; |
1404 | shape_changes = rcur->bc_refc.shape_changes; |
1405 | xfs_refcount_finish_one_cleanup(tp, rcur, error: 0); |
1406 | rcur = NULL; |
1407 | *pcur = NULL; |
1408 | } |
1409 | if (rcur == NULL) { |
1410 | error = xfs_alloc_read_agf(pag: ri->ri_pag, tp, |
1411 | XFS_ALLOC_FLAG_FREEING, agfbpp: &agbp); |
1412 | if (error) |
1413 | return error; |
1414 | |
1415 | rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag: ri->ri_pag); |
1416 | rcur->bc_refc.nr_ops = nr_ops; |
1417 | rcur->bc_refc.shape_changes = shape_changes; |
1418 | } |
1419 | *pcur = rcur; |
1420 | |
1421 | switch (ri->ri_type) { |
1422 | case XFS_REFCOUNT_INCREASE: |
1423 | error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, |
1424 | XFS_REFCOUNT_ADJUST_INCREASE); |
1425 | if (error) |
1426 | return error; |
1427 | if (ri->ri_blockcount > 0) |
1428 | error = xfs_refcount_continue_op(rcur, ri, bno); |
1429 | break; |
1430 | case XFS_REFCOUNT_DECREASE: |
1431 | error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, |
1432 | XFS_REFCOUNT_ADJUST_DECREASE); |
1433 | if (error) |
1434 | return error; |
1435 | if (ri->ri_blockcount > 0) |
1436 | error = xfs_refcount_continue_op(rcur, ri, bno); |
1437 | break; |
1438 | case XFS_REFCOUNT_ALLOC_COW: |
1439 | error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); |
1440 | if (error) |
1441 | return error; |
1442 | ri->ri_blockcount = 0; |
1443 | break; |
1444 | case XFS_REFCOUNT_FREE_COW: |
1445 | error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); |
1446 | if (error) |
1447 | return error; |
1448 | ri->ri_blockcount = 0; |
1449 | break; |
1450 | default: |
1451 | ASSERT(0); |
1452 | return -EFSCORRUPTED; |
1453 | } |
1454 | if (!error && ri->ri_blockcount > 0) |
1455 | trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno, |
1456 | ri->ri_type, bno, ri->ri_blockcount); |
1457 | return error; |
1458 | } |
1459 | |
1460 | /* |
1461 | * Record a refcount intent for later processing. |
1462 | */ |
1463 | static void |
1464 | __xfs_refcount_add( |
1465 | struct xfs_trans *tp, |
1466 | enum xfs_refcount_intent_type type, |
1467 | xfs_fsblock_t startblock, |
1468 | xfs_extlen_t blockcount) |
1469 | { |
1470 | struct xfs_refcount_intent *ri; |
1471 | |
1472 | trace_xfs_refcount_defer(tp->t_mountp, |
1473 | XFS_FSB_TO_AGNO(tp->t_mountp, startblock), |
1474 | type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), |
1475 | blockcount); |
1476 | |
1477 | ri = kmem_cache_alloc(xfs_refcount_intent_cache, |
1478 | GFP_KERNEL | __GFP_NOFAIL); |
1479 | INIT_LIST_HEAD(&ri->ri_list); |
1480 | ri->ri_type = type; |
1481 | ri->ri_startblock = startblock; |
1482 | ri->ri_blockcount = blockcount; |
1483 | |
1484 | xfs_refcount_update_get_group(mp: tp->t_mountp, ri); |
1485 | xfs_defer_add(tp, h: &ri->ri_list, ops: &xfs_refcount_update_defer_type); |
1486 | } |
1487 | |
1488 | /* |
1489 | * Increase the reference count of the blocks backing a file's extent. |
1490 | */ |
1491 | void |
1492 | xfs_refcount_increase_extent( |
1493 | struct xfs_trans *tp, |
1494 | struct xfs_bmbt_irec *PREV) |
1495 | { |
1496 | if (!xfs_has_reflink(tp->t_mountp)) |
1497 | return; |
1498 | |
1499 | __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, |
1500 | PREV->br_blockcount); |
1501 | } |
1502 | |
1503 | /* |
1504 | * Decrease the reference count of the blocks backing a file's extent. |
1505 | */ |
1506 | void |
1507 | xfs_refcount_decrease_extent( |
1508 | struct xfs_trans *tp, |
1509 | struct xfs_bmbt_irec *PREV) |
1510 | { |
1511 | if (!xfs_has_reflink(tp->t_mountp)) |
1512 | return; |
1513 | |
1514 | __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, |
1515 | PREV->br_blockcount); |
1516 | } |
1517 | |
1518 | /* |
1519 | * Given an AG extent, find the lowest-numbered run of shared blocks |
1520 | * within that range and return the range in fbno/flen. If |
1521 | * find_end_of_shared is set, return the longest contiguous extent of |
1522 | * shared blocks; if not, just return the first extent we find. If no |
1523 | * shared blocks are found, fbno and flen will be set to NULLAGBLOCK |
1524 | * and 0, respectively. |
1525 | */ |
1526 | int |
1527 | xfs_refcount_find_shared( |
1528 | struct xfs_btree_cur *cur, |
1529 | xfs_agblock_t agbno, |
1530 | xfs_extlen_t aglen, |
1531 | xfs_agblock_t *fbno, |
1532 | xfs_extlen_t *flen, |
1533 | bool find_end_of_shared) |
1534 | { |
1535 | struct xfs_refcount_irec tmp; |
1536 | int i; |
1537 | int have; |
1538 | int error; |
1539 | |
1540 | trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno, |
1541 | agbno, aglen); |
1542 | |
1543 | /* By default, skip the whole range */ |
1544 | *fbno = NULLAGBLOCK; |
1545 | *flen = 0; |
1546 | |
1547 | /* Try to find a refcount extent that crosses the start */ |
1548 | error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, |
1549 | &have); |
1550 | if (error) |
1551 | goto out_error; |
1552 | if (!have) { |
1553 | /* No left extent, look at the next one */ |
1554 | error = xfs_btree_increment(cur, 0, &have); |
1555 | if (error) |
1556 | goto out_error; |
1557 | if (!have) |
1558 | goto done; |
1559 | } |
1560 | error = xfs_refcount_get_rec(cur, irec: &tmp, stat: &i); |
1561 | if (error) |
1562 | goto out_error; |
1563 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
1564 | xfs_btree_mark_sick(cur); |
1565 | error = -EFSCORRUPTED; |
1566 | goto out_error; |
1567 | } |
1568 | if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) |
1569 | goto done; |
1570 | |
1571 | /* If the extent ends before the start, look at the next one */ |
1572 | if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { |
1573 | error = xfs_btree_increment(cur, 0, &have); |
1574 | if (error) |
1575 | goto out_error; |
1576 | if (!have) |
1577 | goto done; |
1578 | error = xfs_refcount_get_rec(cur, irec: &tmp, stat: &i); |
1579 | if (error) |
1580 | goto out_error; |
1581 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
1582 | xfs_btree_mark_sick(cur); |
1583 | error = -EFSCORRUPTED; |
1584 | goto out_error; |
1585 | } |
1586 | if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) |
1587 | goto done; |
1588 | } |
1589 | |
1590 | /* If the extent starts after the range we want, bail out */ |
1591 | if (tmp.rc_startblock >= agbno + aglen) |
1592 | goto done; |
1593 | |
1594 | /* We found the start of a shared extent! */ |
1595 | if (tmp.rc_startblock < agbno) { |
1596 | tmp.rc_blockcount -= (agbno - tmp.rc_startblock); |
1597 | tmp.rc_startblock = agbno; |
1598 | } |
1599 | |
1600 | *fbno = tmp.rc_startblock; |
1601 | *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); |
1602 | if (!find_end_of_shared) |
1603 | goto done; |
1604 | |
1605 | /* Otherwise, find the end of this shared extent */ |
1606 | while (*fbno + *flen < agbno + aglen) { |
1607 | error = xfs_btree_increment(cur, 0, &have); |
1608 | if (error) |
1609 | goto out_error; |
1610 | if (!have) |
1611 | break; |
1612 | error = xfs_refcount_get_rec(cur, irec: &tmp, stat: &i); |
1613 | if (error) |
1614 | goto out_error; |
1615 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
1616 | xfs_btree_mark_sick(cur); |
1617 | error = -EFSCORRUPTED; |
1618 | goto out_error; |
1619 | } |
1620 | if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || |
1621 | tmp.rc_startblock >= agbno + aglen || |
1622 | tmp.rc_startblock != *fbno + *flen) |
1623 | break; |
1624 | *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); |
1625 | } |
1626 | |
1627 | done: |
1628 | trace_xfs_refcount_find_shared_result(cur->bc_mp, |
1629 | cur->bc_ag.pag->pag_agno, *fbno, *flen); |
1630 | |
1631 | out_error: |
1632 | if (error) |
1633 | trace_xfs_refcount_find_shared_error(cur->bc_mp, |
1634 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
1635 | return error; |
1636 | } |
1637 | |
1638 | /* |
1639 | * Recovering CoW Blocks After a Crash |
1640 | * |
1641 | * Due to the way that the copy on write mechanism works, there's a window of |
1642 | * opportunity in which we can lose track of allocated blocks during a crash. |
1643 | * Because CoW uses delayed allocation in the in-core CoW fork, writeback |
1644 | * causes blocks to be allocated and stored in the CoW fork. The blocks are |
1645 | * no longer in the free space btree but are not otherwise recorded anywhere |
1646 | * until the write completes and the blocks are mapped into the file. A crash |
1647 | * in between allocation and remapping results in the replacement blocks being |
1648 | * lost. This situation is exacerbated by the CoW extent size hint because |
1649 | * allocations can hang around for long time. |
1650 | * |
1651 | * However, there is a place where we can record these allocations before they |
1652 | * become mappings -- the reference count btree. The btree does not record |
1653 | * extents with refcount == 1, so we can record allocations with a refcount of |
1654 | * 1. Blocks being used for CoW writeout cannot be shared, so there should be |
1655 | * no conflict with shared block records. These mappings should be created |
1656 | * when we allocate blocks to the CoW fork and deleted when they're removed |
1657 | * from the CoW fork. |
1658 | * |
1659 | * Minor nit: records for in-progress CoW allocations and records for shared |
1660 | * extents must never be merged, to preserve the property that (except for CoW |
1661 | * allocations) there are no refcount btree entries with refcount == 1. The |
1662 | * only time this could potentially happen is when unsharing a block that's |
1663 | * adjacent to CoW allocations, so we must be careful to avoid this. |
1664 | * |
1665 | * At mount time we recover lost CoW allocations by searching the refcount |
1666 | * btree for these refcount == 1 mappings. These represent CoW allocations |
1667 | * that were in progress at the time the filesystem went down, so we can free |
1668 | * them to get the space back. |
1669 | * |
1670 | * This mechanism is superior to creating EFIs for unmapped CoW extents for |
1671 | * several reasons -- first, EFIs pin the tail of the log and would have to be |
1672 | * periodically relogged to avoid filling up the log. Second, CoW completions |
1673 | * will have to file an EFD and create new EFIs for whatever remains in the |
1674 | * CoW fork; this partially takes care of (1) but extent-size reservations |
1675 | * will have to periodically relog even if there's no writeout in progress. |
1676 | * This can happen if the CoW extent size hint is set, which you really want. |
1677 | * Third, EFIs cannot currently be automatically relogged into newer |
1678 | * transactions to advance the log tail. Fourth, stuffing the log full of |
1679 | * EFIs places an upper bound on the number of CoW allocations that can be |
1680 | * held filesystem-wide at any given time. Recording them in the refcount |
1681 | * btree doesn't require us to maintain any state in memory and doesn't pin |
1682 | * the log. |
1683 | */ |
1684 | /* |
1685 | * Adjust the refcounts of CoW allocations. These allocations are "magic" |
1686 | * in that they're not referenced anywhere else in the filesystem, so we |
1687 | * stash them in the refcount btree with a refcount of 1 until either file |
1688 | * remapping (or CoW cancellation) happens. |
1689 | */ |
1690 | STATIC int |
1691 | xfs_refcount_adjust_cow_extents( |
1692 | struct xfs_btree_cur *cur, |
1693 | xfs_agblock_t agbno, |
1694 | xfs_extlen_t aglen, |
1695 | enum xfs_refc_adjust_op adj) |
1696 | { |
1697 | struct xfs_refcount_irec ext, tmp; |
1698 | int error; |
1699 | int found_rec, found_tmp; |
1700 | |
1701 | if (aglen == 0) |
1702 | return 0; |
1703 | |
1704 | /* Find any overlapping refcount records */ |
1705 | error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, |
1706 | &found_rec); |
1707 | if (error) |
1708 | goto out_error; |
1709 | error = xfs_refcount_get_rec(cur, irec: &ext, stat: &found_rec); |
1710 | if (error) |
1711 | goto out_error; |
1712 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && |
1713 | ext.rc_domain != XFS_REFC_DOMAIN_COW)) { |
1714 | xfs_btree_mark_sick(cur); |
1715 | error = -EFSCORRUPTED; |
1716 | goto out_error; |
1717 | } |
1718 | if (!found_rec) { |
1719 | ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; |
1720 | ext.rc_blockcount = 0; |
1721 | ext.rc_refcount = 0; |
1722 | ext.rc_domain = XFS_REFC_DOMAIN_COW; |
1723 | } |
1724 | |
1725 | switch (adj) { |
1726 | case XFS_REFCOUNT_ADJUST_COW_ALLOC: |
1727 | /* Adding a CoW reservation, there should be nothing here. */ |
1728 | if (XFS_IS_CORRUPT(cur->bc_mp, |
1729 | agbno + aglen > ext.rc_startblock)) { |
1730 | xfs_btree_mark_sick(cur); |
1731 | error = -EFSCORRUPTED; |
1732 | goto out_error; |
1733 | } |
1734 | |
1735 | tmp.rc_startblock = agbno; |
1736 | tmp.rc_blockcount = aglen; |
1737 | tmp.rc_refcount = 1; |
1738 | tmp.rc_domain = XFS_REFC_DOMAIN_COW; |
1739 | |
1740 | trace_xfs_refcount_modify_extent(cur->bc_mp, |
1741 | cur->bc_ag.pag->pag_agno, &tmp); |
1742 | |
1743 | error = xfs_refcount_insert(cur, irec: &tmp, |
1744 | i: &found_tmp); |
1745 | if (error) |
1746 | goto out_error; |
1747 | if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { |
1748 | xfs_btree_mark_sick(cur); |
1749 | error = -EFSCORRUPTED; |
1750 | goto out_error; |
1751 | } |
1752 | break; |
1753 | case XFS_REFCOUNT_ADJUST_COW_FREE: |
1754 | /* Removing a CoW reservation, there should be one extent. */ |
1755 | if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { |
1756 | xfs_btree_mark_sick(cur); |
1757 | error = -EFSCORRUPTED; |
1758 | goto out_error; |
1759 | } |
1760 | if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { |
1761 | xfs_btree_mark_sick(cur); |
1762 | error = -EFSCORRUPTED; |
1763 | goto out_error; |
1764 | } |
1765 | if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { |
1766 | xfs_btree_mark_sick(cur); |
1767 | error = -EFSCORRUPTED; |
1768 | goto out_error; |
1769 | } |
1770 | |
1771 | ext.rc_refcount = 0; |
1772 | trace_xfs_refcount_modify_extent(cur->bc_mp, |
1773 | cur->bc_ag.pag->pag_agno, &ext); |
1774 | error = xfs_refcount_delete(cur, &found_rec); |
1775 | if (error) |
1776 | goto out_error; |
1777 | if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { |
1778 | xfs_btree_mark_sick(cur); |
1779 | error = -EFSCORRUPTED; |
1780 | goto out_error; |
1781 | } |
1782 | break; |
1783 | default: |
1784 | ASSERT(0); |
1785 | } |
1786 | |
1787 | return error; |
1788 | out_error: |
1789 | trace_xfs_refcount_modify_extent_error(cur->bc_mp, |
1790 | cur->bc_ag.pag->pag_agno, error, _RET_IP_); |
1791 | return error; |
1792 | } |
1793 | |
1794 | /* |
1795 | * Add or remove refcount btree entries for CoW reservations. |
1796 | */ |
1797 | STATIC int |
1798 | xfs_refcount_adjust_cow( |
1799 | struct xfs_btree_cur *cur, |
1800 | xfs_agblock_t agbno, |
1801 | xfs_extlen_t aglen, |
1802 | enum xfs_refc_adjust_op adj) |
1803 | { |
1804 | bool shape_changed; |
1805 | int error; |
1806 | |
1807 | /* |
1808 | * Ensure that no rcextents cross the boundary of the adjustment range. |
1809 | */ |
1810 | error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, |
1811 | agbno, &shape_changed); |
1812 | if (error) |
1813 | goto out_error; |
1814 | |
1815 | error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, |
1816 | agbno + aglen, &shape_changed); |
1817 | if (error) |
1818 | goto out_error; |
1819 | |
1820 | /* |
1821 | * Try to merge with the left or right extents of the range. |
1822 | */ |
1823 | error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno, |
1824 | &aglen, adj, &shape_changed); |
1825 | if (error) |
1826 | goto out_error; |
1827 | |
1828 | /* Now that we've taken care of the ends, adjust the middle extents */ |
1829 | error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); |
1830 | if (error) |
1831 | goto out_error; |
1832 | |
1833 | return 0; |
1834 | |
1835 | out_error: |
1836 | trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, |
1837 | error, _RET_IP_); |
1838 | return error; |
1839 | } |
1840 | |
1841 | /* |
1842 | * Record a CoW allocation in the refcount btree. |
1843 | */ |
1844 | STATIC int |
1845 | __xfs_refcount_cow_alloc( |
1846 | struct xfs_btree_cur *rcur, |
1847 | xfs_agblock_t agbno, |
1848 | xfs_extlen_t aglen) |
1849 | { |
1850 | trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, |
1851 | agbno, aglen); |
1852 | |
1853 | /* Add refcount btree reservation */ |
1854 | return xfs_refcount_adjust_cow(rcur, agbno, aglen, |
1855 | XFS_REFCOUNT_ADJUST_COW_ALLOC); |
1856 | } |
1857 | |
1858 | /* |
1859 | * Remove a CoW allocation from the refcount btree. |
1860 | */ |
1861 | STATIC int |
1862 | __xfs_refcount_cow_free( |
1863 | struct xfs_btree_cur *rcur, |
1864 | xfs_agblock_t agbno, |
1865 | xfs_extlen_t aglen) |
1866 | { |
1867 | trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, |
1868 | agbno, aglen); |
1869 | |
1870 | /* Remove refcount btree reservation */ |
1871 | return xfs_refcount_adjust_cow(rcur, agbno, aglen, |
1872 | XFS_REFCOUNT_ADJUST_COW_FREE); |
1873 | } |
1874 | |
1875 | /* Record a CoW staging extent in the refcount btree. */ |
1876 | void |
1877 | xfs_refcount_alloc_cow_extent( |
1878 | struct xfs_trans *tp, |
1879 | xfs_fsblock_t fsb, |
1880 | xfs_extlen_t len) |
1881 | { |
1882 | struct xfs_mount *mp = tp->t_mountp; |
1883 | |
1884 | if (!xfs_has_reflink(mp)) |
1885 | return; |
1886 | |
1887 | __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); |
1888 | |
1889 | /* Add rmap entry */ |
1890 | xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), |
1891 | XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); |
1892 | } |
1893 | |
1894 | /* Forget a CoW staging event in the refcount btree. */ |
1895 | void |
1896 | xfs_refcount_free_cow_extent( |
1897 | struct xfs_trans *tp, |
1898 | xfs_fsblock_t fsb, |
1899 | xfs_extlen_t len) |
1900 | { |
1901 | struct xfs_mount *mp = tp->t_mountp; |
1902 | |
1903 | if (!xfs_has_reflink(mp)) |
1904 | return; |
1905 | |
1906 | /* Remove rmap entry */ |
1907 | xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), |
1908 | XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); |
1909 | __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); |
1910 | } |
1911 | |
1912 | struct xfs_refcount_recovery { |
1913 | struct list_head rr_list; |
1914 | struct xfs_refcount_irec rr_rrec; |
1915 | }; |
1916 | |
1917 | /* Stuff an extent on the recovery list. */ |
1918 | STATIC int |
1919 | xfs_refcount_recover_extent( |
1920 | struct xfs_btree_cur *cur, |
1921 | const union xfs_btree_rec *rec, |
1922 | void *priv) |
1923 | { |
1924 | struct list_head *debris = priv; |
1925 | struct xfs_refcount_recovery *rr; |
1926 | |
1927 | if (XFS_IS_CORRUPT(cur->bc_mp, |
1928 | be32_to_cpu(rec->refc.rc_refcount) != 1)) { |
1929 | xfs_btree_mark_sick(cur); |
1930 | return -EFSCORRUPTED; |
1931 | } |
1932 | |
1933 | rr = kmalloc(sizeof(struct xfs_refcount_recovery), |
1934 | GFP_KERNEL | __GFP_NOFAIL); |
1935 | INIT_LIST_HEAD(&rr->rr_list); |
1936 | xfs_refcount_btrec_to_irec(rec, irec: &rr->rr_rrec); |
1937 | |
1938 | if (xfs_refcount_check_irec(cur->bc_ag.pag, &rr->rr_rrec) != NULL || |
1939 | XFS_IS_CORRUPT(cur->bc_mp, |
1940 | rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { |
1941 | xfs_btree_mark_sick(cur); |
1942 | kfree(rr); |
1943 | return -EFSCORRUPTED; |
1944 | } |
1945 | |
1946 | list_add_tail(&rr->rr_list, debris); |
1947 | return 0; |
1948 | } |
1949 | |
1950 | /* Find and remove leftover CoW reservations. */ |
1951 | int |
1952 | xfs_refcount_recover_cow_leftovers( |
1953 | struct xfs_mount *mp, |
1954 | struct xfs_perag *pag) |
1955 | { |
1956 | struct xfs_trans *tp; |
1957 | struct xfs_btree_cur *cur; |
1958 | struct xfs_buf *agbp; |
1959 | struct xfs_refcount_recovery *rr, *n; |
1960 | struct list_head debris; |
1961 | union xfs_btree_irec low = { |
1962 | .rc.rc_domain = XFS_REFC_DOMAIN_COW, |
1963 | }; |
1964 | union xfs_btree_irec high = { |
1965 | .rc.rc_domain = XFS_REFC_DOMAIN_COW, |
1966 | .rc.rc_startblock = -1U, |
1967 | }; |
1968 | xfs_fsblock_t fsb; |
1969 | int error; |
1970 | |
1971 | /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */ |
1972 | BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG); |
1973 | if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS) |
1974 | return -EOPNOTSUPP; |
1975 | |
1976 | INIT_LIST_HEAD(&debris); |
1977 | |
1978 | /* |
1979 | * In this first part, we use an empty transaction to gather up |
1980 | * all the leftover CoW extents so that we can subsequently |
1981 | * delete them. The empty transaction is used to avoid |
1982 | * a buffer lock deadlock if there happens to be a loop in the |
1983 | * refcountbt because we're allowed to re-grab a buffer that is |
1984 | * already attached to our transaction. When we're done |
1985 | * recording the CoW debris we cancel the (empty) transaction |
1986 | * and everything goes away cleanly. |
1987 | */ |
1988 | error = xfs_trans_alloc_empty(mp, &tp); |
1989 | if (error) |
1990 | return error; |
1991 | |
1992 | error = xfs_alloc_read_agf(pag, tp, flags: 0, agfbpp: &agbp); |
1993 | if (error) |
1994 | goto out_trans; |
1995 | cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); |
1996 | |
1997 | /* Find all the leftover CoW staging extents. */ |
1998 | error = xfs_btree_query_range(cur, low_rec: &low, high_rec: &high, |
1999 | fn: xfs_refcount_recover_extent, priv: &debris); |
2000 | xfs_btree_del_cursor(cur, error); |
2001 | xfs_trans_brelse(tp, agbp); |
2002 | xfs_trans_cancel(tp); |
2003 | if (error) |
2004 | goto out_free; |
2005 | |
2006 | /* Now iterate the list to free the leftovers */ |
2007 | list_for_each_entry_safe(rr, n, &debris, rr_list) { |
2008 | /* Set up transaction. */ |
2009 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); |
2010 | if (error) |
2011 | goto out_free; |
2012 | |
2013 | trace_xfs_refcount_recover_extent(mp, pag->pag_agno, |
2014 | &rr->rr_rrec); |
2015 | |
2016 | /* Free the orphan record */ |
2017 | fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, |
2018 | rr->rr_rrec.rc_startblock); |
2019 | xfs_refcount_free_cow_extent(tp, fsb, |
2020 | rr->rr_rrec.rc_blockcount); |
2021 | |
2022 | /* Free the block. */ |
2023 | error = xfs_free_extent_later(tp, fsb, |
2024 | rr->rr_rrec.rc_blockcount, NULL, |
2025 | XFS_AG_RESV_NONE, false); |
2026 | if (error) |
2027 | goto out_trans; |
2028 | |
2029 | error = xfs_trans_commit(tp); |
2030 | if (error) |
2031 | goto out_free; |
2032 | |
2033 | list_del(&rr->rr_list); |
2034 | kfree(rr); |
2035 | } |
2036 | |
2037 | return error; |
2038 | out_trans: |
2039 | xfs_trans_cancel(tp); |
2040 | out_free: |
2041 | /* Free the leftover list */ |
2042 | list_for_each_entry_safe(rr, n, &debris, rr_list) { |
2043 | list_del(&rr->rr_list); |
2044 | kfree(rr); |
2045 | } |
2046 | return error; |
2047 | } |
2048 | |
2049 | /* |
2050 | * Scan part of the keyspace of the refcount records and tell us if the area |
2051 | * has no records, is fully mapped by records, or is partially filled. |
2052 | */ |
2053 | int |
2054 | xfs_refcount_has_records( |
2055 | struct xfs_btree_cur *cur, |
2056 | enum xfs_refc_domain domain, |
2057 | xfs_agblock_t bno, |
2058 | xfs_extlen_t len, |
2059 | enum xbtree_recpacking *outcome) |
2060 | { |
2061 | union xfs_btree_irec low; |
2062 | union xfs_btree_irec high; |
2063 | |
2064 | memset(&low, 0, sizeof(low)); |
2065 | low.rc.rc_startblock = bno; |
2066 | memset(&high, 0xFF, sizeof(high)); |
2067 | high.rc.rc_startblock = bno + len - 1; |
2068 | low.rc.rc_domain = high.rc.rc_domain = domain; |
2069 | |
2070 | return xfs_btree_has_records(cur, &low, &high, NULL, outcome); |
2071 | } |
2072 | |
2073 | struct xfs_refcount_query_range_info { |
2074 | xfs_refcount_query_range_fn fn; |
2075 | void *priv; |
2076 | }; |
2077 | |
2078 | /* Format btree record and pass to our callback. */ |
2079 | STATIC int |
2080 | xfs_refcount_query_range_helper( |
2081 | struct xfs_btree_cur *cur, |
2082 | const union xfs_btree_rec *rec, |
2083 | void *priv) |
2084 | { |
2085 | struct xfs_refcount_query_range_info *query = priv; |
2086 | struct xfs_refcount_irec irec; |
2087 | xfs_failaddr_t fa; |
2088 | |
2089 | xfs_refcount_btrec_to_irec(rec, irec: &irec); |
2090 | fa = xfs_refcount_check_irec(cur->bc_ag.pag, &irec); |
2091 | if (fa) |
2092 | return xfs_refcount_complain_bad_rec(cur, fa, &irec); |
2093 | |
2094 | return query->fn(cur, &irec, query->priv); |
2095 | } |
2096 | |
2097 | /* Find all refcount records between two keys. */ |
2098 | int |
2099 | xfs_refcount_query_range( |
2100 | struct xfs_btree_cur *cur, |
2101 | const struct xfs_refcount_irec *low_rec, |
2102 | const struct xfs_refcount_irec *high_rec, |
2103 | xfs_refcount_query_range_fn fn, |
2104 | void *priv) |
2105 | { |
2106 | union xfs_btree_irec low_brec = { .rc = *low_rec }; |
2107 | union xfs_btree_irec high_brec = { .rc = *high_rec }; |
2108 | struct xfs_refcount_query_range_info query = { .priv = priv, .fn = fn }; |
2109 | |
2110 | return xfs_btree_query_range(cur, low_rec: &low_brec, high_rec: &high_brec, |
2111 | fn: xfs_refcount_query_range_helper, priv: &query); |
2112 | } |
2113 | |
2114 | int __init |
2115 | xfs_refcount_intent_init_cache(void) |
2116 | { |
2117 | xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent" , |
2118 | sizeof(struct xfs_refcount_intent), |
2119 | 0, 0, NULL); |
2120 | |
2121 | return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; |
2122 | } |
2123 | |
2124 | void |
2125 | xfs_refcount_intent_destroy_cache(void) |
2126 | { |
2127 | kmem_cache_destroy(xfs_refcount_intent_cache); |
2128 | xfs_refcount_intent_cache = NULL; |
2129 | } |
2130 | |