1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. |
4 | * All Rights Reserved. |
5 | */ |
6 | #include "xfs.h" |
7 | #include "xfs_fs.h" |
8 | #include "xfs_shared.h" |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" |
12 | #include "xfs_bit.h" |
13 | #include "xfs_mount.h" |
14 | #include "xfs_inode.h" |
15 | #include "xfs_trans.h" |
16 | #include "xfs_buf_item.h" |
17 | #include "xfs_btree.h" |
18 | #include "xfs_errortag.h" |
19 | #include "xfs_error.h" |
20 | #include "xfs_trace.h" |
21 | #include "xfs_alloc.h" |
22 | #include "xfs_log.h" |
23 | #include "xfs_btree_staging.h" |
24 | #include "xfs_ag.h" |
25 | #include "xfs_alloc_btree.h" |
26 | #include "xfs_ialloc_btree.h" |
27 | #include "xfs_bmap_btree.h" |
28 | #include "xfs_rmap_btree.h" |
29 | #include "xfs_refcount_btree.h" |
30 | #include "xfs_health.h" |
31 | #include "xfs_buf_mem.h" |
32 | #include "xfs_btree_mem.h" |
33 | |
34 | /* |
35 | * Btree magic numbers. |
36 | */ |
37 | uint32_t |
38 | xfs_btree_magic( |
39 | struct xfs_mount *mp, |
40 | const struct xfs_btree_ops *ops) |
41 | { |
42 | int idx = xfs_has_crc(mp) ? 1 : 0; |
43 | __be32 magic = ops->buf_ops->magic[idx]; |
44 | |
45 | /* Ensure we asked for crc for crc-only magics. */ |
46 | ASSERT(magic != 0); |
47 | return be32_to_cpu(magic); |
48 | } |
49 | |
50 | /* |
51 | * These sibling pointer checks are optimised for null sibling pointers. This |
52 | * happens a lot, and we don't need to byte swap at runtime if the sibling |
53 | * pointer is NULL. |
54 | * |
55 | * These are explicitly marked at inline because the cost of calling them as |
56 | * functions instead of inlining them is about 36 bytes extra code per call site |
57 | * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these |
58 | * two sibling check functions reduces the compiled code size by over 300 |
59 | * bytes. |
60 | */ |
61 | static inline xfs_failaddr_t |
62 | xfs_btree_check_fsblock_siblings( |
63 | struct xfs_mount *mp, |
64 | xfs_fsblock_t fsb, |
65 | __be64 dsibling) |
66 | { |
67 | xfs_fsblock_t sibling; |
68 | |
69 | if (dsibling == cpu_to_be64(NULLFSBLOCK)) |
70 | return NULL; |
71 | |
72 | sibling = be64_to_cpu(dsibling); |
73 | if (sibling == fsb) |
74 | return __this_address; |
75 | if (!xfs_verify_fsbno(mp, sibling)) |
76 | return __this_address; |
77 | return NULL; |
78 | } |
79 | |
80 | static inline xfs_failaddr_t |
81 | xfs_btree_check_memblock_siblings( |
82 | struct xfs_buftarg *btp, |
83 | xfbno_t bno, |
84 | __be64 dsibling) |
85 | { |
86 | xfbno_t sibling; |
87 | |
88 | if (dsibling == cpu_to_be64(NULLFSBLOCK)) |
89 | return NULL; |
90 | |
91 | sibling = be64_to_cpu(dsibling); |
92 | if (sibling == bno) |
93 | return __this_address; |
94 | if (!xmbuf_verify_daddr(btp, xfbno_to_daddr(sibling))) |
95 | return __this_address; |
96 | return NULL; |
97 | } |
98 | |
99 | static inline xfs_failaddr_t |
100 | xfs_btree_check_agblock_siblings( |
101 | struct xfs_perag *pag, |
102 | xfs_agblock_t agbno, |
103 | __be32 dsibling) |
104 | { |
105 | xfs_agblock_t sibling; |
106 | |
107 | if (dsibling == cpu_to_be32(NULLAGBLOCK)) |
108 | return NULL; |
109 | |
110 | sibling = be32_to_cpu(dsibling); |
111 | if (sibling == agbno) |
112 | return __this_address; |
113 | if (!xfs_verify_agbno(pag, sibling)) |
114 | return __this_address; |
115 | return NULL; |
116 | } |
117 | |
118 | static xfs_failaddr_t |
119 | __xfs_btree_check_lblock_hdr( |
120 | struct xfs_btree_cur *cur, |
121 | struct xfs_btree_block *block, |
122 | int level, |
123 | struct xfs_buf *bp) |
124 | { |
125 | struct xfs_mount *mp = cur->bc_mp; |
126 | |
127 | if (xfs_has_crc(mp)) { |
128 | if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) |
129 | return __this_address; |
130 | if (block->bb_u.l.bb_blkno != |
131 | cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL)) |
132 | return __this_address; |
133 | if (block->bb_u.l.bb_pad != cpu_to_be32(0)) |
134 | return __this_address; |
135 | } |
136 | |
137 | if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops)) |
138 | return __this_address; |
139 | if (be16_to_cpu(block->bb_level) != level) |
140 | return __this_address; |
141 | if (be16_to_cpu(block->bb_numrecs) > |
142 | cur->bc_ops->get_maxrecs(cur, level)) |
143 | return __this_address; |
144 | |
145 | return NULL; |
146 | } |
147 | |
148 | /* |
149 | * Check a long btree block header. Return the address of the failing check, |
150 | * or NULL if everything is ok. |
151 | */ |
152 | static xfs_failaddr_t |
153 | __xfs_btree_check_fsblock( |
154 | struct xfs_btree_cur *cur, |
155 | struct xfs_btree_block *block, |
156 | int level, |
157 | struct xfs_buf *bp) |
158 | { |
159 | struct xfs_mount *mp = cur->bc_mp; |
160 | xfs_failaddr_t fa; |
161 | xfs_fsblock_t fsb; |
162 | |
163 | fa = __xfs_btree_check_lblock_hdr(cur, block, level, bp); |
164 | if (fa) |
165 | return fa; |
166 | |
167 | /* |
168 | * For inode-rooted btrees, the root block sits in the inode fork. In |
169 | * that case bp is NULL, and the block must not have any siblings. |
170 | */ |
171 | if (!bp) { |
172 | if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK)) |
173 | return __this_address; |
174 | if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK)) |
175 | return __this_address; |
176 | return NULL; |
177 | } |
178 | |
179 | fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); |
180 | fa = xfs_btree_check_fsblock_siblings(mp, fsb, |
181 | block->bb_u.l.bb_leftsib); |
182 | if (!fa) |
183 | fa = xfs_btree_check_fsblock_siblings(mp, fsb, |
184 | block->bb_u.l.bb_rightsib); |
185 | return fa; |
186 | } |
187 | |
188 | /* |
189 | * Check an in-memory btree block header. Return the address of the failing |
190 | * check, or NULL if everything is ok. |
191 | */ |
192 | static xfs_failaddr_t |
193 | __xfs_btree_check_memblock( |
194 | struct xfs_btree_cur *cur, |
195 | struct xfs_btree_block *block, |
196 | int level, |
197 | struct xfs_buf *bp) |
198 | { |
199 | struct xfs_buftarg *btp = cur->bc_mem.xfbtree->target; |
200 | xfs_failaddr_t fa; |
201 | xfbno_t bno; |
202 | |
203 | fa = __xfs_btree_check_lblock_hdr(cur, block, level, bp); |
204 | if (fa) |
205 | return fa; |
206 | |
207 | bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp)); |
208 | fa = xfs_btree_check_memblock_siblings(btp, bno, |
209 | block->bb_u.l.bb_leftsib); |
210 | if (!fa) |
211 | fa = xfs_btree_check_memblock_siblings(btp, bno, |
212 | block->bb_u.l.bb_rightsib); |
213 | return fa; |
214 | } |
215 | |
216 | /* |
217 | * Check a short btree block header. Return the address of the failing check, |
218 | * or NULL if everything is ok. |
219 | */ |
220 | static xfs_failaddr_t |
221 | __xfs_btree_check_agblock( |
222 | struct xfs_btree_cur *cur, |
223 | struct xfs_btree_block *block, |
224 | int level, |
225 | struct xfs_buf *bp) |
226 | { |
227 | struct xfs_mount *mp = cur->bc_mp; |
228 | struct xfs_perag *pag = cur->bc_ag.pag; |
229 | xfs_failaddr_t fa; |
230 | xfs_agblock_t agbno; |
231 | |
232 | if (xfs_has_crc(mp)) { |
233 | if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) |
234 | return __this_address; |
235 | if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) |
236 | return __this_address; |
237 | } |
238 | |
239 | if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops)) |
240 | return __this_address; |
241 | if (be16_to_cpu(block->bb_level) != level) |
242 | return __this_address; |
243 | if (be16_to_cpu(block->bb_numrecs) > |
244 | cur->bc_ops->get_maxrecs(cur, level)) |
245 | return __this_address; |
246 | |
247 | agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); |
248 | fa = xfs_btree_check_agblock_siblings(pag, agbno, |
249 | block->bb_u.s.bb_leftsib); |
250 | if (!fa) |
251 | fa = xfs_btree_check_agblock_siblings(pag, agbno, |
252 | block->bb_u.s.bb_rightsib); |
253 | return fa; |
254 | } |
255 | |
256 | /* |
257 | * Internal btree block check. |
258 | * |
259 | * Return NULL if the block is ok or the address of the failed check otherwise. |
260 | */ |
261 | xfs_failaddr_t |
262 | __xfs_btree_check_block( |
263 | struct xfs_btree_cur *cur, |
264 | struct xfs_btree_block *block, |
265 | int level, |
266 | struct xfs_buf *bp) |
267 | { |
268 | switch (cur->bc_ops->type) { |
269 | case XFS_BTREE_TYPE_MEM: |
270 | return __xfs_btree_check_memblock(cur, block, level, bp); |
271 | case XFS_BTREE_TYPE_AG: |
272 | return __xfs_btree_check_agblock(cur, block, level, bp); |
273 | case XFS_BTREE_TYPE_INODE: |
274 | return __xfs_btree_check_fsblock(cur, block, level, bp); |
275 | default: |
276 | ASSERT(0); |
277 | return __this_address; |
278 | } |
279 | } |
280 | |
281 | static inline unsigned int xfs_btree_block_errtag(struct xfs_btree_cur *cur) |
282 | { |
283 | if (cur->bc_ops->ptr_len == XFS_BTREE_SHORT_PTR_LEN) |
284 | return XFS_ERRTAG_BTREE_CHECK_SBLOCK; |
285 | return XFS_ERRTAG_BTREE_CHECK_LBLOCK; |
286 | } |
287 | |
288 | /* |
289 | * Debug routine: check that block header is ok. |
290 | */ |
291 | int |
292 | xfs_btree_check_block( |
293 | struct xfs_btree_cur *cur, /* btree cursor */ |
294 | struct xfs_btree_block *block, /* generic btree block pointer */ |
295 | int level, /* level of the btree block */ |
296 | struct xfs_buf *bp) /* buffer containing block, if any */ |
297 | { |
298 | struct xfs_mount *mp = cur->bc_mp; |
299 | xfs_failaddr_t fa; |
300 | |
301 | fa = __xfs_btree_check_block(cur, block, level, bp); |
302 | if (XFS_IS_CORRUPT(mp, fa != NULL) || |
303 | XFS_TEST_ERROR(false, mp, xfs_btree_block_errtag(cur))) { |
304 | if (bp) |
305 | trace_xfs_btree_corrupt(bp, _RET_IP_); |
306 | xfs_btree_mark_sick(cur); |
307 | return -EFSCORRUPTED; |
308 | } |
309 | return 0; |
310 | } |
311 | |
312 | int |
313 | __xfs_btree_check_ptr( |
314 | struct xfs_btree_cur *cur, |
315 | const union xfs_btree_ptr *ptr, |
316 | int index, |
317 | int level) |
318 | { |
319 | if (level <= 0) |
320 | return -EFSCORRUPTED; |
321 | |
322 | switch (cur->bc_ops->type) { |
323 | case XFS_BTREE_TYPE_MEM: |
324 | if (!xfbtree_verify_bno(cur->bc_mem.xfbtree, |
325 | be64_to_cpu((&ptr->l)[index]))) |
326 | return -EFSCORRUPTED; |
327 | break; |
328 | case XFS_BTREE_TYPE_INODE: |
329 | if (!xfs_verify_fsbno(cur->bc_mp, |
330 | be64_to_cpu((&ptr->l)[index]))) |
331 | return -EFSCORRUPTED; |
332 | break; |
333 | case XFS_BTREE_TYPE_AG: |
334 | if (!xfs_verify_agbno(cur->bc_ag.pag, |
335 | be32_to_cpu((&ptr->s)[index]))) |
336 | return -EFSCORRUPTED; |
337 | break; |
338 | } |
339 | |
340 | return 0; |
341 | } |
342 | |
343 | /* |
344 | * Check that a given (indexed) btree pointer at a certain level of a |
345 | * btree is valid and doesn't point past where it should. |
346 | */ |
347 | static int |
348 | xfs_btree_check_ptr( |
349 | struct xfs_btree_cur *cur, |
350 | const union xfs_btree_ptr *ptr, |
351 | int index, |
352 | int level) |
353 | { |
354 | int error; |
355 | |
356 | error = __xfs_btree_check_ptr(cur, ptr, index, level); |
357 | if (error) { |
358 | switch (cur->bc_ops->type) { |
359 | case XFS_BTREE_TYPE_MEM: |
360 | xfs_err(cur->bc_mp, |
361 | "In-memory: Corrupt %sbt flags 0x%x pointer at level %d index %d fa %pS." , |
362 | cur->bc_ops->name, cur->bc_flags, level, index, |
363 | __this_address); |
364 | break; |
365 | case XFS_BTREE_TYPE_INODE: |
366 | xfs_err(cur->bc_mp, |
367 | "Inode %llu fork %d: Corrupt %sbt pointer at level %d index %d." , |
368 | cur->bc_ino.ip->i_ino, |
369 | cur->bc_ino.whichfork, cur->bc_ops->name, |
370 | level, index); |
371 | break; |
372 | case XFS_BTREE_TYPE_AG: |
373 | xfs_err(cur->bc_mp, |
374 | "AG %u: Corrupt %sbt pointer at level %d index %d." , |
375 | cur->bc_ag.pag->pag_agno, cur->bc_ops->name, |
376 | level, index); |
377 | break; |
378 | } |
379 | xfs_btree_mark_sick(cur); |
380 | } |
381 | |
382 | return error; |
383 | } |
384 | |
385 | #ifdef DEBUG |
386 | # define xfs_btree_debug_check_ptr xfs_btree_check_ptr |
387 | #else |
388 | # define xfs_btree_debug_check_ptr(...) (0) |
389 | #endif |
390 | |
391 | /* |
392 | * Calculate CRC on the whole btree block and stuff it into the |
393 | * long-form btree header. |
394 | * |
395 | * Prior to calculting the CRC, pull the LSN out of the buffer log item and put |
396 | * it into the buffer so recovery knows what the last modification was that made |
397 | * it to disk. |
398 | */ |
399 | void |
400 | xfs_btree_fsblock_calc_crc( |
401 | struct xfs_buf *bp) |
402 | { |
403 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
404 | struct xfs_buf_log_item *bip = bp->b_log_item; |
405 | |
406 | if (!xfs_has_crc(bp->b_mount)) |
407 | return; |
408 | if (bip) |
409 | block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
410 | xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); |
411 | } |
412 | |
413 | bool |
414 | xfs_btree_fsblock_verify_crc( |
415 | struct xfs_buf *bp) |
416 | { |
417 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
418 | struct xfs_mount *mp = bp->b_mount; |
419 | |
420 | if (xfs_has_crc(mp)) { |
421 | if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) |
422 | return false; |
423 | return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); |
424 | } |
425 | |
426 | return true; |
427 | } |
428 | |
429 | /* |
430 | * Calculate CRC on the whole btree block and stuff it into the |
431 | * short-form btree header. |
432 | * |
433 | * Prior to calculting the CRC, pull the LSN out of the buffer log item and put |
434 | * it into the buffer so recovery knows what the last modification was that made |
435 | * it to disk. |
436 | */ |
437 | void |
438 | xfs_btree_agblock_calc_crc( |
439 | struct xfs_buf *bp) |
440 | { |
441 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
442 | struct xfs_buf_log_item *bip = bp->b_log_item; |
443 | |
444 | if (!xfs_has_crc(bp->b_mount)) |
445 | return; |
446 | if (bip) |
447 | block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
448 | xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); |
449 | } |
450 | |
451 | bool |
452 | xfs_btree_agblock_verify_crc( |
453 | struct xfs_buf *bp) |
454 | { |
455 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
456 | struct xfs_mount *mp = bp->b_mount; |
457 | |
458 | if (xfs_has_crc(mp)) { |
459 | if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) |
460 | return false; |
461 | return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); |
462 | } |
463 | |
464 | return true; |
465 | } |
466 | |
467 | static int |
468 | xfs_btree_free_block( |
469 | struct xfs_btree_cur *cur, |
470 | struct xfs_buf *bp) |
471 | { |
472 | int error; |
473 | |
474 | trace_xfs_btree_free_block(cur, bp); |
475 | |
476 | /* |
477 | * Don't allow block freeing for a staging cursor, because staging |
478 | * cursors do not support regular btree modifications. |
479 | */ |
480 | if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) { |
481 | ASSERT(0); |
482 | return -EFSCORRUPTED; |
483 | } |
484 | |
485 | error = cur->bc_ops->free_block(cur, bp); |
486 | if (!error) { |
487 | xfs_trans_binval(cur->bc_tp, bp); |
488 | XFS_BTREE_STATS_INC(cur, free); |
489 | } |
490 | return error; |
491 | } |
492 | |
493 | /* |
494 | * Delete the btree cursor. |
495 | */ |
496 | void |
497 | xfs_btree_del_cursor( |
498 | struct xfs_btree_cur *cur, /* btree cursor */ |
499 | int error) /* del because of error */ |
500 | { |
501 | int i; /* btree level */ |
502 | |
503 | /* |
504 | * Clear the buffer pointers and release the buffers. If we're doing |
505 | * this because of an error, inspect all of the entries in the bc_bufs |
506 | * array for buffers to be unlocked. This is because some of the btree |
507 | * code works from level n down to 0, and if we get an error along the |
508 | * way we won't have initialized all the entries down to 0. |
509 | */ |
510 | for (i = 0; i < cur->bc_nlevels; i++) { |
511 | if (cur->bc_levels[i].bp) |
512 | xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp); |
513 | else if (!error) |
514 | break; |
515 | } |
516 | |
517 | /* |
518 | * If we are doing a BMBT update, the number of unaccounted blocks |
519 | * allocated during this cursor life time should be zero. If it's not |
520 | * zero, then we should be shut down or on our way to shutdown due to |
521 | * cancelling a dirty transaction on error. |
522 | */ |
523 | ASSERT(!xfs_btree_is_bmap(cur->bc_ops) || cur->bc_bmap.allocated == 0 || |
524 | xfs_is_shutdown(cur->bc_mp) || error != 0); |
525 | |
526 | switch (cur->bc_ops->type) { |
527 | case XFS_BTREE_TYPE_AG: |
528 | if (cur->bc_ag.pag) |
529 | xfs_perag_put(pag: cur->bc_ag.pag); |
530 | break; |
531 | case XFS_BTREE_TYPE_INODE: |
532 | /* nothing to do */ |
533 | break; |
534 | case XFS_BTREE_TYPE_MEM: |
535 | if (cur->bc_mem.pag) |
536 | xfs_perag_put(pag: cur->bc_mem.pag); |
537 | break; |
538 | } |
539 | |
540 | kmem_cache_free(cur->bc_cache, cur); |
541 | } |
542 | |
543 | /* Return the buffer target for this btree's buffer. */ |
544 | static inline struct xfs_buftarg * |
545 | xfs_btree_buftarg( |
546 | struct xfs_btree_cur *cur) |
547 | { |
548 | if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM) |
549 | return cur->bc_mem.xfbtree->target; |
550 | return cur->bc_mp->m_ddev_targp; |
551 | } |
552 | |
553 | /* Return the block size (in units of 512b sectors) for this btree. */ |
554 | static inline unsigned int |
555 | xfs_btree_bbsize( |
556 | struct xfs_btree_cur *cur) |
557 | { |
558 | if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM) |
559 | return XFBNO_BBSIZE; |
560 | return cur->bc_mp->m_bsize; |
561 | } |
562 | |
563 | /* |
564 | * Duplicate the btree cursor. |
565 | * Allocate a new one, copy the record, re-get the buffers. |
566 | */ |
567 | int /* error */ |
568 | xfs_btree_dup_cursor( |
569 | struct xfs_btree_cur *cur, /* input cursor */ |
570 | struct xfs_btree_cur **ncur) /* output cursor */ |
571 | { |
572 | struct xfs_mount *mp = cur->bc_mp; |
573 | struct xfs_trans *tp = cur->bc_tp; |
574 | struct xfs_buf *bp; |
575 | struct xfs_btree_cur *new; |
576 | int error; |
577 | int i; |
578 | |
579 | /* |
580 | * Don't allow staging cursors to be duplicated because they're supposed |
581 | * to be kept private to a single thread. |
582 | */ |
583 | if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) { |
584 | ASSERT(0); |
585 | return -EFSCORRUPTED; |
586 | } |
587 | |
588 | /* |
589 | * Allocate a new cursor like the old one. |
590 | */ |
591 | new = cur->bc_ops->dup_cursor(cur); |
592 | |
593 | /* |
594 | * Copy the record currently in the cursor. |
595 | */ |
596 | new->bc_rec = cur->bc_rec; |
597 | |
598 | /* |
599 | * For each level current, re-get the buffer and copy the ptr value. |
600 | */ |
601 | for (i = 0; i < new->bc_nlevels; i++) { |
602 | new->bc_levels[i].ptr = cur->bc_levels[i].ptr; |
603 | new->bc_levels[i].ra = cur->bc_levels[i].ra; |
604 | bp = cur->bc_levels[i].bp; |
605 | if (bp) { |
606 | error = xfs_trans_read_buf(mp, tp, |
607 | xfs_btree_buftarg(cur), |
608 | xfs_buf_daddr(bp), |
609 | xfs_btree_bbsize(cur), 0, &bp, |
610 | cur->bc_ops->buf_ops); |
611 | if (xfs_metadata_is_sick(error)) |
612 | xfs_btree_mark_sick(cur: new); |
613 | if (error) { |
614 | xfs_btree_del_cursor(cur: new, error); |
615 | *ncur = NULL; |
616 | return error; |
617 | } |
618 | } |
619 | new->bc_levels[i].bp = bp; |
620 | } |
621 | *ncur = new; |
622 | return 0; |
623 | } |
624 | |
625 | /* |
626 | * XFS btree block layout and addressing: |
627 | * |
628 | * There are two types of blocks in the btree: leaf and non-leaf blocks. |
629 | * |
630 | * The leaf record start with a header then followed by records containing |
631 | * the values. A non-leaf block also starts with the same header, and |
632 | * then first contains lookup keys followed by an equal number of pointers |
633 | * to the btree blocks at the previous level. |
634 | * |
635 | * +--------+-------+-------+-------+-------+-------+-------+ |
636 | * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | |
637 | * +--------+-------+-------+-------+-------+-------+-------+ |
638 | * |
639 | * +--------+-------+-------+-------+-------+-------+-------+ |
640 | * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | |
641 | * +--------+-------+-------+-------+-------+-------+-------+ |
642 | * |
643 | * The header is called struct xfs_btree_block for reasons better left unknown |
644 | * and comes in different versions for short (32bit) and long (64bit) block |
645 | * pointers. The record and key structures are defined by the btree instances |
646 | * and opaque to the btree core. The block pointers are simple disk endian |
647 | * integers, available in a short (32bit) and long (64bit) variant. |
648 | * |
649 | * The helpers below calculate the offset of a given record, key or pointer |
650 | * into a btree block (xfs_btree_*_offset) or return a pointer to the given |
651 | * record, key or pointer (xfs_btree_*_addr). Note that all addressing |
652 | * inside the btree block is done using indices starting at one, not zero! |
653 | * |
654 | * If XFS_BTGEO_OVERLAPPING is set, then this btree supports keys containing |
655 | * overlapping intervals. In such a tree, records are still sorted lowest to |
656 | * highest and indexed by the smallest key value that refers to the record. |
657 | * However, nodes are different: each pointer has two associated keys -- one |
658 | * indexing the lowest key available in the block(s) below (the same behavior |
659 | * as the key in a regular btree) and another indexing the highest key |
660 | * available in the block(s) below. Because records are /not/ sorted by the |
661 | * highest key, all leaf block updates require us to compute the highest key |
662 | * that matches any record in the leaf and to recursively update the high keys |
663 | * in the nodes going further up in the tree, if necessary. Nodes look like |
664 | * this: |
665 | * |
666 | * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ |
667 | * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... | |
668 | * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ |
669 | * |
670 | * To perform an interval query on an overlapped tree, perform the usual |
671 | * depth-first search and use the low and high keys to decide if we can skip |
672 | * that particular node. If a leaf node is reached, return the records that |
673 | * intersect the interval. Note that an interval query may return numerous |
674 | * entries. For a non-overlapped tree, simply search for the record associated |
675 | * with the lowest key and iterate forward until a non-matching record is |
676 | * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by |
677 | * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in |
678 | * more detail. |
679 | * |
680 | * Why do we care about overlapping intervals? Let's say you have a bunch of |
681 | * reverse mapping records on a reflink filesystem: |
682 | * |
683 | * 1: +- file A startblock B offset C length D -----------+ |
684 | * 2: +- file E startblock F offset G length H --------------+ |
685 | * 3: +- file I startblock F offset J length K --+ |
686 | * 4: +- file L... --+ |
687 | * |
688 | * Now say we want to map block (B+D) into file A at offset (C+D). Ideally, |
689 | * we'd simply increment the length of record 1. But how do we find the record |
690 | * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return |
691 | * record 3 because the keys are ordered first by startblock. An interval |
692 | * query would return records 1 and 2 because they both overlap (B+D-1), and |
693 | * from that we can pick out record 1 as the appropriate left neighbor. |
694 | * |
695 | * In the non-overlapped case you can do a LE lookup and decrement the cursor |
696 | * because a record's interval must end before the next record. |
697 | */ |
698 | |
699 | /* |
700 | * Return size of the btree block header for this btree instance. |
701 | */ |
702 | static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) |
703 | { |
704 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { |
705 | if (xfs_has_crc(cur->bc_mp)) |
706 | return XFS_BTREE_LBLOCK_CRC_LEN; |
707 | return XFS_BTREE_LBLOCK_LEN; |
708 | } |
709 | if (xfs_has_crc(cur->bc_mp)) |
710 | return XFS_BTREE_SBLOCK_CRC_LEN; |
711 | return XFS_BTREE_SBLOCK_LEN; |
712 | } |
713 | |
714 | /* |
715 | * Calculate offset of the n-th record in a btree block. |
716 | */ |
717 | STATIC size_t |
718 | xfs_btree_rec_offset( |
719 | struct xfs_btree_cur *cur, |
720 | int n) |
721 | { |
722 | return xfs_btree_block_len(cur) + |
723 | (n - 1) * cur->bc_ops->rec_len; |
724 | } |
725 | |
726 | /* |
727 | * Calculate offset of the n-th key in a btree block. |
728 | */ |
729 | STATIC size_t |
730 | xfs_btree_key_offset( |
731 | struct xfs_btree_cur *cur, |
732 | int n) |
733 | { |
734 | return xfs_btree_block_len(cur) + |
735 | (n - 1) * cur->bc_ops->key_len; |
736 | } |
737 | |
738 | /* |
739 | * Calculate offset of the n-th high key in a btree block. |
740 | */ |
741 | STATIC size_t |
742 | xfs_btree_high_key_offset( |
743 | struct xfs_btree_cur *cur, |
744 | int n) |
745 | { |
746 | return xfs_btree_block_len(cur) + |
747 | (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2); |
748 | } |
749 | |
750 | /* |
751 | * Calculate offset of the n-th block pointer in a btree block. |
752 | */ |
753 | STATIC size_t |
754 | xfs_btree_ptr_offset( |
755 | struct xfs_btree_cur *cur, |
756 | int n, |
757 | int level) |
758 | { |
759 | return xfs_btree_block_len(cur) + |
760 | cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + |
761 | (n - 1) * cur->bc_ops->ptr_len; |
762 | } |
763 | |
764 | /* |
765 | * Return a pointer to the n-th record in the btree block. |
766 | */ |
767 | union xfs_btree_rec * |
768 | xfs_btree_rec_addr( |
769 | struct xfs_btree_cur *cur, |
770 | int n, |
771 | struct xfs_btree_block *block) |
772 | { |
773 | return (union xfs_btree_rec *) |
774 | ((char *)block + xfs_btree_rec_offset(cur, n)); |
775 | } |
776 | |
777 | /* |
778 | * Return a pointer to the n-th key in the btree block. |
779 | */ |
780 | union xfs_btree_key * |
781 | xfs_btree_key_addr( |
782 | struct xfs_btree_cur *cur, |
783 | int n, |
784 | struct xfs_btree_block *block) |
785 | { |
786 | return (union xfs_btree_key *) |
787 | ((char *)block + xfs_btree_key_offset(cur, n)); |
788 | } |
789 | |
790 | /* |
791 | * Return a pointer to the n-th high key in the btree block. |
792 | */ |
793 | union xfs_btree_key * |
794 | xfs_btree_high_key_addr( |
795 | struct xfs_btree_cur *cur, |
796 | int n, |
797 | struct xfs_btree_block *block) |
798 | { |
799 | return (union xfs_btree_key *) |
800 | ((char *)block + xfs_btree_high_key_offset(cur, n)); |
801 | } |
802 | |
803 | /* |
804 | * Return a pointer to the n-th block pointer in the btree block. |
805 | */ |
806 | union xfs_btree_ptr * |
807 | xfs_btree_ptr_addr( |
808 | struct xfs_btree_cur *cur, |
809 | int n, |
810 | struct xfs_btree_block *block) |
811 | { |
812 | int level = xfs_btree_get_level(block); |
813 | |
814 | ASSERT(block->bb_level != 0); |
815 | |
816 | return (union xfs_btree_ptr *) |
817 | ((char *)block + xfs_btree_ptr_offset(cur, n, level)); |
818 | } |
819 | |
820 | struct xfs_ifork * |
821 | xfs_btree_ifork_ptr( |
822 | struct xfs_btree_cur *cur) |
823 | { |
824 | ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE); |
825 | |
826 | if (cur->bc_flags & XFS_BTREE_STAGING) |
827 | return cur->bc_ino.ifake->if_fork; |
828 | return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork); |
829 | } |
830 | |
831 | /* |
832 | * Get the root block which is stored in the inode. |
833 | * |
834 | * For now this btree implementation assumes the btree root is always |
835 | * stored in the if_broot field of an inode fork. |
836 | */ |
837 | STATIC struct xfs_btree_block * |
838 | xfs_btree_get_iroot( |
839 | struct xfs_btree_cur *cur) |
840 | { |
841 | struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); |
842 | |
843 | return (struct xfs_btree_block *)ifp->if_broot; |
844 | } |
845 | |
846 | /* |
847 | * Retrieve the block pointer from the cursor at the given level. |
848 | * This may be an inode btree root or from a buffer. |
849 | */ |
850 | struct xfs_btree_block * /* generic btree block pointer */ |
851 | xfs_btree_get_block( |
852 | struct xfs_btree_cur *cur, /* btree cursor */ |
853 | int level, /* level in btree */ |
854 | struct xfs_buf **bpp) /* buffer containing the block */ |
855 | { |
856 | if (xfs_btree_at_iroot(cur, level)) { |
857 | *bpp = NULL; |
858 | return xfs_btree_get_iroot(cur); |
859 | } |
860 | |
861 | *bpp = cur->bc_levels[level].bp; |
862 | return XFS_BUF_TO_BLOCK(*bpp); |
863 | } |
864 | |
865 | /* |
866 | * Change the cursor to point to the first record at the given level. |
867 | * Other levels are unaffected. |
868 | */ |
869 | STATIC int /* success=1, failure=0 */ |
870 | xfs_btree_firstrec( |
871 | struct xfs_btree_cur *cur, /* btree cursor */ |
872 | int level) /* level to change */ |
873 | { |
874 | struct xfs_btree_block *block; /* generic btree block pointer */ |
875 | struct xfs_buf *bp; /* buffer containing block */ |
876 | |
877 | /* |
878 | * Get the block pointer for this level. |
879 | */ |
880 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
881 | if (xfs_btree_check_block(cur, block, level, bp)) |
882 | return 0; |
883 | /* |
884 | * It's empty, there is no such record. |
885 | */ |
886 | if (!block->bb_numrecs) |
887 | return 0; |
888 | /* |
889 | * Set the ptr value to 1, that's the first record/key. |
890 | */ |
891 | cur->bc_levels[level].ptr = 1; |
892 | return 1; |
893 | } |
894 | |
895 | /* |
896 | * Change the cursor to point to the last record in the current block |
897 | * at the given level. Other levels are unaffected. |
898 | */ |
899 | STATIC int /* success=1, failure=0 */ |
900 | xfs_btree_lastrec( |
901 | struct xfs_btree_cur *cur, /* btree cursor */ |
902 | int level) /* level to change */ |
903 | { |
904 | struct xfs_btree_block *block; /* generic btree block pointer */ |
905 | struct xfs_buf *bp; /* buffer containing block */ |
906 | |
907 | /* |
908 | * Get the block pointer for this level. |
909 | */ |
910 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
911 | if (xfs_btree_check_block(cur, block, level, bp)) |
912 | return 0; |
913 | /* |
914 | * It's empty, there is no such record. |
915 | */ |
916 | if (!block->bb_numrecs) |
917 | return 0; |
918 | /* |
919 | * Set the ptr value to numrecs, that's the last record/key. |
920 | */ |
921 | cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs); |
922 | return 1; |
923 | } |
924 | |
925 | /* |
926 | * Compute first and last byte offsets for the fields given. |
927 | * Interprets the offsets table, which contains struct field offsets. |
928 | */ |
929 | void |
930 | xfs_btree_offsets( |
931 | uint32_t fields, /* bitmask of fields */ |
932 | const short *offsets, /* table of field offsets */ |
933 | int nbits, /* number of bits to inspect */ |
934 | int *first, /* output: first byte offset */ |
935 | int *last) /* output: last byte offset */ |
936 | { |
937 | int i; /* current bit number */ |
938 | uint32_t imask; /* mask for current bit number */ |
939 | |
940 | ASSERT(fields != 0); |
941 | /* |
942 | * Find the lowest bit, so the first byte offset. |
943 | */ |
944 | for (i = 0, imask = 1u; ; i++, imask <<= 1) { |
945 | if (imask & fields) { |
946 | *first = offsets[i]; |
947 | break; |
948 | } |
949 | } |
950 | /* |
951 | * Find the highest bit, so the last byte offset. |
952 | */ |
953 | for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) { |
954 | if (imask & fields) { |
955 | *last = offsets[i + 1] - 1; |
956 | break; |
957 | } |
958 | } |
959 | } |
960 | |
961 | STATIC int |
962 | xfs_btree_readahead_fsblock( |
963 | struct xfs_btree_cur *cur, |
964 | int lr, |
965 | struct xfs_btree_block *block) |
966 | { |
967 | struct xfs_mount *mp = cur->bc_mp; |
968 | xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); |
969 | xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); |
970 | int rval = 0; |
971 | |
972 | if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { |
973 | xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, left), |
974 | mp->m_bsize, cur->bc_ops->buf_ops); |
975 | rval++; |
976 | } |
977 | |
978 | if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { |
979 | xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, right), |
980 | mp->m_bsize, cur->bc_ops->buf_ops); |
981 | rval++; |
982 | } |
983 | |
984 | return rval; |
985 | } |
986 | |
987 | STATIC int |
988 | xfs_btree_readahead_memblock( |
989 | struct xfs_btree_cur *cur, |
990 | int lr, |
991 | struct xfs_btree_block *block) |
992 | { |
993 | struct xfs_buftarg *btp = cur->bc_mem.xfbtree->target; |
994 | xfbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); |
995 | xfbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); |
996 | int rval = 0; |
997 | |
998 | if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { |
999 | xfs_buf_readahead(btp, xfbno_to_daddr(left), XFBNO_BBSIZE, |
1000 | cur->bc_ops->buf_ops); |
1001 | rval++; |
1002 | } |
1003 | |
1004 | if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { |
1005 | xfs_buf_readahead(btp, xfbno_to_daddr(right), XFBNO_BBSIZE, |
1006 | cur->bc_ops->buf_ops); |
1007 | rval++; |
1008 | } |
1009 | |
1010 | return rval; |
1011 | } |
1012 | |
1013 | STATIC int |
1014 | xfs_btree_readahead_agblock( |
1015 | struct xfs_btree_cur *cur, |
1016 | int lr, |
1017 | struct xfs_btree_block *block) |
1018 | { |
1019 | struct xfs_mount *mp = cur->bc_mp; |
1020 | xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; |
1021 | xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); |
1022 | xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); |
1023 | int rval = 0; |
1024 | |
1025 | if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { |
1026 | xfs_buf_readahead(mp->m_ddev_targp, |
1027 | XFS_AGB_TO_DADDR(mp, agno, left), |
1028 | mp->m_bsize, cur->bc_ops->buf_ops); |
1029 | rval++; |
1030 | } |
1031 | |
1032 | if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { |
1033 | xfs_buf_readahead(mp->m_ddev_targp, |
1034 | XFS_AGB_TO_DADDR(mp, agno, right), |
1035 | mp->m_bsize, cur->bc_ops->buf_ops); |
1036 | rval++; |
1037 | } |
1038 | |
1039 | return rval; |
1040 | } |
1041 | |
1042 | /* |
1043 | * Read-ahead btree blocks, at the given level. |
1044 | * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. |
1045 | */ |
1046 | STATIC int |
1047 | xfs_btree_readahead( |
1048 | struct xfs_btree_cur *cur, /* btree cursor */ |
1049 | int lev, /* level in btree */ |
1050 | int lr) /* left/right bits */ |
1051 | { |
1052 | struct xfs_btree_block *block; |
1053 | |
1054 | /* |
1055 | * No readahead needed if we are at the root level and the |
1056 | * btree root is stored in the inode. |
1057 | */ |
1058 | if (xfs_btree_at_iroot(cur, lev)) |
1059 | return 0; |
1060 | |
1061 | if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra) |
1062 | return 0; |
1063 | |
1064 | cur->bc_levels[lev].ra |= lr; |
1065 | block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp); |
1066 | |
1067 | switch (cur->bc_ops->type) { |
1068 | case XFS_BTREE_TYPE_AG: |
1069 | return xfs_btree_readahead_agblock(cur, lr, block); |
1070 | case XFS_BTREE_TYPE_INODE: |
1071 | return xfs_btree_readahead_fsblock(cur, lr, block); |
1072 | case XFS_BTREE_TYPE_MEM: |
1073 | return xfs_btree_readahead_memblock(cur, lr, block); |
1074 | default: |
1075 | ASSERT(0); |
1076 | return 0; |
1077 | } |
1078 | } |
1079 | |
1080 | STATIC int |
1081 | xfs_btree_ptr_to_daddr( |
1082 | struct xfs_btree_cur *cur, |
1083 | const union xfs_btree_ptr *ptr, |
1084 | xfs_daddr_t *daddr) |
1085 | { |
1086 | int error; |
1087 | |
1088 | error = xfs_btree_check_ptr(cur, ptr, index: 0, level: 1); |
1089 | if (error) |
1090 | return error; |
1091 | |
1092 | switch (cur->bc_ops->type) { |
1093 | case XFS_BTREE_TYPE_AG: |
1094 | *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno, |
1095 | be32_to_cpu(ptr->s)); |
1096 | break; |
1097 | case XFS_BTREE_TYPE_INODE: |
1098 | *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); |
1099 | break; |
1100 | case XFS_BTREE_TYPE_MEM: |
1101 | *daddr = xfbno_to_daddr(be64_to_cpu(ptr->l)); |
1102 | break; |
1103 | } |
1104 | return 0; |
1105 | } |
1106 | |
1107 | /* |
1108 | * Readahead @count btree blocks at the given @ptr location. |
1109 | * |
1110 | * We don't need to care about long or short form btrees here as we have a |
1111 | * method of converting the ptr directly to a daddr available to us. |
1112 | */ |
1113 | STATIC void |
1114 | xfs_btree_readahead_ptr( |
1115 | struct xfs_btree_cur *cur, |
1116 | union xfs_btree_ptr *ptr, |
1117 | xfs_extlen_t count) |
1118 | { |
1119 | xfs_daddr_t daddr; |
1120 | |
1121 | if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr)) |
1122 | return; |
1123 | xfs_buf_readahead(xfs_btree_buftarg(cur), daddr, |
1124 | xfs_btree_bbsize(cur) * count, |
1125 | cur->bc_ops->buf_ops); |
1126 | } |
1127 | |
1128 | /* |
1129 | * Set the buffer for level "lev" in the cursor to bp, releasing |
1130 | * any previous buffer. |
1131 | */ |
1132 | STATIC void |
1133 | xfs_btree_setbuf( |
1134 | struct xfs_btree_cur *cur, /* btree cursor */ |
1135 | int lev, /* level in btree */ |
1136 | struct xfs_buf *bp) /* new buffer to set */ |
1137 | { |
1138 | struct xfs_btree_block *b; /* btree block */ |
1139 | |
1140 | if (cur->bc_levels[lev].bp) |
1141 | xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp); |
1142 | cur->bc_levels[lev].bp = bp; |
1143 | cur->bc_levels[lev].ra = 0; |
1144 | |
1145 | b = XFS_BUF_TO_BLOCK(bp); |
1146 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { |
1147 | if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) |
1148 | cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; |
1149 | if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) |
1150 | cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; |
1151 | } else { |
1152 | if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) |
1153 | cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; |
1154 | if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) |
1155 | cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; |
1156 | } |
1157 | } |
1158 | |
1159 | bool |
1160 | xfs_btree_ptr_is_null( |
1161 | struct xfs_btree_cur *cur, |
1162 | const union xfs_btree_ptr *ptr) |
1163 | { |
1164 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) |
1165 | return ptr->l == cpu_to_be64(NULLFSBLOCK); |
1166 | else |
1167 | return ptr->s == cpu_to_be32(NULLAGBLOCK); |
1168 | } |
1169 | |
1170 | void |
1171 | xfs_btree_set_ptr_null( |
1172 | struct xfs_btree_cur *cur, |
1173 | union xfs_btree_ptr *ptr) |
1174 | { |
1175 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) |
1176 | ptr->l = cpu_to_be64(NULLFSBLOCK); |
1177 | else |
1178 | ptr->s = cpu_to_be32(NULLAGBLOCK); |
1179 | } |
1180 | |
1181 | static inline bool |
1182 | xfs_btree_ptrs_equal( |
1183 | struct xfs_btree_cur *cur, |
1184 | union xfs_btree_ptr *ptr1, |
1185 | union xfs_btree_ptr *ptr2) |
1186 | { |
1187 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) |
1188 | return ptr1->l == ptr2->l; |
1189 | return ptr1->s == ptr2->s; |
1190 | } |
1191 | |
1192 | /* |
1193 | * Get/set/init sibling pointers |
1194 | */ |
1195 | void |
1196 | xfs_btree_get_sibling( |
1197 | struct xfs_btree_cur *cur, |
1198 | struct xfs_btree_block *block, |
1199 | union xfs_btree_ptr *ptr, |
1200 | int lr) |
1201 | { |
1202 | ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); |
1203 | |
1204 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { |
1205 | if (lr == XFS_BB_RIGHTSIB) |
1206 | ptr->l = block->bb_u.l.bb_rightsib; |
1207 | else |
1208 | ptr->l = block->bb_u.l.bb_leftsib; |
1209 | } else { |
1210 | if (lr == XFS_BB_RIGHTSIB) |
1211 | ptr->s = block->bb_u.s.bb_rightsib; |
1212 | else |
1213 | ptr->s = block->bb_u.s.bb_leftsib; |
1214 | } |
1215 | } |
1216 | |
1217 | void |
1218 | xfs_btree_set_sibling( |
1219 | struct xfs_btree_cur *cur, |
1220 | struct xfs_btree_block *block, |
1221 | const union xfs_btree_ptr *ptr, |
1222 | int lr) |
1223 | { |
1224 | ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); |
1225 | |
1226 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { |
1227 | if (lr == XFS_BB_RIGHTSIB) |
1228 | block->bb_u.l.bb_rightsib = ptr->l; |
1229 | else |
1230 | block->bb_u.l.bb_leftsib = ptr->l; |
1231 | } else { |
1232 | if (lr == XFS_BB_RIGHTSIB) |
1233 | block->bb_u.s.bb_rightsib = ptr->s; |
1234 | else |
1235 | block->bb_u.s.bb_leftsib = ptr->s; |
1236 | } |
1237 | } |
1238 | |
1239 | static void |
1240 | __xfs_btree_init_block( |
1241 | struct xfs_mount *mp, |
1242 | struct xfs_btree_block *buf, |
1243 | const struct xfs_btree_ops *ops, |
1244 | xfs_daddr_t blkno, |
1245 | __u16 level, |
1246 | __u16 numrecs, |
1247 | __u64 owner) |
1248 | { |
1249 | bool crc = xfs_has_crc(mp); |
1250 | __u32 magic = xfs_btree_magic(mp, ops); |
1251 | |
1252 | buf->bb_magic = cpu_to_be32(magic); |
1253 | buf->bb_level = cpu_to_be16(level); |
1254 | buf->bb_numrecs = cpu_to_be16(numrecs); |
1255 | |
1256 | if (ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { |
1257 | buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); |
1258 | buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); |
1259 | if (crc) { |
1260 | buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); |
1261 | buf->bb_u.l.bb_owner = cpu_to_be64(owner); |
1262 | uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); |
1263 | buf->bb_u.l.bb_pad = 0; |
1264 | buf->bb_u.l.bb_lsn = 0; |
1265 | } |
1266 | } else { |
1267 | buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); |
1268 | buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); |
1269 | if (crc) { |
1270 | buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); |
1271 | /* owner is a 32 bit value on short blocks */ |
1272 | buf->bb_u.s.bb_owner = cpu_to_be32((__u32)owner); |
1273 | uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); |
1274 | buf->bb_u.s.bb_lsn = 0; |
1275 | } |
1276 | } |
1277 | } |
1278 | |
1279 | void |
1280 | xfs_btree_init_block( |
1281 | struct xfs_mount *mp, |
1282 | struct xfs_btree_block *block, |
1283 | const struct xfs_btree_ops *ops, |
1284 | __u16 level, |
1285 | __u16 numrecs, |
1286 | __u64 owner) |
1287 | { |
1288 | __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level, |
1289 | numrecs, owner); |
1290 | } |
1291 | |
1292 | void |
1293 | xfs_btree_init_buf( |
1294 | struct xfs_mount *mp, |
1295 | struct xfs_buf *bp, |
1296 | const struct xfs_btree_ops *ops, |
1297 | __u16 level, |
1298 | __u16 numrecs, |
1299 | __u64 owner) |
1300 | { |
1301 | __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops, |
1302 | xfs_buf_daddr(bp), level, numrecs, owner); |
1303 | bp->b_ops = ops->buf_ops; |
1304 | } |
1305 | |
1306 | static inline __u64 |
1307 | xfs_btree_owner( |
1308 | struct xfs_btree_cur *cur) |
1309 | { |
1310 | switch (cur->bc_ops->type) { |
1311 | case XFS_BTREE_TYPE_MEM: |
1312 | return cur->bc_mem.xfbtree->owner; |
1313 | case XFS_BTREE_TYPE_INODE: |
1314 | return cur->bc_ino.ip->i_ino; |
1315 | case XFS_BTREE_TYPE_AG: |
1316 | return cur->bc_ag.pag->pag_agno; |
1317 | default: |
1318 | ASSERT(0); |
1319 | return 0; |
1320 | } |
1321 | } |
1322 | |
1323 | void |
1324 | xfs_btree_init_block_cur( |
1325 | struct xfs_btree_cur *cur, |
1326 | struct xfs_buf *bp, |
1327 | int level, |
1328 | int numrecs) |
1329 | { |
1330 | xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs, |
1331 | xfs_btree_owner(cur)); |
1332 | } |
1333 | |
1334 | /* |
1335 | * Return true if ptr is the last record in the btree and |
1336 | * we need to track updates to this record. The decision |
1337 | * will be further refined in the update_lastrec method. |
1338 | */ |
1339 | STATIC int |
1340 | xfs_btree_is_lastrec( |
1341 | struct xfs_btree_cur *cur, |
1342 | struct xfs_btree_block *block, |
1343 | int level) |
1344 | { |
1345 | union xfs_btree_ptr ptr; |
1346 | |
1347 | if (level > 0) |
1348 | return 0; |
1349 | if (!(cur->bc_ops->geom_flags & XFS_BTGEO_LASTREC_UPDATE)) |
1350 | return 0; |
1351 | |
1352 | xfs_btree_get_sibling(cur, block, ptr: &ptr, XFS_BB_RIGHTSIB); |
1353 | if (!xfs_btree_ptr_is_null(cur, &ptr)) |
1354 | return 0; |
1355 | return 1; |
1356 | } |
1357 | |
1358 | STATIC void |
1359 | xfs_btree_buf_to_ptr( |
1360 | struct xfs_btree_cur *cur, |
1361 | struct xfs_buf *bp, |
1362 | union xfs_btree_ptr *ptr) |
1363 | { |
1364 | switch (cur->bc_ops->type) { |
1365 | case XFS_BTREE_TYPE_AG: |
1366 | ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, |
1367 | xfs_buf_daddr(bp))); |
1368 | break; |
1369 | case XFS_BTREE_TYPE_INODE: |
1370 | ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, |
1371 | xfs_buf_daddr(bp))); |
1372 | break; |
1373 | case XFS_BTREE_TYPE_MEM: |
1374 | ptr->l = cpu_to_be64(xfs_daddr_to_xfbno(xfs_buf_daddr(bp))); |
1375 | break; |
1376 | } |
1377 | } |
1378 | |
1379 | static inline void |
1380 | xfs_btree_set_refs( |
1381 | struct xfs_btree_cur *cur, |
1382 | struct xfs_buf *bp) |
1383 | { |
1384 | xfs_buf_set_ref(bp, cur->bc_ops->lru_refs); |
1385 | } |
1386 | |
1387 | int |
1388 | xfs_btree_get_buf_block( |
1389 | struct xfs_btree_cur *cur, |
1390 | const union xfs_btree_ptr *ptr, |
1391 | struct xfs_btree_block **block, |
1392 | struct xfs_buf **bpp) |
1393 | { |
1394 | xfs_daddr_t d; |
1395 | int error; |
1396 | |
1397 | error = xfs_btree_ptr_to_daddr(cur, ptr, &d); |
1398 | if (error) |
1399 | return error; |
1400 | error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d, |
1401 | xfs_btree_bbsize(cur), 0, bpp); |
1402 | if (error) |
1403 | return error; |
1404 | |
1405 | (*bpp)->b_ops = cur->bc_ops->buf_ops; |
1406 | *block = XFS_BUF_TO_BLOCK(*bpp); |
1407 | return 0; |
1408 | } |
1409 | |
1410 | /* |
1411 | * Read in the buffer at the given ptr and return the buffer and |
1412 | * the block pointer within the buffer. |
1413 | */ |
1414 | int |
1415 | xfs_btree_read_buf_block( |
1416 | struct xfs_btree_cur *cur, |
1417 | const union xfs_btree_ptr *ptr, |
1418 | int flags, |
1419 | struct xfs_btree_block **block, |
1420 | struct xfs_buf **bpp) |
1421 | { |
1422 | struct xfs_mount *mp = cur->bc_mp; |
1423 | xfs_daddr_t d; |
1424 | int error; |
1425 | |
1426 | /* need to sort out how callers deal with failures first */ |
1427 | ASSERT(!(flags & XBF_TRYLOCK)); |
1428 | |
1429 | error = xfs_btree_ptr_to_daddr(cur, ptr, &d); |
1430 | if (error) |
1431 | return error; |
1432 | error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d, |
1433 | xfs_btree_bbsize(cur), flags, bpp, |
1434 | cur->bc_ops->buf_ops); |
1435 | if (xfs_metadata_is_sick(error)) |
1436 | xfs_btree_mark_sick(cur); |
1437 | if (error) |
1438 | return error; |
1439 | |
1440 | xfs_btree_set_refs(cur, bp: *bpp); |
1441 | *block = XFS_BUF_TO_BLOCK(*bpp); |
1442 | return 0; |
1443 | } |
1444 | |
1445 | /* |
1446 | * Copy keys from one btree block to another. |
1447 | */ |
1448 | void |
1449 | xfs_btree_copy_keys( |
1450 | struct xfs_btree_cur *cur, |
1451 | union xfs_btree_key *dst_key, |
1452 | const union xfs_btree_key *src_key, |
1453 | int numkeys) |
1454 | { |
1455 | ASSERT(numkeys >= 0); |
1456 | memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); |
1457 | } |
1458 | |
1459 | /* |
1460 | * Copy records from one btree block to another. |
1461 | */ |
1462 | STATIC void |
1463 | xfs_btree_copy_recs( |
1464 | struct xfs_btree_cur *cur, |
1465 | union xfs_btree_rec *dst_rec, |
1466 | union xfs_btree_rec *src_rec, |
1467 | int numrecs) |
1468 | { |
1469 | ASSERT(numrecs >= 0); |
1470 | memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); |
1471 | } |
1472 | |
1473 | /* |
1474 | * Copy block pointers from one btree block to another. |
1475 | */ |
1476 | void |
1477 | xfs_btree_copy_ptrs( |
1478 | struct xfs_btree_cur *cur, |
1479 | union xfs_btree_ptr *dst_ptr, |
1480 | const union xfs_btree_ptr *src_ptr, |
1481 | int numptrs) |
1482 | { |
1483 | ASSERT(numptrs >= 0); |
1484 | memcpy(dst_ptr, src_ptr, numptrs * cur->bc_ops->ptr_len); |
1485 | } |
1486 | |
1487 | /* |
1488 | * Shift keys one index left/right inside a single btree block. |
1489 | */ |
1490 | STATIC void |
1491 | xfs_btree_shift_keys( |
1492 | struct xfs_btree_cur *cur, |
1493 | union xfs_btree_key *key, |
1494 | int dir, |
1495 | int numkeys) |
1496 | { |
1497 | char *dst_key; |
1498 | |
1499 | ASSERT(numkeys >= 0); |
1500 | ASSERT(dir == 1 || dir == -1); |
1501 | |
1502 | dst_key = (char *)key + (dir * cur->bc_ops->key_len); |
1503 | memmove(dst_key, key, numkeys * cur->bc_ops->key_len); |
1504 | } |
1505 | |
1506 | /* |
1507 | * Shift records one index left/right inside a single btree block. |
1508 | */ |
1509 | STATIC void |
1510 | xfs_btree_shift_recs( |
1511 | struct xfs_btree_cur *cur, |
1512 | union xfs_btree_rec *rec, |
1513 | int dir, |
1514 | int numrecs) |
1515 | { |
1516 | char *dst_rec; |
1517 | |
1518 | ASSERT(numrecs >= 0); |
1519 | ASSERT(dir == 1 || dir == -1); |
1520 | |
1521 | dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); |
1522 | memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); |
1523 | } |
1524 | |
1525 | /* |
1526 | * Shift block pointers one index left/right inside a single btree block. |
1527 | */ |
1528 | STATIC void |
1529 | xfs_btree_shift_ptrs( |
1530 | struct xfs_btree_cur *cur, |
1531 | union xfs_btree_ptr *ptr, |
1532 | int dir, |
1533 | int numptrs) |
1534 | { |
1535 | char *dst_ptr; |
1536 | |
1537 | ASSERT(numptrs >= 0); |
1538 | ASSERT(dir == 1 || dir == -1); |
1539 | |
1540 | dst_ptr = (char *)ptr + (dir * cur->bc_ops->ptr_len); |
1541 | memmove(dst_ptr, ptr, numptrs * cur->bc_ops->ptr_len); |
1542 | } |
1543 | |
1544 | /* |
1545 | * Log key values from the btree block. |
1546 | */ |
1547 | STATIC void |
1548 | xfs_btree_log_keys( |
1549 | struct xfs_btree_cur *cur, |
1550 | struct xfs_buf *bp, |
1551 | int first, |
1552 | int last) |
1553 | { |
1554 | |
1555 | if (bp) { |
1556 | xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); |
1557 | xfs_trans_log_buf(cur->bc_tp, bp, |
1558 | xfs_btree_key_offset(cur, first), |
1559 | xfs_btree_key_offset(cur, last + 1) - 1); |
1560 | } else { |
1561 | xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, |
1562 | xfs_ilog_fbroot(w: cur->bc_ino.whichfork)); |
1563 | } |
1564 | } |
1565 | |
1566 | /* |
1567 | * Log record values from the btree block. |
1568 | */ |
1569 | void |
1570 | xfs_btree_log_recs( |
1571 | struct xfs_btree_cur *cur, |
1572 | struct xfs_buf *bp, |
1573 | int first, |
1574 | int last) |
1575 | { |
1576 | |
1577 | xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); |
1578 | xfs_trans_log_buf(cur->bc_tp, bp, |
1579 | xfs_btree_rec_offset(cur, first), |
1580 | xfs_btree_rec_offset(cur, last + 1) - 1); |
1581 | |
1582 | } |
1583 | |
1584 | /* |
1585 | * Log block pointer fields from a btree block (nonleaf). |
1586 | */ |
1587 | STATIC void |
1588 | xfs_btree_log_ptrs( |
1589 | struct xfs_btree_cur *cur, /* btree cursor */ |
1590 | struct xfs_buf *bp, /* buffer containing btree block */ |
1591 | int first, /* index of first pointer to log */ |
1592 | int last) /* index of last pointer to log */ |
1593 | { |
1594 | |
1595 | if (bp) { |
1596 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
1597 | int level = xfs_btree_get_level(block); |
1598 | |
1599 | xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); |
1600 | xfs_trans_log_buf(cur->bc_tp, bp, |
1601 | xfs_btree_ptr_offset(cur, first, level), |
1602 | xfs_btree_ptr_offset(cur, last + 1, level) - 1); |
1603 | } else { |
1604 | xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, |
1605 | xfs_ilog_fbroot(w: cur->bc_ino.whichfork)); |
1606 | } |
1607 | |
1608 | } |
1609 | |
1610 | /* |
1611 | * Log fields from a btree block header. |
1612 | */ |
1613 | void |
1614 | xfs_btree_log_block( |
1615 | struct xfs_btree_cur *cur, /* btree cursor */ |
1616 | struct xfs_buf *bp, /* buffer containing btree block */ |
1617 | uint32_t fields) /* mask of fields: XFS_BB_... */ |
1618 | { |
1619 | int first; /* first byte offset logged */ |
1620 | int last; /* last byte offset logged */ |
1621 | static const short soffsets[] = { /* table of offsets (short) */ |
1622 | offsetof(struct xfs_btree_block, bb_magic), |
1623 | offsetof(struct xfs_btree_block, bb_level), |
1624 | offsetof(struct xfs_btree_block, bb_numrecs), |
1625 | offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), |
1626 | offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), |
1627 | offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), |
1628 | offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), |
1629 | offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), |
1630 | offsetof(struct xfs_btree_block, bb_u.s.bb_owner), |
1631 | offsetof(struct xfs_btree_block, bb_u.s.bb_crc), |
1632 | XFS_BTREE_SBLOCK_CRC_LEN |
1633 | }; |
1634 | static const short loffsets[] = { /* table of offsets (long) */ |
1635 | offsetof(struct xfs_btree_block, bb_magic), |
1636 | offsetof(struct xfs_btree_block, bb_level), |
1637 | offsetof(struct xfs_btree_block, bb_numrecs), |
1638 | offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), |
1639 | offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), |
1640 | offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), |
1641 | offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), |
1642 | offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), |
1643 | offsetof(struct xfs_btree_block, bb_u.l.bb_owner), |
1644 | offsetof(struct xfs_btree_block, bb_u.l.bb_crc), |
1645 | offsetof(struct xfs_btree_block, bb_u.l.bb_pad), |
1646 | XFS_BTREE_LBLOCK_CRC_LEN |
1647 | }; |
1648 | |
1649 | if (bp) { |
1650 | int nbits; |
1651 | |
1652 | if (xfs_has_crc(cur->bc_mp)) { |
1653 | /* |
1654 | * We don't log the CRC when updating a btree |
1655 | * block but instead recreate it during log |
1656 | * recovery. As the log buffers have checksums |
1657 | * of their own this is safe and avoids logging a crc |
1658 | * update in a lot of places. |
1659 | */ |
1660 | if (fields == XFS_BB_ALL_BITS) |
1661 | fields = XFS_BB_ALL_BITS_CRC; |
1662 | nbits = XFS_BB_NUM_BITS_CRC; |
1663 | } else { |
1664 | nbits = XFS_BB_NUM_BITS; |
1665 | } |
1666 | xfs_btree_offsets(fields, |
1667 | (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) ? |
1668 | loffsets : soffsets, |
1669 | nbits, &first, &last); |
1670 | xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); |
1671 | xfs_trans_log_buf(cur->bc_tp, bp, first, last); |
1672 | } else { |
1673 | xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, |
1674 | xfs_ilog_fbroot(w: cur->bc_ino.whichfork)); |
1675 | } |
1676 | } |
1677 | |
1678 | /* |
1679 | * Increment cursor by one record at the level. |
1680 | * For nonzero levels the leaf-ward information is untouched. |
1681 | */ |
1682 | int /* error */ |
1683 | xfs_btree_increment( |
1684 | struct xfs_btree_cur *cur, |
1685 | int level, |
1686 | int *stat) /* success/failure */ |
1687 | { |
1688 | struct xfs_btree_block *block; |
1689 | union xfs_btree_ptr ptr; |
1690 | struct xfs_buf *bp; |
1691 | int error; /* error return value */ |
1692 | int lev; |
1693 | |
1694 | ASSERT(level < cur->bc_nlevels); |
1695 | |
1696 | /* Read-ahead to the right at this level. */ |
1697 | xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); |
1698 | |
1699 | /* Get a pointer to the btree block. */ |
1700 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
1701 | |
1702 | #ifdef DEBUG |
1703 | error = xfs_btree_check_block(cur, block, level, bp); |
1704 | if (error) |
1705 | goto error0; |
1706 | #endif |
1707 | |
1708 | /* We're done if we remain in the block after the increment. */ |
1709 | if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block)) |
1710 | goto out1; |
1711 | |
1712 | /* Fail if we just went off the right edge of the tree. */ |
1713 | xfs_btree_get_sibling(cur, block, ptr: &ptr, XFS_BB_RIGHTSIB); |
1714 | if (xfs_btree_ptr_is_null(cur, &ptr)) |
1715 | goto out0; |
1716 | |
1717 | XFS_BTREE_STATS_INC(cur, increment); |
1718 | |
1719 | /* |
1720 | * March up the tree incrementing pointers. |
1721 | * Stop when we don't go off the right edge of a block. |
1722 | */ |
1723 | for (lev = level + 1; lev < cur->bc_nlevels; lev++) { |
1724 | block = xfs_btree_get_block(cur, level: lev, bpp: &bp); |
1725 | |
1726 | #ifdef DEBUG |
1727 | error = xfs_btree_check_block(cur, block, lev, bp); |
1728 | if (error) |
1729 | goto error0; |
1730 | #endif |
1731 | |
1732 | if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block)) |
1733 | break; |
1734 | |
1735 | /* Read-ahead the right block for the next loop. */ |
1736 | xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); |
1737 | } |
1738 | |
1739 | /* |
1740 | * If we went off the root then we are either seriously |
1741 | * confused or have the tree root in an inode. |
1742 | */ |
1743 | if (lev == cur->bc_nlevels) { |
1744 | if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) |
1745 | goto out0; |
1746 | ASSERT(0); |
1747 | xfs_btree_mark_sick(cur); |
1748 | error = -EFSCORRUPTED; |
1749 | goto error0; |
1750 | } |
1751 | ASSERT(lev < cur->bc_nlevels); |
1752 | |
1753 | /* |
1754 | * Now walk back down the tree, fixing up the cursor's buffer |
1755 | * pointers and key numbers. |
1756 | */ |
1757 | for (block = xfs_btree_get_block(cur, level: lev, bpp: &bp); lev > level; ) { |
1758 | union xfs_btree_ptr *ptrp; |
1759 | |
1760 | ptrp = xfs_btree_ptr_addr(cur, n: cur->bc_levels[lev].ptr, block); |
1761 | --lev; |
1762 | error = xfs_btree_read_buf_block(cur, ptr: ptrp, flags: 0, block: &block, bpp: &bp); |
1763 | if (error) |
1764 | goto error0; |
1765 | |
1766 | xfs_btree_setbuf(cur, lev, bp); |
1767 | cur->bc_levels[lev].ptr = 1; |
1768 | } |
1769 | out1: |
1770 | *stat = 1; |
1771 | return 0; |
1772 | |
1773 | out0: |
1774 | *stat = 0; |
1775 | return 0; |
1776 | |
1777 | error0: |
1778 | return error; |
1779 | } |
1780 | |
1781 | /* |
1782 | * Decrement cursor by one record at the level. |
1783 | * For nonzero levels the leaf-ward information is untouched. |
1784 | */ |
1785 | int /* error */ |
1786 | xfs_btree_decrement( |
1787 | struct xfs_btree_cur *cur, |
1788 | int level, |
1789 | int *stat) /* success/failure */ |
1790 | { |
1791 | struct xfs_btree_block *block; |
1792 | struct xfs_buf *bp; |
1793 | int error; /* error return value */ |
1794 | int lev; |
1795 | union xfs_btree_ptr ptr; |
1796 | |
1797 | ASSERT(level < cur->bc_nlevels); |
1798 | |
1799 | /* Read-ahead to the left at this level. */ |
1800 | xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); |
1801 | |
1802 | /* We're done if we remain in the block after the decrement. */ |
1803 | if (--cur->bc_levels[level].ptr > 0) |
1804 | goto out1; |
1805 | |
1806 | /* Get a pointer to the btree block. */ |
1807 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
1808 | |
1809 | #ifdef DEBUG |
1810 | error = xfs_btree_check_block(cur, block, level, bp); |
1811 | if (error) |
1812 | goto error0; |
1813 | #endif |
1814 | |
1815 | /* Fail if we just went off the left edge of the tree. */ |
1816 | xfs_btree_get_sibling(cur, block, ptr: &ptr, XFS_BB_LEFTSIB); |
1817 | if (xfs_btree_ptr_is_null(cur, &ptr)) |
1818 | goto out0; |
1819 | |
1820 | XFS_BTREE_STATS_INC(cur, decrement); |
1821 | |
1822 | /* |
1823 | * March up the tree decrementing pointers. |
1824 | * Stop when we don't go off the left edge of a block. |
1825 | */ |
1826 | for (lev = level + 1; lev < cur->bc_nlevels; lev++) { |
1827 | if (--cur->bc_levels[lev].ptr > 0) |
1828 | break; |
1829 | /* Read-ahead the left block for the next loop. */ |
1830 | xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); |
1831 | } |
1832 | |
1833 | /* |
1834 | * If we went off the root then we are seriously confused. |
1835 | * or the root of the tree is in an inode. |
1836 | */ |
1837 | if (lev == cur->bc_nlevels) { |
1838 | if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) |
1839 | goto out0; |
1840 | ASSERT(0); |
1841 | xfs_btree_mark_sick(cur); |
1842 | error = -EFSCORRUPTED; |
1843 | goto error0; |
1844 | } |
1845 | ASSERT(lev < cur->bc_nlevels); |
1846 | |
1847 | /* |
1848 | * Now walk back down the tree, fixing up the cursor's buffer |
1849 | * pointers and key numbers. |
1850 | */ |
1851 | for (block = xfs_btree_get_block(cur, level: lev, bpp: &bp); lev > level; ) { |
1852 | union xfs_btree_ptr *ptrp; |
1853 | |
1854 | ptrp = xfs_btree_ptr_addr(cur, n: cur->bc_levels[lev].ptr, block); |
1855 | --lev; |
1856 | error = xfs_btree_read_buf_block(cur, ptr: ptrp, flags: 0, block: &block, bpp: &bp); |
1857 | if (error) |
1858 | goto error0; |
1859 | xfs_btree_setbuf(cur, lev, bp); |
1860 | cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block); |
1861 | } |
1862 | out1: |
1863 | *stat = 1; |
1864 | return 0; |
1865 | |
1866 | out0: |
1867 | *stat = 0; |
1868 | return 0; |
1869 | |
1870 | error0: |
1871 | return error; |
1872 | } |
1873 | |
1874 | /* |
1875 | * Check the btree block owner now that we have the context to know who the |
1876 | * real owner is. |
1877 | */ |
1878 | static inline xfs_failaddr_t |
1879 | xfs_btree_check_block_owner( |
1880 | struct xfs_btree_cur *cur, |
1881 | struct xfs_btree_block *block) |
1882 | { |
1883 | __u64 owner; |
1884 | |
1885 | if (!xfs_has_crc(cur->bc_mp) || |
1886 | (cur->bc_flags & XFS_BTREE_BMBT_INVALID_OWNER)) |
1887 | return NULL; |
1888 | |
1889 | owner = xfs_btree_owner(cur); |
1890 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { |
1891 | if (be64_to_cpu(block->bb_u.l.bb_owner) != owner) |
1892 | return __this_address; |
1893 | } else { |
1894 | if (be32_to_cpu(block->bb_u.s.bb_owner) != owner) |
1895 | return __this_address; |
1896 | } |
1897 | |
1898 | return NULL; |
1899 | } |
1900 | |
1901 | int |
1902 | xfs_btree_lookup_get_block( |
1903 | struct xfs_btree_cur *cur, /* btree cursor */ |
1904 | int level, /* level in the btree */ |
1905 | const union xfs_btree_ptr *pp, /* ptr to btree block */ |
1906 | struct xfs_btree_block **blkp) /* return btree block */ |
1907 | { |
1908 | struct xfs_buf *bp; /* buffer pointer for btree block */ |
1909 | xfs_daddr_t daddr; |
1910 | int error = 0; |
1911 | |
1912 | /* special case the root block if in an inode */ |
1913 | if (xfs_btree_at_iroot(cur, level)) { |
1914 | *blkp = xfs_btree_get_iroot(cur); |
1915 | return 0; |
1916 | } |
1917 | |
1918 | /* |
1919 | * If the old buffer at this level for the disk address we are |
1920 | * looking for re-use it. |
1921 | * |
1922 | * Otherwise throw it away and get a new one. |
1923 | */ |
1924 | bp = cur->bc_levels[level].bp; |
1925 | error = xfs_btree_ptr_to_daddr(cur, pp, &daddr); |
1926 | if (error) |
1927 | return error; |
1928 | if (bp && xfs_buf_daddr(bp) == daddr) { |
1929 | *blkp = XFS_BUF_TO_BLOCK(bp); |
1930 | return 0; |
1931 | } |
1932 | |
1933 | error = xfs_btree_read_buf_block(cur, ptr: pp, flags: 0, block: blkp, bpp: &bp); |
1934 | if (error) |
1935 | return error; |
1936 | |
1937 | /* Check the inode owner since the verifiers don't. */ |
1938 | if (xfs_btree_check_block_owner(cur, *blkp) != NULL) |
1939 | goto out_bad; |
1940 | |
1941 | /* Did we get the level we were looking for? */ |
1942 | if (be16_to_cpu((*blkp)->bb_level) != level) |
1943 | goto out_bad; |
1944 | |
1945 | /* Check that internal nodes have at least one record. */ |
1946 | if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) |
1947 | goto out_bad; |
1948 | |
1949 | xfs_btree_setbuf(cur, level, bp); |
1950 | return 0; |
1951 | |
1952 | out_bad: |
1953 | *blkp = NULL; |
1954 | xfs_buf_mark_corrupt(bp); |
1955 | xfs_trans_brelse(cur->bc_tp, bp); |
1956 | xfs_btree_mark_sick(cur); |
1957 | return -EFSCORRUPTED; |
1958 | } |
1959 | |
1960 | /* |
1961 | * Get current search key. For level 0 we don't actually have a key |
1962 | * structure so we make one up from the record. For all other levels |
1963 | * we just return the right key. |
1964 | */ |
1965 | STATIC union xfs_btree_key * |
1966 | xfs_lookup_get_search_key( |
1967 | struct xfs_btree_cur *cur, |
1968 | int level, |
1969 | int keyno, |
1970 | struct xfs_btree_block *block, |
1971 | union xfs_btree_key *kp) |
1972 | { |
1973 | if (level == 0) { |
1974 | cur->bc_ops->init_key_from_rec(kp, |
1975 | xfs_btree_rec_addr(cur, n: keyno, block)); |
1976 | return kp; |
1977 | } |
1978 | |
1979 | return xfs_btree_key_addr(cur, n: keyno, block); |
1980 | } |
1981 | |
1982 | /* |
1983 | * Initialize a pointer to the root block. |
1984 | */ |
1985 | void |
1986 | xfs_btree_init_ptr_from_cur( |
1987 | struct xfs_btree_cur *cur, |
1988 | union xfs_btree_ptr *ptr) |
1989 | { |
1990 | if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) { |
1991 | /* |
1992 | * Inode-rooted btrees call xfs_btree_get_iroot to find the root |
1993 | * in xfs_btree_lookup_get_block and don't need a pointer here. |
1994 | */ |
1995 | ptr->l = 0; |
1996 | } else if (cur->bc_flags & XFS_BTREE_STAGING) { |
1997 | ptr->s = cpu_to_be32(cur->bc_ag.afake->af_root); |
1998 | } else { |
1999 | cur->bc_ops->init_ptr_from_cur(cur, ptr); |
2000 | } |
2001 | } |
2002 | |
2003 | /* |
2004 | * Lookup the record. The cursor is made to point to it, based on dir. |
2005 | * stat is set to 0 if can't find any such record, 1 for success. |
2006 | */ |
2007 | int /* error */ |
2008 | xfs_btree_lookup( |
2009 | struct xfs_btree_cur *cur, /* btree cursor */ |
2010 | xfs_lookup_t dir, /* <=, ==, or >= */ |
2011 | int *stat) /* success/failure */ |
2012 | { |
2013 | struct xfs_btree_block *block; /* current btree block */ |
2014 | int64_t diff; /* difference for the current key */ |
2015 | int error; /* error return value */ |
2016 | int keyno; /* current key number */ |
2017 | int level; /* level in the btree */ |
2018 | union xfs_btree_ptr *pp; /* ptr to btree block */ |
2019 | union xfs_btree_ptr ptr; /* ptr to btree block */ |
2020 | |
2021 | XFS_BTREE_STATS_INC(cur, lookup); |
2022 | |
2023 | /* No such thing as a zero-level tree. */ |
2024 | if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) { |
2025 | xfs_btree_mark_sick(cur); |
2026 | return -EFSCORRUPTED; |
2027 | } |
2028 | |
2029 | block = NULL; |
2030 | keyno = 0; |
2031 | |
2032 | /* initialise start pointer from cursor */ |
2033 | xfs_btree_init_ptr_from_cur(cur, ptr: &ptr); |
2034 | pp = &ptr; |
2035 | |
2036 | /* |
2037 | * Iterate over each level in the btree, starting at the root. |
2038 | * For each level above the leaves, find the key we need, based |
2039 | * on the lookup record, then follow the corresponding block |
2040 | * pointer down to the next level. |
2041 | */ |
2042 | for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { |
2043 | /* Get the block we need to do the lookup on. */ |
2044 | error = xfs_btree_lookup_get_block(cur, level, pp, blkp: &block); |
2045 | if (error) |
2046 | goto error0; |
2047 | |
2048 | if (diff == 0) { |
2049 | /* |
2050 | * If we already had a key match at a higher level, we |
2051 | * know we need to use the first entry in this block. |
2052 | */ |
2053 | keyno = 1; |
2054 | } else { |
2055 | /* Otherwise search this block. Do a binary search. */ |
2056 | |
2057 | int high; /* high entry number */ |
2058 | int low; /* low entry number */ |
2059 | |
2060 | /* Set low and high entry numbers, 1-based. */ |
2061 | low = 1; |
2062 | high = xfs_btree_get_numrecs(block); |
2063 | if (!high) { |
2064 | /* Block is empty, must be an empty leaf. */ |
2065 | if (level != 0 || cur->bc_nlevels != 1) { |
2066 | XFS_CORRUPTION_ERROR(__func__, |
2067 | XFS_ERRLEVEL_LOW, |
2068 | cur->bc_mp, block, |
2069 | sizeof(*block)); |
2070 | xfs_btree_mark_sick(cur); |
2071 | return -EFSCORRUPTED; |
2072 | } |
2073 | |
2074 | cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE; |
2075 | *stat = 0; |
2076 | return 0; |
2077 | } |
2078 | |
2079 | /* Binary search the block. */ |
2080 | while (low <= high) { |
2081 | union xfs_btree_key key; |
2082 | union xfs_btree_key *kp; |
2083 | |
2084 | XFS_BTREE_STATS_INC(cur, compare); |
2085 | |
2086 | /* keyno is average of low and high. */ |
2087 | keyno = (low + high) >> 1; |
2088 | |
2089 | /* Get current search key */ |
2090 | kp = xfs_lookup_get_search_key(cur, level, |
2091 | keyno, block, &key); |
2092 | |
2093 | /* |
2094 | * Compute difference to get next direction: |
2095 | * - less than, move right |
2096 | * - greater than, move left |
2097 | * - equal, we're done |
2098 | */ |
2099 | diff = cur->bc_ops->key_diff(cur, kp); |
2100 | if (diff < 0) |
2101 | low = keyno + 1; |
2102 | else if (diff > 0) |
2103 | high = keyno - 1; |
2104 | else |
2105 | break; |
2106 | } |
2107 | } |
2108 | |
2109 | /* |
2110 | * If there are more levels, set up for the next level |
2111 | * by getting the block number and filling in the cursor. |
2112 | */ |
2113 | if (level > 0) { |
2114 | /* |
2115 | * If we moved left, need the previous key number, |
2116 | * unless there isn't one. |
2117 | */ |
2118 | if (diff > 0 && --keyno < 1) |
2119 | keyno = 1; |
2120 | pp = xfs_btree_ptr_addr(cur, n: keyno, block); |
2121 | |
2122 | error = xfs_btree_debug_check_ptr(cur, pp, 0, level); |
2123 | if (error) |
2124 | goto error0; |
2125 | |
2126 | cur->bc_levels[level].ptr = keyno; |
2127 | } |
2128 | } |
2129 | |
2130 | /* Done with the search. See if we need to adjust the results. */ |
2131 | if (dir != XFS_LOOKUP_LE && diff < 0) { |
2132 | keyno++; |
2133 | /* |
2134 | * If ge search and we went off the end of the block, but it's |
2135 | * not the last block, we're in the wrong block. |
2136 | */ |
2137 | xfs_btree_get_sibling(cur, block, ptr: &ptr, XFS_BB_RIGHTSIB); |
2138 | if (dir == XFS_LOOKUP_GE && |
2139 | keyno > xfs_btree_get_numrecs(block) && |
2140 | !xfs_btree_ptr_is_null(cur, &ptr)) { |
2141 | int i; |
2142 | |
2143 | cur->bc_levels[0].ptr = keyno; |
2144 | error = xfs_btree_increment(cur, level: 0, stat: &i); |
2145 | if (error) |
2146 | goto error0; |
2147 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
2148 | xfs_btree_mark_sick(cur); |
2149 | return -EFSCORRUPTED; |
2150 | } |
2151 | *stat = 1; |
2152 | return 0; |
2153 | } |
2154 | } else if (dir == XFS_LOOKUP_LE && diff > 0) |
2155 | keyno--; |
2156 | cur->bc_levels[0].ptr = keyno; |
2157 | |
2158 | /* Return if we succeeded or not. */ |
2159 | if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) |
2160 | *stat = 0; |
2161 | else if (dir != XFS_LOOKUP_EQ || diff == 0) |
2162 | *stat = 1; |
2163 | else |
2164 | *stat = 0; |
2165 | return 0; |
2166 | |
2167 | error0: |
2168 | return error; |
2169 | } |
2170 | |
2171 | /* Find the high key storage area from a regular key. */ |
2172 | union xfs_btree_key * |
2173 | xfs_btree_high_key_from_key( |
2174 | struct xfs_btree_cur *cur, |
2175 | union xfs_btree_key *key) |
2176 | { |
2177 | ASSERT(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING); |
2178 | return (union xfs_btree_key *)((char *)key + |
2179 | (cur->bc_ops->key_len / 2)); |
2180 | } |
2181 | |
2182 | /* Determine the low (and high if overlapped) keys of a leaf block */ |
2183 | STATIC void |
2184 | xfs_btree_get_leaf_keys( |
2185 | struct xfs_btree_cur *cur, |
2186 | struct xfs_btree_block *block, |
2187 | union xfs_btree_key *key) |
2188 | { |
2189 | union xfs_btree_key max_hkey; |
2190 | union xfs_btree_key hkey; |
2191 | union xfs_btree_rec *rec; |
2192 | union xfs_btree_key *high; |
2193 | int n; |
2194 | |
2195 | rec = xfs_btree_rec_addr(cur, n: 1, block); |
2196 | cur->bc_ops->init_key_from_rec(key, rec); |
2197 | |
2198 | if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) { |
2199 | |
2200 | cur->bc_ops->init_high_key_from_rec(&max_hkey, rec); |
2201 | for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { |
2202 | rec = xfs_btree_rec_addr(cur, n, block); |
2203 | cur->bc_ops->init_high_key_from_rec(&hkey, rec); |
2204 | if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey)) |
2205 | max_hkey = hkey; |
2206 | } |
2207 | |
2208 | high = xfs_btree_high_key_from_key(cur, key); |
2209 | memcpy(high, &max_hkey, cur->bc_ops->key_len / 2); |
2210 | } |
2211 | } |
2212 | |
2213 | /* Determine the low (and high if overlapped) keys of a node block */ |
2214 | STATIC void |
2215 | xfs_btree_get_node_keys( |
2216 | struct xfs_btree_cur *cur, |
2217 | struct xfs_btree_block *block, |
2218 | union xfs_btree_key *key) |
2219 | { |
2220 | union xfs_btree_key *hkey; |
2221 | union xfs_btree_key *max_hkey; |
2222 | union xfs_btree_key *high; |
2223 | int n; |
2224 | |
2225 | if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) { |
2226 | memcpy(key, xfs_btree_key_addr(cur, n: 1, block), |
2227 | cur->bc_ops->key_len / 2); |
2228 | |
2229 | max_hkey = xfs_btree_high_key_addr(cur, n: 1, block); |
2230 | for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { |
2231 | hkey = xfs_btree_high_key_addr(cur, n, block); |
2232 | if (xfs_btree_keycmp_gt(cur, hkey, max_hkey)) |
2233 | max_hkey = hkey; |
2234 | } |
2235 | |
2236 | high = xfs_btree_high_key_from_key(cur, key); |
2237 | memcpy(high, max_hkey, cur->bc_ops->key_len / 2); |
2238 | } else { |
2239 | memcpy(key, xfs_btree_key_addr(cur, n: 1, block), |
2240 | cur->bc_ops->key_len); |
2241 | } |
2242 | } |
2243 | |
2244 | /* Derive the keys for any btree block. */ |
2245 | void |
2246 | xfs_btree_get_keys( |
2247 | struct xfs_btree_cur *cur, |
2248 | struct xfs_btree_block *block, |
2249 | union xfs_btree_key *key) |
2250 | { |
2251 | if (be16_to_cpu(block->bb_level) == 0) |
2252 | xfs_btree_get_leaf_keys(cur, block, key); |
2253 | else |
2254 | xfs_btree_get_node_keys(cur, block, key); |
2255 | } |
2256 | |
2257 | /* |
2258 | * Decide if we need to update the parent keys of a btree block. For |
2259 | * a standard btree this is only necessary if we're updating the first |
2260 | * record/key. For an overlapping btree, we must always update the |
2261 | * keys because the highest key can be in any of the records or keys |
2262 | * in the block. |
2263 | */ |
2264 | static inline bool |
2265 | xfs_btree_needs_key_update( |
2266 | struct xfs_btree_cur *cur, |
2267 | int ptr) |
2268 | { |
2269 | return (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) || ptr == 1; |
2270 | } |
2271 | |
2272 | /* |
2273 | * Update the low and high parent keys of the given level, progressing |
2274 | * towards the root. If force_all is false, stop if the keys for a given |
2275 | * level do not need updating. |
2276 | */ |
2277 | STATIC int |
2278 | __xfs_btree_updkeys( |
2279 | struct xfs_btree_cur *cur, |
2280 | int level, |
2281 | struct xfs_btree_block *block, |
2282 | struct xfs_buf *bp0, |
2283 | bool force_all) |
2284 | { |
2285 | union xfs_btree_key key; /* keys from current level */ |
2286 | union xfs_btree_key *lkey; /* keys from the next level up */ |
2287 | union xfs_btree_key *hkey; |
2288 | union xfs_btree_key *nlkey; /* keys from the next level up */ |
2289 | union xfs_btree_key *nhkey; |
2290 | struct xfs_buf *bp; |
2291 | int ptr; |
2292 | |
2293 | ASSERT(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING); |
2294 | |
2295 | /* Exit if there aren't any parent levels to update. */ |
2296 | if (level + 1 >= cur->bc_nlevels) |
2297 | return 0; |
2298 | |
2299 | trace_xfs_btree_updkeys(cur, level, bp0); |
2300 | |
2301 | lkey = &key; |
2302 | hkey = xfs_btree_high_key_from_key(cur, key: lkey); |
2303 | xfs_btree_get_keys(cur, block, key: lkey); |
2304 | for (level++; level < cur->bc_nlevels; level++) { |
2305 | #ifdef DEBUG |
2306 | int error; |
2307 | #endif |
2308 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
2309 | trace_xfs_btree_updkeys(cur, level, bp); |
2310 | #ifdef DEBUG |
2311 | error = xfs_btree_check_block(cur, block, level, bp); |
2312 | if (error) |
2313 | return error; |
2314 | #endif |
2315 | ptr = cur->bc_levels[level].ptr; |
2316 | nlkey = xfs_btree_key_addr(cur, n: ptr, block); |
2317 | nhkey = xfs_btree_high_key_addr(cur, n: ptr, block); |
2318 | if (!force_all && |
2319 | xfs_btree_keycmp_eq(cur, nlkey, lkey) && |
2320 | xfs_btree_keycmp_eq(cur, nhkey, hkey)) |
2321 | break; |
2322 | xfs_btree_copy_keys(cur, dst_key: nlkey, src_key: lkey, numkeys: 1); |
2323 | xfs_btree_log_keys(cur, bp, ptr, ptr); |
2324 | if (level + 1 >= cur->bc_nlevels) |
2325 | break; |
2326 | xfs_btree_get_node_keys(cur, block, lkey); |
2327 | } |
2328 | |
2329 | return 0; |
2330 | } |
2331 | |
2332 | /* Update all the keys from some level in cursor back to the root. */ |
2333 | STATIC int |
2334 | xfs_btree_updkeys_force( |
2335 | struct xfs_btree_cur *cur, |
2336 | int level) |
2337 | { |
2338 | struct xfs_buf *bp; |
2339 | struct xfs_btree_block *block; |
2340 | |
2341 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
2342 | return __xfs_btree_updkeys(cur, level, block, bp, true); |
2343 | } |
2344 | |
2345 | /* |
2346 | * Update the parent keys of the given level, progressing towards the root. |
2347 | */ |
2348 | STATIC int |
2349 | xfs_btree_update_keys( |
2350 | struct xfs_btree_cur *cur, |
2351 | int level) |
2352 | { |
2353 | struct xfs_btree_block *block; |
2354 | struct xfs_buf *bp; |
2355 | union xfs_btree_key *kp; |
2356 | union xfs_btree_key key; |
2357 | int ptr; |
2358 | |
2359 | ASSERT(level >= 0); |
2360 | |
2361 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
2362 | if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) |
2363 | return __xfs_btree_updkeys(cur, level, block, bp, false); |
2364 | |
2365 | /* |
2366 | * Go up the tree from this level toward the root. |
2367 | * At each level, update the key value to the value input. |
2368 | * Stop when we reach a level where the cursor isn't pointing |
2369 | * at the first entry in the block. |
2370 | */ |
2371 | xfs_btree_get_keys(cur, block, key: &key); |
2372 | for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { |
2373 | #ifdef DEBUG |
2374 | int error; |
2375 | #endif |
2376 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
2377 | #ifdef DEBUG |
2378 | error = xfs_btree_check_block(cur, block, level, bp); |
2379 | if (error) |
2380 | return error; |
2381 | #endif |
2382 | ptr = cur->bc_levels[level].ptr; |
2383 | kp = xfs_btree_key_addr(cur, n: ptr, block); |
2384 | xfs_btree_copy_keys(cur, dst_key: kp, src_key: &key, numkeys: 1); |
2385 | xfs_btree_log_keys(cur, bp, ptr, ptr); |
2386 | } |
2387 | |
2388 | return 0; |
2389 | } |
2390 | |
2391 | /* |
2392 | * Update the record referred to by cur to the value in the |
2393 | * given record. This either works (return 0) or gets an |
2394 | * EFSCORRUPTED error. |
2395 | */ |
2396 | int |
2397 | xfs_btree_update( |
2398 | struct xfs_btree_cur *cur, |
2399 | union xfs_btree_rec *rec) |
2400 | { |
2401 | struct xfs_btree_block *block; |
2402 | struct xfs_buf *bp; |
2403 | int error; |
2404 | int ptr; |
2405 | union xfs_btree_rec *rp; |
2406 | |
2407 | /* Pick up the current block. */ |
2408 | block = xfs_btree_get_block(cur, level: 0, bpp: &bp); |
2409 | |
2410 | #ifdef DEBUG |
2411 | error = xfs_btree_check_block(cur, block, 0, bp); |
2412 | if (error) |
2413 | goto error0; |
2414 | #endif |
2415 | /* Get the address of the rec to be updated. */ |
2416 | ptr = cur->bc_levels[0].ptr; |
2417 | rp = xfs_btree_rec_addr(cur, n: ptr, block); |
2418 | |
2419 | /* Fill in the new contents and log them. */ |
2420 | xfs_btree_copy_recs(cur, rp, rec, 1); |
2421 | xfs_btree_log_recs(cur, bp, first: ptr, last: ptr); |
2422 | |
2423 | /* |
2424 | * If we are tracking the last record in the tree and |
2425 | * we are at the far right edge of the tree, update it. |
2426 | */ |
2427 | if (xfs_btree_is_lastrec(cur, block, 0)) { |
2428 | cur->bc_ops->update_lastrec(cur, block, rec, |
2429 | ptr, LASTREC_UPDATE); |
2430 | } |
2431 | |
2432 | /* Pass new key value up to our parent. */ |
2433 | if (xfs_btree_needs_key_update(cur, ptr)) { |
2434 | error = xfs_btree_update_keys(cur, 0); |
2435 | if (error) |
2436 | goto error0; |
2437 | } |
2438 | |
2439 | return 0; |
2440 | |
2441 | error0: |
2442 | return error; |
2443 | } |
2444 | |
2445 | /* |
2446 | * Move 1 record left from cur/level if possible. |
2447 | * Update cur to reflect the new path. |
2448 | */ |
2449 | STATIC int /* error */ |
2450 | xfs_btree_lshift( |
2451 | struct xfs_btree_cur *cur, |
2452 | int level, |
2453 | int *stat) /* success/failure */ |
2454 | { |
2455 | struct xfs_buf *lbp; /* left buffer pointer */ |
2456 | struct xfs_btree_block *left; /* left btree block */ |
2457 | int lrecs; /* left record count */ |
2458 | struct xfs_buf *rbp; /* right buffer pointer */ |
2459 | struct xfs_btree_block *right; /* right btree block */ |
2460 | struct xfs_btree_cur *tcur; /* temporary btree cursor */ |
2461 | int rrecs; /* right record count */ |
2462 | union xfs_btree_ptr lptr; /* left btree pointer */ |
2463 | union xfs_btree_key *rkp = NULL; /* right btree key */ |
2464 | union xfs_btree_ptr *rpp = NULL; /* right address pointer */ |
2465 | union xfs_btree_rec *rrp = NULL; /* right record pointer */ |
2466 | int error; /* error return value */ |
2467 | int i; |
2468 | |
2469 | if (xfs_btree_at_iroot(cur, level)) |
2470 | goto out0; |
2471 | |
2472 | /* Set up variables for this block as "right". */ |
2473 | right = xfs_btree_get_block(cur, level, bpp: &rbp); |
2474 | |
2475 | #ifdef DEBUG |
2476 | error = xfs_btree_check_block(cur, right, level, rbp); |
2477 | if (error) |
2478 | goto error0; |
2479 | #endif |
2480 | |
2481 | /* If we've got no left sibling then we can't shift an entry left. */ |
2482 | xfs_btree_get_sibling(cur, block: right, ptr: &lptr, XFS_BB_LEFTSIB); |
2483 | if (xfs_btree_ptr_is_null(cur, &lptr)) |
2484 | goto out0; |
2485 | |
2486 | /* |
2487 | * If the cursor entry is the one that would be moved, don't |
2488 | * do it... it's too complicated. |
2489 | */ |
2490 | if (cur->bc_levels[level].ptr <= 1) |
2491 | goto out0; |
2492 | |
2493 | /* Set up the left neighbor as "left". */ |
2494 | error = xfs_btree_read_buf_block(cur, ptr: &lptr, flags: 0, block: &left, bpp: &lbp); |
2495 | if (error) |
2496 | goto error0; |
2497 | |
2498 | /* If it's full, it can't take another entry. */ |
2499 | lrecs = xfs_btree_get_numrecs(block: left); |
2500 | if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) |
2501 | goto out0; |
2502 | |
2503 | rrecs = xfs_btree_get_numrecs(block: right); |
2504 | |
2505 | /* |
2506 | * We add one entry to the left side and remove one for the right side. |
2507 | * Account for it here, the changes will be updated on disk and logged |
2508 | * later. |
2509 | */ |
2510 | lrecs++; |
2511 | rrecs--; |
2512 | |
2513 | XFS_BTREE_STATS_INC(cur, lshift); |
2514 | XFS_BTREE_STATS_ADD(cur, moves, 1); |
2515 | |
2516 | /* |
2517 | * If non-leaf, copy a key and a ptr to the left block. |
2518 | * Log the changes to the left block. |
2519 | */ |
2520 | if (level > 0) { |
2521 | /* It's a non-leaf. Move keys and pointers. */ |
2522 | union xfs_btree_key *lkp; /* left btree key */ |
2523 | union xfs_btree_ptr *lpp; /* left address pointer */ |
2524 | |
2525 | lkp = xfs_btree_key_addr(cur, n: lrecs, block: left); |
2526 | rkp = xfs_btree_key_addr(cur, n: 1, block: right); |
2527 | |
2528 | lpp = xfs_btree_ptr_addr(cur, n: lrecs, block: left); |
2529 | rpp = xfs_btree_ptr_addr(cur, n: 1, block: right); |
2530 | |
2531 | error = xfs_btree_debug_check_ptr(cur, rpp, 0, level); |
2532 | if (error) |
2533 | goto error0; |
2534 | |
2535 | xfs_btree_copy_keys(cur, dst_key: lkp, src_key: rkp, numkeys: 1); |
2536 | xfs_btree_copy_ptrs(cur, dst_ptr: lpp, src_ptr: rpp, numptrs: 1); |
2537 | |
2538 | xfs_btree_log_keys(cur, lbp, lrecs, lrecs); |
2539 | xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); |
2540 | |
2541 | ASSERT(cur->bc_ops->keys_inorder(cur, |
2542 | xfs_btree_key_addr(cur, n: lrecs - 1, block: left), lkp)); |
2543 | } else { |
2544 | /* It's a leaf. Move records. */ |
2545 | union xfs_btree_rec *lrp; /* left record pointer */ |
2546 | |
2547 | lrp = xfs_btree_rec_addr(cur, n: lrecs, block: left); |
2548 | rrp = xfs_btree_rec_addr(cur, n: 1, block: right); |
2549 | |
2550 | xfs_btree_copy_recs(cur, lrp, rrp, 1); |
2551 | xfs_btree_log_recs(cur, bp: lbp, first: lrecs, last: lrecs); |
2552 | |
2553 | ASSERT(cur->bc_ops->recs_inorder(cur, |
2554 | xfs_btree_rec_addr(cur, n: lrecs - 1, block: left), lrp)); |
2555 | } |
2556 | |
2557 | xfs_btree_set_numrecs(left, lrecs); |
2558 | xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); |
2559 | |
2560 | xfs_btree_set_numrecs(right, rrecs); |
2561 | xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); |
2562 | |
2563 | /* |
2564 | * Slide the contents of right down one entry. |
2565 | */ |
2566 | XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); |
2567 | if (level > 0) { |
2568 | /* It's a nonleaf. operate on keys and ptrs */ |
2569 | for (i = 0; i < rrecs; i++) { |
2570 | error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level); |
2571 | if (error) |
2572 | goto error0; |
2573 | } |
2574 | |
2575 | xfs_btree_shift_keys(cur, |
2576 | xfs_btree_key_addr(cur, n: 2, block: right), |
2577 | -1, rrecs); |
2578 | xfs_btree_shift_ptrs(cur, |
2579 | xfs_btree_ptr_addr(cur, n: 2, block: right), |
2580 | -1, rrecs); |
2581 | |
2582 | xfs_btree_log_keys(cur, rbp, 1, rrecs); |
2583 | xfs_btree_log_ptrs(cur, rbp, 1, rrecs); |
2584 | } else { |
2585 | /* It's a leaf. operate on records */ |
2586 | xfs_btree_shift_recs(cur, |
2587 | xfs_btree_rec_addr(cur, n: 2, block: right), |
2588 | -1, rrecs); |
2589 | xfs_btree_log_recs(cur, bp: rbp, first: 1, last: rrecs); |
2590 | } |
2591 | |
2592 | /* |
2593 | * Using a temporary cursor, update the parent key values of the |
2594 | * block on the left. |
2595 | */ |
2596 | if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) { |
2597 | error = xfs_btree_dup_cursor(cur, ncur: &tcur); |
2598 | if (error) |
2599 | goto error0; |
2600 | i = xfs_btree_firstrec(tcur, level); |
2601 | if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { |
2602 | xfs_btree_mark_sick(cur); |
2603 | error = -EFSCORRUPTED; |
2604 | goto error0; |
2605 | } |
2606 | |
2607 | error = xfs_btree_decrement(cur: tcur, level, stat: &i); |
2608 | if (error) |
2609 | goto error1; |
2610 | |
2611 | /* Update the parent high keys of the left block, if needed. */ |
2612 | error = xfs_btree_update_keys(tcur, level); |
2613 | if (error) |
2614 | goto error1; |
2615 | |
2616 | xfs_btree_del_cursor(cur: tcur, XFS_BTREE_NOERROR); |
2617 | } |
2618 | |
2619 | /* Update the parent keys of the right block. */ |
2620 | error = xfs_btree_update_keys(cur, level); |
2621 | if (error) |
2622 | goto error0; |
2623 | |
2624 | /* Slide the cursor value left one. */ |
2625 | cur->bc_levels[level].ptr--; |
2626 | |
2627 | *stat = 1; |
2628 | return 0; |
2629 | |
2630 | out0: |
2631 | *stat = 0; |
2632 | return 0; |
2633 | |
2634 | error0: |
2635 | return error; |
2636 | |
2637 | error1: |
2638 | xfs_btree_del_cursor(cur: tcur, XFS_BTREE_ERROR); |
2639 | return error; |
2640 | } |
2641 | |
2642 | /* |
2643 | * Move 1 record right from cur/level if possible. |
2644 | * Update cur to reflect the new path. |
2645 | */ |
2646 | STATIC int /* error */ |
2647 | xfs_btree_rshift( |
2648 | struct xfs_btree_cur *cur, |
2649 | int level, |
2650 | int *stat) /* success/failure */ |
2651 | { |
2652 | struct xfs_buf *lbp; /* left buffer pointer */ |
2653 | struct xfs_btree_block *left; /* left btree block */ |
2654 | struct xfs_buf *rbp; /* right buffer pointer */ |
2655 | struct xfs_btree_block *right; /* right btree block */ |
2656 | struct xfs_btree_cur *tcur; /* temporary btree cursor */ |
2657 | union xfs_btree_ptr rptr; /* right block pointer */ |
2658 | union xfs_btree_key *rkp; /* right btree key */ |
2659 | int rrecs; /* right record count */ |
2660 | int lrecs; /* left record count */ |
2661 | int error; /* error return value */ |
2662 | int i; /* loop counter */ |
2663 | |
2664 | if (xfs_btree_at_iroot(cur, level)) |
2665 | goto out0; |
2666 | |
2667 | /* Set up variables for this block as "left". */ |
2668 | left = xfs_btree_get_block(cur, level, bpp: &lbp); |
2669 | |
2670 | #ifdef DEBUG |
2671 | error = xfs_btree_check_block(cur, left, level, lbp); |
2672 | if (error) |
2673 | goto error0; |
2674 | #endif |
2675 | |
2676 | /* If we've got no right sibling then we can't shift an entry right. */ |
2677 | xfs_btree_get_sibling(cur, block: left, ptr: &rptr, XFS_BB_RIGHTSIB); |
2678 | if (xfs_btree_ptr_is_null(cur, &rptr)) |
2679 | goto out0; |
2680 | |
2681 | /* |
2682 | * If the cursor entry is the one that would be moved, don't |
2683 | * do it... it's too complicated. |
2684 | */ |
2685 | lrecs = xfs_btree_get_numrecs(block: left); |
2686 | if (cur->bc_levels[level].ptr >= lrecs) |
2687 | goto out0; |
2688 | |
2689 | /* Set up the right neighbor as "right". */ |
2690 | error = xfs_btree_read_buf_block(cur, ptr: &rptr, flags: 0, block: &right, bpp: &rbp); |
2691 | if (error) |
2692 | goto error0; |
2693 | |
2694 | /* If it's full, it can't take another entry. */ |
2695 | rrecs = xfs_btree_get_numrecs(block: right); |
2696 | if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) |
2697 | goto out0; |
2698 | |
2699 | XFS_BTREE_STATS_INC(cur, rshift); |
2700 | XFS_BTREE_STATS_ADD(cur, moves, rrecs); |
2701 | |
2702 | /* |
2703 | * Make a hole at the start of the right neighbor block, then |
2704 | * copy the last left block entry to the hole. |
2705 | */ |
2706 | if (level > 0) { |
2707 | /* It's a nonleaf. make a hole in the keys and ptrs */ |
2708 | union xfs_btree_key *lkp; |
2709 | union xfs_btree_ptr *lpp; |
2710 | union xfs_btree_ptr *rpp; |
2711 | |
2712 | lkp = xfs_btree_key_addr(cur, n: lrecs, block: left); |
2713 | lpp = xfs_btree_ptr_addr(cur, n: lrecs, block: left); |
2714 | rkp = xfs_btree_key_addr(cur, n: 1, block: right); |
2715 | rpp = xfs_btree_ptr_addr(cur, n: 1, block: right); |
2716 | |
2717 | for (i = rrecs - 1; i >= 0; i--) { |
2718 | error = xfs_btree_debug_check_ptr(cur, rpp, i, level); |
2719 | if (error) |
2720 | goto error0; |
2721 | } |
2722 | |
2723 | xfs_btree_shift_keys(cur, rkp, 1, rrecs); |
2724 | xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); |
2725 | |
2726 | error = xfs_btree_debug_check_ptr(cur, lpp, 0, level); |
2727 | if (error) |
2728 | goto error0; |
2729 | |
2730 | /* Now put the new data in, and log it. */ |
2731 | xfs_btree_copy_keys(cur, dst_key: rkp, src_key: lkp, numkeys: 1); |
2732 | xfs_btree_copy_ptrs(cur, dst_ptr: rpp, src_ptr: lpp, numptrs: 1); |
2733 | |
2734 | xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); |
2735 | xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); |
2736 | |
2737 | ASSERT(cur->bc_ops->keys_inorder(cur, rkp, |
2738 | xfs_btree_key_addr(cur, n: 2, block: right))); |
2739 | } else { |
2740 | /* It's a leaf. make a hole in the records */ |
2741 | union xfs_btree_rec *lrp; |
2742 | union xfs_btree_rec *rrp; |
2743 | |
2744 | lrp = xfs_btree_rec_addr(cur, n: lrecs, block: left); |
2745 | rrp = xfs_btree_rec_addr(cur, n: 1, block: right); |
2746 | |
2747 | xfs_btree_shift_recs(cur, rrp, 1, rrecs); |
2748 | |
2749 | /* Now put the new data in, and log it. */ |
2750 | xfs_btree_copy_recs(cur, rrp, lrp, 1); |
2751 | xfs_btree_log_recs(cur, bp: rbp, first: 1, last: rrecs + 1); |
2752 | } |
2753 | |
2754 | /* |
2755 | * Decrement and log left's numrecs, bump and log right's numrecs. |
2756 | */ |
2757 | xfs_btree_set_numrecs(left, --lrecs); |
2758 | xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); |
2759 | |
2760 | xfs_btree_set_numrecs(right, ++rrecs); |
2761 | xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); |
2762 | |
2763 | /* |
2764 | * Using a temporary cursor, update the parent key values of the |
2765 | * block on the right. |
2766 | */ |
2767 | error = xfs_btree_dup_cursor(cur, ncur: &tcur); |
2768 | if (error) |
2769 | goto error0; |
2770 | i = xfs_btree_lastrec(tcur, level); |
2771 | if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { |
2772 | xfs_btree_mark_sick(cur); |
2773 | error = -EFSCORRUPTED; |
2774 | goto error0; |
2775 | } |
2776 | |
2777 | error = xfs_btree_increment(cur: tcur, level, stat: &i); |
2778 | if (error) |
2779 | goto error1; |
2780 | |
2781 | /* Update the parent high keys of the left block, if needed. */ |
2782 | if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) { |
2783 | error = xfs_btree_update_keys(cur, level); |
2784 | if (error) |
2785 | goto error1; |
2786 | } |
2787 | |
2788 | /* Update the parent keys of the right block. */ |
2789 | error = xfs_btree_update_keys(tcur, level); |
2790 | if (error) |
2791 | goto error1; |
2792 | |
2793 | xfs_btree_del_cursor(cur: tcur, XFS_BTREE_NOERROR); |
2794 | |
2795 | *stat = 1; |
2796 | return 0; |
2797 | |
2798 | out0: |
2799 | *stat = 0; |
2800 | return 0; |
2801 | |
2802 | error0: |
2803 | return error; |
2804 | |
2805 | error1: |
2806 | xfs_btree_del_cursor(cur: tcur, XFS_BTREE_ERROR); |
2807 | return error; |
2808 | } |
2809 | |
2810 | static inline int |
2811 | xfs_btree_alloc_block( |
2812 | struct xfs_btree_cur *cur, |
2813 | const union xfs_btree_ptr *hint_block, |
2814 | union xfs_btree_ptr *new_block, |
2815 | int *stat) |
2816 | { |
2817 | int error; |
2818 | |
2819 | /* |
2820 | * Don't allow block allocation for a staging cursor, because staging |
2821 | * cursors do not support regular btree modifications. |
2822 | * |
2823 | * Bulk loading uses a separate callback to obtain new blocks from a |
2824 | * preallocated list, which prevents ENOSPC failures during loading. |
2825 | */ |
2826 | if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) { |
2827 | ASSERT(0); |
2828 | return -EFSCORRUPTED; |
2829 | } |
2830 | |
2831 | error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat); |
2832 | trace_xfs_btree_alloc_block(cur, new_block, *stat, error); |
2833 | return error; |
2834 | } |
2835 | |
2836 | /* |
2837 | * Split cur/level block in half. |
2838 | * Return new block number and the key to its first |
2839 | * record (to be inserted into parent). |
2840 | */ |
2841 | STATIC int /* error */ |
2842 | __xfs_btree_split( |
2843 | struct xfs_btree_cur *cur, |
2844 | int level, |
2845 | union xfs_btree_ptr *ptrp, |
2846 | union xfs_btree_key *key, |
2847 | struct xfs_btree_cur **curp, |
2848 | int *stat) /* success/failure */ |
2849 | { |
2850 | union xfs_btree_ptr lptr; /* left sibling block ptr */ |
2851 | struct xfs_buf *lbp; /* left buffer pointer */ |
2852 | struct xfs_btree_block *left; /* left btree block */ |
2853 | union xfs_btree_ptr rptr; /* right sibling block ptr */ |
2854 | struct xfs_buf *rbp; /* right buffer pointer */ |
2855 | struct xfs_btree_block *right; /* right btree block */ |
2856 | union xfs_btree_ptr rrptr; /* right-right sibling ptr */ |
2857 | struct xfs_buf *rrbp; /* right-right buffer pointer */ |
2858 | struct xfs_btree_block *rrblock; /* right-right btree block */ |
2859 | int lrecs; |
2860 | int rrecs; |
2861 | int src_index; |
2862 | int error; /* error return value */ |
2863 | int i; |
2864 | |
2865 | XFS_BTREE_STATS_INC(cur, split); |
2866 | |
2867 | /* Set up left block (current one). */ |
2868 | left = xfs_btree_get_block(cur, level, bpp: &lbp); |
2869 | |
2870 | #ifdef DEBUG |
2871 | error = xfs_btree_check_block(cur, left, level, lbp); |
2872 | if (error) |
2873 | goto error0; |
2874 | #endif |
2875 | |
2876 | xfs_btree_buf_to_ptr(cur, lbp, &lptr); |
2877 | |
2878 | /* Allocate the new block. If we can't do it, we're toast. Give up. */ |
2879 | error = xfs_btree_alloc_block(cur, hint_block: &lptr, new_block: &rptr, stat); |
2880 | if (error) |
2881 | goto error0; |
2882 | if (*stat == 0) |
2883 | goto out0; |
2884 | XFS_BTREE_STATS_INC(cur, alloc); |
2885 | |
2886 | /* Set up the new block as "right". */ |
2887 | error = xfs_btree_get_buf_block(cur, ptr: &rptr, block: &right, bpp: &rbp); |
2888 | if (error) |
2889 | goto error0; |
2890 | |
2891 | /* Fill in the btree header for the new right block. */ |
2892 | xfs_btree_init_block_cur(cur, bp: rbp, level: xfs_btree_get_level(block: left), numrecs: 0); |
2893 | |
2894 | /* |
2895 | * Split the entries between the old and the new block evenly. |
2896 | * Make sure that if there's an odd number of entries now, that |
2897 | * each new block will have the same number of entries. |
2898 | */ |
2899 | lrecs = xfs_btree_get_numrecs(block: left); |
2900 | rrecs = lrecs / 2; |
2901 | if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1) |
2902 | rrecs++; |
2903 | src_index = (lrecs - rrecs + 1); |
2904 | |
2905 | XFS_BTREE_STATS_ADD(cur, moves, rrecs); |
2906 | |
2907 | /* Adjust numrecs for the later get_*_keys() calls. */ |
2908 | lrecs -= rrecs; |
2909 | xfs_btree_set_numrecs(left, lrecs); |
2910 | xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(block: right) + rrecs); |
2911 | |
2912 | /* |
2913 | * Copy btree block entries from the left block over to the |
2914 | * new block, the right. Update the right block and log the |
2915 | * changes. |
2916 | */ |
2917 | if (level > 0) { |
2918 | /* It's a non-leaf. Move keys and pointers. */ |
2919 | union xfs_btree_key *lkp; /* left btree key */ |
2920 | union xfs_btree_ptr *lpp; /* left address pointer */ |
2921 | union xfs_btree_key *rkp; /* right btree key */ |
2922 | union xfs_btree_ptr *rpp; /* right address pointer */ |
2923 | |
2924 | lkp = xfs_btree_key_addr(cur, n: src_index, block: left); |
2925 | lpp = xfs_btree_ptr_addr(cur, n: src_index, block: left); |
2926 | rkp = xfs_btree_key_addr(cur, n: 1, block: right); |
2927 | rpp = xfs_btree_ptr_addr(cur, n: 1, block: right); |
2928 | |
2929 | for (i = src_index; i < rrecs; i++) { |
2930 | error = xfs_btree_debug_check_ptr(cur, lpp, i, level); |
2931 | if (error) |
2932 | goto error0; |
2933 | } |
2934 | |
2935 | /* Copy the keys & pointers to the new block. */ |
2936 | xfs_btree_copy_keys(cur, dst_key: rkp, src_key: lkp, numkeys: rrecs); |
2937 | xfs_btree_copy_ptrs(cur, dst_ptr: rpp, src_ptr: lpp, numptrs: rrecs); |
2938 | |
2939 | xfs_btree_log_keys(cur, rbp, 1, rrecs); |
2940 | xfs_btree_log_ptrs(cur, rbp, 1, rrecs); |
2941 | |
2942 | /* Stash the keys of the new block for later insertion. */ |
2943 | xfs_btree_get_node_keys(cur, right, key); |
2944 | } else { |
2945 | /* It's a leaf. Move records. */ |
2946 | union xfs_btree_rec *lrp; /* left record pointer */ |
2947 | union xfs_btree_rec *rrp; /* right record pointer */ |
2948 | |
2949 | lrp = xfs_btree_rec_addr(cur, n: src_index, block: left); |
2950 | rrp = xfs_btree_rec_addr(cur, n: 1, block: right); |
2951 | |
2952 | /* Copy records to the new block. */ |
2953 | xfs_btree_copy_recs(cur, rrp, lrp, rrecs); |
2954 | xfs_btree_log_recs(cur, bp: rbp, first: 1, last: rrecs); |
2955 | |
2956 | /* Stash the keys of the new block for later insertion. */ |
2957 | xfs_btree_get_leaf_keys(cur, right, key); |
2958 | } |
2959 | |
2960 | /* |
2961 | * Find the left block number by looking in the buffer. |
2962 | * Adjust sibling pointers. |
2963 | */ |
2964 | xfs_btree_get_sibling(cur, block: left, ptr: &rrptr, XFS_BB_RIGHTSIB); |
2965 | xfs_btree_set_sibling(cur, block: right, ptr: &rrptr, XFS_BB_RIGHTSIB); |
2966 | xfs_btree_set_sibling(cur, block: right, ptr: &lptr, XFS_BB_LEFTSIB); |
2967 | xfs_btree_set_sibling(cur, block: left, ptr: &rptr, XFS_BB_RIGHTSIB); |
2968 | |
2969 | xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); |
2970 | xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); |
2971 | |
2972 | /* |
2973 | * If there's a block to the new block's right, make that block |
2974 | * point back to right instead of to left. |
2975 | */ |
2976 | if (!xfs_btree_ptr_is_null(cur, &rrptr)) { |
2977 | error = xfs_btree_read_buf_block(cur, ptr: &rrptr, |
2978 | flags: 0, block: &rrblock, bpp: &rrbp); |
2979 | if (error) |
2980 | goto error0; |
2981 | xfs_btree_set_sibling(cur, block: rrblock, ptr: &rptr, XFS_BB_LEFTSIB); |
2982 | xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); |
2983 | } |
2984 | |
2985 | /* Update the parent high keys of the left block, if needed. */ |
2986 | if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) { |
2987 | error = xfs_btree_update_keys(cur, level); |
2988 | if (error) |
2989 | goto error0; |
2990 | } |
2991 | |
2992 | /* |
2993 | * If the cursor is really in the right block, move it there. |
2994 | * If it's just pointing past the last entry in left, then we'll |
2995 | * insert there, so don't change anything in that case. |
2996 | */ |
2997 | if (cur->bc_levels[level].ptr > lrecs + 1) { |
2998 | xfs_btree_setbuf(cur, level, rbp); |
2999 | cur->bc_levels[level].ptr -= lrecs; |
3000 | } |
3001 | /* |
3002 | * If there are more levels, we'll need another cursor which refers |
3003 | * the right block, no matter where this cursor was. |
3004 | */ |
3005 | if (level + 1 < cur->bc_nlevels) { |
3006 | error = xfs_btree_dup_cursor(cur, ncur: curp); |
3007 | if (error) |
3008 | goto error0; |
3009 | (*curp)->bc_levels[level + 1].ptr++; |
3010 | } |
3011 | *ptrp = rptr; |
3012 | *stat = 1; |
3013 | return 0; |
3014 | out0: |
3015 | *stat = 0; |
3016 | return 0; |
3017 | |
3018 | error0: |
3019 | return error; |
3020 | } |
3021 | |
3022 | #ifdef __KERNEL__ |
3023 | struct xfs_btree_split_args { |
3024 | struct xfs_btree_cur *cur; |
3025 | int level; |
3026 | union xfs_btree_ptr *ptrp; |
3027 | union xfs_btree_key *key; |
3028 | struct xfs_btree_cur **curp; |
3029 | int *stat; /* success/failure */ |
3030 | int result; |
3031 | bool kswapd; /* allocation in kswapd context */ |
3032 | struct completion *done; |
3033 | struct work_struct work; |
3034 | }; |
3035 | |
3036 | /* |
3037 | * Stack switching interfaces for allocation |
3038 | */ |
3039 | static void |
3040 | xfs_btree_split_worker( |
3041 | struct work_struct *work) |
3042 | { |
3043 | struct xfs_btree_split_args *args = container_of(work, |
3044 | struct xfs_btree_split_args, work); |
3045 | unsigned long pflags; |
3046 | unsigned long new_pflags = 0; |
3047 | |
3048 | /* |
3049 | * we are in a transaction context here, but may also be doing work |
3050 | * in kswapd context, and hence we may need to inherit that state |
3051 | * temporarily to ensure that we don't block waiting for memory reclaim |
3052 | * in any way. |
3053 | */ |
3054 | if (args->kswapd) |
3055 | new_pflags |= PF_MEMALLOC | PF_KSWAPD; |
3056 | |
3057 | current_set_flags_nested(&pflags, new_pflags); |
3058 | xfs_trans_set_context(args->cur->bc_tp); |
3059 | |
3060 | args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, |
3061 | args->key, args->curp, args->stat); |
3062 | |
3063 | xfs_trans_clear_context(args->cur->bc_tp); |
3064 | current_restore_flags_nested(&pflags, new_pflags); |
3065 | |
3066 | /* |
3067 | * Do not access args after complete() has run here. We don't own args |
3068 | * and the owner may run and free args before we return here. |
3069 | */ |
3070 | complete(args->done); |
3071 | |
3072 | } |
3073 | |
3074 | /* |
3075 | * BMBT split requests often come in with little stack to work on so we push |
3076 | * them off to a worker thread so there is lots of stack to use. For the other |
3077 | * btree types, just call directly to avoid the context switch overhead here. |
3078 | * |
3079 | * Care must be taken here - the work queue rescuer thread introduces potential |
3080 | * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new |
3081 | * AGFs to allocate blocks. A task being run by the rescuer could attempt to |
3082 | * lock an AGF that is already locked by a task queued to run by the rescuer, |
3083 | * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to |
3084 | * release it until the current thread it is running gains the lock. |
3085 | * |
3086 | * To avoid this issue, we only ever queue BMBT splits that don't have an AGF |
3087 | * already locked to allocate from. The only place that doesn't hold an AGF |
3088 | * locked is unwritten extent conversion at IO completion, but that has already |
3089 | * been offloaded to a worker thread and hence has no stack consumption issues |
3090 | * we have to worry about. |
3091 | */ |
3092 | STATIC int /* error */ |
3093 | xfs_btree_split( |
3094 | struct xfs_btree_cur *cur, |
3095 | int level, |
3096 | union xfs_btree_ptr *ptrp, |
3097 | union xfs_btree_key *key, |
3098 | struct xfs_btree_cur **curp, |
3099 | int *stat) /* success/failure */ |
3100 | { |
3101 | struct xfs_btree_split_args args; |
3102 | DECLARE_COMPLETION_ONSTACK(done); |
3103 | |
3104 | if (!xfs_btree_is_bmap(cur->bc_ops) || |
3105 | cur->bc_tp->t_highest_agno == NULLAGNUMBER) |
3106 | return __xfs_btree_split(cur, level, ptrp, key, curp, stat); |
3107 | |
3108 | args.cur = cur; |
3109 | args.level = level; |
3110 | args.ptrp = ptrp; |
3111 | args.key = key; |
3112 | args.curp = curp; |
3113 | args.stat = stat; |
3114 | args.done = &done; |
3115 | args.kswapd = current_is_kswapd(); |
3116 | INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); |
3117 | queue_work(xfs_alloc_wq, &args.work); |
3118 | wait_for_completion(&done); |
3119 | destroy_work_on_stack(&args.work); |
3120 | return args.result; |
3121 | } |
3122 | #else |
3123 | #define xfs_btree_split __xfs_btree_split |
3124 | #endif /* __KERNEL__ */ |
3125 | |
3126 | /* |
3127 | * Copy the old inode root contents into a real block and make the |
3128 | * broot point to it. |
3129 | */ |
3130 | int /* error */ |
3131 | xfs_btree_new_iroot( |
3132 | struct xfs_btree_cur *cur, /* btree cursor */ |
3133 | int *logflags, /* logging flags for inode */ |
3134 | int *stat) /* return status - 0 fail */ |
3135 | { |
3136 | struct xfs_buf *cbp; /* buffer for cblock */ |
3137 | struct xfs_btree_block *block; /* btree block */ |
3138 | struct xfs_btree_block *cblock; /* child btree block */ |
3139 | union xfs_btree_key *ckp; /* child key pointer */ |
3140 | union xfs_btree_ptr *cpp; /* child ptr pointer */ |
3141 | union xfs_btree_key *kp; /* pointer to btree key */ |
3142 | union xfs_btree_ptr *pp; /* pointer to block addr */ |
3143 | union xfs_btree_ptr nptr; /* new block addr */ |
3144 | int level; /* btree level */ |
3145 | int error; /* error return code */ |
3146 | int i; /* loop counter */ |
3147 | |
3148 | XFS_BTREE_STATS_INC(cur, newroot); |
3149 | |
3150 | ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE); |
3151 | |
3152 | level = cur->bc_nlevels - 1; |
3153 | |
3154 | block = xfs_btree_get_iroot(cur); |
3155 | pp = xfs_btree_ptr_addr(cur, n: 1, block); |
3156 | |
3157 | /* Allocate the new block. If we can't do it, we're toast. Give up. */ |
3158 | error = xfs_btree_alloc_block(cur, hint_block: pp, new_block: &nptr, stat); |
3159 | if (error) |
3160 | goto error0; |
3161 | if (*stat == 0) |
3162 | return 0; |
3163 | |
3164 | XFS_BTREE_STATS_INC(cur, alloc); |
3165 | |
3166 | /* Copy the root into a real block. */ |
3167 | error = xfs_btree_get_buf_block(cur, ptr: &nptr, block: &cblock, bpp: &cbp); |
3168 | if (error) |
3169 | goto error0; |
3170 | |
3171 | /* |
3172 | * we can't just memcpy() the root in for CRC enabled btree blocks. |
3173 | * In that case have to also ensure the blkno remains correct |
3174 | */ |
3175 | memcpy(cblock, block, xfs_btree_block_len(cur)); |
3176 | if (xfs_has_crc(cur->bc_mp)) { |
3177 | __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp)); |
3178 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) |
3179 | cblock->bb_u.l.bb_blkno = bno; |
3180 | else |
3181 | cblock->bb_u.s.bb_blkno = bno; |
3182 | } |
3183 | |
3184 | be16_add_cpu(&block->bb_level, 1); |
3185 | xfs_btree_set_numrecs(block, 1); |
3186 | cur->bc_nlevels++; |
3187 | ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); |
3188 | cur->bc_levels[level + 1].ptr = 1; |
3189 | |
3190 | kp = xfs_btree_key_addr(cur, n: 1, block); |
3191 | ckp = xfs_btree_key_addr(cur, n: 1, block: cblock); |
3192 | xfs_btree_copy_keys(cur, dst_key: ckp, src_key: kp, numkeys: xfs_btree_get_numrecs(block: cblock)); |
3193 | |
3194 | cpp = xfs_btree_ptr_addr(cur, n: 1, block: cblock); |
3195 | for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { |
3196 | error = xfs_btree_debug_check_ptr(cur, pp, i, level); |
3197 | if (error) |
3198 | goto error0; |
3199 | } |
3200 | |
3201 | xfs_btree_copy_ptrs(cur, dst_ptr: cpp, src_ptr: pp, numptrs: xfs_btree_get_numrecs(block: cblock)); |
3202 | |
3203 | error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level); |
3204 | if (error) |
3205 | goto error0; |
3206 | |
3207 | xfs_btree_copy_ptrs(cur, dst_ptr: pp, src_ptr: &nptr, numptrs: 1); |
3208 | |
3209 | xfs_iroot_realloc(cur->bc_ino.ip, |
3210 | 1 - xfs_btree_get_numrecs(block: cblock), |
3211 | cur->bc_ino.whichfork); |
3212 | |
3213 | xfs_btree_setbuf(cur, level, cbp); |
3214 | |
3215 | /* |
3216 | * Do all this logging at the end so that |
3217 | * the root is at the right level. |
3218 | */ |
3219 | xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); |
3220 | xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); |
3221 | xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); |
3222 | |
3223 | *logflags |= |
3224 | XFS_ILOG_CORE | xfs_ilog_fbroot(w: cur->bc_ino.whichfork); |
3225 | *stat = 1; |
3226 | return 0; |
3227 | error0: |
3228 | return error; |
3229 | } |
3230 | |
3231 | static void |
3232 | xfs_btree_set_root( |
3233 | struct xfs_btree_cur *cur, |
3234 | const union xfs_btree_ptr *ptr, |
3235 | int inc) |
3236 | { |
3237 | if (cur->bc_flags & XFS_BTREE_STAGING) { |
3238 | /* Update the btree root information for a per-AG fake root. */ |
3239 | cur->bc_ag.afake->af_root = be32_to_cpu(ptr->s); |
3240 | cur->bc_ag.afake->af_levels += inc; |
3241 | } else { |
3242 | cur->bc_ops->set_root(cur, ptr, inc); |
3243 | } |
3244 | } |
3245 | |
3246 | /* |
3247 | * Allocate a new root block, fill it in. |
3248 | */ |
3249 | STATIC int /* error */ |
3250 | xfs_btree_new_root( |
3251 | struct xfs_btree_cur *cur, /* btree cursor */ |
3252 | int *stat) /* success/failure */ |
3253 | { |
3254 | struct xfs_btree_block *block; /* one half of the old root block */ |
3255 | struct xfs_buf *bp; /* buffer containing block */ |
3256 | int error; /* error return value */ |
3257 | struct xfs_buf *lbp; /* left buffer pointer */ |
3258 | struct xfs_btree_block *left; /* left btree block */ |
3259 | struct xfs_buf *nbp; /* new (root) buffer */ |
3260 | struct xfs_btree_block *new; /* new (root) btree block */ |
3261 | int nptr; /* new value for key index, 1 or 2 */ |
3262 | struct xfs_buf *rbp; /* right buffer pointer */ |
3263 | struct xfs_btree_block *right; /* right btree block */ |
3264 | union xfs_btree_ptr rptr; |
3265 | union xfs_btree_ptr lptr; |
3266 | |
3267 | XFS_BTREE_STATS_INC(cur, newroot); |
3268 | |
3269 | /* initialise our start point from the cursor */ |
3270 | xfs_btree_init_ptr_from_cur(cur, ptr: &rptr); |
3271 | |
3272 | /* Allocate the new block. If we can't do it, we're toast. Give up. */ |
3273 | error = xfs_btree_alloc_block(cur, hint_block: &rptr, new_block: &lptr, stat); |
3274 | if (error) |
3275 | goto error0; |
3276 | if (*stat == 0) |
3277 | goto out0; |
3278 | XFS_BTREE_STATS_INC(cur, alloc); |
3279 | |
3280 | /* Set up the new block. */ |
3281 | error = xfs_btree_get_buf_block(cur, ptr: &lptr, block: &new, bpp: &nbp); |
3282 | if (error) |
3283 | goto error0; |
3284 | |
3285 | /* Set the root in the holding structure increasing the level by 1. */ |
3286 | xfs_btree_set_root(cur, ptr: &lptr, inc: 1); |
3287 | |
3288 | /* |
3289 | * At the previous root level there are now two blocks: the old root, |
3290 | * and the new block generated when it was split. We don't know which |
3291 | * one the cursor is pointing at, so we set up variables "left" and |
3292 | * "right" for each case. |
3293 | */ |
3294 | block = xfs_btree_get_block(cur, level: cur->bc_nlevels - 1, bpp: &bp); |
3295 | |
3296 | #ifdef DEBUG |
3297 | error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); |
3298 | if (error) |
3299 | goto error0; |
3300 | #endif |
3301 | |
3302 | xfs_btree_get_sibling(cur, block, ptr: &rptr, XFS_BB_RIGHTSIB); |
3303 | if (!xfs_btree_ptr_is_null(cur, &rptr)) { |
3304 | /* Our block is left, pick up the right block. */ |
3305 | lbp = bp; |
3306 | xfs_btree_buf_to_ptr(cur, lbp, &lptr); |
3307 | left = block; |
3308 | error = xfs_btree_read_buf_block(cur, ptr: &rptr, flags: 0, block: &right, bpp: &rbp); |
3309 | if (error) |
3310 | goto error0; |
3311 | bp = rbp; |
3312 | nptr = 1; |
3313 | } else { |
3314 | /* Our block is right, pick up the left block. */ |
3315 | rbp = bp; |
3316 | xfs_btree_buf_to_ptr(cur, rbp, &rptr); |
3317 | right = block; |
3318 | xfs_btree_get_sibling(cur, block: right, ptr: &lptr, XFS_BB_LEFTSIB); |
3319 | error = xfs_btree_read_buf_block(cur, ptr: &lptr, flags: 0, block: &left, bpp: &lbp); |
3320 | if (error) |
3321 | goto error0; |
3322 | bp = lbp; |
3323 | nptr = 2; |
3324 | } |
3325 | |
3326 | /* Fill in the new block's btree header and log it. */ |
3327 | xfs_btree_init_block_cur(cur, bp: nbp, level: cur->bc_nlevels, numrecs: 2); |
3328 | xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); |
3329 | ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && |
3330 | !xfs_btree_ptr_is_null(cur, &rptr)); |
3331 | |
3332 | /* Fill in the key data in the new root. */ |
3333 | if (xfs_btree_get_level(block: left) > 0) { |
3334 | /* |
3335 | * Get the keys for the left block's keys and put them directly |
3336 | * in the parent block. Do the same for the right block. |
3337 | */ |
3338 | xfs_btree_get_node_keys(cur, left, |
3339 | xfs_btree_key_addr(cur, n: 1, block: new)); |
3340 | xfs_btree_get_node_keys(cur, right, |
3341 | xfs_btree_key_addr(cur, n: 2, block: new)); |
3342 | } else { |
3343 | /* |
3344 | * Get the keys for the left block's records and put them |
3345 | * directly in the parent block. Do the same for the right |
3346 | * block. |
3347 | */ |
3348 | xfs_btree_get_leaf_keys(cur, left, |
3349 | xfs_btree_key_addr(cur, n: 1, block: new)); |
3350 | xfs_btree_get_leaf_keys(cur, right, |
3351 | xfs_btree_key_addr(cur, n: 2, block: new)); |
3352 | } |
3353 | xfs_btree_log_keys(cur, nbp, 1, 2); |
3354 | |
3355 | /* Fill in the pointer data in the new root. */ |
3356 | xfs_btree_copy_ptrs(cur, |
3357 | dst_ptr: xfs_btree_ptr_addr(cur, n: 1, block: new), src_ptr: &lptr, numptrs: 1); |
3358 | xfs_btree_copy_ptrs(cur, |
3359 | dst_ptr: xfs_btree_ptr_addr(cur, n: 2, block: new), src_ptr: &rptr, numptrs: 1); |
3360 | xfs_btree_log_ptrs(cur, nbp, 1, 2); |
3361 | |
3362 | /* Fix up the cursor. */ |
3363 | xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); |
3364 | cur->bc_levels[cur->bc_nlevels].ptr = nptr; |
3365 | cur->bc_nlevels++; |
3366 | ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); |
3367 | *stat = 1; |
3368 | return 0; |
3369 | error0: |
3370 | return error; |
3371 | out0: |
3372 | *stat = 0; |
3373 | return 0; |
3374 | } |
3375 | |
3376 | STATIC int |
3377 | xfs_btree_make_block_unfull( |
3378 | struct xfs_btree_cur *cur, /* btree cursor */ |
3379 | int level, /* btree level */ |
3380 | int numrecs,/* # of recs in block */ |
3381 | int *oindex,/* old tree index */ |
3382 | int *index, /* new tree index */ |
3383 | union xfs_btree_ptr *nptr, /* new btree ptr */ |
3384 | struct xfs_btree_cur **ncur, /* new btree cursor */ |
3385 | union xfs_btree_key *key, /* key of new block */ |
3386 | int *stat) |
3387 | { |
3388 | int error = 0; |
3389 | |
3390 | if (xfs_btree_at_iroot(cur, level)) { |
3391 | struct xfs_inode *ip = cur->bc_ino.ip; |
3392 | |
3393 | if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { |
3394 | /* A root block that can be made bigger. */ |
3395 | xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork); |
3396 | *stat = 1; |
3397 | } else { |
3398 | /* A root block that needs replacing */ |
3399 | int logflags = 0; |
3400 | |
3401 | error = xfs_btree_new_iroot(cur, logflags: &logflags, stat); |
3402 | if (error || *stat == 0) |
3403 | return error; |
3404 | |
3405 | xfs_trans_log_inode(cur->bc_tp, ip, logflags); |
3406 | } |
3407 | |
3408 | return 0; |
3409 | } |
3410 | |
3411 | /* First, try shifting an entry to the right neighbor. */ |
3412 | error = xfs_btree_rshift(cur, level, stat); |
3413 | if (error || *stat) |
3414 | return error; |
3415 | |
3416 | /* Next, try shifting an entry to the left neighbor. */ |
3417 | error = xfs_btree_lshift(cur, level, stat); |
3418 | if (error) |
3419 | return error; |
3420 | |
3421 | if (*stat) { |
3422 | *oindex = *index = cur->bc_levels[level].ptr; |
3423 | return 0; |
3424 | } |
3425 | |
3426 | /* |
3427 | * Next, try splitting the current block in half. |
3428 | * |
3429 | * If this works we have to re-set our variables because we |
3430 | * could be in a different block now. |
3431 | */ |
3432 | error = xfs_btree_split(cur, level, nptr, key, ncur, stat); |
3433 | if (error || *stat == 0) |
3434 | return error; |
3435 | |
3436 | |
3437 | *index = cur->bc_levels[level].ptr; |
3438 | return 0; |
3439 | } |
3440 | |
3441 | /* |
3442 | * Insert one record/level. Return information to the caller |
3443 | * allowing the next level up to proceed if necessary. |
3444 | */ |
3445 | STATIC int |
3446 | xfs_btree_insrec( |
3447 | struct xfs_btree_cur *cur, /* btree cursor */ |
3448 | int level, /* level to insert record at */ |
3449 | union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ |
3450 | union xfs_btree_rec *rec, /* record to insert */ |
3451 | union xfs_btree_key *key, /* i/o: block key for ptrp */ |
3452 | struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ |
3453 | int *stat) /* success/failure */ |
3454 | { |
3455 | struct xfs_btree_block *block; /* btree block */ |
3456 | struct xfs_buf *bp; /* buffer for block */ |
3457 | union xfs_btree_ptr nptr; /* new block ptr */ |
3458 | struct xfs_btree_cur *ncur = NULL; /* new btree cursor */ |
3459 | union xfs_btree_key nkey; /* new block key */ |
3460 | union xfs_btree_key *lkey; |
3461 | int optr; /* old key/record index */ |
3462 | int ptr; /* key/record index */ |
3463 | int numrecs;/* number of records */ |
3464 | int error; /* error return value */ |
3465 | int i; |
3466 | xfs_daddr_t old_bn; |
3467 | |
3468 | ncur = NULL; |
3469 | lkey = &nkey; |
3470 | |
3471 | /* |
3472 | * If we have an external root pointer, and we've made it to the |
3473 | * root level, allocate a new root block and we're done. |
3474 | */ |
3475 | if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE && |
3476 | level >= cur->bc_nlevels) { |
3477 | error = xfs_btree_new_root(cur, stat); |
3478 | xfs_btree_set_ptr_null(cur, ptr: ptrp); |
3479 | |
3480 | return error; |
3481 | } |
3482 | |
3483 | /* If we're off the left edge, return failure. */ |
3484 | ptr = cur->bc_levels[level].ptr; |
3485 | if (ptr == 0) { |
3486 | *stat = 0; |
3487 | return 0; |
3488 | } |
3489 | |
3490 | optr = ptr; |
3491 | |
3492 | XFS_BTREE_STATS_INC(cur, insrec); |
3493 | |
3494 | /* Get pointers to the btree buffer and block. */ |
3495 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
3496 | old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL; |
3497 | numrecs = xfs_btree_get_numrecs(block); |
3498 | |
3499 | #ifdef DEBUG |
3500 | error = xfs_btree_check_block(cur, block, level, bp); |
3501 | if (error) |
3502 | goto error0; |
3503 | |
3504 | /* Check that the new entry is being inserted in the right place. */ |
3505 | if (ptr <= numrecs) { |
3506 | if (level == 0) { |
3507 | ASSERT(cur->bc_ops->recs_inorder(cur, rec, |
3508 | xfs_btree_rec_addr(cur, ptr, block))); |
3509 | } else { |
3510 | ASSERT(cur->bc_ops->keys_inorder(cur, key, |
3511 | xfs_btree_key_addr(cur, ptr, block))); |
3512 | } |
3513 | } |
3514 | #endif |
3515 | |
3516 | /* |
3517 | * If the block is full, we can't insert the new entry until we |
3518 | * make the block un-full. |
3519 | */ |
3520 | xfs_btree_set_ptr_null(cur, ptr: &nptr); |
3521 | if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { |
3522 | error = xfs_btree_make_block_unfull(cur, level, numrecs, |
3523 | &optr, &ptr, &nptr, &ncur, lkey, stat); |
3524 | if (error || *stat == 0) |
3525 | goto error0; |
3526 | } |
3527 | |
3528 | /* |
3529 | * The current block may have changed if the block was |
3530 | * previously full and we have just made space in it. |
3531 | */ |
3532 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
3533 | numrecs = xfs_btree_get_numrecs(block); |
3534 | |
3535 | #ifdef DEBUG |
3536 | error = xfs_btree_check_block(cur, block, level, bp); |
3537 | if (error) |
3538 | goto error0; |
3539 | #endif |
3540 | |
3541 | /* |
3542 | * At this point we know there's room for our new entry in the block |
3543 | * we're pointing at. |
3544 | */ |
3545 | XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); |
3546 | |
3547 | if (level > 0) { |
3548 | /* It's a nonleaf. make a hole in the keys and ptrs */ |
3549 | union xfs_btree_key *kp; |
3550 | union xfs_btree_ptr *pp; |
3551 | |
3552 | kp = xfs_btree_key_addr(cur, n: ptr, block); |
3553 | pp = xfs_btree_ptr_addr(cur, n: ptr, block); |
3554 | |
3555 | for (i = numrecs - ptr; i >= 0; i--) { |
3556 | error = xfs_btree_debug_check_ptr(cur, pp, i, level); |
3557 | if (error) |
3558 | goto error0; |
3559 | } |
3560 | |
3561 | xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); |
3562 | xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); |
3563 | |
3564 | error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level); |
3565 | if (error) |
3566 | goto error0; |
3567 | |
3568 | /* Now put the new data in, bump numrecs and log it. */ |
3569 | xfs_btree_copy_keys(cur, dst_key: kp, src_key: key, numkeys: 1); |
3570 | xfs_btree_copy_ptrs(cur, dst_ptr: pp, src_ptr: ptrp, numptrs: 1); |
3571 | numrecs++; |
3572 | xfs_btree_set_numrecs(block, numrecs); |
3573 | xfs_btree_log_ptrs(cur, bp, ptr, numrecs); |
3574 | xfs_btree_log_keys(cur, bp, ptr, numrecs); |
3575 | #ifdef DEBUG |
3576 | if (ptr < numrecs) { |
3577 | ASSERT(cur->bc_ops->keys_inorder(cur, kp, |
3578 | xfs_btree_key_addr(cur, ptr + 1, block))); |
3579 | } |
3580 | #endif |
3581 | } else { |
3582 | /* It's a leaf. make a hole in the records */ |
3583 | union xfs_btree_rec *rp; |
3584 | |
3585 | rp = xfs_btree_rec_addr(cur, n: ptr, block); |
3586 | |
3587 | xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); |
3588 | |
3589 | /* Now put the new data in, bump numrecs and log it. */ |
3590 | xfs_btree_copy_recs(cur, rp, rec, 1); |
3591 | xfs_btree_set_numrecs(block, ++numrecs); |
3592 | xfs_btree_log_recs(cur, bp, first: ptr, last: numrecs); |
3593 | #ifdef DEBUG |
3594 | if (ptr < numrecs) { |
3595 | ASSERT(cur->bc_ops->recs_inorder(cur, rp, |
3596 | xfs_btree_rec_addr(cur, ptr + 1, block))); |
3597 | } |
3598 | #endif |
3599 | } |
3600 | |
3601 | /* Log the new number of records in the btree header. */ |
3602 | xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); |
3603 | |
3604 | /* |
3605 | * If we just inserted into a new tree block, we have to |
3606 | * recalculate nkey here because nkey is out of date. |
3607 | * |
3608 | * Otherwise we're just updating an existing block (having shoved |
3609 | * some records into the new tree block), so use the regular key |
3610 | * update mechanism. |
3611 | */ |
3612 | if (bp && xfs_buf_daddr(bp) != old_bn) { |
3613 | xfs_btree_get_keys(cur, block, key: lkey); |
3614 | } else if (xfs_btree_needs_key_update(cur, optr)) { |
3615 | error = xfs_btree_update_keys(cur, level); |
3616 | if (error) |
3617 | goto error0; |
3618 | } |
3619 | |
3620 | /* |
3621 | * If we are tracking the last record in the tree and |
3622 | * we are at the far right edge of the tree, update it. |
3623 | */ |
3624 | if (xfs_btree_is_lastrec(cur, block, level)) { |
3625 | cur->bc_ops->update_lastrec(cur, block, rec, |
3626 | ptr, LASTREC_INSREC); |
3627 | } |
3628 | |
3629 | /* |
3630 | * Return the new block number, if any. |
3631 | * If there is one, give back a record value and a cursor too. |
3632 | */ |
3633 | *ptrp = nptr; |
3634 | if (!xfs_btree_ptr_is_null(cur, &nptr)) { |
3635 | xfs_btree_copy_keys(cur, dst_key: key, src_key: lkey, numkeys: 1); |
3636 | *curp = ncur; |
3637 | } |
3638 | |
3639 | *stat = 1; |
3640 | return 0; |
3641 | |
3642 | error0: |
3643 | if (ncur) |
3644 | xfs_btree_del_cursor(cur: ncur, error); |
3645 | return error; |
3646 | } |
3647 | |
3648 | /* |
3649 | * Insert the record at the point referenced by cur. |
3650 | * |
3651 | * A multi-level split of the tree on insert will invalidate the original |
3652 | * cursor. All callers of this function should assume that the cursor is |
3653 | * no longer valid and revalidate it. |
3654 | */ |
3655 | int |
3656 | xfs_btree_insert( |
3657 | struct xfs_btree_cur *cur, |
3658 | int *stat) |
3659 | { |
3660 | int error; /* error return value */ |
3661 | int i; /* result value, 0 for failure */ |
3662 | int level; /* current level number in btree */ |
3663 | union xfs_btree_ptr nptr; /* new block number (split result) */ |
3664 | struct xfs_btree_cur *ncur; /* new cursor (split result) */ |
3665 | struct xfs_btree_cur *pcur; /* previous level's cursor */ |
3666 | union xfs_btree_key bkey; /* key of block to insert */ |
3667 | union xfs_btree_key *key; |
3668 | union xfs_btree_rec rec; /* record to insert */ |
3669 | |
3670 | level = 0; |
3671 | ncur = NULL; |
3672 | pcur = cur; |
3673 | key = &bkey; |
3674 | |
3675 | xfs_btree_set_ptr_null(cur, ptr: &nptr); |
3676 | |
3677 | /* Make a key out of the record data to be inserted, and save it. */ |
3678 | cur->bc_ops->init_rec_from_cur(cur, &rec); |
3679 | cur->bc_ops->init_key_from_rec(key, &rec); |
3680 | |
3681 | /* |
3682 | * Loop going up the tree, starting at the leaf level. |
3683 | * Stop when we don't get a split block, that must mean that |
3684 | * the insert is finished with this level. |
3685 | */ |
3686 | do { |
3687 | /* |
3688 | * Insert nrec/nptr into this level of the tree. |
3689 | * Note if we fail, nptr will be null. |
3690 | */ |
3691 | error = xfs_btree_insrec(pcur, level, &nptr, &rec, key, |
3692 | &ncur, &i); |
3693 | if (error) { |
3694 | if (pcur != cur) |
3695 | xfs_btree_del_cursor(cur: pcur, XFS_BTREE_ERROR); |
3696 | goto error0; |
3697 | } |
3698 | |
3699 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
3700 | xfs_btree_mark_sick(cur); |
3701 | error = -EFSCORRUPTED; |
3702 | goto error0; |
3703 | } |
3704 | level++; |
3705 | |
3706 | /* |
3707 | * See if the cursor we just used is trash. |
3708 | * Can't trash the caller's cursor, but otherwise we should |
3709 | * if ncur is a new cursor or we're about to be done. |
3710 | */ |
3711 | if (pcur != cur && |
3712 | (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { |
3713 | /* Save the state from the cursor before we trash it */ |
3714 | if (cur->bc_ops->update_cursor && |
3715 | !(cur->bc_flags & XFS_BTREE_STAGING)) |
3716 | cur->bc_ops->update_cursor(pcur, cur); |
3717 | cur->bc_nlevels = pcur->bc_nlevels; |
3718 | xfs_btree_del_cursor(cur: pcur, XFS_BTREE_NOERROR); |
3719 | } |
3720 | /* If we got a new cursor, switch to it. */ |
3721 | if (ncur) { |
3722 | pcur = ncur; |
3723 | ncur = NULL; |
3724 | } |
3725 | } while (!xfs_btree_ptr_is_null(cur, &nptr)); |
3726 | |
3727 | *stat = i; |
3728 | return 0; |
3729 | error0: |
3730 | return error; |
3731 | } |
3732 | |
3733 | /* |
3734 | * Try to merge a non-leaf block back into the inode root. |
3735 | * |
3736 | * Note: the killroot names comes from the fact that we're effectively |
3737 | * killing the old root block. But because we can't just delete the |
3738 | * inode we have to copy the single block it was pointing to into the |
3739 | * inode. |
3740 | */ |
3741 | STATIC int |
3742 | xfs_btree_kill_iroot( |
3743 | struct xfs_btree_cur *cur) |
3744 | { |
3745 | int whichfork = cur->bc_ino.whichfork; |
3746 | struct xfs_inode *ip = cur->bc_ino.ip; |
3747 | struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); |
3748 | struct xfs_btree_block *block; |
3749 | struct xfs_btree_block *cblock; |
3750 | union xfs_btree_key *kp; |
3751 | union xfs_btree_key *ckp; |
3752 | union xfs_btree_ptr *pp; |
3753 | union xfs_btree_ptr *cpp; |
3754 | struct xfs_buf *cbp; |
3755 | int level; |
3756 | int index; |
3757 | int numrecs; |
3758 | int error; |
3759 | #ifdef DEBUG |
3760 | union xfs_btree_ptr ptr; |
3761 | #endif |
3762 | int i; |
3763 | |
3764 | ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE); |
3765 | ASSERT(cur->bc_nlevels > 1); |
3766 | |
3767 | /* |
3768 | * Don't deal with the root block needs to be a leaf case. |
3769 | * We're just going to turn the thing back into extents anyway. |
3770 | */ |
3771 | level = cur->bc_nlevels - 1; |
3772 | if (level == 1) |
3773 | goto out0; |
3774 | |
3775 | /* |
3776 | * Give up if the root has multiple children. |
3777 | */ |
3778 | block = xfs_btree_get_iroot(cur); |
3779 | if (xfs_btree_get_numrecs(block) != 1) |
3780 | goto out0; |
3781 | |
3782 | cblock = xfs_btree_get_block(cur, level: level - 1, bpp: &cbp); |
3783 | numrecs = xfs_btree_get_numrecs(block: cblock); |
3784 | |
3785 | /* |
3786 | * Only do this if the next level will fit. |
3787 | * Then the data must be copied up to the inode, |
3788 | * instead of freeing the root you free the next level. |
3789 | */ |
3790 | if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) |
3791 | goto out0; |
3792 | |
3793 | XFS_BTREE_STATS_INC(cur, killroot); |
3794 | |
3795 | #ifdef DEBUG |
3796 | xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); |
3797 | ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); |
3798 | xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); |
3799 | ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); |
3800 | #endif |
3801 | |
3802 | index = numrecs - cur->bc_ops->get_maxrecs(cur, level); |
3803 | if (index) { |
3804 | xfs_iroot_realloc(cur->bc_ino.ip, index, |
3805 | cur->bc_ino.whichfork); |
3806 | block = ifp->if_broot; |
3807 | } |
3808 | |
3809 | be16_add_cpu(&block->bb_numrecs, index); |
3810 | ASSERT(block->bb_numrecs == cblock->bb_numrecs); |
3811 | |
3812 | kp = xfs_btree_key_addr(cur, n: 1, block); |
3813 | ckp = xfs_btree_key_addr(cur, n: 1, block: cblock); |
3814 | xfs_btree_copy_keys(cur, dst_key: kp, src_key: ckp, numkeys: numrecs); |
3815 | |
3816 | pp = xfs_btree_ptr_addr(cur, n: 1, block); |
3817 | cpp = xfs_btree_ptr_addr(cur, n: 1, block: cblock); |
3818 | |
3819 | for (i = 0; i < numrecs; i++) { |
3820 | error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1); |
3821 | if (error) |
3822 | return error; |
3823 | } |
3824 | |
3825 | xfs_btree_copy_ptrs(cur, dst_ptr: pp, src_ptr: cpp, numptrs: numrecs); |
3826 | |
3827 | error = xfs_btree_free_block(cur, bp: cbp); |
3828 | if (error) |
3829 | return error; |
3830 | |
3831 | cur->bc_levels[level - 1].bp = NULL; |
3832 | be16_add_cpu(&block->bb_level, -1); |
3833 | xfs_trans_log_inode(cur->bc_tp, ip, |
3834 | XFS_ILOG_CORE | xfs_ilog_fbroot(w: cur->bc_ino.whichfork)); |
3835 | cur->bc_nlevels--; |
3836 | out0: |
3837 | return 0; |
3838 | } |
3839 | |
3840 | /* |
3841 | * Kill the current root node, and replace it with it's only child node. |
3842 | */ |
3843 | STATIC int |
3844 | xfs_btree_kill_root( |
3845 | struct xfs_btree_cur *cur, |
3846 | struct xfs_buf *bp, |
3847 | int level, |
3848 | union xfs_btree_ptr *newroot) |
3849 | { |
3850 | int error; |
3851 | |
3852 | XFS_BTREE_STATS_INC(cur, killroot); |
3853 | |
3854 | /* |
3855 | * Update the root pointer, decreasing the level by 1 and then |
3856 | * free the old root. |
3857 | */ |
3858 | xfs_btree_set_root(cur, ptr: newroot, inc: -1); |
3859 | |
3860 | error = xfs_btree_free_block(cur, bp); |
3861 | if (error) |
3862 | return error; |
3863 | |
3864 | cur->bc_levels[level].bp = NULL; |
3865 | cur->bc_levels[level].ra = 0; |
3866 | cur->bc_nlevels--; |
3867 | |
3868 | return 0; |
3869 | } |
3870 | |
3871 | STATIC int |
3872 | xfs_btree_dec_cursor( |
3873 | struct xfs_btree_cur *cur, |
3874 | int level, |
3875 | int *stat) |
3876 | { |
3877 | int error; |
3878 | int i; |
3879 | |
3880 | if (level > 0) { |
3881 | error = xfs_btree_decrement(cur, level, stat: &i); |
3882 | if (error) |
3883 | return error; |
3884 | } |
3885 | |
3886 | *stat = 1; |
3887 | return 0; |
3888 | } |
3889 | |
3890 | /* |
3891 | * Single level of the btree record deletion routine. |
3892 | * Delete record pointed to by cur/level. |
3893 | * Remove the record from its block then rebalance the tree. |
3894 | * Return 0 for error, 1 for done, 2 to go on to the next level. |
3895 | */ |
3896 | STATIC int /* error */ |
3897 | xfs_btree_delrec( |
3898 | struct xfs_btree_cur *cur, /* btree cursor */ |
3899 | int level, /* level removing record from */ |
3900 | int *stat) /* fail/done/go-on */ |
3901 | { |
3902 | struct xfs_btree_block *block; /* btree block */ |
3903 | union xfs_btree_ptr cptr; /* current block ptr */ |
3904 | struct xfs_buf *bp; /* buffer for block */ |
3905 | int error; /* error return value */ |
3906 | int i; /* loop counter */ |
3907 | union xfs_btree_ptr lptr; /* left sibling block ptr */ |
3908 | struct xfs_buf *lbp; /* left buffer pointer */ |
3909 | struct xfs_btree_block *left; /* left btree block */ |
3910 | int lrecs = 0; /* left record count */ |
3911 | int ptr; /* key/record index */ |
3912 | union xfs_btree_ptr rptr; /* right sibling block ptr */ |
3913 | struct xfs_buf *rbp; /* right buffer pointer */ |
3914 | struct xfs_btree_block *right; /* right btree block */ |
3915 | struct xfs_btree_block *rrblock; /* right-right btree block */ |
3916 | struct xfs_buf *rrbp; /* right-right buffer pointer */ |
3917 | int rrecs = 0; /* right record count */ |
3918 | struct xfs_btree_cur *tcur; /* temporary btree cursor */ |
3919 | int numrecs; /* temporary numrec count */ |
3920 | |
3921 | tcur = NULL; |
3922 | |
3923 | /* Get the index of the entry being deleted, check for nothing there. */ |
3924 | ptr = cur->bc_levels[level].ptr; |
3925 | if (ptr == 0) { |
3926 | *stat = 0; |
3927 | return 0; |
3928 | } |
3929 | |
3930 | /* Get the buffer & block containing the record or key/ptr. */ |
3931 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
3932 | numrecs = xfs_btree_get_numrecs(block); |
3933 | |
3934 | #ifdef DEBUG |
3935 | error = xfs_btree_check_block(cur, block, level, bp); |
3936 | if (error) |
3937 | goto error0; |
3938 | #endif |
3939 | |
3940 | /* Fail if we're off the end of the block. */ |
3941 | if (ptr > numrecs) { |
3942 | *stat = 0; |
3943 | return 0; |
3944 | } |
3945 | |
3946 | XFS_BTREE_STATS_INC(cur, delrec); |
3947 | XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); |
3948 | |
3949 | /* Excise the entries being deleted. */ |
3950 | if (level > 0) { |
3951 | /* It's a nonleaf. operate on keys and ptrs */ |
3952 | union xfs_btree_key *lkp; |
3953 | union xfs_btree_ptr *lpp; |
3954 | |
3955 | lkp = xfs_btree_key_addr(cur, n: ptr + 1, block); |
3956 | lpp = xfs_btree_ptr_addr(cur, n: ptr + 1, block); |
3957 | |
3958 | for (i = 0; i < numrecs - ptr; i++) { |
3959 | error = xfs_btree_debug_check_ptr(cur, lpp, i, level); |
3960 | if (error) |
3961 | goto error0; |
3962 | } |
3963 | |
3964 | if (ptr < numrecs) { |
3965 | xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); |
3966 | xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); |
3967 | xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); |
3968 | xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); |
3969 | } |
3970 | } else { |
3971 | /* It's a leaf. operate on records */ |
3972 | if (ptr < numrecs) { |
3973 | xfs_btree_shift_recs(cur, |
3974 | xfs_btree_rec_addr(cur, n: ptr + 1, block), |
3975 | -1, numrecs - ptr); |
3976 | xfs_btree_log_recs(cur, bp, first: ptr, last: numrecs - 1); |
3977 | } |
3978 | } |
3979 | |
3980 | /* |
3981 | * Decrement and log the number of entries in the block. |
3982 | */ |
3983 | xfs_btree_set_numrecs(block, --numrecs); |
3984 | xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); |
3985 | |
3986 | /* |
3987 | * If we are tracking the last record in the tree and |
3988 | * we are at the far right edge of the tree, update it. |
3989 | */ |
3990 | if (xfs_btree_is_lastrec(cur, block, level)) { |
3991 | cur->bc_ops->update_lastrec(cur, block, NULL, |
3992 | ptr, LASTREC_DELREC); |
3993 | } |
3994 | |
3995 | /* |
3996 | * We're at the root level. First, shrink the root block in-memory. |
3997 | * Try to get rid of the next level down. If we can't then there's |
3998 | * nothing left to do. |
3999 | */ |
4000 | if (xfs_btree_at_iroot(cur, level)) { |
4001 | xfs_iroot_realloc(cur->bc_ino.ip, -1, cur->bc_ino.whichfork); |
4002 | |
4003 | error = xfs_btree_kill_iroot(cur); |
4004 | if (error) |
4005 | goto error0; |
4006 | |
4007 | error = xfs_btree_dec_cursor(cur, level, stat); |
4008 | if (error) |
4009 | goto error0; |
4010 | *stat = 1; |
4011 | return 0; |
4012 | } |
4013 | |
4014 | /* |
4015 | * If this is the root level, and there's only one entry left, and it's |
4016 | * NOT the leaf level, then we can get rid of this level. |
4017 | */ |
4018 | if (level == cur->bc_nlevels - 1) { |
4019 | if (numrecs == 1 && level > 0) { |
4020 | union xfs_btree_ptr *pp; |
4021 | /* |
4022 | * pp is still set to the first pointer in the block. |
4023 | * Make it the new root of the btree. |
4024 | */ |
4025 | pp = xfs_btree_ptr_addr(cur, n: 1, block); |
4026 | error = xfs_btree_kill_root(cur, bp, level, pp); |
4027 | if (error) |
4028 | goto error0; |
4029 | } else if (level > 0) { |
4030 | error = xfs_btree_dec_cursor(cur, level, stat); |
4031 | if (error) |
4032 | goto error0; |
4033 | } |
4034 | *stat = 1; |
4035 | return 0; |
4036 | } |
4037 | |
4038 | /* |
4039 | * If we deleted the leftmost entry in the block, update the |
4040 | * key values above us in the tree. |
4041 | */ |
4042 | if (xfs_btree_needs_key_update(cur, ptr)) { |
4043 | error = xfs_btree_update_keys(cur, level); |
4044 | if (error) |
4045 | goto error0; |
4046 | } |
4047 | |
4048 | /* |
4049 | * If the number of records remaining in the block is at least |
4050 | * the minimum, we're done. |
4051 | */ |
4052 | if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { |
4053 | error = xfs_btree_dec_cursor(cur, level, stat); |
4054 | if (error) |
4055 | goto error0; |
4056 | return 0; |
4057 | } |
4058 | |
4059 | /* |
4060 | * Otherwise, we have to move some records around to keep the |
4061 | * tree balanced. Look at the left and right sibling blocks to |
4062 | * see if we can re-balance by moving only one record. |
4063 | */ |
4064 | xfs_btree_get_sibling(cur, block, ptr: &rptr, XFS_BB_RIGHTSIB); |
4065 | xfs_btree_get_sibling(cur, block, ptr: &lptr, XFS_BB_LEFTSIB); |
4066 | |
4067 | if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) { |
4068 | /* |
4069 | * One child of root, need to get a chance to copy its contents |
4070 | * into the root and delete it. Can't go up to next level, |
4071 | * there's nothing to delete there. |
4072 | */ |
4073 | if (xfs_btree_ptr_is_null(cur, &rptr) && |
4074 | xfs_btree_ptr_is_null(cur, &lptr) && |
4075 | level == cur->bc_nlevels - 2) { |
4076 | error = xfs_btree_kill_iroot(cur); |
4077 | if (!error) |
4078 | error = xfs_btree_dec_cursor(cur, level, stat); |
4079 | if (error) |
4080 | goto error0; |
4081 | return 0; |
4082 | } |
4083 | } |
4084 | |
4085 | ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || |
4086 | !xfs_btree_ptr_is_null(cur, &lptr)); |
4087 | |
4088 | /* |
4089 | * Duplicate the cursor so our btree manipulations here won't |
4090 | * disrupt the next level up. |
4091 | */ |
4092 | error = xfs_btree_dup_cursor(cur, ncur: &tcur); |
4093 | if (error) |
4094 | goto error0; |
4095 | |
4096 | /* |
4097 | * If there's a right sibling, see if it's ok to shift an entry |
4098 | * out of it. |
4099 | */ |
4100 | if (!xfs_btree_ptr_is_null(cur, &rptr)) { |
4101 | /* |
4102 | * Move the temp cursor to the last entry in the next block. |
4103 | * Actually any entry but the first would suffice. |
4104 | */ |
4105 | i = xfs_btree_lastrec(tcur, level); |
4106 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
4107 | xfs_btree_mark_sick(cur); |
4108 | error = -EFSCORRUPTED; |
4109 | goto error0; |
4110 | } |
4111 | |
4112 | error = xfs_btree_increment(cur: tcur, level, stat: &i); |
4113 | if (error) |
4114 | goto error0; |
4115 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
4116 | xfs_btree_mark_sick(cur); |
4117 | error = -EFSCORRUPTED; |
4118 | goto error0; |
4119 | } |
4120 | |
4121 | i = xfs_btree_lastrec(tcur, level); |
4122 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
4123 | xfs_btree_mark_sick(cur); |
4124 | error = -EFSCORRUPTED; |
4125 | goto error0; |
4126 | } |
4127 | |
4128 | /* Grab a pointer to the block. */ |
4129 | right = xfs_btree_get_block(cur: tcur, level, bpp: &rbp); |
4130 | #ifdef DEBUG |
4131 | error = xfs_btree_check_block(tcur, right, level, rbp); |
4132 | if (error) |
4133 | goto error0; |
4134 | #endif |
4135 | /* Grab the current block number, for future use. */ |
4136 | xfs_btree_get_sibling(cur: tcur, block: right, ptr: &cptr, XFS_BB_LEFTSIB); |
4137 | |
4138 | /* |
4139 | * If right block is full enough so that removing one entry |
4140 | * won't make it too empty, and left-shifting an entry out |
4141 | * of right to us works, we're done. |
4142 | */ |
4143 | if (xfs_btree_get_numrecs(block: right) - 1 >= |
4144 | cur->bc_ops->get_minrecs(tcur, level)) { |
4145 | error = xfs_btree_lshift(tcur, level, &i); |
4146 | if (error) |
4147 | goto error0; |
4148 | if (i) { |
4149 | ASSERT(xfs_btree_get_numrecs(block) >= |
4150 | cur->bc_ops->get_minrecs(tcur, level)); |
4151 | |
4152 | xfs_btree_del_cursor(cur: tcur, XFS_BTREE_NOERROR); |
4153 | tcur = NULL; |
4154 | |
4155 | error = xfs_btree_dec_cursor(cur, level, stat); |
4156 | if (error) |
4157 | goto error0; |
4158 | return 0; |
4159 | } |
4160 | } |
4161 | |
4162 | /* |
4163 | * Otherwise, grab the number of records in right for |
4164 | * future reference, and fix up the temp cursor to point |
4165 | * to our block again (last record). |
4166 | */ |
4167 | rrecs = xfs_btree_get_numrecs(block: right); |
4168 | if (!xfs_btree_ptr_is_null(cur, &lptr)) { |
4169 | i = xfs_btree_firstrec(tcur, level); |
4170 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
4171 | xfs_btree_mark_sick(cur); |
4172 | error = -EFSCORRUPTED; |
4173 | goto error0; |
4174 | } |
4175 | |
4176 | error = xfs_btree_decrement(cur: tcur, level, stat: &i); |
4177 | if (error) |
4178 | goto error0; |
4179 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
4180 | xfs_btree_mark_sick(cur); |
4181 | error = -EFSCORRUPTED; |
4182 | goto error0; |
4183 | } |
4184 | } |
4185 | } |
4186 | |
4187 | /* |
4188 | * If there's a left sibling, see if it's ok to shift an entry |
4189 | * out of it. |
4190 | */ |
4191 | if (!xfs_btree_ptr_is_null(cur, &lptr)) { |
4192 | /* |
4193 | * Move the temp cursor to the first entry in the |
4194 | * previous block. |
4195 | */ |
4196 | i = xfs_btree_firstrec(tcur, level); |
4197 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
4198 | xfs_btree_mark_sick(cur); |
4199 | error = -EFSCORRUPTED; |
4200 | goto error0; |
4201 | } |
4202 | |
4203 | error = xfs_btree_decrement(cur: tcur, level, stat: &i); |
4204 | if (error) |
4205 | goto error0; |
4206 | i = xfs_btree_firstrec(tcur, level); |
4207 | if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { |
4208 | xfs_btree_mark_sick(cur); |
4209 | error = -EFSCORRUPTED; |
4210 | goto error0; |
4211 | } |
4212 | |
4213 | /* Grab a pointer to the block. */ |
4214 | left = xfs_btree_get_block(cur: tcur, level, bpp: &lbp); |
4215 | #ifdef DEBUG |
4216 | error = xfs_btree_check_block(cur, left, level, lbp); |
4217 | if (error) |
4218 | goto error0; |
4219 | #endif |
4220 | /* Grab the current block number, for future use. */ |
4221 | xfs_btree_get_sibling(cur: tcur, block: left, ptr: &cptr, XFS_BB_RIGHTSIB); |
4222 | |
4223 | /* |
4224 | * If left block is full enough so that removing one entry |
4225 | * won't make it too empty, and right-shifting an entry out |
4226 | * of left to us works, we're done. |
4227 | */ |
4228 | if (xfs_btree_get_numrecs(block: left) - 1 >= |
4229 | cur->bc_ops->get_minrecs(tcur, level)) { |
4230 | error = xfs_btree_rshift(tcur, level, &i); |
4231 | if (error) |
4232 | goto error0; |
4233 | if (i) { |
4234 | ASSERT(xfs_btree_get_numrecs(block) >= |
4235 | cur->bc_ops->get_minrecs(tcur, level)); |
4236 | xfs_btree_del_cursor(cur: tcur, XFS_BTREE_NOERROR); |
4237 | tcur = NULL; |
4238 | if (level == 0) |
4239 | cur->bc_levels[0].ptr++; |
4240 | |
4241 | *stat = 1; |
4242 | return 0; |
4243 | } |
4244 | } |
4245 | |
4246 | /* |
4247 | * Otherwise, grab the number of records in right for |
4248 | * future reference. |
4249 | */ |
4250 | lrecs = xfs_btree_get_numrecs(block: left); |
4251 | } |
4252 | |
4253 | /* Delete the temp cursor, we're done with it. */ |
4254 | xfs_btree_del_cursor(cur: tcur, XFS_BTREE_NOERROR); |
4255 | tcur = NULL; |
4256 | |
4257 | /* If here, we need to do a join to keep the tree balanced. */ |
4258 | ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); |
4259 | |
4260 | if (!xfs_btree_ptr_is_null(cur, &lptr) && |
4261 | lrecs + xfs_btree_get_numrecs(block) <= |
4262 | cur->bc_ops->get_maxrecs(cur, level)) { |
4263 | /* |
4264 | * Set "right" to be the starting block, |
4265 | * "left" to be the left neighbor. |
4266 | */ |
4267 | rptr = cptr; |
4268 | right = block; |
4269 | rbp = bp; |
4270 | error = xfs_btree_read_buf_block(cur, ptr: &lptr, flags: 0, block: &left, bpp: &lbp); |
4271 | if (error) |
4272 | goto error0; |
4273 | |
4274 | /* |
4275 | * If that won't work, see if we can join with the right neighbor block. |
4276 | */ |
4277 | } else if (!xfs_btree_ptr_is_null(cur, &rptr) && |
4278 | rrecs + xfs_btree_get_numrecs(block) <= |
4279 | cur->bc_ops->get_maxrecs(cur, level)) { |
4280 | /* |
4281 | * Set "left" to be the starting block, |
4282 | * "right" to be the right neighbor. |
4283 | */ |
4284 | lptr = cptr; |
4285 | left = block; |
4286 | lbp = bp; |
4287 | error = xfs_btree_read_buf_block(cur, ptr: &rptr, flags: 0, block: &right, bpp: &rbp); |
4288 | if (error) |
4289 | goto error0; |
4290 | |
4291 | /* |
4292 | * Otherwise, we can't fix the imbalance. |
4293 | * Just return. This is probably a logic error, but it's not fatal. |
4294 | */ |
4295 | } else { |
4296 | error = xfs_btree_dec_cursor(cur, level, stat); |
4297 | if (error) |
4298 | goto error0; |
4299 | return 0; |
4300 | } |
4301 | |
4302 | rrecs = xfs_btree_get_numrecs(block: right); |
4303 | lrecs = xfs_btree_get_numrecs(block: left); |
4304 | |
4305 | /* |
4306 | * We're now going to join "left" and "right" by moving all the stuff |
4307 | * in "right" to "left" and deleting "right". |
4308 | */ |
4309 | XFS_BTREE_STATS_ADD(cur, moves, rrecs); |
4310 | if (level > 0) { |
4311 | /* It's a non-leaf. Move keys and pointers. */ |
4312 | union xfs_btree_key *lkp; /* left btree key */ |
4313 | union xfs_btree_ptr *lpp; /* left address pointer */ |
4314 | union xfs_btree_key *rkp; /* right btree key */ |
4315 | union xfs_btree_ptr *rpp; /* right address pointer */ |
4316 | |
4317 | lkp = xfs_btree_key_addr(cur, n: lrecs + 1, block: left); |
4318 | lpp = xfs_btree_ptr_addr(cur, n: lrecs + 1, block: left); |
4319 | rkp = xfs_btree_key_addr(cur, n: 1, block: right); |
4320 | rpp = xfs_btree_ptr_addr(cur, n: 1, block: right); |
4321 | |
4322 | for (i = 1; i < rrecs; i++) { |
4323 | error = xfs_btree_debug_check_ptr(cur, rpp, i, level); |
4324 | if (error) |
4325 | goto error0; |
4326 | } |
4327 | |
4328 | xfs_btree_copy_keys(cur, dst_key: lkp, src_key: rkp, numkeys: rrecs); |
4329 | xfs_btree_copy_ptrs(cur, dst_ptr: lpp, src_ptr: rpp, numptrs: rrecs); |
4330 | |
4331 | xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); |
4332 | xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); |
4333 | } else { |
4334 | /* It's a leaf. Move records. */ |
4335 | union xfs_btree_rec *lrp; /* left record pointer */ |
4336 | union xfs_btree_rec *rrp; /* right record pointer */ |
4337 | |
4338 | lrp = xfs_btree_rec_addr(cur, n: lrecs + 1, block: left); |
4339 | rrp = xfs_btree_rec_addr(cur, n: 1, block: right); |
4340 | |
4341 | xfs_btree_copy_recs(cur, lrp, rrp, rrecs); |
4342 | xfs_btree_log_recs(cur, bp: lbp, first: lrecs + 1, last: lrecs + rrecs); |
4343 | } |
4344 | |
4345 | XFS_BTREE_STATS_INC(cur, join); |
4346 | |
4347 | /* |
4348 | * Fix up the number of records and right block pointer in the |
4349 | * surviving block, and log it. |
4350 | */ |
4351 | xfs_btree_set_numrecs(left, lrecs + rrecs); |
4352 | xfs_btree_get_sibling(cur, block: right, ptr: &cptr, XFS_BB_RIGHTSIB); |
4353 | xfs_btree_set_sibling(cur, block: left, ptr: &cptr, XFS_BB_RIGHTSIB); |
4354 | xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); |
4355 | |
4356 | /* If there is a right sibling, point it to the remaining block. */ |
4357 | xfs_btree_get_sibling(cur, block: left, ptr: &cptr, XFS_BB_RIGHTSIB); |
4358 | if (!xfs_btree_ptr_is_null(cur, &cptr)) { |
4359 | error = xfs_btree_read_buf_block(cur, ptr: &cptr, flags: 0, block: &rrblock, bpp: &rrbp); |
4360 | if (error) |
4361 | goto error0; |
4362 | xfs_btree_set_sibling(cur, block: rrblock, ptr: &lptr, XFS_BB_LEFTSIB); |
4363 | xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); |
4364 | } |
4365 | |
4366 | /* Free the deleted block. */ |
4367 | error = xfs_btree_free_block(cur, bp: rbp); |
4368 | if (error) |
4369 | goto error0; |
4370 | |
4371 | /* |
4372 | * If we joined with the left neighbor, set the buffer in the |
4373 | * cursor to the left block, and fix up the index. |
4374 | */ |
4375 | if (bp != lbp) { |
4376 | cur->bc_levels[level].bp = lbp; |
4377 | cur->bc_levels[level].ptr += lrecs; |
4378 | cur->bc_levels[level].ra = 0; |
4379 | } |
4380 | /* |
4381 | * If we joined with the right neighbor and there's a level above |
4382 | * us, increment the cursor at that level. |
4383 | */ |
4384 | else if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE || |
4385 | level + 1 < cur->bc_nlevels) { |
4386 | error = xfs_btree_increment(cur, level: level + 1, stat: &i); |
4387 | if (error) |
4388 | goto error0; |
4389 | } |
4390 | |
4391 | /* |
4392 | * Readjust the ptr at this level if it's not a leaf, since it's |
4393 | * still pointing at the deletion point, which makes the cursor |
4394 | * inconsistent. If this makes the ptr 0, the caller fixes it up. |
4395 | * We can't use decrement because it would change the next level up. |
4396 | */ |
4397 | if (level > 0) |
4398 | cur->bc_levels[level].ptr--; |
4399 | |
4400 | /* |
4401 | * We combined blocks, so we have to update the parent keys if the |
4402 | * btree supports overlapped intervals. However, |
4403 | * bc_levels[level + 1].ptr points to the old block so that the caller |
4404 | * knows which record to delete. Therefore, the caller must be savvy |
4405 | * enough to call updkeys for us if we return stat == 2. The other |
4406 | * exit points from this function don't require deletions further up |
4407 | * the tree, so they can call updkeys directly. |
4408 | */ |
4409 | |
4410 | /* Return value means the next level up has something to do. */ |
4411 | *stat = 2; |
4412 | return 0; |
4413 | |
4414 | error0: |
4415 | if (tcur) |
4416 | xfs_btree_del_cursor(cur: tcur, XFS_BTREE_ERROR); |
4417 | return error; |
4418 | } |
4419 | |
4420 | /* |
4421 | * Delete the record pointed to by cur. |
4422 | * The cursor refers to the place where the record was (could be inserted) |
4423 | * when the operation returns. |
4424 | */ |
4425 | int /* error */ |
4426 | xfs_btree_delete( |
4427 | struct xfs_btree_cur *cur, |
4428 | int *stat) /* success/failure */ |
4429 | { |
4430 | int error; /* error return value */ |
4431 | int level; |
4432 | int i; |
4433 | bool joined = false; |
4434 | |
4435 | /* |
4436 | * Go up the tree, starting at leaf level. |
4437 | * |
4438 | * If 2 is returned then a join was done; go to the next level. |
4439 | * Otherwise we are done. |
4440 | */ |
4441 | for (level = 0, i = 2; i == 2; level++) { |
4442 | error = xfs_btree_delrec(cur, level, &i); |
4443 | if (error) |
4444 | goto error0; |
4445 | if (i == 2) |
4446 | joined = true; |
4447 | } |
4448 | |
4449 | /* |
4450 | * If we combined blocks as part of deleting the record, delrec won't |
4451 | * have updated the parent high keys so we have to do that here. |
4452 | */ |
4453 | if (joined && (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) { |
4454 | error = xfs_btree_updkeys_force(cur, 0); |
4455 | if (error) |
4456 | goto error0; |
4457 | } |
4458 | |
4459 | if (i == 0) { |
4460 | for (level = 1; level < cur->bc_nlevels; level++) { |
4461 | if (cur->bc_levels[level].ptr == 0) { |
4462 | error = xfs_btree_decrement(cur, level, stat: &i); |
4463 | if (error) |
4464 | goto error0; |
4465 | break; |
4466 | } |
4467 | } |
4468 | } |
4469 | |
4470 | *stat = i; |
4471 | return 0; |
4472 | error0: |
4473 | return error; |
4474 | } |
4475 | |
4476 | /* |
4477 | * Get the data from the pointed-to record. |
4478 | */ |
4479 | int /* error */ |
4480 | xfs_btree_get_rec( |
4481 | struct xfs_btree_cur *cur, /* btree cursor */ |
4482 | union xfs_btree_rec **recp, /* output: btree record */ |
4483 | int *stat) /* output: success/failure */ |
4484 | { |
4485 | struct xfs_btree_block *block; /* btree block */ |
4486 | struct xfs_buf *bp; /* buffer pointer */ |
4487 | int ptr; /* record number */ |
4488 | #ifdef DEBUG |
4489 | int error; /* error return value */ |
4490 | #endif |
4491 | |
4492 | ptr = cur->bc_levels[0].ptr; |
4493 | block = xfs_btree_get_block(cur, level: 0, bpp: &bp); |
4494 | |
4495 | #ifdef DEBUG |
4496 | error = xfs_btree_check_block(cur, block, 0, bp); |
4497 | if (error) |
4498 | return error; |
4499 | #endif |
4500 | |
4501 | /* |
4502 | * Off the right end or left end, return failure. |
4503 | */ |
4504 | if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { |
4505 | *stat = 0; |
4506 | return 0; |
4507 | } |
4508 | |
4509 | /* |
4510 | * Point to the record and extract its data. |
4511 | */ |
4512 | *recp = xfs_btree_rec_addr(cur, n: ptr, block); |
4513 | *stat = 1; |
4514 | return 0; |
4515 | } |
4516 | |
4517 | /* Visit a block in a btree. */ |
4518 | STATIC int |
4519 | xfs_btree_visit_block( |
4520 | struct xfs_btree_cur *cur, |
4521 | int level, |
4522 | xfs_btree_visit_blocks_fn fn, |
4523 | void *data) |
4524 | { |
4525 | struct xfs_btree_block *block; |
4526 | struct xfs_buf *bp; |
4527 | union xfs_btree_ptr rptr, bufptr; |
4528 | int error; |
4529 | |
4530 | /* do right sibling readahead */ |
4531 | xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); |
4532 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
4533 | |
4534 | /* process the block */ |
4535 | error = fn(cur, level, data); |
4536 | if (error) |
4537 | return error; |
4538 | |
4539 | /* now read rh sibling block for next iteration */ |
4540 | xfs_btree_get_sibling(cur, block, ptr: &rptr, XFS_BB_RIGHTSIB); |
4541 | if (xfs_btree_ptr_is_null(cur, &rptr)) |
4542 | return -ENOENT; |
4543 | |
4544 | /* |
4545 | * We only visit blocks once in this walk, so we have to avoid the |
4546 | * internal xfs_btree_lookup_get_block() optimisation where it will |
4547 | * return the same block without checking if the right sibling points |
4548 | * back to us and creates a cyclic reference in the btree. |
4549 | */ |
4550 | xfs_btree_buf_to_ptr(cur, bp, &bufptr); |
4551 | if (xfs_btree_ptrs_equal(cur, &rptr, &bufptr)) { |
4552 | xfs_btree_mark_sick(cur); |
4553 | return -EFSCORRUPTED; |
4554 | } |
4555 | |
4556 | return xfs_btree_lookup_get_block(cur, level, pp: &rptr, blkp: &block); |
4557 | } |
4558 | |
4559 | |
4560 | /* Visit every block in a btree. */ |
4561 | int |
4562 | xfs_btree_visit_blocks( |
4563 | struct xfs_btree_cur *cur, |
4564 | xfs_btree_visit_blocks_fn fn, |
4565 | unsigned int flags, |
4566 | void *data) |
4567 | { |
4568 | union xfs_btree_ptr lptr; |
4569 | int level; |
4570 | struct xfs_btree_block *block = NULL; |
4571 | int error = 0; |
4572 | |
4573 | xfs_btree_init_ptr_from_cur(cur, ptr: &lptr); |
4574 | |
4575 | /* for each level */ |
4576 | for (level = cur->bc_nlevels - 1; level >= 0; level--) { |
4577 | /* grab the left hand block */ |
4578 | error = xfs_btree_lookup_get_block(cur, level, pp: &lptr, blkp: &block); |
4579 | if (error) |
4580 | return error; |
4581 | |
4582 | /* readahead the left most block for the next level down */ |
4583 | if (level > 0) { |
4584 | union xfs_btree_ptr *ptr; |
4585 | |
4586 | ptr = xfs_btree_ptr_addr(cur, n: 1, block); |
4587 | xfs_btree_readahead_ptr(cur, ptr, 1); |
4588 | |
4589 | /* save for the next iteration of the loop */ |
4590 | xfs_btree_copy_ptrs(cur, dst_ptr: &lptr, src_ptr: ptr, numptrs: 1); |
4591 | |
4592 | if (!(flags & XFS_BTREE_VISIT_LEAVES)) |
4593 | continue; |
4594 | } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) { |
4595 | continue; |
4596 | } |
4597 | |
4598 | /* for each buffer in the level */ |
4599 | do { |
4600 | error = xfs_btree_visit_block(cur, level, fn, data); |
4601 | } while (!error); |
4602 | |
4603 | if (error != -ENOENT) |
4604 | return error; |
4605 | } |
4606 | |
4607 | return 0; |
4608 | } |
4609 | |
4610 | /* |
4611 | * Change the owner of a btree. |
4612 | * |
4613 | * The mechanism we use here is ordered buffer logging. Because we don't know |
4614 | * how many buffers were are going to need to modify, we don't really want to |
4615 | * have to make transaction reservations for the worst case of every buffer in a |
4616 | * full size btree as that may be more space that we can fit in the log.... |
4617 | * |
4618 | * We do the btree walk in the most optimal manner possible - we have sibling |
4619 | * pointers so we can just walk all the blocks on each level from left to right |
4620 | * in a single pass, and then move to the next level and do the same. We can |
4621 | * also do readahead on the sibling pointers to get IO moving more quickly, |
4622 | * though for slow disks this is unlikely to make much difference to performance |
4623 | * as the amount of CPU work we have to do before moving to the next block is |
4624 | * relatively small. |
4625 | * |
4626 | * For each btree block that we load, modify the owner appropriately, set the |
4627 | * buffer as an ordered buffer and log it appropriately. We need to ensure that |
4628 | * we mark the region we change dirty so that if the buffer is relogged in |
4629 | * a subsequent transaction the changes we make here as an ordered buffer are |
4630 | * correctly relogged in that transaction. If we are in recovery context, then |
4631 | * just queue the modified buffer as delayed write buffer so the transaction |
4632 | * recovery completion writes the changes to disk. |
4633 | */ |
4634 | struct xfs_btree_block_change_owner_info { |
4635 | uint64_t new_owner; |
4636 | struct list_head *buffer_list; |
4637 | }; |
4638 | |
4639 | static int |
4640 | xfs_btree_block_change_owner( |
4641 | struct xfs_btree_cur *cur, |
4642 | int level, |
4643 | void *data) |
4644 | { |
4645 | struct xfs_btree_block_change_owner_info *bbcoi = data; |
4646 | struct xfs_btree_block *block; |
4647 | struct xfs_buf *bp; |
4648 | |
4649 | /* modify the owner */ |
4650 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
4651 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { |
4652 | if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) |
4653 | return 0; |
4654 | block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); |
4655 | } else { |
4656 | if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner)) |
4657 | return 0; |
4658 | block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); |
4659 | } |
4660 | |
4661 | /* |
4662 | * If the block is a root block hosted in an inode, we might not have a |
4663 | * buffer pointer here and we shouldn't attempt to log the change as the |
4664 | * information is already held in the inode and discarded when the root |
4665 | * block is formatted into the on-disk inode fork. We still change it, |
4666 | * though, so everything is consistent in memory. |
4667 | */ |
4668 | if (!bp) { |
4669 | ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE); |
4670 | ASSERT(level == cur->bc_nlevels - 1); |
4671 | return 0; |
4672 | } |
4673 | |
4674 | if (cur->bc_tp) { |
4675 | if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) { |
4676 | xfs_btree_log_block(cur, bp, XFS_BB_OWNER); |
4677 | return -EAGAIN; |
4678 | } |
4679 | } else { |
4680 | xfs_buf_delwri_queue(bp, bbcoi->buffer_list); |
4681 | } |
4682 | |
4683 | return 0; |
4684 | } |
4685 | |
4686 | int |
4687 | xfs_btree_change_owner( |
4688 | struct xfs_btree_cur *cur, |
4689 | uint64_t new_owner, |
4690 | struct list_head *buffer_list) |
4691 | { |
4692 | struct xfs_btree_block_change_owner_info bbcoi; |
4693 | |
4694 | bbcoi.new_owner = new_owner; |
4695 | bbcoi.buffer_list = buffer_list; |
4696 | |
4697 | return xfs_btree_visit_blocks(cur, fn: xfs_btree_block_change_owner, |
4698 | XFS_BTREE_VISIT_ALL, data: &bbcoi); |
4699 | } |
4700 | |
4701 | /* Verify the v5 fields of a long-format btree block. */ |
4702 | xfs_failaddr_t |
4703 | xfs_btree_fsblock_v5hdr_verify( |
4704 | struct xfs_buf *bp, |
4705 | uint64_t owner) |
4706 | { |
4707 | struct xfs_mount *mp = bp->b_mount; |
4708 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
4709 | |
4710 | if (!xfs_has_crc(mp)) |
4711 | return __this_address; |
4712 | if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) |
4713 | return __this_address; |
4714 | if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) |
4715 | return __this_address; |
4716 | if (owner != XFS_RMAP_OWN_UNKNOWN && |
4717 | be64_to_cpu(block->bb_u.l.bb_owner) != owner) |
4718 | return __this_address; |
4719 | return NULL; |
4720 | } |
4721 | |
4722 | /* Verify a long-format btree block. */ |
4723 | xfs_failaddr_t |
4724 | xfs_btree_fsblock_verify( |
4725 | struct xfs_buf *bp, |
4726 | unsigned int max_recs) |
4727 | { |
4728 | struct xfs_mount *mp = bp->b_mount; |
4729 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
4730 | xfs_fsblock_t fsb; |
4731 | xfs_failaddr_t fa; |
4732 | |
4733 | ASSERT(!xfs_buftarg_is_mem(bp->b_target)); |
4734 | |
4735 | /* numrecs verification */ |
4736 | if (be16_to_cpu(block->bb_numrecs) > max_recs) |
4737 | return __this_address; |
4738 | |
4739 | /* sibling pointer verification */ |
4740 | fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); |
4741 | fa = xfs_btree_check_fsblock_siblings(mp, fsb, |
4742 | block->bb_u.l.bb_leftsib); |
4743 | if (!fa) |
4744 | fa = xfs_btree_check_fsblock_siblings(mp, fsb, |
4745 | block->bb_u.l.bb_rightsib); |
4746 | return fa; |
4747 | } |
4748 | |
4749 | /* Verify an in-memory btree block. */ |
4750 | xfs_failaddr_t |
4751 | xfs_btree_memblock_verify( |
4752 | struct xfs_buf *bp, |
4753 | unsigned int max_recs) |
4754 | { |
4755 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
4756 | struct xfs_buftarg *btp = bp->b_target; |
4757 | xfs_failaddr_t fa; |
4758 | xfbno_t bno; |
4759 | |
4760 | ASSERT(xfs_buftarg_is_mem(bp->b_target)); |
4761 | |
4762 | /* numrecs verification */ |
4763 | if (be16_to_cpu(block->bb_numrecs) > max_recs) |
4764 | return __this_address; |
4765 | |
4766 | /* sibling pointer verification */ |
4767 | bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp)); |
4768 | fa = xfs_btree_check_memblock_siblings(btp, bno, |
4769 | block->bb_u.l.bb_leftsib); |
4770 | if (fa) |
4771 | return fa; |
4772 | fa = xfs_btree_check_memblock_siblings(btp, bno, |
4773 | block->bb_u.l.bb_rightsib); |
4774 | if (fa) |
4775 | return fa; |
4776 | |
4777 | return NULL; |
4778 | } |
4779 | /** |
4780 | * xfs_btree_agblock_v5hdr_verify() -- verify the v5 fields of a short-format |
4781 | * btree block |
4782 | * |
4783 | * @bp: buffer containing the btree block |
4784 | */ |
4785 | xfs_failaddr_t |
4786 | xfs_btree_agblock_v5hdr_verify( |
4787 | struct xfs_buf *bp) |
4788 | { |
4789 | struct xfs_mount *mp = bp->b_mount; |
4790 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
4791 | struct xfs_perag *pag = bp->b_pag; |
4792 | |
4793 | if (!xfs_has_crc(mp)) |
4794 | return __this_address; |
4795 | if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) |
4796 | return __this_address; |
4797 | if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) |
4798 | return __this_address; |
4799 | if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) |
4800 | return __this_address; |
4801 | return NULL; |
4802 | } |
4803 | |
4804 | /** |
4805 | * xfs_btree_agblock_verify() -- verify a short-format btree block |
4806 | * |
4807 | * @bp: buffer containing the btree block |
4808 | * @max_recs: maximum records allowed in this btree node |
4809 | */ |
4810 | xfs_failaddr_t |
4811 | xfs_btree_agblock_verify( |
4812 | struct xfs_buf *bp, |
4813 | unsigned int max_recs) |
4814 | { |
4815 | struct xfs_mount *mp = bp->b_mount; |
4816 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
4817 | xfs_agblock_t agbno; |
4818 | xfs_failaddr_t fa; |
4819 | |
4820 | ASSERT(!xfs_buftarg_is_mem(bp->b_target)); |
4821 | |
4822 | /* numrecs verification */ |
4823 | if (be16_to_cpu(block->bb_numrecs) > max_recs) |
4824 | return __this_address; |
4825 | |
4826 | /* sibling pointer verification */ |
4827 | agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); |
4828 | fa = xfs_btree_check_agblock_siblings(bp->b_pag, agbno, |
4829 | block->bb_u.s.bb_leftsib); |
4830 | if (!fa) |
4831 | fa = xfs_btree_check_agblock_siblings(bp->b_pag, agbno, |
4832 | block->bb_u.s.bb_rightsib); |
4833 | return fa; |
4834 | } |
4835 | |
4836 | /* |
4837 | * For the given limits on leaf and keyptr records per block, calculate the |
4838 | * height of the tree needed to index the number of leaf records. |
4839 | */ |
4840 | unsigned int |
4841 | xfs_btree_compute_maxlevels( |
4842 | const unsigned int *limits, |
4843 | unsigned long long records) |
4844 | { |
4845 | unsigned long long level_blocks = howmany_64(records, limits[0]); |
4846 | unsigned int height = 1; |
4847 | |
4848 | while (level_blocks > 1) { |
4849 | level_blocks = howmany_64(level_blocks, limits[1]); |
4850 | height++; |
4851 | } |
4852 | |
4853 | return height; |
4854 | } |
4855 | |
4856 | /* |
4857 | * For the given limits on leaf and keyptr records per block, calculate the |
4858 | * number of blocks needed to index the given number of leaf records. |
4859 | */ |
4860 | unsigned long long |
4861 | xfs_btree_calc_size( |
4862 | const unsigned int *limits, |
4863 | unsigned long long records) |
4864 | { |
4865 | unsigned long long level_blocks = howmany_64(records, limits[0]); |
4866 | unsigned long long blocks = level_blocks; |
4867 | |
4868 | while (level_blocks > 1) { |
4869 | level_blocks = howmany_64(level_blocks, limits[1]); |
4870 | blocks += level_blocks; |
4871 | } |
4872 | |
4873 | return blocks; |
4874 | } |
4875 | |
4876 | /* |
4877 | * Given a number of available blocks for the btree to consume with records and |
4878 | * pointers, calculate the height of the tree needed to index all the records |
4879 | * that space can hold based on the number of pointers each interior node |
4880 | * holds. |
4881 | * |
4882 | * We start by assuming a single level tree consumes a single block, then track |
4883 | * the number of blocks each node level consumes until we no longer have space |
4884 | * to store the next node level. At this point, we are indexing all the leaf |
4885 | * blocks in the space, and there's no more free space to split the tree any |
4886 | * further. That's our maximum btree height. |
4887 | */ |
4888 | unsigned int |
4889 | xfs_btree_space_to_height( |
4890 | const unsigned int *limits, |
4891 | unsigned long long leaf_blocks) |
4892 | { |
4893 | /* |
4894 | * The root btree block can have fewer than minrecs pointers in it |
4895 | * because the tree might not be big enough to require that amount of |
4896 | * fanout. Hence it has a minimum size of 2 pointers, not limits[1]. |
4897 | */ |
4898 | unsigned long long node_blocks = 2; |
4899 | unsigned long long blocks_left = leaf_blocks - 1; |
4900 | unsigned int height = 1; |
4901 | |
4902 | if (leaf_blocks < 1) |
4903 | return 0; |
4904 | |
4905 | while (node_blocks < blocks_left) { |
4906 | blocks_left -= node_blocks; |
4907 | node_blocks *= limits[1]; |
4908 | height++; |
4909 | } |
4910 | |
4911 | return height; |
4912 | } |
4913 | |
4914 | /* |
4915 | * Query a regular btree for all records overlapping a given interval. |
4916 | * Start with a LE lookup of the key of low_rec and return all records |
4917 | * until we find a record with a key greater than the key of high_rec. |
4918 | */ |
4919 | STATIC int |
4920 | xfs_btree_simple_query_range( |
4921 | struct xfs_btree_cur *cur, |
4922 | const union xfs_btree_key *low_key, |
4923 | const union xfs_btree_key *high_key, |
4924 | xfs_btree_query_range_fn fn, |
4925 | void *priv) |
4926 | { |
4927 | union xfs_btree_rec *recp; |
4928 | union xfs_btree_key rec_key; |
4929 | int stat; |
4930 | bool firstrec = true; |
4931 | int error; |
4932 | |
4933 | ASSERT(cur->bc_ops->init_high_key_from_rec); |
4934 | ASSERT(cur->bc_ops->diff_two_keys); |
4935 | |
4936 | /* |
4937 | * Find the leftmost record. The btree cursor must be set |
4938 | * to the low record used to generate low_key. |
4939 | */ |
4940 | stat = 0; |
4941 | error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); |
4942 | if (error) |
4943 | goto out; |
4944 | |
4945 | /* Nothing? See if there's anything to the right. */ |
4946 | if (!stat) { |
4947 | error = xfs_btree_increment(cur, level: 0, stat: &stat); |
4948 | if (error) |
4949 | goto out; |
4950 | } |
4951 | |
4952 | while (stat) { |
4953 | /* Find the record. */ |
4954 | error = xfs_btree_get_rec(cur, recp: &recp, stat: &stat); |
4955 | if (error || !stat) |
4956 | break; |
4957 | |
4958 | /* Skip if low_key > high_key(rec). */ |
4959 | if (firstrec) { |
4960 | cur->bc_ops->init_high_key_from_rec(&rec_key, recp); |
4961 | firstrec = false; |
4962 | if (xfs_btree_keycmp_gt(cur, low_key, &rec_key)) |
4963 | goto advloop; |
4964 | } |
4965 | |
4966 | /* Stop if low_key(rec) > high_key. */ |
4967 | cur->bc_ops->init_key_from_rec(&rec_key, recp); |
4968 | if (xfs_btree_keycmp_gt(cur, &rec_key, high_key)) |
4969 | break; |
4970 | |
4971 | /* Callback */ |
4972 | error = fn(cur, recp, priv); |
4973 | if (error) |
4974 | break; |
4975 | |
4976 | advloop: |
4977 | /* Move on to the next record. */ |
4978 | error = xfs_btree_increment(cur, level: 0, stat: &stat); |
4979 | if (error) |
4980 | break; |
4981 | } |
4982 | |
4983 | out: |
4984 | return error; |
4985 | } |
4986 | |
4987 | /* |
4988 | * Query an overlapped interval btree for all records overlapping a given |
4989 | * interval. This function roughly follows the algorithm given in |
4990 | * "Interval Trees" of _Introduction to Algorithms_, which is section |
4991 | * 14.3 in the 2nd and 3rd editions. |
4992 | * |
4993 | * First, generate keys for the low and high records passed in. |
4994 | * |
4995 | * For any leaf node, generate the high and low keys for the record. |
4996 | * If the record keys overlap with the query low/high keys, pass the |
4997 | * record to the function iterator. |
4998 | * |
4999 | * For any internal node, compare the low and high keys of each |
5000 | * pointer against the query low/high keys. If there's an overlap, |
5001 | * follow the pointer. |
5002 | * |
5003 | * As an optimization, we stop scanning a block when we find a low key |
5004 | * that is greater than the query's high key. |
5005 | */ |
5006 | STATIC int |
5007 | xfs_btree_overlapped_query_range( |
5008 | struct xfs_btree_cur *cur, |
5009 | const union xfs_btree_key *low_key, |
5010 | const union xfs_btree_key *high_key, |
5011 | xfs_btree_query_range_fn fn, |
5012 | void *priv) |
5013 | { |
5014 | union xfs_btree_ptr ptr; |
5015 | union xfs_btree_ptr *pp; |
5016 | union xfs_btree_key rec_key; |
5017 | union xfs_btree_key rec_hkey; |
5018 | union xfs_btree_key *lkp; |
5019 | union xfs_btree_key *hkp; |
5020 | union xfs_btree_rec *recp; |
5021 | struct xfs_btree_block *block; |
5022 | int level; |
5023 | struct xfs_buf *bp; |
5024 | int i; |
5025 | int error; |
5026 | |
5027 | /* Load the root of the btree. */ |
5028 | level = cur->bc_nlevels - 1; |
5029 | xfs_btree_init_ptr_from_cur(cur, ptr: &ptr); |
5030 | error = xfs_btree_lookup_get_block(cur, level, pp: &ptr, blkp: &block); |
5031 | if (error) |
5032 | return error; |
5033 | xfs_btree_get_block(cur, level, bpp: &bp); |
5034 | trace_xfs_btree_overlapped_query_range(cur, level, bp); |
5035 | #ifdef DEBUG |
5036 | error = xfs_btree_check_block(cur, block, level, bp); |
5037 | if (error) |
5038 | goto out; |
5039 | #endif |
5040 | cur->bc_levels[level].ptr = 1; |
5041 | |
5042 | while (level < cur->bc_nlevels) { |
5043 | block = xfs_btree_get_block(cur, level, bpp: &bp); |
5044 | |
5045 | /* End of node, pop back towards the root. */ |
5046 | if (cur->bc_levels[level].ptr > |
5047 | be16_to_cpu(block->bb_numrecs)) { |
5048 | pop_up: |
5049 | if (level < cur->bc_nlevels - 1) |
5050 | cur->bc_levels[level + 1].ptr++; |
5051 | level++; |
5052 | continue; |
5053 | } |
5054 | |
5055 | if (level == 0) { |
5056 | /* Handle a leaf node. */ |
5057 | recp = xfs_btree_rec_addr(cur, n: cur->bc_levels[0].ptr, |
5058 | block); |
5059 | |
5060 | cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); |
5061 | cur->bc_ops->init_key_from_rec(&rec_key, recp); |
5062 | |
5063 | /* |
5064 | * If (query's high key < record's low key), then there |
5065 | * are no more interesting records in this block. Pop |
5066 | * up to the leaf level to find more record blocks. |
5067 | * |
5068 | * If (record's high key >= query's low key) and |
5069 | * (query's high key >= record's low key), then |
5070 | * this record overlaps the query range; callback. |
5071 | */ |
5072 | if (xfs_btree_keycmp_lt(cur, high_key, &rec_key)) |
5073 | goto pop_up; |
5074 | if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) { |
5075 | error = fn(cur, recp, priv); |
5076 | if (error) |
5077 | break; |
5078 | } |
5079 | cur->bc_levels[level].ptr++; |
5080 | continue; |
5081 | } |
5082 | |
5083 | /* Handle an internal node. */ |
5084 | lkp = xfs_btree_key_addr(cur, n: cur->bc_levels[level].ptr, block); |
5085 | hkp = xfs_btree_high_key_addr(cur, n: cur->bc_levels[level].ptr, |
5086 | block); |
5087 | pp = xfs_btree_ptr_addr(cur, n: cur->bc_levels[level].ptr, block); |
5088 | |
5089 | /* |
5090 | * If (query's high key < pointer's low key), then there are no |
5091 | * more interesting keys in this block. Pop up one leaf level |
5092 | * to continue looking for records. |
5093 | * |
5094 | * If (pointer's high key >= query's low key) and |
5095 | * (query's high key >= pointer's low key), then |
5096 | * this record overlaps the query range; follow pointer. |
5097 | */ |
5098 | if (xfs_btree_keycmp_lt(cur, high_key, lkp)) |
5099 | goto pop_up; |
5100 | if (xfs_btree_keycmp_ge(cur, hkp, low_key)) { |
5101 | level--; |
5102 | error = xfs_btree_lookup_get_block(cur, level, pp, |
5103 | blkp: &block); |
5104 | if (error) |
5105 | goto out; |
5106 | xfs_btree_get_block(cur, level, bpp: &bp); |
5107 | trace_xfs_btree_overlapped_query_range(cur, level, bp); |
5108 | #ifdef DEBUG |
5109 | error = xfs_btree_check_block(cur, block, level, bp); |
5110 | if (error) |
5111 | goto out; |
5112 | #endif |
5113 | cur->bc_levels[level].ptr = 1; |
5114 | continue; |
5115 | } |
5116 | cur->bc_levels[level].ptr++; |
5117 | } |
5118 | |
5119 | out: |
5120 | /* |
5121 | * If we don't end this function with the cursor pointing at a record |
5122 | * block, a subsequent non-error cursor deletion will not release |
5123 | * node-level buffers, causing a buffer leak. This is quite possible |
5124 | * with a zero-results range query, so release the buffers if we |
5125 | * failed to return any results. |
5126 | */ |
5127 | if (cur->bc_levels[0].bp == NULL) { |
5128 | for (i = 0; i < cur->bc_nlevels; i++) { |
5129 | if (cur->bc_levels[i].bp) { |
5130 | xfs_trans_brelse(cur->bc_tp, |
5131 | cur->bc_levels[i].bp); |
5132 | cur->bc_levels[i].bp = NULL; |
5133 | cur->bc_levels[i].ptr = 0; |
5134 | cur->bc_levels[i].ra = 0; |
5135 | } |
5136 | } |
5137 | } |
5138 | |
5139 | return error; |
5140 | } |
5141 | |
5142 | static inline void |
5143 | xfs_btree_key_from_irec( |
5144 | struct xfs_btree_cur *cur, |
5145 | union xfs_btree_key *key, |
5146 | const union xfs_btree_irec *irec) |
5147 | { |
5148 | union xfs_btree_rec rec; |
5149 | |
5150 | cur->bc_rec = *irec; |
5151 | cur->bc_ops->init_rec_from_cur(cur, &rec); |
5152 | cur->bc_ops->init_key_from_rec(key, &rec); |
5153 | } |
5154 | |
5155 | /* |
5156 | * Query a btree for all records overlapping a given interval of keys. The |
5157 | * supplied function will be called with each record found; return one of the |
5158 | * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error |
5159 | * code. This function returns -ECANCELED, zero, or a negative error code. |
5160 | */ |
5161 | int |
5162 | xfs_btree_query_range( |
5163 | struct xfs_btree_cur *cur, |
5164 | const union xfs_btree_irec *low_rec, |
5165 | const union xfs_btree_irec *high_rec, |
5166 | xfs_btree_query_range_fn fn, |
5167 | void *priv) |
5168 | { |
5169 | union xfs_btree_key low_key; |
5170 | union xfs_btree_key high_key; |
5171 | |
5172 | /* Find the keys of both ends of the interval. */ |
5173 | xfs_btree_key_from_irec(cur, key: &high_key, irec: high_rec); |
5174 | xfs_btree_key_from_irec(cur, key: &low_key, irec: low_rec); |
5175 | |
5176 | /* Enforce low key <= high key. */ |
5177 | if (!xfs_btree_keycmp_le(cur, &low_key, &high_key)) |
5178 | return -EINVAL; |
5179 | |
5180 | if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) |
5181 | return xfs_btree_simple_query_range(cur, &low_key, |
5182 | &high_key, fn, priv); |
5183 | return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, |
5184 | fn, priv); |
5185 | } |
5186 | |
5187 | /* Query a btree for all records. */ |
5188 | int |
5189 | xfs_btree_query_all( |
5190 | struct xfs_btree_cur *cur, |
5191 | xfs_btree_query_range_fn fn, |
5192 | void *priv) |
5193 | { |
5194 | union xfs_btree_key low_key; |
5195 | union xfs_btree_key high_key; |
5196 | |
5197 | memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); |
5198 | memset(&low_key, 0, sizeof(low_key)); |
5199 | memset(&high_key, 0xFF, sizeof(high_key)); |
5200 | |
5201 | return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); |
5202 | } |
5203 | |
5204 | static int |
5205 | xfs_btree_count_blocks_helper( |
5206 | struct xfs_btree_cur *cur, |
5207 | int level, |
5208 | void *data) |
5209 | { |
5210 | xfs_extlen_t *blocks = data; |
5211 | (*blocks)++; |
5212 | |
5213 | return 0; |
5214 | } |
5215 | |
5216 | /* Count the blocks in a btree and return the result in *blocks. */ |
5217 | int |
5218 | xfs_btree_count_blocks( |
5219 | struct xfs_btree_cur *cur, |
5220 | xfs_extlen_t *blocks) |
5221 | { |
5222 | *blocks = 0; |
5223 | return xfs_btree_visit_blocks(cur, fn: xfs_btree_count_blocks_helper, |
5224 | XFS_BTREE_VISIT_ALL, data: blocks); |
5225 | } |
5226 | |
5227 | /* Compare two btree pointers. */ |
5228 | int64_t |
5229 | xfs_btree_diff_two_ptrs( |
5230 | struct xfs_btree_cur *cur, |
5231 | const union xfs_btree_ptr *a, |
5232 | const union xfs_btree_ptr *b) |
5233 | { |
5234 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) |
5235 | return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); |
5236 | return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); |
5237 | } |
5238 | |
5239 | struct xfs_btree_has_records { |
5240 | /* Keys for the start and end of the range we want to know about. */ |
5241 | union xfs_btree_key start_key; |
5242 | union xfs_btree_key end_key; |
5243 | |
5244 | /* Mask for key comparisons, if desired. */ |
5245 | const union xfs_btree_key *key_mask; |
5246 | |
5247 | /* Highest record key we've seen so far. */ |
5248 | union xfs_btree_key high_key; |
5249 | |
5250 | enum xbtree_recpacking outcome; |
5251 | }; |
5252 | |
5253 | STATIC int |
5254 | xfs_btree_has_records_helper( |
5255 | struct xfs_btree_cur *cur, |
5256 | const union xfs_btree_rec *rec, |
5257 | void *priv) |
5258 | { |
5259 | union xfs_btree_key rec_key; |
5260 | union xfs_btree_key rec_high_key; |
5261 | struct xfs_btree_has_records *info = priv; |
5262 | enum xbtree_key_contig key_contig; |
5263 | |
5264 | cur->bc_ops->init_key_from_rec(&rec_key, rec); |
5265 | |
5266 | if (info->outcome == XBTREE_RECPACKING_EMPTY) { |
5267 | info->outcome = XBTREE_RECPACKING_SPARSE; |
5268 | |
5269 | /* |
5270 | * If the first record we find does not overlap the start key, |
5271 | * then there is a hole at the start of the search range. |
5272 | * Classify this as sparse and stop immediately. |
5273 | */ |
5274 | if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key, |
5275 | info->key_mask)) |
5276 | return -ECANCELED; |
5277 | } else { |
5278 | /* |
5279 | * If a subsequent record does not overlap with the any record |
5280 | * we've seen so far, there is a hole in the middle of the |
5281 | * search range. Classify this as sparse and stop. |
5282 | * If the keys overlap and this btree does not allow overlap, |
5283 | * signal corruption. |
5284 | */ |
5285 | key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key, |
5286 | &rec_key, info->key_mask); |
5287 | if (key_contig == XBTREE_KEY_OVERLAP && |
5288 | !(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) |
5289 | return -EFSCORRUPTED; |
5290 | if (key_contig == XBTREE_KEY_GAP) |
5291 | return -ECANCELED; |
5292 | } |
5293 | |
5294 | /* |
5295 | * If high_key(rec) is larger than any other high key we've seen, |
5296 | * remember it for later. |
5297 | */ |
5298 | cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec); |
5299 | if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key, |
5300 | info->key_mask)) |
5301 | info->high_key = rec_high_key; /* struct copy */ |
5302 | |
5303 | return 0; |
5304 | } |
5305 | |
5306 | /* |
5307 | * Scan part of the keyspace of a btree and tell us if that keyspace does not |
5308 | * map to any records; is fully mapped to records; or is partially mapped to |
5309 | * records. This is the btree record equivalent to determining if a file is |
5310 | * sparse. |
5311 | * |
5312 | * For most btree types, the record scan should use all available btree key |
5313 | * fields to compare the keys encountered. These callers should pass NULL for |
5314 | * @mask. However, some callers (e.g. scanning physical space in the rmapbt) |
5315 | * want to ignore some part of the btree record keyspace when performing the |
5316 | * comparison. These callers should pass in a union xfs_btree_key object with |
5317 | * the fields that *should* be a part of the comparison set to any nonzero |
5318 | * value, and the rest zeroed. |
5319 | */ |
5320 | int |
5321 | xfs_btree_has_records( |
5322 | struct xfs_btree_cur *cur, |
5323 | const union xfs_btree_irec *low, |
5324 | const union xfs_btree_irec *high, |
5325 | const union xfs_btree_key *mask, |
5326 | enum xbtree_recpacking *outcome) |
5327 | { |
5328 | struct xfs_btree_has_records info = { |
5329 | .outcome = XBTREE_RECPACKING_EMPTY, |
5330 | .key_mask = mask, |
5331 | }; |
5332 | int error; |
5333 | |
5334 | /* Not all btrees support this operation. */ |
5335 | if (!cur->bc_ops->keys_contiguous) { |
5336 | ASSERT(0); |
5337 | return -EOPNOTSUPP; |
5338 | } |
5339 | |
5340 | xfs_btree_key_from_irec(cur, key: &info.start_key, irec: low); |
5341 | xfs_btree_key_from_irec(cur, key: &info.end_key, irec: high); |
5342 | |
5343 | error = xfs_btree_query_range(cur, low_rec: low, high_rec: high, |
5344 | fn: xfs_btree_has_records_helper, priv: &info); |
5345 | if (error == -ECANCELED) |
5346 | goto out; |
5347 | if (error) |
5348 | return error; |
5349 | |
5350 | if (info.outcome == XBTREE_RECPACKING_EMPTY) |
5351 | goto out; |
5352 | |
5353 | /* |
5354 | * If the largest high_key(rec) we saw during the walk is greater than |
5355 | * the end of the search range, classify this as full. Otherwise, |
5356 | * there is a hole at the end of the search range. |
5357 | */ |
5358 | if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key, |
5359 | mask)) |
5360 | info.outcome = XBTREE_RECPACKING_FULL; |
5361 | |
5362 | out: |
5363 | *outcome = info.outcome; |
5364 | return 0; |
5365 | } |
5366 | |
5367 | /* Are there more records in this btree? */ |
5368 | bool |
5369 | xfs_btree_has_more_records( |
5370 | struct xfs_btree_cur *cur) |
5371 | { |
5372 | struct xfs_btree_block *block; |
5373 | struct xfs_buf *bp; |
5374 | |
5375 | block = xfs_btree_get_block(cur, level: 0, bpp: &bp); |
5376 | |
5377 | /* There are still records in this block. */ |
5378 | if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block)) |
5379 | return true; |
5380 | |
5381 | /* There are more record blocks. */ |
5382 | if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) |
5383 | return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK); |
5384 | else |
5385 | return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK); |
5386 | } |
5387 | |
5388 | /* Set up all the btree cursor caches. */ |
5389 | int __init |
5390 | xfs_btree_init_cur_caches(void) |
5391 | { |
5392 | int error; |
5393 | |
5394 | error = xfs_allocbt_init_cur_cache(); |
5395 | if (error) |
5396 | return error; |
5397 | error = xfs_inobt_init_cur_cache(); |
5398 | if (error) |
5399 | goto err; |
5400 | error = xfs_bmbt_init_cur_cache(); |
5401 | if (error) |
5402 | goto err; |
5403 | error = xfs_rmapbt_init_cur_cache(); |
5404 | if (error) |
5405 | goto err; |
5406 | error = xfs_refcountbt_init_cur_cache(); |
5407 | if (error) |
5408 | goto err; |
5409 | |
5410 | return 0; |
5411 | err: |
5412 | xfs_btree_destroy_cur_caches(); |
5413 | return error; |
5414 | } |
5415 | |
5416 | /* Destroy all the btree cursor caches, if they've been allocated. */ |
5417 | void |
5418 | xfs_btree_destroy_cur_caches(void) |
5419 | { |
5420 | xfs_allocbt_destroy_cur_cache(); |
5421 | xfs_inobt_destroy_cur_cache(); |
5422 | xfs_bmbt_destroy_cur_cache(); |
5423 | xfs_rmapbt_destroy_cur_cache(); |
5424 | xfs_refcountbt_destroy_cur_cache(); |
5425 | } |
5426 | |
5427 | /* Move the btree cursor before the first record. */ |
5428 | int |
5429 | xfs_btree_goto_left_edge( |
5430 | struct xfs_btree_cur *cur) |
5431 | { |
5432 | int stat = 0; |
5433 | int error; |
5434 | |
5435 | memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); |
5436 | error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); |
5437 | if (error) |
5438 | return error; |
5439 | if (!stat) |
5440 | return 0; |
5441 | |
5442 | error = xfs_btree_decrement(cur, level: 0, stat: &stat); |
5443 | if (error) |
5444 | return error; |
5445 | if (stat != 0) { |
5446 | ASSERT(0); |
5447 | xfs_btree_mark_sick(cur); |
5448 | return -EFSCORRUPTED; |
5449 | } |
5450 | |
5451 | return 0; |
5452 | } |
5453 | |