1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
4 | * Copyright (c) 2013 Red Hat, Inc. |
5 | * All Rights Reserved. |
6 | */ |
7 | #include "xfs.h" |
8 | #include "xfs_fs.h" |
9 | #include "xfs_shared.h" |
10 | #include "xfs_format.h" |
11 | #include "xfs_log_format.h" |
12 | #include "xfs_trans_resv.h" |
13 | #include "xfs_bit.h" |
14 | #include "xfs_mount.h" |
15 | #include "xfs_inode.h" |
16 | #include "xfs_dir2.h" |
17 | #include "xfs_dir2_priv.h" |
18 | #include "xfs_trans.h" |
19 | #include "xfs_bmap.h" |
20 | #include "xfs_attr_leaf.h" |
21 | #include "xfs_error.h" |
22 | #include "xfs_trace.h" |
23 | #include "xfs_buf_item.h" |
24 | #include "xfs_log.h" |
25 | #include "xfs_errortag.h" |
26 | #include "xfs_health.h" |
27 | |
28 | /* |
29 | * xfs_da_btree.c |
30 | * |
31 | * Routines to implement directories as Btrees of hashed names. |
32 | */ |
33 | |
34 | /*======================================================================== |
35 | * Function prototypes for the kernel. |
36 | *========================================================================*/ |
37 | |
38 | /* |
39 | * Routines used for growing the Btree. |
40 | */ |
41 | STATIC int xfs_da3_root_split(xfs_da_state_t *state, |
42 | xfs_da_state_blk_t *existing_root, |
43 | xfs_da_state_blk_t *new_child); |
44 | STATIC int xfs_da3_node_split(xfs_da_state_t *state, |
45 | xfs_da_state_blk_t *existing_blk, |
46 | xfs_da_state_blk_t *split_blk, |
47 | xfs_da_state_blk_t *blk_to_add, |
48 | int treelevel, |
49 | int *result); |
50 | STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state, |
51 | xfs_da_state_blk_t *node_blk_1, |
52 | xfs_da_state_blk_t *node_blk_2); |
53 | STATIC void xfs_da3_node_add(xfs_da_state_t *state, |
54 | xfs_da_state_blk_t *old_node_blk, |
55 | xfs_da_state_blk_t *new_node_blk); |
56 | |
57 | /* |
58 | * Routines used for shrinking the Btree. |
59 | */ |
60 | STATIC int xfs_da3_root_join(xfs_da_state_t *state, |
61 | xfs_da_state_blk_t *root_blk); |
62 | STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval); |
63 | STATIC void xfs_da3_node_remove(xfs_da_state_t *state, |
64 | xfs_da_state_blk_t *drop_blk); |
65 | STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state, |
66 | xfs_da_state_blk_t *src_node_blk, |
67 | xfs_da_state_blk_t *dst_node_blk); |
68 | |
69 | /* |
70 | * Utility routines. |
71 | */ |
72 | STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, |
73 | xfs_da_state_blk_t *drop_blk, |
74 | xfs_da_state_blk_t *save_blk); |
75 | |
76 | |
77 | struct kmem_cache *xfs_da_state_cache; /* anchor for dir/attr state */ |
78 | |
79 | /* |
80 | * Allocate a dir-state structure. |
81 | * We don't put them on the stack since they're large. |
82 | */ |
83 | struct xfs_da_state * |
84 | xfs_da_state_alloc( |
85 | struct xfs_da_args *args) |
86 | { |
87 | struct xfs_da_state *state; |
88 | |
89 | state = kmem_cache_zalloc(xfs_da_state_cache, |
90 | GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); |
91 | state->args = args; |
92 | state->mp = args->dp->i_mount; |
93 | return state; |
94 | } |
95 | |
96 | /* |
97 | * Kill the altpath contents of a da-state structure. |
98 | */ |
99 | STATIC void |
100 | xfs_da_state_kill_altpath(xfs_da_state_t *state) |
101 | { |
102 | int i; |
103 | |
104 | for (i = 0; i < state->altpath.active; i++) |
105 | state->altpath.blk[i].bp = NULL; |
106 | state->altpath.active = 0; |
107 | } |
108 | |
109 | /* |
110 | * Free a da-state structure. |
111 | */ |
112 | void |
113 | xfs_da_state_free(xfs_da_state_t *state) |
114 | { |
115 | xfs_da_state_kill_altpath(state); |
116 | #ifdef DEBUG |
117 | memset((char *)state, 0, sizeof(*state)); |
118 | #endif /* DEBUG */ |
119 | kmem_cache_free(xfs_da_state_cache, state); |
120 | } |
121 | |
122 | void |
123 | xfs_da_state_reset( |
124 | struct xfs_da_state *state, |
125 | struct xfs_da_args *args) |
126 | { |
127 | xfs_da_state_kill_altpath(state); |
128 | memset(state, 0, sizeof(struct xfs_da_state)); |
129 | state->args = args; |
130 | state->mp = state->args->dp->i_mount; |
131 | } |
132 | |
133 | static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork) |
134 | { |
135 | if (whichfork == XFS_DATA_FORK) |
136 | return mp->m_dir_geo->fsbcount; |
137 | return mp->m_attr_geo->fsbcount; |
138 | } |
139 | |
140 | void |
141 | xfs_da3_node_hdr_from_disk( |
142 | struct xfs_mount *mp, |
143 | struct xfs_da3_icnode_hdr *to, |
144 | struct xfs_da_intnode *from) |
145 | { |
146 | if (xfs_has_crc(mp)) { |
147 | struct xfs_da3_intnode *from3 = (struct xfs_da3_intnode *)from; |
148 | |
149 | to->forw = be32_to_cpu(from3->hdr.info.hdr.forw); |
150 | to->back = be32_to_cpu(from3->hdr.info.hdr.back); |
151 | to->magic = be16_to_cpu(from3->hdr.info.hdr.magic); |
152 | to->count = be16_to_cpu(from3->hdr.__count); |
153 | to->level = be16_to_cpu(from3->hdr.__level); |
154 | to->btree = from3->__btree; |
155 | ASSERT(to->magic == XFS_DA3_NODE_MAGIC); |
156 | } else { |
157 | to->forw = be32_to_cpu(from->hdr.info.forw); |
158 | to->back = be32_to_cpu(from->hdr.info.back); |
159 | to->magic = be16_to_cpu(from->hdr.info.magic); |
160 | to->count = be16_to_cpu(from->hdr.__count); |
161 | to->level = be16_to_cpu(from->hdr.__level); |
162 | to->btree = from->__btree; |
163 | ASSERT(to->magic == XFS_DA_NODE_MAGIC); |
164 | } |
165 | } |
166 | |
167 | void |
168 | xfs_da3_node_hdr_to_disk( |
169 | struct xfs_mount *mp, |
170 | struct xfs_da_intnode *to, |
171 | struct xfs_da3_icnode_hdr *from) |
172 | { |
173 | if (xfs_has_crc(mp)) { |
174 | struct xfs_da3_intnode *to3 = (struct xfs_da3_intnode *)to; |
175 | |
176 | ASSERT(from->magic == XFS_DA3_NODE_MAGIC); |
177 | to3->hdr.info.hdr.forw = cpu_to_be32(from->forw); |
178 | to3->hdr.info.hdr.back = cpu_to_be32(from->back); |
179 | to3->hdr.info.hdr.magic = cpu_to_be16(from->magic); |
180 | to3->hdr.__count = cpu_to_be16(from->count); |
181 | to3->hdr.__level = cpu_to_be16(from->level); |
182 | } else { |
183 | ASSERT(from->magic == XFS_DA_NODE_MAGIC); |
184 | to->hdr.info.forw = cpu_to_be32(from->forw); |
185 | to->hdr.info.back = cpu_to_be32(from->back); |
186 | to->hdr.info.magic = cpu_to_be16(from->magic); |
187 | to->hdr.__count = cpu_to_be16(from->count); |
188 | to->hdr.__level = cpu_to_be16(from->level); |
189 | } |
190 | } |
191 | |
192 | /* |
193 | * Verify an xfs_da3_blkinfo structure. Note that the da3 fields are only |
194 | * accessible on v5 filesystems. This header format is common across da node, |
195 | * attr leaf and dir leaf blocks. |
196 | */ |
197 | xfs_failaddr_t |
198 | xfs_da3_blkinfo_verify( |
199 | struct xfs_buf *bp, |
200 | struct xfs_da3_blkinfo *hdr3) |
201 | { |
202 | struct xfs_mount *mp = bp->b_mount; |
203 | struct xfs_da_blkinfo *hdr = &hdr3->hdr; |
204 | |
205 | if (!xfs_verify_magic16(bp, hdr->magic)) |
206 | return __this_address; |
207 | |
208 | if (xfs_has_crc(mp)) { |
209 | if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) |
210 | return __this_address; |
211 | if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp)) |
212 | return __this_address; |
213 | if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) |
214 | return __this_address; |
215 | } |
216 | |
217 | return NULL; |
218 | } |
219 | |
220 | static xfs_failaddr_t |
221 | xfs_da3_node_verify( |
222 | struct xfs_buf *bp) |
223 | { |
224 | struct xfs_mount *mp = bp->b_mount; |
225 | struct xfs_da_intnode *hdr = bp->b_addr; |
226 | struct xfs_da3_icnode_hdr ichdr; |
227 | xfs_failaddr_t fa; |
228 | |
229 | xfs_da3_node_hdr_from_disk(mp, to: &ichdr, from: hdr); |
230 | |
231 | fa = xfs_da3_blkinfo_verify(bp, bp->b_addr); |
232 | if (fa) |
233 | return fa; |
234 | |
235 | if (ichdr.level == 0) |
236 | return __this_address; |
237 | if (ichdr.level > XFS_DA_NODE_MAXDEPTH) |
238 | return __this_address; |
239 | if (ichdr.count == 0) |
240 | return __this_address; |
241 | |
242 | /* |
243 | * we don't know if the node is for and attribute or directory tree, |
244 | * so only fail if the count is outside both bounds |
245 | */ |
246 | if (ichdr.count > mp->m_dir_geo->node_ents && |
247 | ichdr.count > mp->m_attr_geo->node_ents) |
248 | return __this_address; |
249 | |
250 | /* XXX: hash order check? */ |
251 | |
252 | return NULL; |
253 | } |
254 | |
255 | static void |
256 | xfs_da3_node_write_verify( |
257 | struct xfs_buf *bp) |
258 | { |
259 | struct xfs_mount *mp = bp->b_mount; |
260 | struct xfs_buf_log_item *bip = bp->b_log_item; |
261 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; |
262 | xfs_failaddr_t fa; |
263 | |
264 | fa = xfs_da3_node_verify(bp); |
265 | if (fa) { |
266 | xfs_verifier_error(bp, -EFSCORRUPTED, fa); |
267 | return; |
268 | } |
269 | |
270 | if (!xfs_has_crc(mp)) |
271 | return; |
272 | |
273 | if (bip) |
274 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); |
275 | |
276 | xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); |
277 | } |
278 | |
279 | /* |
280 | * leaf/node format detection on trees is sketchy, so a node read can be done on |
281 | * leaf level blocks when detection identifies the tree as a node format tree |
282 | * incorrectly. In this case, we need to swap the verifier to match the correct |
283 | * format of the block being read. |
284 | */ |
285 | static void |
286 | xfs_da3_node_read_verify( |
287 | struct xfs_buf *bp) |
288 | { |
289 | struct xfs_da_blkinfo *info = bp->b_addr; |
290 | xfs_failaddr_t fa; |
291 | |
292 | switch (be16_to_cpu(info->magic)) { |
293 | case XFS_DA3_NODE_MAGIC: |
294 | if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { |
295 | xfs_verifier_error(bp, -EFSBADCRC, |
296 | __this_address); |
297 | break; |
298 | } |
299 | fallthrough; |
300 | case XFS_DA_NODE_MAGIC: |
301 | fa = xfs_da3_node_verify(bp); |
302 | if (fa) |
303 | xfs_verifier_error(bp, -EFSCORRUPTED, fa); |
304 | return; |
305 | case XFS_ATTR_LEAF_MAGIC: |
306 | case XFS_ATTR3_LEAF_MAGIC: |
307 | bp->b_ops = &xfs_attr3_leaf_buf_ops; |
308 | bp->b_ops->verify_read(bp); |
309 | return; |
310 | case XFS_DIR2_LEAFN_MAGIC: |
311 | case XFS_DIR3_LEAFN_MAGIC: |
312 | bp->b_ops = &xfs_dir3_leafn_buf_ops; |
313 | bp->b_ops->verify_read(bp); |
314 | return; |
315 | default: |
316 | xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); |
317 | break; |
318 | } |
319 | } |
320 | |
321 | /* Verify the structure of a da3 block. */ |
322 | static xfs_failaddr_t |
323 | xfs_da3_node_verify_struct( |
324 | struct xfs_buf *bp) |
325 | { |
326 | struct xfs_da_blkinfo *info = bp->b_addr; |
327 | |
328 | switch (be16_to_cpu(info->magic)) { |
329 | case XFS_DA3_NODE_MAGIC: |
330 | case XFS_DA_NODE_MAGIC: |
331 | return xfs_da3_node_verify(bp); |
332 | case XFS_ATTR_LEAF_MAGIC: |
333 | case XFS_ATTR3_LEAF_MAGIC: |
334 | bp->b_ops = &xfs_attr3_leaf_buf_ops; |
335 | return bp->b_ops->verify_struct(bp); |
336 | case XFS_DIR2_LEAFN_MAGIC: |
337 | case XFS_DIR3_LEAFN_MAGIC: |
338 | bp->b_ops = &xfs_dir3_leafn_buf_ops; |
339 | return bp->b_ops->verify_struct(bp); |
340 | default: |
341 | return __this_address; |
342 | } |
343 | } |
344 | |
345 | const struct xfs_buf_ops xfs_da3_node_buf_ops = { |
346 | .name = "xfs_da3_node" , |
347 | .magic16 = { cpu_to_be16(XFS_DA_NODE_MAGIC), |
348 | cpu_to_be16(XFS_DA3_NODE_MAGIC) }, |
349 | .verify_read = xfs_da3_node_read_verify, |
350 | .verify_write = xfs_da3_node_write_verify, |
351 | .verify_struct = xfs_da3_node_verify_struct, |
352 | }; |
353 | |
354 | static int |
355 | xfs_da3_node_set_type( |
356 | struct xfs_trans *tp, |
357 | struct xfs_inode *dp, |
358 | int whichfork, |
359 | struct xfs_buf *bp) |
360 | { |
361 | struct xfs_da_blkinfo *info = bp->b_addr; |
362 | |
363 | switch (be16_to_cpu(info->magic)) { |
364 | case XFS_DA_NODE_MAGIC: |
365 | case XFS_DA3_NODE_MAGIC: |
366 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); |
367 | return 0; |
368 | case XFS_ATTR_LEAF_MAGIC: |
369 | case XFS_ATTR3_LEAF_MAGIC: |
370 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_ATTR_LEAF_BUF); |
371 | return 0; |
372 | case XFS_DIR2_LEAFN_MAGIC: |
373 | case XFS_DIR3_LEAFN_MAGIC: |
374 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); |
375 | return 0; |
376 | default: |
377 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp, |
378 | info, sizeof(*info)); |
379 | xfs_trans_brelse(tp, bp); |
380 | xfs_dirattr_mark_sick(ip: dp, whichfork); |
381 | return -EFSCORRUPTED; |
382 | } |
383 | } |
384 | |
385 | int |
386 | xfs_da3_node_read( |
387 | struct xfs_trans *tp, |
388 | struct xfs_inode *dp, |
389 | xfs_dablk_t bno, |
390 | struct xfs_buf **bpp, |
391 | int whichfork) |
392 | { |
393 | int error; |
394 | |
395 | error = xfs_da_read_buf(tp, dp, bno, 0, bpp, whichfork, |
396 | &xfs_da3_node_buf_ops); |
397 | if (error || !*bpp || !tp) |
398 | return error; |
399 | return xfs_da3_node_set_type(tp, dp, whichfork, bp: *bpp); |
400 | } |
401 | |
402 | int |
403 | xfs_da3_node_read_mapped( |
404 | struct xfs_trans *tp, |
405 | struct xfs_inode *dp, |
406 | xfs_daddr_t mappedbno, |
407 | struct xfs_buf **bpp, |
408 | int whichfork) |
409 | { |
410 | struct xfs_mount *mp = dp->i_mount; |
411 | int error; |
412 | |
413 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno, |
414 | XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0, |
415 | bpp, &xfs_da3_node_buf_ops); |
416 | if (xfs_metadata_is_sick(error)) |
417 | xfs_dirattr_mark_sick(ip: dp, whichfork); |
418 | if (error || !*bpp) |
419 | return error; |
420 | |
421 | if (whichfork == XFS_ATTR_FORK) |
422 | xfs_buf_set_ref(*bpp, XFS_ATTR_BTREE_REF); |
423 | else |
424 | xfs_buf_set_ref(*bpp, XFS_DIR_BTREE_REF); |
425 | |
426 | if (!tp) |
427 | return 0; |
428 | return xfs_da3_node_set_type(tp, dp, whichfork, bp: *bpp); |
429 | } |
430 | |
431 | /* |
432 | * Copy src directory/attr leaf/node buffer to the dst. |
433 | * For v5 file systems make sure the right blkno is stamped in. |
434 | */ |
435 | void |
436 | xfs_da_buf_copy( |
437 | struct xfs_buf *dst, |
438 | struct xfs_buf *src, |
439 | size_t size) |
440 | { |
441 | struct xfs_da3_blkinfo *da3 = dst->b_addr; |
442 | |
443 | memcpy(dst->b_addr, src->b_addr, size); |
444 | dst->b_ops = src->b_ops; |
445 | xfs_trans_buf_copy_type(dst, src); |
446 | if (xfs_has_crc(dst->b_mount)) |
447 | da3->blkno = cpu_to_be64(xfs_buf_daddr(dst)); |
448 | } |
449 | |
450 | /*======================================================================== |
451 | * Routines used for growing the Btree. |
452 | *========================================================================*/ |
453 | |
454 | /* |
455 | * Create the initial contents of an intermediate node. |
456 | */ |
457 | int |
458 | xfs_da3_node_create( |
459 | struct xfs_da_args *args, |
460 | xfs_dablk_t blkno, |
461 | int level, |
462 | struct xfs_buf **bpp, |
463 | int whichfork) |
464 | { |
465 | struct xfs_da_intnode *node; |
466 | struct xfs_trans *tp = args->trans; |
467 | struct xfs_mount *mp = tp->t_mountp; |
468 | struct xfs_da3_icnode_hdr ichdr = {0}; |
469 | struct xfs_buf *bp; |
470 | int error; |
471 | struct xfs_inode *dp = args->dp; |
472 | |
473 | trace_xfs_da_node_create(args); |
474 | ASSERT(level <= XFS_DA_NODE_MAXDEPTH); |
475 | |
476 | error = xfs_da_get_buf(tp, dp, blkno, &bp, whichfork); |
477 | if (error) |
478 | return error; |
479 | bp->b_ops = &xfs_da3_node_buf_ops; |
480 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); |
481 | node = bp->b_addr; |
482 | |
483 | if (xfs_has_crc(mp)) { |
484 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; |
485 | |
486 | memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr)); |
487 | ichdr.magic = XFS_DA3_NODE_MAGIC; |
488 | hdr3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp)); |
489 | hdr3->info.owner = cpu_to_be64(args->dp->i_ino); |
490 | uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid); |
491 | } else { |
492 | ichdr.magic = XFS_DA_NODE_MAGIC; |
493 | } |
494 | ichdr.level = level; |
495 | |
496 | xfs_da3_node_hdr_to_disk(mp: dp->i_mount, to: node, from: &ichdr); |
497 | xfs_trans_log_buf(tp, bp, |
498 | XFS_DA_LOGRANGE(node, &node->hdr, args->geo->node_hdr_size)); |
499 | |
500 | *bpp = bp; |
501 | return 0; |
502 | } |
503 | |
504 | /* |
505 | * Split a leaf node, rebalance, then possibly split |
506 | * intermediate nodes, rebalance, etc. |
507 | */ |
508 | int /* error */ |
509 | xfs_da3_split( |
510 | struct xfs_da_state *state) |
511 | { |
512 | struct xfs_da_state_blk *oldblk; |
513 | struct xfs_da_state_blk *newblk; |
514 | struct xfs_da_state_blk *addblk; |
515 | struct xfs_da_intnode *node; |
516 | int max; |
517 | int action = 0; |
518 | int error; |
519 | int i; |
520 | |
521 | trace_xfs_da_split(state->args); |
522 | |
523 | if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT)) |
524 | return -EIO; |
525 | |
526 | /* |
527 | * Walk back up the tree splitting/inserting/adjusting as necessary. |
528 | * If we need to insert and there isn't room, split the node, then |
529 | * decide which fragment to insert the new block from below into. |
530 | * Note that we may split the root this way, but we need more fixup. |
531 | */ |
532 | max = state->path.active - 1; |
533 | ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); |
534 | ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || |
535 | state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); |
536 | |
537 | addblk = &state->path.blk[max]; /* initial dummy value */ |
538 | for (i = max; (i >= 0) && addblk; state->path.active--, i--) { |
539 | oldblk = &state->path.blk[i]; |
540 | newblk = &state->altpath.blk[i]; |
541 | |
542 | /* |
543 | * If a leaf node then |
544 | * Allocate a new leaf node, then rebalance across them. |
545 | * else if an intermediate node then |
546 | * We split on the last layer, must we split the node? |
547 | */ |
548 | switch (oldblk->magic) { |
549 | case XFS_ATTR_LEAF_MAGIC: |
550 | error = xfs_attr3_leaf_split(state, oldblk, newblk); |
551 | if ((error != 0) && (error != -ENOSPC)) { |
552 | return error; /* GROT: attr is inconsistent */ |
553 | } |
554 | if (!error) { |
555 | addblk = newblk; |
556 | break; |
557 | } |
558 | /* |
559 | * Entry wouldn't fit, split the leaf again. The new |
560 | * extrablk will be consumed by xfs_da3_node_split if |
561 | * the node is split. |
562 | */ |
563 | state->extravalid = 1; |
564 | if (state->inleaf) { |
565 | state->extraafter = 0; /* before newblk */ |
566 | trace_xfs_attr_leaf_split_before(state->args); |
567 | error = xfs_attr3_leaf_split(state, oldblk, |
568 | newblk: &state->extrablk); |
569 | } else { |
570 | state->extraafter = 1; /* after newblk */ |
571 | trace_xfs_attr_leaf_split_after(state->args); |
572 | error = xfs_attr3_leaf_split(state, oldblk: newblk, |
573 | newblk: &state->extrablk); |
574 | } |
575 | if (error) |
576 | return error; /* GROT: attr inconsistent */ |
577 | addblk = newblk; |
578 | break; |
579 | case XFS_DIR2_LEAFN_MAGIC: |
580 | error = xfs_dir2_leafn_split(state, oldblk, newblk); |
581 | if (error) |
582 | return error; |
583 | addblk = newblk; |
584 | break; |
585 | case XFS_DA_NODE_MAGIC: |
586 | error = xfs_da3_node_split(state, oldblk, newblk, addblk, |
587 | max - i, &action); |
588 | addblk->bp = NULL; |
589 | if (error) |
590 | return error; /* GROT: dir is inconsistent */ |
591 | /* |
592 | * Record the newly split block for the next time thru? |
593 | */ |
594 | if (action) |
595 | addblk = newblk; |
596 | else |
597 | addblk = NULL; |
598 | break; |
599 | } |
600 | |
601 | /* |
602 | * Update the btree to show the new hashval for this child. |
603 | */ |
604 | xfs_da3_fixhashpath(state, path_to_to_fix: &state->path); |
605 | } |
606 | if (!addblk) |
607 | return 0; |
608 | |
609 | /* |
610 | * xfs_da3_node_split() should have consumed any extra blocks we added |
611 | * during a double leaf split in the attr fork. This is guaranteed as |
612 | * we can't be here if the attr fork only has a single leaf block. |
613 | */ |
614 | ASSERT(state->extravalid == 0 || |
615 | state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); |
616 | |
617 | /* |
618 | * Split the root node. |
619 | */ |
620 | ASSERT(state->path.active == 0); |
621 | oldblk = &state->path.blk[0]; |
622 | error = xfs_da3_root_split(state, oldblk, addblk); |
623 | if (error) |
624 | goto out; |
625 | |
626 | /* |
627 | * Update pointers to the node which used to be block 0 and just got |
628 | * bumped because of the addition of a new root node. Note that the |
629 | * original block 0 could be at any position in the list of blocks in |
630 | * the tree. |
631 | * |
632 | * Note: the magic numbers and sibling pointers are in the same physical |
633 | * place for both v2 and v3 headers (by design). Hence it doesn't matter |
634 | * which version of the xfs_da_intnode structure we use here as the |
635 | * result will be the same using either structure. |
636 | */ |
637 | node = oldblk->bp->b_addr; |
638 | if (node->hdr.info.forw) { |
639 | if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) { |
640 | xfs_buf_mark_corrupt(oldblk->bp); |
641 | xfs_da_mark_sick(args: state->args); |
642 | error = -EFSCORRUPTED; |
643 | goto out; |
644 | } |
645 | node = addblk->bp->b_addr; |
646 | node->hdr.info.back = cpu_to_be32(oldblk->blkno); |
647 | xfs_trans_log_buf(state->args->trans, addblk->bp, |
648 | XFS_DA_LOGRANGE(node, &node->hdr.info, |
649 | sizeof(node->hdr.info))); |
650 | } |
651 | node = oldblk->bp->b_addr; |
652 | if (node->hdr.info.back) { |
653 | if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) { |
654 | xfs_buf_mark_corrupt(oldblk->bp); |
655 | xfs_da_mark_sick(args: state->args); |
656 | error = -EFSCORRUPTED; |
657 | goto out; |
658 | } |
659 | node = addblk->bp->b_addr; |
660 | node->hdr.info.forw = cpu_to_be32(oldblk->blkno); |
661 | xfs_trans_log_buf(state->args->trans, addblk->bp, |
662 | XFS_DA_LOGRANGE(node, &node->hdr.info, |
663 | sizeof(node->hdr.info))); |
664 | } |
665 | out: |
666 | addblk->bp = NULL; |
667 | return error; |
668 | } |
669 | |
670 | /* |
671 | * Split the root. We have to create a new root and point to the two |
672 | * parts (the split old root) that we just created. Copy block zero to |
673 | * the EOF, extending the inode in process. |
674 | */ |
675 | STATIC int /* error */ |
676 | xfs_da3_root_split( |
677 | struct xfs_da_state *state, |
678 | struct xfs_da_state_blk *blk1, |
679 | struct xfs_da_state_blk *blk2) |
680 | { |
681 | struct xfs_da_intnode *node; |
682 | struct xfs_da_intnode *oldroot; |
683 | struct xfs_da_node_entry *btree; |
684 | struct xfs_da3_icnode_hdr nodehdr; |
685 | struct xfs_da_args *args; |
686 | struct xfs_buf *bp; |
687 | struct xfs_inode *dp; |
688 | struct xfs_trans *tp; |
689 | struct xfs_dir2_leaf *leaf; |
690 | xfs_dablk_t blkno; |
691 | int level; |
692 | int error; |
693 | int size; |
694 | |
695 | trace_xfs_da_root_split(state->args); |
696 | |
697 | /* |
698 | * Copy the existing (incorrect) block from the root node position |
699 | * to a free space somewhere. |
700 | */ |
701 | args = state->args; |
702 | error = xfs_da_grow_inode(args, &blkno); |
703 | if (error) |
704 | return error; |
705 | |
706 | dp = args->dp; |
707 | tp = args->trans; |
708 | error = xfs_da_get_buf(tp, dp, blkno, &bp, args->whichfork); |
709 | if (error) |
710 | return error; |
711 | node = bp->b_addr; |
712 | oldroot = blk1->bp->b_addr; |
713 | if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || |
714 | oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { |
715 | struct xfs_da3_icnode_hdr icnodehdr; |
716 | |
717 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &icnodehdr, from: oldroot); |
718 | btree = icnodehdr.btree; |
719 | size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot); |
720 | level = icnodehdr.level; |
721 | } else { |
722 | struct xfs_dir3_icleaf_hdr leafhdr; |
723 | |
724 | leaf = (xfs_dir2_leaf_t *)oldroot; |
725 | xfs_dir2_leaf_hdr_from_disk(mp: dp->i_mount, to: &leafhdr, from: leaf); |
726 | |
727 | ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || |
728 | leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); |
729 | size = (int)((char *)&leafhdr.ents[leafhdr.count] - |
730 | (char *)leaf); |
731 | level = 0; |
732 | } |
733 | |
734 | /* |
735 | * Copy old root to new buffer and log it. |
736 | */ |
737 | xfs_da_buf_copy(bp, blk1->bp, size); |
738 | xfs_trans_log_buf(tp, bp, 0, size - 1); |
739 | |
740 | /* |
741 | * Update blk1 to point to new buffer. |
742 | */ |
743 | blk1->bp = bp; |
744 | blk1->blkno = blkno; |
745 | |
746 | /* |
747 | * Set up the new root node. |
748 | */ |
749 | error = xfs_da3_node_create(args, |
750 | (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0, |
751 | level + 1, &bp, args->whichfork); |
752 | if (error) |
753 | return error; |
754 | |
755 | node = bp->b_addr; |
756 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr, from: node); |
757 | btree = nodehdr.btree; |
758 | btree[0].hashval = cpu_to_be32(blk1->hashval); |
759 | btree[0].before = cpu_to_be32(blk1->blkno); |
760 | btree[1].hashval = cpu_to_be32(blk2->hashval); |
761 | btree[1].before = cpu_to_be32(blk2->blkno); |
762 | nodehdr.count = 2; |
763 | xfs_da3_node_hdr_to_disk(mp: dp->i_mount, to: node, from: &nodehdr); |
764 | |
765 | #ifdef DEBUG |
766 | if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || |
767 | oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { |
768 | ASSERT(blk1->blkno >= args->geo->leafblk && |
769 | blk1->blkno < args->geo->freeblk); |
770 | ASSERT(blk2->blkno >= args->geo->leafblk && |
771 | blk2->blkno < args->geo->freeblk); |
772 | } |
773 | #endif |
774 | |
775 | /* Header is already logged by xfs_da_node_create */ |
776 | xfs_trans_log_buf(tp, bp, |
777 | XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2)); |
778 | |
779 | return 0; |
780 | } |
781 | |
782 | /* |
783 | * Split the node, rebalance, then add the new entry. |
784 | */ |
785 | STATIC int /* error */ |
786 | xfs_da3_node_split( |
787 | struct xfs_da_state *state, |
788 | struct xfs_da_state_blk *oldblk, |
789 | struct xfs_da_state_blk *newblk, |
790 | struct xfs_da_state_blk *addblk, |
791 | int treelevel, |
792 | int *result) |
793 | { |
794 | struct xfs_da_intnode *node; |
795 | struct xfs_da3_icnode_hdr nodehdr; |
796 | xfs_dablk_t blkno; |
797 | int newcount; |
798 | int error; |
799 | int ; |
800 | struct xfs_inode *dp = state->args->dp; |
801 | |
802 | trace_xfs_da_node_split(state->args); |
803 | |
804 | node = oldblk->bp->b_addr; |
805 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr, from: node); |
806 | |
807 | /* |
808 | * With V2 dirs the extra block is data or freespace. |
809 | */ |
810 | useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK; |
811 | newcount = 1 + useextra; |
812 | /* |
813 | * Do we have to split the node? |
814 | */ |
815 | if (nodehdr.count + newcount > state->args->geo->node_ents) { |
816 | /* |
817 | * Allocate a new node, add to the doubly linked chain of |
818 | * nodes, then move some of our excess entries into it. |
819 | */ |
820 | error = xfs_da_grow_inode(state->args, &blkno); |
821 | if (error) |
822 | return error; /* GROT: dir is inconsistent */ |
823 | |
824 | error = xfs_da3_node_create(state->args, blkno, treelevel, |
825 | &newblk->bp, state->args->whichfork); |
826 | if (error) |
827 | return error; /* GROT: dir is inconsistent */ |
828 | newblk->blkno = blkno; |
829 | newblk->magic = XFS_DA_NODE_MAGIC; |
830 | xfs_da3_node_rebalance(state, oldblk, newblk); |
831 | error = xfs_da3_blk_link(state, old_blk: oldblk, new_blk: newblk); |
832 | if (error) |
833 | return error; |
834 | *result = 1; |
835 | } else { |
836 | *result = 0; |
837 | } |
838 | |
839 | /* |
840 | * Insert the new entry(s) into the correct block |
841 | * (updating last hashval in the process). |
842 | * |
843 | * xfs_da3_node_add() inserts BEFORE the given index, |
844 | * and as a result of using node_lookup_int() we always |
845 | * point to a valid entry (not after one), but a split |
846 | * operation always results in a new block whose hashvals |
847 | * FOLLOW the current block. |
848 | * |
849 | * If we had double-split op below us, then add the extra block too. |
850 | */ |
851 | node = oldblk->bp->b_addr; |
852 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr, from: node); |
853 | if (oldblk->index <= nodehdr.count) { |
854 | oldblk->index++; |
855 | xfs_da3_node_add(state, oldblk, addblk); |
856 | if (useextra) { |
857 | if (state->extraafter) |
858 | oldblk->index++; |
859 | xfs_da3_node_add(state, oldblk, &state->extrablk); |
860 | state->extravalid = 0; |
861 | } |
862 | } else { |
863 | newblk->index++; |
864 | xfs_da3_node_add(state, newblk, addblk); |
865 | if (useextra) { |
866 | if (state->extraafter) |
867 | newblk->index++; |
868 | xfs_da3_node_add(state, newblk, &state->extrablk); |
869 | state->extravalid = 0; |
870 | } |
871 | } |
872 | |
873 | return 0; |
874 | } |
875 | |
876 | /* |
877 | * Balance the btree elements between two intermediate nodes, |
878 | * usually one full and one empty. |
879 | * |
880 | * NOTE: if blk2 is empty, then it will get the upper half of blk1. |
881 | */ |
882 | STATIC void |
883 | xfs_da3_node_rebalance( |
884 | struct xfs_da_state *state, |
885 | struct xfs_da_state_blk *blk1, |
886 | struct xfs_da_state_blk *blk2) |
887 | { |
888 | struct xfs_da_intnode *node1; |
889 | struct xfs_da_intnode *node2; |
890 | struct xfs_da_node_entry *btree1; |
891 | struct xfs_da_node_entry *btree2; |
892 | struct xfs_da_node_entry *btree_s; |
893 | struct xfs_da_node_entry *btree_d; |
894 | struct xfs_da3_icnode_hdr nodehdr1; |
895 | struct xfs_da3_icnode_hdr nodehdr2; |
896 | struct xfs_trans *tp; |
897 | int count; |
898 | int tmp; |
899 | int swap = 0; |
900 | struct xfs_inode *dp = state->args->dp; |
901 | |
902 | trace_xfs_da_node_rebalance(state->args); |
903 | |
904 | node1 = blk1->bp->b_addr; |
905 | node2 = blk2->bp->b_addr; |
906 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr1, from: node1); |
907 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr2, from: node2); |
908 | btree1 = nodehdr1.btree; |
909 | btree2 = nodehdr2.btree; |
910 | |
911 | /* |
912 | * Figure out how many entries need to move, and in which direction. |
913 | * Swap the nodes around if that makes it simpler. |
914 | */ |
915 | if (nodehdr1.count > 0 && nodehdr2.count > 0 && |
916 | ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || |
917 | (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) < |
918 | be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) { |
919 | swap(node1, node2); |
920 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr1, from: node1); |
921 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr2, from: node2); |
922 | btree1 = nodehdr1.btree; |
923 | btree2 = nodehdr2.btree; |
924 | swap = 1; |
925 | } |
926 | |
927 | count = (nodehdr1.count - nodehdr2.count) / 2; |
928 | if (count == 0) |
929 | return; |
930 | tp = state->args->trans; |
931 | /* |
932 | * Two cases: high-to-low and low-to-high. |
933 | */ |
934 | if (count > 0) { |
935 | /* |
936 | * Move elements in node2 up to make a hole. |
937 | */ |
938 | tmp = nodehdr2.count; |
939 | if (tmp > 0) { |
940 | tmp *= (uint)sizeof(xfs_da_node_entry_t); |
941 | btree_s = &btree2[0]; |
942 | btree_d = &btree2[count]; |
943 | memmove(btree_d, btree_s, tmp); |
944 | } |
945 | |
946 | /* |
947 | * Move the req'd B-tree elements from high in node1 to |
948 | * low in node2. |
949 | */ |
950 | nodehdr2.count += count; |
951 | tmp = count * (uint)sizeof(xfs_da_node_entry_t); |
952 | btree_s = &btree1[nodehdr1.count - count]; |
953 | btree_d = &btree2[0]; |
954 | memcpy(btree_d, btree_s, tmp); |
955 | nodehdr1.count -= count; |
956 | } else { |
957 | /* |
958 | * Move the req'd B-tree elements from low in node2 to |
959 | * high in node1. |
960 | */ |
961 | count = -count; |
962 | tmp = count * (uint)sizeof(xfs_da_node_entry_t); |
963 | btree_s = &btree2[0]; |
964 | btree_d = &btree1[nodehdr1.count]; |
965 | memcpy(btree_d, btree_s, tmp); |
966 | nodehdr1.count += count; |
967 | |
968 | xfs_trans_log_buf(tp, blk1->bp, |
969 | XFS_DA_LOGRANGE(node1, btree_d, tmp)); |
970 | |
971 | /* |
972 | * Move elements in node2 down to fill the hole. |
973 | */ |
974 | tmp = nodehdr2.count - count; |
975 | tmp *= (uint)sizeof(xfs_da_node_entry_t); |
976 | btree_s = &btree2[count]; |
977 | btree_d = &btree2[0]; |
978 | memmove(btree_d, btree_s, tmp); |
979 | nodehdr2.count -= count; |
980 | } |
981 | |
982 | /* |
983 | * Log header of node 1 and all current bits of node 2. |
984 | */ |
985 | xfs_da3_node_hdr_to_disk(mp: dp->i_mount, to: node1, from: &nodehdr1); |
986 | xfs_trans_log_buf(tp, blk1->bp, |
987 | XFS_DA_LOGRANGE(node1, &node1->hdr, |
988 | state->args->geo->node_hdr_size)); |
989 | |
990 | xfs_da3_node_hdr_to_disk(mp: dp->i_mount, to: node2, from: &nodehdr2); |
991 | xfs_trans_log_buf(tp, blk2->bp, |
992 | XFS_DA_LOGRANGE(node2, &node2->hdr, |
993 | state->args->geo->node_hdr_size + |
994 | (sizeof(btree2[0]) * nodehdr2.count))); |
995 | |
996 | /* |
997 | * Record the last hashval from each block for upward propagation. |
998 | * (note: don't use the swapped node pointers) |
999 | */ |
1000 | if (swap) { |
1001 | node1 = blk1->bp->b_addr; |
1002 | node2 = blk2->bp->b_addr; |
1003 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr1, from: node1); |
1004 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr2, from: node2); |
1005 | btree1 = nodehdr1.btree; |
1006 | btree2 = nodehdr2.btree; |
1007 | } |
1008 | blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); |
1009 | blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); |
1010 | |
1011 | /* |
1012 | * Adjust the expected index for insertion. |
1013 | */ |
1014 | if (blk1->index >= nodehdr1.count) { |
1015 | blk2->index = blk1->index - nodehdr1.count; |
1016 | blk1->index = nodehdr1.count + 1; /* make it invalid */ |
1017 | } |
1018 | } |
1019 | |
1020 | /* |
1021 | * Add a new entry to an intermediate node. |
1022 | */ |
1023 | STATIC void |
1024 | xfs_da3_node_add( |
1025 | struct xfs_da_state *state, |
1026 | struct xfs_da_state_blk *oldblk, |
1027 | struct xfs_da_state_blk *newblk) |
1028 | { |
1029 | struct xfs_da_intnode *node; |
1030 | struct xfs_da3_icnode_hdr nodehdr; |
1031 | struct xfs_da_node_entry *btree; |
1032 | int tmp; |
1033 | struct xfs_inode *dp = state->args->dp; |
1034 | |
1035 | trace_xfs_da_node_add(state->args); |
1036 | |
1037 | node = oldblk->bp->b_addr; |
1038 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr, from: node); |
1039 | btree = nodehdr.btree; |
1040 | |
1041 | ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); |
1042 | ASSERT(newblk->blkno != 0); |
1043 | if (state->args->whichfork == XFS_DATA_FORK) |
1044 | ASSERT(newblk->blkno >= state->args->geo->leafblk && |
1045 | newblk->blkno < state->args->geo->freeblk); |
1046 | |
1047 | /* |
1048 | * We may need to make some room before we insert the new node. |
1049 | */ |
1050 | tmp = 0; |
1051 | if (oldblk->index < nodehdr.count) { |
1052 | tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree); |
1053 | memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp); |
1054 | } |
1055 | btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval); |
1056 | btree[oldblk->index].before = cpu_to_be32(newblk->blkno); |
1057 | xfs_trans_log_buf(state->args->trans, oldblk->bp, |
1058 | XFS_DA_LOGRANGE(node, &btree[oldblk->index], |
1059 | tmp + sizeof(*btree))); |
1060 | |
1061 | nodehdr.count += 1; |
1062 | xfs_da3_node_hdr_to_disk(mp: dp->i_mount, to: node, from: &nodehdr); |
1063 | xfs_trans_log_buf(state->args->trans, oldblk->bp, |
1064 | XFS_DA_LOGRANGE(node, &node->hdr, |
1065 | state->args->geo->node_hdr_size)); |
1066 | |
1067 | /* |
1068 | * Copy the last hash value from the oldblk to propagate upwards. |
1069 | */ |
1070 | oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); |
1071 | } |
1072 | |
1073 | /*======================================================================== |
1074 | * Routines used for shrinking the Btree. |
1075 | *========================================================================*/ |
1076 | |
1077 | /* |
1078 | * Deallocate an empty leaf node, remove it from its parent, |
1079 | * possibly deallocating that block, etc... |
1080 | */ |
1081 | int |
1082 | xfs_da3_join( |
1083 | struct xfs_da_state *state) |
1084 | { |
1085 | struct xfs_da_state_blk *drop_blk; |
1086 | struct xfs_da_state_blk *save_blk; |
1087 | int action = 0; |
1088 | int error; |
1089 | |
1090 | trace_xfs_da_join(state->args); |
1091 | |
1092 | drop_blk = &state->path.blk[ state->path.active-1 ]; |
1093 | save_blk = &state->altpath.blk[ state->path.active-1 ]; |
1094 | ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); |
1095 | ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || |
1096 | drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); |
1097 | |
1098 | /* |
1099 | * Walk back up the tree joining/deallocating as necessary. |
1100 | * When we stop dropping blocks, break out. |
1101 | */ |
1102 | for ( ; state->path.active >= 2; drop_blk--, save_blk--, |
1103 | state->path.active--) { |
1104 | /* |
1105 | * See if we can combine the block with a neighbor. |
1106 | * (action == 0) => no options, just leave |
1107 | * (action == 1) => coalesce, then unlink |
1108 | * (action == 2) => block empty, unlink it |
1109 | */ |
1110 | switch (drop_blk->magic) { |
1111 | case XFS_ATTR_LEAF_MAGIC: |
1112 | error = xfs_attr3_leaf_toosmall(state, retval: &action); |
1113 | if (error) |
1114 | return error; |
1115 | if (action == 0) |
1116 | return 0; |
1117 | xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); |
1118 | break; |
1119 | case XFS_DIR2_LEAFN_MAGIC: |
1120 | error = xfs_dir2_leafn_toosmall(state, action: &action); |
1121 | if (error) |
1122 | return error; |
1123 | if (action == 0) |
1124 | return 0; |
1125 | xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); |
1126 | break; |
1127 | case XFS_DA_NODE_MAGIC: |
1128 | /* |
1129 | * Remove the offending node, fixup hashvals, |
1130 | * check for a toosmall neighbor. |
1131 | */ |
1132 | xfs_da3_node_remove(state, drop_blk); |
1133 | xfs_da3_fixhashpath(state, path_to_to_fix: &state->path); |
1134 | error = xfs_da3_node_toosmall(state, &action); |
1135 | if (error) |
1136 | return error; |
1137 | if (action == 0) |
1138 | return 0; |
1139 | xfs_da3_node_unbalance(state, drop_blk, save_blk); |
1140 | break; |
1141 | } |
1142 | xfs_da3_fixhashpath(state, path_to_to_fix: &state->altpath); |
1143 | error = xfs_da3_blk_unlink(state, drop_blk, save_blk); |
1144 | xfs_da_state_kill_altpath(state); |
1145 | if (error) |
1146 | return error; |
1147 | error = xfs_da_shrink_inode(state->args, drop_blk->blkno, |
1148 | drop_blk->bp); |
1149 | drop_blk->bp = NULL; |
1150 | if (error) |
1151 | return error; |
1152 | } |
1153 | /* |
1154 | * We joined all the way to the top. If it turns out that |
1155 | * we only have one entry in the root, make the child block |
1156 | * the new root. |
1157 | */ |
1158 | xfs_da3_node_remove(state, drop_blk); |
1159 | xfs_da3_fixhashpath(state, path_to_to_fix: &state->path); |
1160 | error = xfs_da3_root_join(state, &state->path.blk[0]); |
1161 | return error; |
1162 | } |
1163 | |
1164 | #ifdef DEBUG |
1165 | static void |
1166 | xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) |
1167 | { |
1168 | __be16 magic = blkinfo->magic; |
1169 | |
1170 | if (level == 1) { |
1171 | ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || |
1172 | magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || |
1173 | magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || |
1174 | magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); |
1175 | } else { |
1176 | ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || |
1177 | magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); |
1178 | } |
1179 | ASSERT(!blkinfo->forw); |
1180 | ASSERT(!blkinfo->back); |
1181 | } |
1182 | #else /* !DEBUG */ |
1183 | #define xfs_da_blkinfo_onlychild_validate(blkinfo, level) |
1184 | #endif /* !DEBUG */ |
1185 | |
1186 | /* |
1187 | * We have only one entry in the root. Copy the only remaining child of |
1188 | * the old root to block 0 as the new root node. |
1189 | */ |
1190 | STATIC int |
1191 | xfs_da3_root_join( |
1192 | struct xfs_da_state *state, |
1193 | struct xfs_da_state_blk *root_blk) |
1194 | { |
1195 | struct xfs_da_intnode *oldroot; |
1196 | struct xfs_da_args *args; |
1197 | xfs_dablk_t child; |
1198 | struct xfs_buf *bp; |
1199 | struct xfs_da3_icnode_hdr oldroothdr; |
1200 | int error; |
1201 | struct xfs_inode *dp = state->args->dp; |
1202 | |
1203 | trace_xfs_da_root_join(state->args); |
1204 | |
1205 | ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); |
1206 | |
1207 | args = state->args; |
1208 | oldroot = root_blk->bp->b_addr; |
1209 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &oldroothdr, from: oldroot); |
1210 | ASSERT(oldroothdr.forw == 0); |
1211 | ASSERT(oldroothdr.back == 0); |
1212 | |
1213 | /* |
1214 | * If the root has more than one child, then don't do anything. |
1215 | */ |
1216 | if (oldroothdr.count > 1) |
1217 | return 0; |
1218 | |
1219 | /* |
1220 | * Read in the (only) child block, then copy those bytes into |
1221 | * the root block's buffer and free the original child block. |
1222 | */ |
1223 | child = be32_to_cpu(oldroothdr.btree[0].before); |
1224 | ASSERT(child != 0); |
1225 | error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork); |
1226 | if (error) |
1227 | return error; |
1228 | xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level); |
1229 | |
1230 | /* |
1231 | * Copy child to root buffer and log it. |
1232 | */ |
1233 | xfs_da_buf_copy(root_blk->bp, bp, args->geo->blksize); |
1234 | xfs_trans_log_buf(args->trans, root_blk->bp, 0, |
1235 | args->geo->blksize - 1); |
1236 | /* |
1237 | * Now we can drop the child buffer. |
1238 | */ |
1239 | error = xfs_da_shrink_inode(args, child, bp); |
1240 | return error; |
1241 | } |
1242 | |
1243 | /* |
1244 | * Check a node block and its neighbors to see if the block should be |
1245 | * collapsed into one or the other neighbor. Always keep the block |
1246 | * with the smaller block number. |
1247 | * If the current block is over 50% full, don't try to join it, return 0. |
1248 | * If the block is empty, fill in the state structure and return 2. |
1249 | * If it can be collapsed, fill in the state structure and return 1. |
1250 | * If nothing can be done, return 0. |
1251 | */ |
1252 | STATIC int |
1253 | xfs_da3_node_toosmall( |
1254 | struct xfs_da_state *state, |
1255 | int *action) |
1256 | { |
1257 | struct xfs_da_intnode *node; |
1258 | struct xfs_da_state_blk *blk; |
1259 | struct xfs_da_blkinfo *info; |
1260 | xfs_dablk_t blkno; |
1261 | struct xfs_buf *bp; |
1262 | struct xfs_da3_icnode_hdr nodehdr; |
1263 | int count; |
1264 | int forward; |
1265 | int error; |
1266 | int retval; |
1267 | int i; |
1268 | struct xfs_inode *dp = state->args->dp; |
1269 | |
1270 | trace_xfs_da_node_toosmall(state->args); |
1271 | |
1272 | /* |
1273 | * Check for the degenerate case of the block being over 50% full. |
1274 | * If so, it's not worth even looking to see if we might be able |
1275 | * to coalesce with a sibling. |
1276 | */ |
1277 | blk = &state->path.blk[ state->path.active-1 ]; |
1278 | info = blk->bp->b_addr; |
1279 | node = (xfs_da_intnode_t *)info; |
1280 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr, from: node); |
1281 | if (nodehdr.count > (state->args->geo->node_ents >> 1)) { |
1282 | *action = 0; /* blk over 50%, don't try to join */ |
1283 | return 0; /* blk over 50%, don't try to join */ |
1284 | } |
1285 | |
1286 | /* |
1287 | * Check for the degenerate case of the block being empty. |
1288 | * If the block is empty, we'll simply delete it, no need to |
1289 | * coalesce it with a sibling block. We choose (arbitrarily) |
1290 | * to merge with the forward block unless it is NULL. |
1291 | */ |
1292 | if (nodehdr.count == 0) { |
1293 | /* |
1294 | * Make altpath point to the block we want to keep and |
1295 | * path point to the block we want to drop (this one). |
1296 | */ |
1297 | forward = (info->forw != 0); |
1298 | memcpy(&state->altpath, &state->path, sizeof(state->path)); |
1299 | error = xfs_da3_path_shift(state, path: &state->altpath, forward, |
1300 | release: 0, result: &retval); |
1301 | if (error) |
1302 | return error; |
1303 | if (retval) { |
1304 | *action = 0; |
1305 | } else { |
1306 | *action = 2; |
1307 | } |
1308 | return 0; |
1309 | } |
1310 | |
1311 | /* |
1312 | * Examine each sibling block to see if we can coalesce with |
1313 | * at least 25% free space to spare. We need to figure out |
1314 | * whether to merge with the forward or the backward block. |
1315 | * We prefer coalescing with the lower numbered sibling so as |
1316 | * to shrink a directory over time. |
1317 | */ |
1318 | count = state->args->geo->node_ents; |
1319 | count -= state->args->geo->node_ents >> 2; |
1320 | count -= nodehdr.count; |
1321 | |
1322 | /* start with smaller blk num */ |
1323 | forward = nodehdr.forw < nodehdr.back; |
1324 | for (i = 0; i < 2; forward = !forward, i++) { |
1325 | struct xfs_da3_icnode_hdr thdr; |
1326 | if (forward) |
1327 | blkno = nodehdr.forw; |
1328 | else |
1329 | blkno = nodehdr.back; |
1330 | if (blkno == 0) |
1331 | continue; |
1332 | error = xfs_da3_node_read(state->args->trans, dp, blkno, &bp, |
1333 | state->args->whichfork); |
1334 | if (error) |
1335 | return error; |
1336 | |
1337 | node = bp->b_addr; |
1338 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &thdr, from: node); |
1339 | xfs_trans_brelse(state->args->trans, bp); |
1340 | |
1341 | if (count - thdr.count >= 0) |
1342 | break; /* fits with at least 25% to spare */ |
1343 | } |
1344 | if (i >= 2) { |
1345 | *action = 0; |
1346 | return 0; |
1347 | } |
1348 | |
1349 | /* |
1350 | * Make altpath point to the block we want to keep (the lower |
1351 | * numbered block) and path point to the block we want to drop. |
1352 | */ |
1353 | memcpy(&state->altpath, &state->path, sizeof(state->path)); |
1354 | if (blkno < blk->blkno) { |
1355 | error = xfs_da3_path_shift(state, path: &state->altpath, forward, |
1356 | release: 0, result: &retval); |
1357 | } else { |
1358 | error = xfs_da3_path_shift(state, path: &state->path, forward, |
1359 | release: 0, result: &retval); |
1360 | } |
1361 | if (error) |
1362 | return error; |
1363 | if (retval) { |
1364 | *action = 0; |
1365 | return 0; |
1366 | } |
1367 | *action = 1; |
1368 | return 0; |
1369 | } |
1370 | |
1371 | /* |
1372 | * Pick up the last hashvalue from an intermediate node. |
1373 | */ |
1374 | STATIC uint |
1375 | xfs_da3_node_lasthash( |
1376 | struct xfs_inode *dp, |
1377 | struct xfs_buf *bp, |
1378 | int *count) |
1379 | { |
1380 | struct xfs_da3_icnode_hdr nodehdr; |
1381 | |
1382 | xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, bp->b_addr); |
1383 | if (count) |
1384 | *count = nodehdr.count; |
1385 | if (!nodehdr.count) |
1386 | return 0; |
1387 | return be32_to_cpu(nodehdr.btree[nodehdr.count - 1].hashval); |
1388 | } |
1389 | |
1390 | /* |
1391 | * Walk back up the tree adjusting hash values as necessary, |
1392 | * when we stop making changes, return. |
1393 | */ |
1394 | void |
1395 | xfs_da3_fixhashpath( |
1396 | struct xfs_da_state *state, |
1397 | struct xfs_da_state_path *path) |
1398 | { |
1399 | struct xfs_da_state_blk *blk; |
1400 | struct xfs_da_intnode *node; |
1401 | struct xfs_da_node_entry *btree; |
1402 | xfs_dahash_t lasthash=0; |
1403 | int level; |
1404 | int count; |
1405 | struct xfs_inode *dp = state->args->dp; |
1406 | |
1407 | trace_xfs_da_fixhashpath(state->args); |
1408 | |
1409 | level = path->active-1; |
1410 | blk = &path->blk[ level ]; |
1411 | switch (blk->magic) { |
1412 | case XFS_ATTR_LEAF_MAGIC: |
1413 | lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); |
1414 | if (count == 0) |
1415 | return; |
1416 | break; |
1417 | case XFS_DIR2_LEAFN_MAGIC: |
1418 | lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count); |
1419 | if (count == 0) |
1420 | return; |
1421 | break; |
1422 | case XFS_DA_NODE_MAGIC: |
1423 | lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); |
1424 | if (count == 0) |
1425 | return; |
1426 | break; |
1427 | } |
1428 | for (blk--, level--; level >= 0; blk--, level--) { |
1429 | struct xfs_da3_icnode_hdr nodehdr; |
1430 | |
1431 | node = blk->bp->b_addr; |
1432 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr, from: node); |
1433 | btree = nodehdr.btree; |
1434 | if (be32_to_cpu(btree[blk->index].hashval) == lasthash) |
1435 | break; |
1436 | blk->hashval = lasthash; |
1437 | btree[blk->index].hashval = cpu_to_be32(lasthash); |
1438 | xfs_trans_log_buf(state->args->trans, blk->bp, |
1439 | XFS_DA_LOGRANGE(node, &btree[blk->index], |
1440 | sizeof(*btree))); |
1441 | |
1442 | lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval); |
1443 | } |
1444 | } |
1445 | |
1446 | /* |
1447 | * Remove an entry from an intermediate node. |
1448 | */ |
1449 | STATIC void |
1450 | xfs_da3_node_remove( |
1451 | struct xfs_da_state *state, |
1452 | struct xfs_da_state_blk *drop_blk) |
1453 | { |
1454 | struct xfs_da_intnode *node; |
1455 | struct xfs_da3_icnode_hdr nodehdr; |
1456 | struct xfs_da_node_entry *btree; |
1457 | int index; |
1458 | int tmp; |
1459 | struct xfs_inode *dp = state->args->dp; |
1460 | |
1461 | trace_xfs_da_node_remove(state->args); |
1462 | |
1463 | node = drop_blk->bp->b_addr; |
1464 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr, from: node); |
1465 | ASSERT(drop_blk->index < nodehdr.count); |
1466 | ASSERT(drop_blk->index >= 0); |
1467 | |
1468 | /* |
1469 | * Copy over the offending entry, or just zero it out. |
1470 | */ |
1471 | index = drop_blk->index; |
1472 | btree = nodehdr.btree; |
1473 | if (index < nodehdr.count - 1) { |
1474 | tmp = nodehdr.count - index - 1; |
1475 | tmp *= (uint)sizeof(xfs_da_node_entry_t); |
1476 | memmove(&btree[index], &btree[index + 1], tmp); |
1477 | xfs_trans_log_buf(state->args->trans, drop_blk->bp, |
1478 | XFS_DA_LOGRANGE(node, &btree[index], tmp)); |
1479 | index = nodehdr.count - 1; |
1480 | } |
1481 | memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); |
1482 | xfs_trans_log_buf(state->args->trans, drop_blk->bp, |
1483 | XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); |
1484 | nodehdr.count -= 1; |
1485 | xfs_da3_node_hdr_to_disk(mp: dp->i_mount, to: node, from: &nodehdr); |
1486 | xfs_trans_log_buf(state->args->trans, drop_blk->bp, |
1487 | XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size)); |
1488 | |
1489 | /* |
1490 | * Copy the last hash value from the block to propagate upwards. |
1491 | */ |
1492 | drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval); |
1493 | } |
1494 | |
1495 | /* |
1496 | * Unbalance the elements between two intermediate nodes, |
1497 | * move all Btree elements from one node into another. |
1498 | */ |
1499 | STATIC void |
1500 | xfs_da3_node_unbalance( |
1501 | struct xfs_da_state *state, |
1502 | struct xfs_da_state_blk *drop_blk, |
1503 | struct xfs_da_state_blk *save_blk) |
1504 | { |
1505 | struct xfs_da_intnode *drop_node; |
1506 | struct xfs_da_intnode *save_node; |
1507 | struct xfs_da_node_entry *drop_btree; |
1508 | struct xfs_da_node_entry *save_btree; |
1509 | struct xfs_da3_icnode_hdr drop_hdr; |
1510 | struct xfs_da3_icnode_hdr save_hdr; |
1511 | struct xfs_trans *tp; |
1512 | int sindex; |
1513 | int tmp; |
1514 | struct xfs_inode *dp = state->args->dp; |
1515 | |
1516 | trace_xfs_da_node_unbalance(state->args); |
1517 | |
1518 | drop_node = drop_blk->bp->b_addr; |
1519 | save_node = save_blk->bp->b_addr; |
1520 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &drop_hdr, from: drop_node); |
1521 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &save_hdr, from: save_node); |
1522 | drop_btree = drop_hdr.btree; |
1523 | save_btree = save_hdr.btree; |
1524 | tp = state->args->trans; |
1525 | |
1526 | /* |
1527 | * If the dying block has lower hashvals, then move all the |
1528 | * elements in the remaining block up to make a hole. |
1529 | */ |
1530 | if ((be32_to_cpu(drop_btree[0].hashval) < |
1531 | be32_to_cpu(save_btree[0].hashval)) || |
1532 | (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) < |
1533 | be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) { |
1534 | /* XXX: check this - is memmove dst correct? */ |
1535 | tmp = save_hdr.count * sizeof(xfs_da_node_entry_t); |
1536 | memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp); |
1537 | |
1538 | sindex = 0; |
1539 | xfs_trans_log_buf(tp, save_blk->bp, |
1540 | XFS_DA_LOGRANGE(save_node, &save_btree[0], |
1541 | (save_hdr.count + drop_hdr.count) * |
1542 | sizeof(xfs_da_node_entry_t))); |
1543 | } else { |
1544 | sindex = save_hdr.count; |
1545 | xfs_trans_log_buf(tp, save_blk->bp, |
1546 | XFS_DA_LOGRANGE(save_node, &save_btree[sindex], |
1547 | drop_hdr.count * sizeof(xfs_da_node_entry_t))); |
1548 | } |
1549 | |
1550 | /* |
1551 | * Move all the B-tree elements from drop_blk to save_blk. |
1552 | */ |
1553 | tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t); |
1554 | memcpy(&save_btree[sindex], &drop_btree[0], tmp); |
1555 | save_hdr.count += drop_hdr.count; |
1556 | |
1557 | xfs_da3_node_hdr_to_disk(mp: dp->i_mount, to: save_node, from: &save_hdr); |
1558 | xfs_trans_log_buf(tp, save_blk->bp, |
1559 | XFS_DA_LOGRANGE(save_node, &save_node->hdr, |
1560 | state->args->geo->node_hdr_size)); |
1561 | |
1562 | /* |
1563 | * Save the last hashval in the remaining block for upward propagation. |
1564 | */ |
1565 | save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval); |
1566 | } |
1567 | |
1568 | /*======================================================================== |
1569 | * Routines used for finding things in the Btree. |
1570 | *========================================================================*/ |
1571 | |
1572 | /* |
1573 | * Walk down the Btree looking for a particular filename, filling |
1574 | * in the state structure as we go. |
1575 | * |
1576 | * We will set the state structure to point to each of the elements |
1577 | * in each of the nodes where either the hashval is or should be. |
1578 | * |
1579 | * We support duplicate hashval's so for each entry in the current |
1580 | * node that could contain the desired hashval, descend. This is a |
1581 | * pruned depth-first tree search. |
1582 | */ |
1583 | int /* error */ |
1584 | xfs_da3_node_lookup_int( |
1585 | struct xfs_da_state *state, |
1586 | int *result) |
1587 | { |
1588 | struct xfs_da_state_blk *blk; |
1589 | struct xfs_da_blkinfo *curr; |
1590 | struct xfs_da_intnode *node; |
1591 | struct xfs_da_node_entry *btree; |
1592 | struct xfs_da3_icnode_hdr nodehdr; |
1593 | struct xfs_da_args *args; |
1594 | xfs_dablk_t blkno; |
1595 | xfs_dahash_t hashval; |
1596 | xfs_dahash_t btreehashval; |
1597 | int probe; |
1598 | int span; |
1599 | int max; |
1600 | int error; |
1601 | int retval; |
1602 | unsigned int expected_level = 0; |
1603 | uint16_t magic; |
1604 | struct xfs_inode *dp = state->args->dp; |
1605 | |
1606 | args = state->args; |
1607 | |
1608 | /* |
1609 | * Descend thru the B-tree searching each level for the right |
1610 | * node to use, until the right hashval is found. |
1611 | */ |
1612 | blkno = args->geo->leafblk; |
1613 | for (blk = &state->path.blk[0], state->path.active = 1; |
1614 | state->path.active <= XFS_DA_NODE_MAXDEPTH; |
1615 | blk++, state->path.active++) { |
1616 | /* |
1617 | * Read the next node down in the tree. |
1618 | */ |
1619 | blk->blkno = blkno; |
1620 | error = xfs_da3_node_read(args->trans, args->dp, blkno, |
1621 | &blk->bp, args->whichfork); |
1622 | if (error) { |
1623 | blk->blkno = 0; |
1624 | state->path.active--; |
1625 | return error; |
1626 | } |
1627 | curr = blk->bp->b_addr; |
1628 | magic = be16_to_cpu(curr->magic); |
1629 | |
1630 | if (magic == XFS_ATTR_LEAF_MAGIC || |
1631 | magic == XFS_ATTR3_LEAF_MAGIC) { |
1632 | blk->magic = XFS_ATTR_LEAF_MAGIC; |
1633 | blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); |
1634 | break; |
1635 | } |
1636 | |
1637 | if (magic == XFS_DIR2_LEAFN_MAGIC || |
1638 | magic == XFS_DIR3_LEAFN_MAGIC) { |
1639 | blk->magic = XFS_DIR2_LEAFN_MAGIC; |
1640 | blk->hashval = xfs_dir2_leaf_lasthash(args->dp, |
1641 | blk->bp, NULL); |
1642 | break; |
1643 | } |
1644 | |
1645 | if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { |
1646 | xfs_buf_mark_corrupt(blk->bp); |
1647 | xfs_da_mark_sick(args); |
1648 | return -EFSCORRUPTED; |
1649 | } |
1650 | |
1651 | blk->magic = XFS_DA_NODE_MAGIC; |
1652 | |
1653 | /* |
1654 | * Search an intermediate node for a match. |
1655 | */ |
1656 | node = blk->bp->b_addr; |
1657 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr, from: node); |
1658 | btree = nodehdr.btree; |
1659 | |
1660 | /* Tree taller than we can handle; bail out! */ |
1661 | if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { |
1662 | xfs_buf_mark_corrupt(blk->bp); |
1663 | xfs_da_mark_sick(args); |
1664 | return -EFSCORRUPTED; |
1665 | } |
1666 | |
1667 | /* Check the level from the root. */ |
1668 | if (blkno == args->geo->leafblk) |
1669 | expected_level = nodehdr.level - 1; |
1670 | else if (expected_level != nodehdr.level) { |
1671 | xfs_buf_mark_corrupt(blk->bp); |
1672 | xfs_da_mark_sick(args); |
1673 | return -EFSCORRUPTED; |
1674 | } else |
1675 | expected_level--; |
1676 | |
1677 | max = nodehdr.count; |
1678 | blk->hashval = be32_to_cpu(btree[max - 1].hashval); |
1679 | |
1680 | /* |
1681 | * Binary search. (note: small blocks will skip loop) |
1682 | */ |
1683 | probe = span = max / 2; |
1684 | hashval = args->hashval; |
1685 | while (span > 4) { |
1686 | span /= 2; |
1687 | btreehashval = be32_to_cpu(btree[probe].hashval); |
1688 | if (btreehashval < hashval) |
1689 | probe += span; |
1690 | else if (btreehashval > hashval) |
1691 | probe -= span; |
1692 | else |
1693 | break; |
1694 | } |
1695 | ASSERT((probe >= 0) && (probe < max)); |
1696 | ASSERT((span <= 4) || |
1697 | (be32_to_cpu(btree[probe].hashval) == hashval)); |
1698 | |
1699 | /* |
1700 | * Since we may have duplicate hashval's, find the first |
1701 | * matching hashval in the node. |
1702 | */ |
1703 | while (probe > 0 && |
1704 | be32_to_cpu(btree[probe].hashval) >= hashval) { |
1705 | probe--; |
1706 | } |
1707 | while (probe < max && |
1708 | be32_to_cpu(btree[probe].hashval) < hashval) { |
1709 | probe++; |
1710 | } |
1711 | |
1712 | /* |
1713 | * Pick the right block to descend on. |
1714 | */ |
1715 | if (probe == max) { |
1716 | blk->index = max - 1; |
1717 | blkno = be32_to_cpu(btree[max - 1].before); |
1718 | } else { |
1719 | blk->index = probe; |
1720 | blkno = be32_to_cpu(btree[probe].before); |
1721 | } |
1722 | |
1723 | /* We can't point back to the root. */ |
1724 | if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) { |
1725 | xfs_da_mark_sick(args); |
1726 | return -EFSCORRUPTED; |
1727 | } |
1728 | } |
1729 | |
1730 | if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) { |
1731 | xfs_da_mark_sick(args); |
1732 | return -EFSCORRUPTED; |
1733 | } |
1734 | |
1735 | /* |
1736 | * A leaf block that ends in the hashval that we are interested in |
1737 | * (final hashval == search hashval) means that the next block may |
1738 | * contain more entries with the same hashval, shift upward to the |
1739 | * next leaf and keep searching. |
1740 | */ |
1741 | for (;;) { |
1742 | if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { |
1743 | retval = xfs_dir2_leafn_lookup_int(bp: blk->bp, args, |
1744 | indexp: &blk->index, state); |
1745 | } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { |
1746 | retval = xfs_attr3_leaf_lookup_int(leaf: blk->bp, args); |
1747 | blk->index = args->index; |
1748 | args->blkno = blk->blkno; |
1749 | } else { |
1750 | ASSERT(0); |
1751 | xfs_da_mark_sick(args); |
1752 | return -EFSCORRUPTED; |
1753 | } |
1754 | if (((retval == -ENOENT) || (retval == -ENOATTR)) && |
1755 | (blk->hashval == args->hashval)) { |
1756 | error = xfs_da3_path_shift(state, path: &state->path, forward: 1, release: 1, |
1757 | result: &retval); |
1758 | if (error) |
1759 | return error; |
1760 | if (retval == 0) { |
1761 | continue; |
1762 | } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { |
1763 | /* path_shift() gives ENOENT */ |
1764 | retval = -ENOATTR; |
1765 | } |
1766 | } |
1767 | break; |
1768 | } |
1769 | *result = retval; |
1770 | return 0; |
1771 | } |
1772 | |
1773 | /*======================================================================== |
1774 | * Utility routines. |
1775 | *========================================================================*/ |
1776 | |
1777 | /* |
1778 | * Compare two intermediate nodes for "order". |
1779 | */ |
1780 | STATIC int |
1781 | xfs_da3_node_order( |
1782 | struct xfs_inode *dp, |
1783 | struct xfs_buf *node1_bp, |
1784 | struct xfs_buf *node2_bp) |
1785 | { |
1786 | struct xfs_da_intnode *node1; |
1787 | struct xfs_da_intnode *node2; |
1788 | struct xfs_da_node_entry *btree1; |
1789 | struct xfs_da_node_entry *btree2; |
1790 | struct xfs_da3_icnode_hdr node1hdr; |
1791 | struct xfs_da3_icnode_hdr node2hdr; |
1792 | |
1793 | node1 = node1_bp->b_addr; |
1794 | node2 = node2_bp->b_addr; |
1795 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &node1hdr, from: node1); |
1796 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &node2hdr, from: node2); |
1797 | btree1 = node1hdr.btree; |
1798 | btree2 = node2hdr.btree; |
1799 | |
1800 | if (node1hdr.count > 0 && node2hdr.count > 0 && |
1801 | ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || |
1802 | (be32_to_cpu(btree2[node2hdr.count - 1].hashval) < |
1803 | be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) { |
1804 | return 1; |
1805 | } |
1806 | return 0; |
1807 | } |
1808 | |
1809 | /* |
1810 | * Link a new block into a doubly linked list of blocks (of whatever type). |
1811 | */ |
1812 | int /* error */ |
1813 | xfs_da3_blk_link( |
1814 | struct xfs_da_state *state, |
1815 | struct xfs_da_state_blk *old_blk, |
1816 | struct xfs_da_state_blk *new_blk) |
1817 | { |
1818 | struct xfs_da_blkinfo *old_info; |
1819 | struct xfs_da_blkinfo *new_info; |
1820 | struct xfs_da_blkinfo *tmp_info; |
1821 | struct xfs_da_args *args; |
1822 | struct xfs_buf *bp; |
1823 | int before = 0; |
1824 | int error; |
1825 | struct xfs_inode *dp = state->args->dp; |
1826 | |
1827 | /* |
1828 | * Set up environment. |
1829 | */ |
1830 | args = state->args; |
1831 | ASSERT(args != NULL); |
1832 | old_info = old_blk->bp->b_addr; |
1833 | new_info = new_blk->bp->b_addr; |
1834 | ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || |
1835 | old_blk->magic == XFS_DIR2_LEAFN_MAGIC || |
1836 | old_blk->magic == XFS_ATTR_LEAF_MAGIC); |
1837 | |
1838 | switch (old_blk->magic) { |
1839 | case XFS_ATTR_LEAF_MAGIC: |
1840 | before = xfs_attr_leaf_order(leaf1_bp: old_blk->bp, leaf2_bp: new_blk->bp); |
1841 | break; |
1842 | case XFS_DIR2_LEAFN_MAGIC: |
1843 | before = xfs_dir2_leafn_order(dp, leaf1_bp: old_blk->bp, leaf2_bp: new_blk->bp); |
1844 | break; |
1845 | case XFS_DA_NODE_MAGIC: |
1846 | before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); |
1847 | break; |
1848 | } |
1849 | |
1850 | /* |
1851 | * Link blocks in appropriate order. |
1852 | */ |
1853 | if (before) { |
1854 | /* |
1855 | * Link new block in before existing block. |
1856 | */ |
1857 | trace_xfs_da_link_before(args); |
1858 | new_info->forw = cpu_to_be32(old_blk->blkno); |
1859 | new_info->back = old_info->back; |
1860 | if (old_info->back) { |
1861 | error = xfs_da3_node_read(args->trans, dp, |
1862 | be32_to_cpu(old_info->back), |
1863 | &bp, args->whichfork); |
1864 | if (error) |
1865 | return error; |
1866 | ASSERT(bp != NULL); |
1867 | tmp_info = bp->b_addr; |
1868 | ASSERT(tmp_info->magic == old_info->magic); |
1869 | ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); |
1870 | tmp_info->forw = cpu_to_be32(new_blk->blkno); |
1871 | xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); |
1872 | } |
1873 | old_info->back = cpu_to_be32(new_blk->blkno); |
1874 | } else { |
1875 | /* |
1876 | * Link new block in after existing block. |
1877 | */ |
1878 | trace_xfs_da_link_after(args); |
1879 | new_info->forw = old_info->forw; |
1880 | new_info->back = cpu_to_be32(old_blk->blkno); |
1881 | if (old_info->forw) { |
1882 | error = xfs_da3_node_read(args->trans, dp, |
1883 | be32_to_cpu(old_info->forw), |
1884 | &bp, args->whichfork); |
1885 | if (error) |
1886 | return error; |
1887 | ASSERT(bp != NULL); |
1888 | tmp_info = bp->b_addr; |
1889 | ASSERT(tmp_info->magic == old_info->magic); |
1890 | ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); |
1891 | tmp_info->back = cpu_to_be32(new_blk->blkno); |
1892 | xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); |
1893 | } |
1894 | old_info->forw = cpu_to_be32(new_blk->blkno); |
1895 | } |
1896 | |
1897 | xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); |
1898 | xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); |
1899 | return 0; |
1900 | } |
1901 | |
1902 | /* |
1903 | * Unlink a block from a doubly linked list of blocks. |
1904 | */ |
1905 | STATIC int /* error */ |
1906 | xfs_da3_blk_unlink( |
1907 | struct xfs_da_state *state, |
1908 | struct xfs_da_state_blk *drop_blk, |
1909 | struct xfs_da_state_blk *save_blk) |
1910 | { |
1911 | struct xfs_da_blkinfo *drop_info; |
1912 | struct xfs_da_blkinfo *save_info; |
1913 | struct xfs_da_blkinfo *tmp_info; |
1914 | struct xfs_da_args *args; |
1915 | struct xfs_buf *bp; |
1916 | int error; |
1917 | |
1918 | /* |
1919 | * Set up environment. |
1920 | */ |
1921 | args = state->args; |
1922 | ASSERT(args != NULL); |
1923 | save_info = save_blk->bp->b_addr; |
1924 | drop_info = drop_blk->bp->b_addr; |
1925 | ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || |
1926 | save_blk->magic == XFS_DIR2_LEAFN_MAGIC || |
1927 | save_blk->magic == XFS_ATTR_LEAF_MAGIC); |
1928 | ASSERT(save_blk->magic == drop_blk->magic); |
1929 | ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || |
1930 | (be32_to_cpu(save_info->back) == drop_blk->blkno)); |
1931 | ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) || |
1932 | (be32_to_cpu(drop_info->back) == save_blk->blkno)); |
1933 | |
1934 | /* |
1935 | * Unlink the leaf block from the doubly linked chain of leaves. |
1936 | */ |
1937 | if (be32_to_cpu(save_info->back) == drop_blk->blkno) { |
1938 | trace_xfs_da_unlink_back(args); |
1939 | save_info->back = drop_info->back; |
1940 | if (drop_info->back) { |
1941 | error = xfs_da3_node_read(args->trans, args->dp, |
1942 | be32_to_cpu(drop_info->back), |
1943 | &bp, args->whichfork); |
1944 | if (error) |
1945 | return error; |
1946 | ASSERT(bp != NULL); |
1947 | tmp_info = bp->b_addr; |
1948 | ASSERT(tmp_info->magic == save_info->magic); |
1949 | ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); |
1950 | tmp_info->forw = cpu_to_be32(save_blk->blkno); |
1951 | xfs_trans_log_buf(args->trans, bp, 0, |
1952 | sizeof(*tmp_info) - 1); |
1953 | } |
1954 | } else { |
1955 | trace_xfs_da_unlink_forward(args); |
1956 | save_info->forw = drop_info->forw; |
1957 | if (drop_info->forw) { |
1958 | error = xfs_da3_node_read(args->trans, args->dp, |
1959 | be32_to_cpu(drop_info->forw), |
1960 | &bp, args->whichfork); |
1961 | if (error) |
1962 | return error; |
1963 | ASSERT(bp != NULL); |
1964 | tmp_info = bp->b_addr; |
1965 | ASSERT(tmp_info->magic == save_info->magic); |
1966 | ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); |
1967 | tmp_info->back = cpu_to_be32(save_blk->blkno); |
1968 | xfs_trans_log_buf(args->trans, bp, 0, |
1969 | sizeof(*tmp_info) - 1); |
1970 | } |
1971 | } |
1972 | |
1973 | xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); |
1974 | return 0; |
1975 | } |
1976 | |
1977 | /* |
1978 | * Move a path "forward" or "!forward" one block at the current level. |
1979 | * |
1980 | * This routine will adjust a "path" to point to the next block |
1981 | * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the |
1982 | * Btree, including updating pointers to the intermediate nodes between |
1983 | * the new bottom and the root. |
1984 | */ |
1985 | int /* error */ |
1986 | xfs_da3_path_shift( |
1987 | struct xfs_da_state *state, |
1988 | struct xfs_da_state_path *path, |
1989 | int forward, |
1990 | int release, |
1991 | int *result) |
1992 | { |
1993 | struct xfs_da_state_blk *blk; |
1994 | struct xfs_da_blkinfo *info; |
1995 | struct xfs_da_args *args; |
1996 | struct xfs_da_node_entry *btree; |
1997 | struct xfs_da3_icnode_hdr nodehdr; |
1998 | struct xfs_buf *bp; |
1999 | xfs_dablk_t blkno = 0; |
2000 | int level; |
2001 | int error; |
2002 | struct xfs_inode *dp = state->args->dp; |
2003 | |
2004 | trace_xfs_da_path_shift(state->args); |
2005 | |
2006 | /* |
2007 | * Roll up the Btree looking for the first block where our |
2008 | * current index is not at the edge of the block. Note that |
2009 | * we skip the bottom layer because we want the sibling block. |
2010 | */ |
2011 | args = state->args; |
2012 | ASSERT(args != NULL); |
2013 | ASSERT(path != NULL); |
2014 | ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); |
2015 | level = (path->active-1) - 1; /* skip bottom layer in path */ |
2016 | for (; level >= 0; level--) { |
2017 | blk = &path->blk[level]; |
2018 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr, |
2019 | from: blk->bp->b_addr); |
2020 | |
2021 | if (forward && (blk->index < nodehdr.count - 1)) { |
2022 | blk->index++; |
2023 | blkno = be32_to_cpu(nodehdr.btree[blk->index].before); |
2024 | break; |
2025 | } else if (!forward && (blk->index > 0)) { |
2026 | blk->index--; |
2027 | blkno = be32_to_cpu(nodehdr.btree[blk->index].before); |
2028 | break; |
2029 | } |
2030 | } |
2031 | if (level < 0) { |
2032 | *result = -ENOENT; /* we're out of our tree */ |
2033 | ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); |
2034 | return 0; |
2035 | } |
2036 | |
2037 | /* |
2038 | * Roll down the edge of the subtree until we reach the |
2039 | * same depth we were at originally. |
2040 | */ |
2041 | for (blk++, level++; level < path->active; blk++, level++) { |
2042 | /* |
2043 | * Read the next child block into a local buffer. |
2044 | */ |
2045 | error = xfs_da3_node_read(args->trans, dp, blkno, &bp, |
2046 | args->whichfork); |
2047 | if (error) |
2048 | return error; |
2049 | |
2050 | /* |
2051 | * Release the old block (if it's dirty, the trans doesn't |
2052 | * actually let go) and swap the local buffer into the path |
2053 | * structure. This ensures failure of the above read doesn't set |
2054 | * a NULL buffer in an active slot in the path. |
2055 | */ |
2056 | if (release) |
2057 | xfs_trans_brelse(args->trans, blk->bp); |
2058 | blk->blkno = blkno; |
2059 | blk->bp = bp; |
2060 | |
2061 | info = blk->bp->b_addr; |
2062 | ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || |
2063 | info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || |
2064 | info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || |
2065 | info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || |
2066 | info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || |
2067 | info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); |
2068 | |
2069 | |
2070 | /* |
2071 | * Note: we flatten the magic number to a single type so we |
2072 | * don't have to compare against crc/non-crc types elsewhere. |
2073 | */ |
2074 | switch (be16_to_cpu(info->magic)) { |
2075 | case XFS_DA_NODE_MAGIC: |
2076 | case XFS_DA3_NODE_MAGIC: |
2077 | blk->magic = XFS_DA_NODE_MAGIC; |
2078 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &nodehdr, |
2079 | from: bp->b_addr); |
2080 | btree = nodehdr.btree; |
2081 | blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); |
2082 | if (forward) |
2083 | blk->index = 0; |
2084 | else |
2085 | blk->index = nodehdr.count - 1; |
2086 | blkno = be32_to_cpu(btree[blk->index].before); |
2087 | break; |
2088 | case XFS_ATTR_LEAF_MAGIC: |
2089 | case XFS_ATTR3_LEAF_MAGIC: |
2090 | blk->magic = XFS_ATTR_LEAF_MAGIC; |
2091 | ASSERT(level == path->active-1); |
2092 | blk->index = 0; |
2093 | blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); |
2094 | break; |
2095 | case XFS_DIR2_LEAFN_MAGIC: |
2096 | case XFS_DIR3_LEAFN_MAGIC: |
2097 | blk->magic = XFS_DIR2_LEAFN_MAGIC; |
2098 | ASSERT(level == path->active-1); |
2099 | blk->index = 0; |
2100 | blk->hashval = xfs_dir2_leaf_lasthash(args->dp, |
2101 | blk->bp, NULL); |
2102 | break; |
2103 | default: |
2104 | ASSERT(0); |
2105 | break; |
2106 | } |
2107 | } |
2108 | *result = 0; |
2109 | return 0; |
2110 | } |
2111 | |
2112 | |
2113 | /*======================================================================== |
2114 | * Utility routines. |
2115 | *========================================================================*/ |
2116 | |
2117 | /* |
2118 | * Implement a simple hash on a character string. |
2119 | * Rotate the hash value by 7 bits, then XOR each character in. |
2120 | * This is implemented with some source-level loop unrolling. |
2121 | */ |
2122 | xfs_dahash_t |
2123 | xfs_da_hashname(const uint8_t *name, int namelen) |
2124 | { |
2125 | xfs_dahash_t hash; |
2126 | |
2127 | /* |
2128 | * Do four characters at a time as long as we can. |
2129 | */ |
2130 | for (hash = 0; namelen >= 4; namelen -= 4, name += 4) |
2131 | hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ |
2132 | (name[3] << 0) ^ rol32(hash, 7 * 4); |
2133 | |
2134 | /* |
2135 | * Now do the rest of the characters. |
2136 | */ |
2137 | switch (namelen) { |
2138 | case 3: |
2139 | return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ |
2140 | rol32(hash, 7 * 3); |
2141 | case 2: |
2142 | return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); |
2143 | case 1: |
2144 | return (name[0] << 0) ^ rol32(hash, 7 * 1); |
2145 | default: /* case 0: */ |
2146 | return hash; |
2147 | } |
2148 | } |
2149 | |
2150 | enum xfs_dacmp |
2151 | xfs_da_compname( |
2152 | struct xfs_da_args *args, |
2153 | const unsigned char *name, |
2154 | int len) |
2155 | { |
2156 | return (args->namelen == len && memcmp(args->name, name, len) == 0) ? |
2157 | XFS_CMP_EXACT : XFS_CMP_DIFFERENT; |
2158 | } |
2159 | |
2160 | int |
2161 | xfs_da_grow_inode_int( |
2162 | struct xfs_da_args *args, |
2163 | xfs_fileoff_t *bno, |
2164 | int count) |
2165 | { |
2166 | struct xfs_trans *tp = args->trans; |
2167 | struct xfs_inode *dp = args->dp; |
2168 | int w = args->whichfork; |
2169 | xfs_rfsblock_t nblks = dp->i_nblocks; |
2170 | struct xfs_bmbt_irec map, *mapp; |
2171 | int nmap, error, got, i, mapi; |
2172 | |
2173 | /* |
2174 | * Find a spot in the file space to put the new block. |
2175 | */ |
2176 | error = xfs_bmap_first_unused(tp, dp, count, bno, w); |
2177 | if (error) |
2178 | return error; |
2179 | |
2180 | /* |
2181 | * Try mapping it in one filesystem block. |
2182 | */ |
2183 | nmap = 1; |
2184 | error = xfs_bmapi_write(tp, dp, *bno, count, |
2185 | xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, |
2186 | args->total, &map, &nmap); |
2187 | if (error) |
2188 | return error; |
2189 | |
2190 | ASSERT(nmap <= 1); |
2191 | if (nmap == 1) { |
2192 | mapp = ↦ |
2193 | mapi = 1; |
2194 | } else if (nmap == 0 && count > 1) { |
2195 | xfs_fileoff_t b; |
2196 | int c; |
2197 | |
2198 | /* |
2199 | * If we didn't get it and the block might work if fragmented, |
2200 | * try without the CONTIG flag. Loop until we get it all. |
2201 | */ |
2202 | mapp = kmalloc(sizeof(*mapp) * count, |
2203 | GFP_KERNEL | __GFP_NOFAIL); |
2204 | for (b = *bno, mapi = 0; b < *bno + count; ) { |
2205 | c = (int)(*bno + count - b); |
2206 | nmap = min(XFS_BMAP_MAX_NMAP, c); |
2207 | error = xfs_bmapi_write(tp, dp, b, c, |
2208 | xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, |
2209 | args->total, &mapp[mapi], &nmap); |
2210 | if (error) |
2211 | goto out_free_map; |
2212 | if (nmap < 1) |
2213 | break; |
2214 | mapi += nmap; |
2215 | b = mapp[mapi - 1].br_startoff + |
2216 | mapp[mapi - 1].br_blockcount; |
2217 | } |
2218 | } else { |
2219 | mapi = 0; |
2220 | mapp = NULL; |
2221 | } |
2222 | |
2223 | /* |
2224 | * Count the blocks we got, make sure it matches the total. |
2225 | */ |
2226 | for (i = 0, got = 0; i < mapi; i++) |
2227 | got += mapp[i].br_blockcount; |
2228 | if (got != count || mapp[0].br_startoff != *bno || |
2229 | mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != |
2230 | *bno + count) { |
2231 | error = -ENOSPC; |
2232 | goto out_free_map; |
2233 | } |
2234 | |
2235 | /* account for newly allocated blocks in reserved blocks total */ |
2236 | args->total -= dp->i_nblocks - nblks; |
2237 | |
2238 | out_free_map: |
2239 | if (mapp != &map) |
2240 | kfree(mapp); |
2241 | return error; |
2242 | } |
2243 | |
2244 | /* |
2245 | * Add a block to the btree ahead of the file. |
2246 | * Return the new block number to the caller. |
2247 | */ |
2248 | int |
2249 | xfs_da_grow_inode( |
2250 | struct xfs_da_args *args, |
2251 | xfs_dablk_t *new_blkno) |
2252 | { |
2253 | xfs_fileoff_t bno; |
2254 | int error; |
2255 | |
2256 | trace_xfs_da_grow_inode(args); |
2257 | |
2258 | bno = args->geo->leafblk; |
2259 | error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount); |
2260 | if (!error) |
2261 | *new_blkno = (xfs_dablk_t)bno; |
2262 | return error; |
2263 | } |
2264 | |
2265 | /* |
2266 | * Ick. We need to always be able to remove a btree block, even |
2267 | * if there's no space reservation because the filesystem is full. |
2268 | * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. |
2269 | * It swaps the target block with the last block in the file. The |
2270 | * last block in the file can always be removed since it can't cause |
2271 | * a bmap btree split to do that. |
2272 | */ |
2273 | STATIC int |
2274 | xfs_da3_swap_lastblock( |
2275 | struct xfs_da_args *args, |
2276 | xfs_dablk_t *dead_blknop, |
2277 | struct xfs_buf **dead_bufp) |
2278 | { |
2279 | struct xfs_da_blkinfo *dead_info; |
2280 | struct xfs_da_blkinfo *sib_info; |
2281 | struct xfs_da_intnode *par_node; |
2282 | struct xfs_da_intnode *dead_node; |
2283 | struct xfs_dir2_leaf *dead_leaf2; |
2284 | struct xfs_da_node_entry *btree; |
2285 | struct xfs_da3_icnode_hdr par_hdr; |
2286 | struct xfs_inode *dp; |
2287 | struct xfs_trans *tp; |
2288 | struct xfs_mount *mp; |
2289 | struct xfs_buf *dead_buf; |
2290 | struct xfs_buf *last_buf; |
2291 | struct xfs_buf *sib_buf; |
2292 | struct xfs_buf *par_buf; |
2293 | xfs_dahash_t dead_hash; |
2294 | xfs_fileoff_t lastoff; |
2295 | xfs_dablk_t dead_blkno; |
2296 | xfs_dablk_t last_blkno; |
2297 | xfs_dablk_t sib_blkno; |
2298 | xfs_dablk_t par_blkno; |
2299 | int error; |
2300 | int w; |
2301 | int entno; |
2302 | int level; |
2303 | int dead_level; |
2304 | |
2305 | trace_xfs_da_swap_lastblock(args); |
2306 | |
2307 | dead_buf = *dead_bufp; |
2308 | dead_blkno = *dead_blknop; |
2309 | tp = args->trans; |
2310 | dp = args->dp; |
2311 | w = args->whichfork; |
2312 | ASSERT(w == XFS_DATA_FORK); |
2313 | mp = dp->i_mount; |
2314 | lastoff = args->geo->freeblk; |
2315 | error = xfs_bmap_last_before(tp, dp, &lastoff, w); |
2316 | if (error) |
2317 | return error; |
2318 | if (XFS_IS_CORRUPT(mp, lastoff == 0)) { |
2319 | xfs_da_mark_sick(args); |
2320 | return -EFSCORRUPTED; |
2321 | } |
2322 | /* |
2323 | * Read the last block in the btree space. |
2324 | */ |
2325 | last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; |
2326 | error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w); |
2327 | if (error) |
2328 | return error; |
2329 | /* |
2330 | * Copy the last block into the dead buffer and log it. |
2331 | */ |
2332 | xfs_da_buf_copy(dead_buf, last_buf, args->geo->blksize); |
2333 | xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); |
2334 | dead_info = dead_buf->b_addr; |
2335 | |
2336 | /* |
2337 | * Get values from the moved block. |
2338 | */ |
2339 | if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || |
2340 | dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { |
2341 | struct xfs_dir3_icleaf_hdr leafhdr; |
2342 | struct xfs_dir2_leaf_entry *ents; |
2343 | |
2344 | dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; |
2345 | xfs_dir2_leaf_hdr_from_disk(mp: dp->i_mount, to: &leafhdr, |
2346 | from: dead_leaf2); |
2347 | ents = leafhdr.ents; |
2348 | dead_level = 0; |
2349 | dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); |
2350 | } else { |
2351 | struct xfs_da3_icnode_hdr deadhdr; |
2352 | |
2353 | dead_node = (xfs_da_intnode_t *)dead_info; |
2354 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &deadhdr, from: dead_node); |
2355 | btree = deadhdr.btree; |
2356 | dead_level = deadhdr.level; |
2357 | dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); |
2358 | } |
2359 | sib_buf = par_buf = NULL; |
2360 | /* |
2361 | * If the moved block has a left sibling, fix up the pointers. |
2362 | */ |
2363 | if ((sib_blkno = be32_to_cpu(dead_info->back))) { |
2364 | error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w); |
2365 | if (error) |
2366 | goto done; |
2367 | sib_info = sib_buf->b_addr; |
2368 | if (XFS_IS_CORRUPT(mp, |
2369 | be32_to_cpu(sib_info->forw) != last_blkno || |
2370 | sib_info->magic != dead_info->magic)) { |
2371 | xfs_da_mark_sick(args); |
2372 | error = -EFSCORRUPTED; |
2373 | goto done; |
2374 | } |
2375 | sib_info->forw = cpu_to_be32(dead_blkno); |
2376 | xfs_trans_log_buf(tp, sib_buf, |
2377 | XFS_DA_LOGRANGE(sib_info, &sib_info->forw, |
2378 | sizeof(sib_info->forw))); |
2379 | sib_buf = NULL; |
2380 | } |
2381 | /* |
2382 | * If the moved block has a right sibling, fix up the pointers. |
2383 | */ |
2384 | if ((sib_blkno = be32_to_cpu(dead_info->forw))) { |
2385 | error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w); |
2386 | if (error) |
2387 | goto done; |
2388 | sib_info = sib_buf->b_addr; |
2389 | if (XFS_IS_CORRUPT(mp, |
2390 | be32_to_cpu(sib_info->back) != last_blkno || |
2391 | sib_info->magic != dead_info->magic)) { |
2392 | xfs_da_mark_sick(args); |
2393 | error = -EFSCORRUPTED; |
2394 | goto done; |
2395 | } |
2396 | sib_info->back = cpu_to_be32(dead_blkno); |
2397 | xfs_trans_log_buf(tp, sib_buf, |
2398 | XFS_DA_LOGRANGE(sib_info, &sib_info->back, |
2399 | sizeof(sib_info->back))); |
2400 | sib_buf = NULL; |
2401 | } |
2402 | par_blkno = args->geo->leafblk; |
2403 | level = -1; |
2404 | /* |
2405 | * Walk down the tree looking for the parent of the moved block. |
2406 | */ |
2407 | for (;;) { |
2408 | error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w); |
2409 | if (error) |
2410 | goto done; |
2411 | par_node = par_buf->b_addr; |
2412 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &par_hdr, from: par_node); |
2413 | if (XFS_IS_CORRUPT(mp, |
2414 | level >= 0 && level != par_hdr.level + 1)) { |
2415 | xfs_da_mark_sick(args); |
2416 | error = -EFSCORRUPTED; |
2417 | goto done; |
2418 | } |
2419 | level = par_hdr.level; |
2420 | btree = par_hdr.btree; |
2421 | for (entno = 0; |
2422 | entno < par_hdr.count && |
2423 | be32_to_cpu(btree[entno].hashval) < dead_hash; |
2424 | entno++) |
2425 | continue; |
2426 | if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) { |
2427 | xfs_da_mark_sick(args); |
2428 | error = -EFSCORRUPTED; |
2429 | goto done; |
2430 | } |
2431 | par_blkno = be32_to_cpu(btree[entno].before); |
2432 | if (level == dead_level + 1) |
2433 | break; |
2434 | xfs_trans_brelse(tp, par_buf); |
2435 | par_buf = NULL; |
2436 | } |
2437 | /* |
2438 | * We're in the right parent block. |
2439 | * Look for the right entry. |
2440 | */ |
2441 | for (;;) { |
2442 | for (; |
2443 | entno < par_hdr.count && |
2444 | be32_to_cpu(btree[entno].before) != last_blkno; |
2445 | entno++) |
2446 | continue; |
2447 | if (entno < par_hdr.count) |
2448 | break; |
2449 | par_blkno = par_hdr.forw; |
2450 | xfs_trans_brelse(tp, par_buf); |
2451 | par_buf = NULL; |
2452 | if (XFS_IS_CORRUPT(mp, par_blkno == 0)) { |
2453 | xfs_da_mark_sick(args); |
2454 | error = -EFSCORRUPTED; |
2455 | goto done; |
2456 | } |
2457 | error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w); |
2458 | if (error) |
2459 | goto done; |
2460 | par_node = par_buf->b_addr; |
2461 | xfs_da3_node_hdr_from_disk(mp: dp->i_mount, to: &par_hdr, from: par_node); |
2462 | if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) { |
2463 | xfs_da_mark_sick(args); |
2464 | error = -EFSCORRUPTED; |
2465 | goto done; |
2466 | } |
2467 | btree = par_hdr.btree; |
2468 | entno = 0; |
2469 | } |
2470 | /* |
2471 | * Update the parent entry pointing to the moved block. |
2472 | */ |
2473 | btree[entno].before = cpu_to_be32(dead_blkno); |
2474 | xfs_trans_log_buf(tp, par_buf, |
2475 | XFS_DA_LOGRANGE(par_node, &btree[entno].before, |
2476 | sizeof(btree[entno].before))); |
2477 | *dead_blknop = last_blkno; |
2478 | *dead_bufp = last_buf; |
2479 | return 0; |
2480 | done: |
2481 | if (par_buf) |
2482 | xfs_trans_brelse(tp, par_buf); |
2483 | if (sib_buf) |
2484 | xfs_trans_brelse(tp, sib_buf); |
2485 | xfs_trans_brelse(tp, last_buf); |
2486 | return error; |
2487 | } |
2488 | |
2489 | /* |
2490 | * Remove a btree block from a directory or attribute. |
2491 | */ |
2492 | int |
2493 | xfs_da_shrink_inode( |
2494 | struct xfs_da_args *args, |
2495 | xfs_dablk_t dead_blkno, |
2496 | struct xfs_buf *dead_buf) |
2497 | { |
2498 | struct xfs_inode *dp; |
2499 | int done, error, w, count; |
2500 | struct xfs_trans *tp; |
2501 | |
2502 | trace_xfs_da_shrink_inode(args); |
2503 | |
2504 | dp = args->dp; |
2505 | w = args->whichfork; |
2506 | tp = args->trans; |
2507 | count = args->geo->fsbcount; |
2508 | for (;;) { |
2509 | /* |
2510 | * Remove extents. If we get ENOSPC for a dir we have to move |
2511 | * the last block to the place we want to kill. |
2512 | */ |
2513 | error = xfs_bunmapi(tp, dp, dead_blkno, count, |
2514 | xfs_bmapi_aflag(w), 0, &done); |
2515 | if (error == -ENOSPC) { |
2516 | if (w != XFS_DATA_FORK) |
2517 | break; |
2518 | error = xfs_da3_swap_lastblock(args, &dead_blkno, |
2519 | &dead_buf); |
2520 | if (error) |
2521 | break; |
2522 | } else { |
2523 | break; |
2524 | } |
2525 | } |
2526 | xfs_trans_binval(tp, dead_buf); |
2527 | return error; |
2528 | } |
2529 | |
2530 | static int |
2531 | xfs_dabuf_map( |
2532 | struct xfs_inode *dp, |
2533 | xfs_dablk_t bno, |
2534 | unsigned int flags, |
2535 | int whichfork, |
2536 | struct xfs_buf_map **mapp, |
2537 | int *nmaps) |
2538 | { |
2539 | struct xfs_mount *mp = dp->i_mount; |
2540 | int nfsb = xfs_dabuf_nfsb(mp, whichfork); |
2541 | struct xfs_bmbt_irec irec, *irecs = &irec; |
2542 | struct xfs_buf_map *map = *mapp; |
2543 | xfs_fileoff_t off = bno; |
2544 | int error = 0, nirecs, i; |
2545 | |
2546 | if (nfsb > 1) |
2547 | irecs = kzalloc(sizeof(irec) * nfsb, |
2548 | GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); |
2549 | |
2550 | nirecs = nfsb; |
2551 | error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs, |
2552 | xfs_bmapi_aflag(w: whichfork)); |
2553 | if (error) |
2554 | goto out_free_irecs; |
2555 | |
2556 | /* |
2557 | * Use the caller provided map for the single map case, else allocate a |
2558 | * larger one that needs to be free by the caller. |
2559 | */ |
2560 | if (nirecs > 1) { |
2561 | map = kzalloc(nirecs * sizeof(struct xfs_buf_map), |
2562 | GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); |
2563 | if (!map) { |
2564 | error = -ENOMEM; |
2565 | goto out_free_irecs; |
2566 | } |
2567 | *mapp = map; |
2568 | } |
2569 | |
2570 | for (i = 0; i < nirecs; i++) { |
2571 | if (irecs[i].br_startblock == HOLESTARTBLOCK || |
2572 | irecs[i].br_startblock == DELAYSTARTBLOCK) |
2573 | goto invalid_mapping; |
2574 | if (off != irecs[i].br_startoff) |
2575 | goto invalid_mapping; |
2576 | |
2577 | map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); |
2578 | map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); |
2579 | off += irecs[i].br_blockcount; |
2580 | } |
2581 | |
2582 | if (off != bno + nfsb) |
2583 | goto invalid_mapping; |
2584 | |
2585 | *nmaps = nirecs; |
2586 | out_free_irecs: |
2587 | if (irecs != &irec) |
2588 | kfree(irecs); |
2589 | return error; |
2590 | |
2591 | invalid_mapping: |
2592 | /* Caller ok with no mapping. */ |
2593 | if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) { |
2594 | xfs_dirattr_mark_sick(ip: dp, whichfork); |
2595 | error = -EFSCORRUPTED; |
2596 | if (xfs_error_level >= XFS_ERRLEVEL_LOW) { |
2597 | xfs_alert(mp, "%s: bno %u inode %llu" , |
2598 | __func__, bno, dp->i_ino); |
2599 | |
2600 | for (i = 0; i < nirecs; i++) { |
2601 | xfs_alert(mp, |
2602 | "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d" , |
2603 | i, irecs[i].br_startoff, |
2604 | irecs[i].br_startblock, |
2605 | irecs[i].br_blockcount, |
2606 | irecs[i].br_state); |
2607 | } |
2608 | } |
2609 | } else { |
2610 | *nmaps = 0; |
2611 | } |
2612 | goto out_free_irecs; |
2613 | } |
2614 | |
2615 | /* |
2616 | * Get a buffer for the dir/attr block. |
2617 | */ |
2618 | int |
2619 | xfs_da_get_buf( |
2620 | struct xfs_trans *tp, |
2621 | struct xfs_inode *dp, |
2622 | xfs_dablk_t bno, |
2623 | struct xfs_buf **bpp, |
2624 | int whichfork) |
2625 | { |
2626 | struct xfs_mount *mp = dp->i_mount; |
2627 | struct xfs_buf *bp; |
2628 | struct xfs_buf_map map, *mapp = ↦ |
2629 | int nmap = 1; |
2630 | int error; |
2631 | |
2632 | *bpp = NULL; |
2633 | error = xfs_dabuf_map(dp, bno, 0, whichfork, &mapp, &nmap); |
2634 | if (error || nmap == 0) |
2635 | goto out_free; |
2636 | |
2637 | error = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0, &bp); |
2638 | if (error) |
2639 | goto out_free; |
2640 | |
2641 | *bpp = bp; |
2642 | |
2643 | out_free: |
2644 | if (mapp != &map) |
2645 | kfree(mapp); |
2646 | |
2647 | return error; |
2648 | } |
2649 | |
2650 | /* |
2651 | * Get a buffer for the dir/attr block, fill in the contents. |
2652 | */ |
2653 | int |
2654 | xfs_da_read_buf( |
2655 | struct xfs_trans *tp, |
2656 | struct xfs_inode *dp, |
2657 | xfs_dablk_t bno, |
2658 | unsigned int flags, |
2659 | struct xfs_buf **bpp, |
2660 | int whichfork, |
2661 | const struct xfs_buf_ops *ops) |
2662 | { |
2663 | struct xfs_mount *mp = dp->i_mount; |
2664 | struct xfs_buf *bp; |
2665 | struct xfs_buf_map map, *mapp = ↦ |
2666 | int nmap = 1; |
2667 | int error; |
2668 | |
2669 | *bpp = NULL; |
2670 | error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap); |
2671 | if (error || !nmap) |
2672 | goto out_free; |
2673 | |
2674 | error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0, |
2675 | &bp, ops); |
2676 | if (xfs_metadata_is_sick(error)) |
2677 | xfs_dirattr_mark_sick(ip: dp, whichfork); |
2678 | if (error) |
2679 | goto out_free; |
2680 | |
2681 | if (whichfork == XFS_ATTR_FORK) |
2682 | xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); |
2683 | else |
2684 | xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); |
2685 | *bpp = bp; |
2686 | out_free: |
2687 | if (mapp != &map) |
2688 | kfree(mapp); |
2689 | |
2690 | return error; |
2691 | } |
2692 | |
2693 | /* |
2694 | * Readahead the dir/attr block. |
2695 | */ |
2696 | int |
2697 | xfs_da_reada_buf( |
2698 | struct xfs_inode *dp, |
2699 | xfs_dablk_t bno, |
2700 | unsigned int flags, |
2701 | int whichfork, |
2702 | const struct xfs_buf_ops *ops) |
2703 | { |
2704 | struct xfs_buf_map map; |
2705 | struct xfs_buf_map *mapp; |
2706 | int nmap; |
2707 | int error; |
2708 | |
2709 | mapp = ↦ |
2710 | nmap = 1; |
2711 | error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap); |
2712 | if (error || !nmap) |
2713 | goto out_free; |
2714 | |
2715 | xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); |
2716 | |
2717 | out_free: |
2718 | if (mapp != &map) |
2719 | kfree(mapp); |
2720 | |
2721 | return error; |
2722 | } |
2723 | |