1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2007 Oracle. All rights reserved. |
4 | */ |
5 | |
6 | #include <linux/err.h> |
7 | #include <linux/uuid.h> |
8 | #include "ctree.h" |
9 | #include "fs.h" |
10 | #include "messages.h" |
11 | #include "transaction.h" |
12 | #include "disk-io.h" |
13 | #include "qgroup.h" |
14 | #include "space-info.h" |
15 | #include "accessors.h" |
16 | #include "root-tree.h" |
17 | #include "orphan.h" |
18 | |
19 | /* |
20 | * Read a root item from the tree. In case we detect a root item smaller then |
21 | * sizeof(root_item), we know it's an old version of the root structure and |
22 | * initialize all new fields to zero. The same happens if we detect mismatching |
23 | * generation numbers as then we know the root was once mounted with an older |
24 | * kernel that was not aware of the root item structure change. |
25 | */ |
26 | static void btrfs_read_root_item(struct extent_buffer *eb, int slot, |
27 | struct btrfs_root_item *item) |
28 | { |
29 | u32 len; |
30 | int need_reset = 0; |
31 | |
32 | len = btrfs_item_size(eb, slot); |
33 | read_extent_buffer(eb, dst: item, btrfs_item_ptr_offset(eb, slot), |
34 | min_t(u32, len, sizeof(*item))); |
35 | if (len < sizeof(*item)) |
36 | need_reset = 1; |
37 | if (!need_reset && btrfs_root_generation(s: item) |
38 | != btrfs_root_generation_v2(s: item)) { |
39 | if (btrfs_root_generation_v2(s: item) != 0) { |
40 | btrfs_warn(eb->fs_info, |
41 | "mismatching generation and generation_v2 found in root item. This root was probably mounted with an older kernel. Resetting all new fields." ); |
42 | } |
43 | need_reset = 1; |
44 | } |
45 | if (need_reset) { |
46 | /* Clear all members from generation_v2 onwards. */ |
47 | memset_startat(item, 0, generation_v2); |
48 | generate_random_guid(guid: item->uuid); |
49 | } |
50 | } |
51 | |
52 | /* |
53 | * Lookup the root by the key. |
54 | * |
55 | * root: the root of the root tree |
56 | * search_key: the key to search |
57 | * path: the path we search |
58 | * root_item: the root item of the tree we look for |
59 | * root_key: the root key of the tree we look for |
60 | * |
61 | * If ->offset of 'search_key' is -1ULL, it means we are not sure the offset |
62 | * of the search key, just lookup the root with the highest offset for a |
63 | * given objectid. |
64 | * |
65 | * If we find something return 0, otherwise > 0, < 0 on error. |
66 | */ |
67 | int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key, |
68 | struct btrfs_path *path, struct btrfs_root_item *root_item, |
69 | struct btrfs_key *root_key) |
70 | { |
71 | struct btrfs_key found_key; |
72 | struct extent_buffer *l; |
73 | int ret; |
74 | int slot; |
75 | |
76 | ret = btrfs_search_slot(NULL, root, key: search_key, p: path, ins_len: 0, cow: 0); |
77 | if (ret < 0) |
78 | return ret; |
79 | |
80 | if (search_key->offset != -1ULL) { /* the search key is exact */ |
81 | if (ret > 0) |
82 | goto out; |
83 | } else { |
84 | /* |
85 | * Key with offset -1 found, there would have to exist a root |
86 | * with such id, but this is out of the valid range. |
87 | */ |
88 | if (ret == 0) { |
89 | ret = -EUCLEAN; |
90 | goto out; |
91 | } |
92 | if (path->slots[0] == 0) |
93 | goto out; |
94 | path->slots[0]--; |
95 | ret = 0; |
96 | } |
97 | |
98 | l = path->nodes[0]; |
99 | slot = path->slots[0]; |
100 | |
101 | btrfs_item_key_to_cpu(eb: l, cpu_key: &found_key, nr: slot); |
102 | if (found_key.objectid != search_key->objectid || |
103 | found_key.type != BTRFS_ROOT_ITEM_KEY) { |
104 | ret = 1; |
105 | goto out; |
106 | } |
107 | |
108 | if (root_item) |
109 | btrfs_read_root_item(eb: l, slot, item: root_item); |
110 | if (root_key) |
111 | memcpy(root_key, &found_key, sizeof(found_key)); |
112 | out: |
113 | btrfs_release_path(p: path); |
114 | return ret; |
115 | } |
116 | |
117 | void btrfs_set_root_node(struct btrfs_root_item *item, |
118 | struct extent_buffer *node) |
119 | { |
120 | btrfs_set_root_bytenr(s: item, val: node->start); |
121 | btrfs_set_root_level(s: item, val: btrfs_header_level(eb: node)); |
122 | btrfs_set_root_generation(s: item, val: btrfs_header_generation(eb: node)); |
123 | } |
124 | |
125 | /* |
126 | * copy the data in 'item' into the btree |
127 | */ |
128 | int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root |
129 | *root, struct btrfs_key *key, struct btrfs_root_item |
130 | *item) |
131 | { |
132 | struct btrfs_fs_info *fs_info = root->fs_info; |
133 | struct btrfs_path *path; |
134 | struct extent_buffer *l; |
135 | int ret; |
136 | int slot; |
137 | unsigned long ptr; |
138 | u32 old_len; |
139 | |
140 | path = btrfs_alloc_path(); |
141 | if (!path) |
142 | return -ENOMEM; |
143 | |
144 | ret = btrfs_search_slot(trans, root, key, p: path, ins_len: 0, cow: 1); |
145 | if (ret < 0) |
146 | goto out; |
147 | |
148 | if (ret > 0) { |
149 | btrfs_crit(fs_info, |
150 | "unable to find root key (%llu %u %llu) in tree %llu" , |
151 | key->objectid, key->type, key->offset, |
152 | root->root_key.objectid); |
153 | ret = -EUCLEAN; |
154 | btrfs_abort_transaction(trans, ret); |
155 | goto out; |
156 | } |
157 | |
158 | l = path->nodes[0]; |
159 | slot = path->slots[0]; |
160 | ptr = btrfs_item_ptr_offset(l, slot); |
161 | old_len = btrfs_item_size(eb: l, slot); |
162 | |
163 | /* |
164 | * If this is the first time we update the root item which originated |
165 | * from an older kernel, we need to enlarge the item size to make room |
166 | * for the added fields. |
167 | */ |
168 | if (old_len < sizeof(*item)) { |
169 | btrfs_release_path(p: path); |
170 | ret = btrfs_search_slot(trans, root, key, p: path, |
171 | ins_len: -1, cow: 1); |
172 | if (ret < 0) { |
173 | btrfs_abort_transaction(trans, ret); |
174 | goto out; |
175 | } |
176 | |
177 | ret = btrfs_del_item(trans, root, path); |
178 | if (ret < 0) { |
179 | btrfs_abort_transaction(trans, ret); |
180 | goto out; |
181 | } |
182 | btrfs_release_path(p: path); |
183 | ret = btrfs_insert_empty_item(trans, root, path, |
184 | key, data_size: sizeof(*item)); |
185 | if (ret < 0) { |
186 | btrfs_abort_transaction(trans, ret); |
187 | goto out; |
188 | } |
189 | l = path->nodes[0]; |
190 | slot = path->slots[0]; |
191 | ptr = btrfs_item_ptr_offset(l, slot); |
192 | } |
193 | |
194 | /* |
195 | * Update generation_v2 so at the next mount we know the new root |
196 | * fields are valid. |
197 | */ |
198 | btrfs_set_root_generation_v2(s: item, val: btrfs_root_generation(s: item)); |
199 | |
200 | write_extent_buffer(eb: l, src: item, start: ptr, len: sizeof(*item)); |
201 | btrfs_mark_buffer_dirty(trans, buf: path->nodes[0]); |
202 | out: |
203 | btrfs_free_path(p: path); |
204 | return ret; |
205 | } |
206 | |
207 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
208 | const struct btrfs_key *key, struct btrfs_root_item *item) |
209 | { |
210 | /* |
211 | * Make sure generation v1 and v2 match. See update_root for details. |
212 | */ |
213 | btrfs_set_root_generation_v2(s: item, val: btrfs_root_generation(s: item)); |
214 | return btrfs_insert_item(trans, root, key, data: item, data_size: sizeof(*item)); |
215 | } |
216 | |
217 | int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info) |
218 | { |
219 | struct btrfs_root *tree_root = fs_info->tree_root; |
220 | struct extent_buffer *leaf; |
221 | struct btrfs_path *path; |
222 | struct btrfs_key key; |
223 | struct btrfs_root *root; |
224 | int err = 0; |
225 | int ret; |
226 | |
227 | path = btrfs_alloc_path(); |
228 | if (!path) |
229 | return -ENOMEM; |
230 | |
231 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
232 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
233 | key.offset = 0; |
234 | |
235 | while (1) { |
236 | u64 root_objectid; |
237 | |
238 | ret = btrfs_search_slot(NULL, root: tree_root, key: &key, p: path, ins_len: 0, cow: 0); |
239 | if (ret < 0) { |
240 | err = ret; |
241 | break; |
242 | } |
243 | |
244 | leaf = path->nodes[0]; |
245 | if (path->slots[0] >= btrfs_header_nritems(eb: leaf)) { |
246 | ret = btrfs_next_leaf(root: tree_root, path); |
247 | if (ret < 0) |
248 | err = ret; |
249 | if (ret != 0) |
250 | break; |
251 | leaf = path->nodes[0]; |
252 | } |
253 | |
254 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0]); |
255 | btrfs_release_path(p: path); |
256 | |
257 | if (key.objectid != BTRFS_ORPHAN_OBJECTID || |
258 | key.type != BTRFS_ORPHAN_ITEM_KEY) |
259 | break; |
260 | |
261 | root_objectid = key.offset; |
262 | key.offset++; |
263 | |
264 | root = btrfs_get_fs_root(fs_info, objectid: root_objectid, check_ref: false); |
265 | err = PTR_ERR_OR_ZERO(ptr: root); |
266 | if (err && err != -ENOENT) { |
267 | break; |
268 | } else if (err == -ENOENT) { |
269 | struct btrfs_trans_handle *trans; |
270 | |
271 | btrfs_release_path(p: path); |
272 | |
273 | trans = btrfs_join_transaction(root: tree_root); |
274 | if (IS_ERR(ptr: trans)) { |
275 | err = PTR_ERR(ptr: trans); |
276 | btrfs_handle_fs_error(fs_info, err, |
277 | "Failed to start trans to delete orphan item" ); |
278 | break; |
279 | } |
280 | err = btrfs_del_orphan_item(trans, root: tree_root, |
281 | offset: root_objectid); |
282 | btrfs_end_transaction(trans); |
283 | if (err) { |
284 | btrfs_handle_fs_error(fs_info, err, |
285 | "Failed to delete root orphan item" ); |
286 | break; |
287 | } |
288 | continue; |
289 | } |
290 | |
291 | WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)); |
292 | if (btrfs_root_refs(s: &root->root_item) == 0) { |
293 | struct btrfs_key drop_key; |
294 | |
295 | btrfs_disk_key_to_cpu(cpu_key: &drop_key, disk_key: &root->root_item.drop_progress); |
296 | /* |
297 | * If we have a non-zero drop_progress then we know we |
298 | * made it partly through deleting this snapshot, and |
299 | * thus we need to make sure we block any balance from |
300 | * happening until this snapshot is completely dropped. |
301 | */ |
302 | if (drop_key.objectid != 0 || drop_key.type != 0 || |
303 | drop_key.offset != 0) { |
304 | set_bit(nr: BTRFS_FS_UNFINISHED_DROPS, addr: &fs_info->flags); |
305 | set_bit(nr: BTRFS_ROOT_UNFINISHED_DROP, addr: &root->state); |
306 | } |
307 | |
308 | set_bit(nr: BTRFS_ROOT_DEAD_TREE, addr: &root->state); |
309 | btrfs_add_dead_root(root); |
310 | } |
311 | btrfs_put_root(root); |
312 | } |
313 | |
314 | btrfs_free_path(p: path); |
315 | return err; |
316 | } |
317 | |
318 | /* drop the root item for 'key' from the tree root */ |
319 | int btrfs_del_root(struct btrfs_trans_handle *trans, |
320 | const struct btrfs_key *key) |
321 | { |
322 | struct btrfs_root *root = trans->fs_info->tree_root; |
323 | struct btrfs_path *path; |
324 | int ret; |
325 | |
326 | path = btrfs_alloc_path(); |
327 | if (!path) |
328 | return -ENOMEM; |
329 | ret = btrfs_search_slot(trans, root, key, p: path, ins_len: -1, cow: 1); |
330 | if (ret < 0) |
331 | goto out; |
332 | if (ret != 0) { |
333 | /* The root must exist but we did not find it by the key. */ |
334 | ret = -EUCLEAN; |
335 | goto out; |
336 | } |
337 | |
338 | ret = btrfs_del_item(trans, root, path); |
339 | out: |
340 | btrfs_free_path(p: path); |
341 | return ret; |
342 | } |
343 | |
344 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, |
345 | u64 ref_id, u64 dirid, u64 *sequence, |
346 | const struct fscrypt_str *name) |
347 | { |
348 | struct btrfs_root *tree_root = trans->fs_info->tree_root; |
349 | struct btrfs_path *path; |
350 | struct btrfs_root_ref *ref; |
351 | struct extent_buffer *leaf; |
352 | struct btrfs_key key; |
353 | unsigned long ptr; |
354 | int ret; |
355 | |
356 | path = btrfs_alloc_path(); |
357 | if (!path) |
358 | return -ENOMEM; |
359 | |
360 | key.objectid = root_id; |
361 | key.type = BTRFS_ROOT_BACKREF_KEY; |
362 | key.offset = ref_id; |
363 | again: |
364 | ret = btrfs_search_slot(trans, root: tree_root, key: &key, p: path, ins_len: -1, cow: 1); |
365 | if (ret < 0) { |
366 | goto out; |
367 | } else if (ret == 0) { |
368 | leaf = path->nodes[0]; |
369 | ref = btrfs_item_ptr(leaf, path->slots[0], |
370 | struct btrfs_root_ref); |
371 | ptr = (unsigned long)(ref + 1); |
372 | if ((btrfs_root_ref_dirid(eb: leaf, s: ref) != dirid) || |
373 | (btrfs_root_ref_name_len(eb: leaf, s: ref) != name->len) || |
374 | memcmp_extent_buffer(eb: leaf, ptrv: name->name, start: ptr, len: name->len)) { |
375 | ret = -ENOENT; |
376 | goto out; |
377 | } |
378 | *sequence = btrfs_root_ref_sequence(eb: leaf, s: ref); |
379 | |
380 | ret = btrfs_del_item(trans, root: tree_root, path); |
381 | if (ret) |
382 | goto out; |
383 | } else { |
384 | ret = -ENOENT; |
385 | goto out; |
386 | } |
387 | |
388 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { |
389 | btrfs_release_path(p: path); |
390 | key.objectid = ref_id; |
391 | key.type = BTRFS_ROOT_REF_KEY; |
392 | key.offset = root_id; |
393 | goto again; |
394 | } |
395 | |
396 | out: |
397 | btrfs_free_path(p: path); |
398 | return ret; |
399 | } |
400 | |
401 | /* |
402 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY |
403 | * or BTRFS_ROOT_BACKREF_KEY. |
404 | * |
405 | * The dirid, sequence, name and name_len refer to the directory entry |
406 | * that is referencing the root. |
407 | * |
408 | * For a forward ref, the root_id is the id of the tree referencing |
409 | * the root and ref_id is the id of the subvol or snapshot. |
410 | * |
411 | * For a back ref the root_id is the id of the subvol or snapshot and |
412 | * ref_id is the id of the tree referencing it. |
413 | * |
414 | * Will return 0, -ENOMEM, or anything from the CoW path |
415 | */ |
416 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, |
417 | u64 ref_id, u64 dirid, u64 sequence, |
418 | const struct fscrypt_str *name) |
419 | { |
420 | struct btrfs_root *tree_root = trans->fs_info->tree_root; |
421 | struct btrfs_key key; |
422 | int ret; |
423 | struct btrfs_path *path; |
424 | struct btrfs_root_ref *ref; |
425 | struct extent_buffer *leaf; |
426 | unsigned long ptr; |
427 | |
428 | path = btrfs_alloc_path(); |
429 | if (!path) |
430 | return -ENOMEM; |
431 | |
432 | key.objectid = root_id; |
433 | key.type = BTRFS_ROOT_BACKREF_KEY; |
434 | key.offset = ref_id; |
435 | again: |
436 | ret = btrfs_insert_empty_item(trans, root: tree_root, path, key: &key, |
437 | data_size: sizeof(*ref) + name->len); |
438 | if (ret) { |
439 | btrfs_abort_transaction(trans, ret); |
440 | btrfs_free_path(p: path); |
441 | return ret; |
442 | } |
443 | |
444 | leaf = path->nodes[0]; |
445 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); |
446 | btrfs_set_root_ref_dirid(eb: leaf, s: ref, val: dirid); |
447 | btrfs_set_root_ref_sequence(eb: leaf, s: ref, val: sequence); |
448 | btrfs_set_root_ref_name_len(eb: leaf, s: ref, val: name->len); |
449 | ptr = (unsigned long)(ref + 1); |
450 | write_extent_buffer(eb: leaf, src: name->name, start: ptr, len: name->len); |
451 | btrfs_mark_buffer_dirty(trans, buf: leaf); |
452 | |
453 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { |
454 | btrfs_release_path(p: path); |
455 | key.objectid = ref_id; |
456 | key.type = BTRFS_ROOT_REF_KEY; |
457 | key.offset = root_id; |
458 | goto again; |
459 | } |
460 | |
461 | btrfs_free_path(p: path); |
462 | return 0; |
463 | } |
464 | |
465 | /* |
466 | * Old btrfs forgets to init root_item->flags and root_item->byte_limit |
467 | * for subvolumes. To work around this problem, we steal a bit from |
468 | * root_item->inode_item->flags, and use it to indicate if those fields |
469 | * have been properly initialized. |
470 | */ |
471 | void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) |
472 | { |
473 | u64 inode_flags = btrfs_stack_inode_flags(s: &root_item->inode); |
474 | |
475 | if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { |
476 | inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; |
477 | btrfs_set_stack_inode_flags(s: &root_item->inode, val: inode_flags); |
478 | btrfs_set_root_flags(s: root_item, val: 0); |
479 | btrfs_set_root_limit(s: root_item, val: 0); |
480 | } |
481 | } |
482 | |
483 | void btrfs_update_root_times(struct btrfs_trans_handle *trans, |
484 | struct btrfs_root *root) |
485 | { |
486 | struct btrfs_root_item *item = &root->root_item; |
487 | struct timespec64 ct; |
488 | |
489 | ktime_get_real_ts64(tv: &ct); |
490 | spin_lock(lock: &root->root_item_lock); |
491 | btrfs_set_root_ctransid(s: item, val: trans->transid); |
492 | btrfs_set_stack_timespec_sec(s: &item->ctime, val: ct.tv_sec); |
493 | btrfs_set_stack_timespec_nsec(s: &item->ctime, val: ct.tv_nsec); |
494 | spin_unlock(lock: &root->root_item_lock); |
495 | } |
496 | |
497 | /* |
498 | * Reserve space for subvolume operation. |
499 | * |
500 | * root: the root of the parent directory |
501 | * rsv: block reservation |
502 | * items: the number of items that we need do reservation |
503 | * use_global_rsv: allow fallback to the global block reservation |
504 | * |
505 | * This function is used to reserve the space for snapshot/subvolume |
506 | * creation and deletion. Those operations are different with the |
507 | * common file/directory operations, they change two fs/file trees |
508 | * and root tree, the number of items that the qgroup reserves is |
509 | * different with the free space reservation. So we can not use |
510 | * the space reservation mechanism in start_transaction(). |
511 | */ |
512 | int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, |
513 | struct btrfs_block_rsv *rsv, int items, |
514 | bool use_global_rsv) |
515 | { |
516 | u64 qgroup_num_bytes = 0; |
517 | u64 num_bytes; |
518 | int ret; |
519 | struct btrfs_fs_info *fs_info = root->fs_info; |
520 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
521 | |
522 | if (btrfs_qgroup_enabled(fs_info)) { |
523 | /* One for parent inode, two for dir entries */ |
524 | qgroup_num_bytes = 3 * fs_info->nodesize; |
525 | ret = btrfs_qgroup_reserve_meta_prealloc(root, |
526 | num_bytes: qgroup_num_bytes, enforce: true, |
527 | noflush: false); |
528 | if (ret) |
529 | return ret; |
530 | } |
531 | |
532 | num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items: items); |
533 | rsv->space_info = btrfs_find_space_info(info: fs_info, |
534 | BTRFS_BLOCK_GROUP_METADATA); |
535 | ret = btrfs_block_rsv_add(fs_info, block_rsv: rsv, num_bytes, |
536 | flush: BTRFS_RESERVE_FLUSH_ALL); |
537 | |
538 | if (ret == -ENOSPC && use_global_rsv) |
539 | ret = btrfs_block_rsv_migrate(src_rsv: global_rsv, dst_rsv: rsv, num_bytes, update_size: true); |
540 | |
541 | if (ret && qgroup_num_bytes) |
542 | btrfs_qgroup_free_meta_prealloc(root, num_bytes: qgroup_num_bytes); |
543 | |
544 | if (!ret) { |
545 | spin_lock(lock: &rsv->lock); |
546 | rsv->qgroup_rsv_reserved += qgroup_num_bytes; |
547 | spin_unlock(lock: &rsv->lock); |
548 | } |
549 | return ret; |
550 | } |
551 | |