1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2007 Oracle. All rights reserved. |
4 | */ |
5 | |
6 | #include <linux/err.h> |
7 | #include <linux/uuid.h> |
8 | #include "ctree.h" |
9 | #include "fs.h" |
10 | #include "messages.h" |
11 | #include "transaction.h" |
12 | #include "disk-io.h" |
13 | #include "qgroup.h" |
14 | #include "space-info.h" |
15 | #include "accessors.h" |
16 | #include "root-tree.h" |
17 | #include "orphan.h" |
18 | |
19 | /* |
20 | * Read a root item from the tree. In case we detect a root item smaller then |
21 | * sizeof(root_item), we know it's an old version of the root structure and |
22 | * initialize all new fields to zero. The same happens if we detect mismatching |
23 | * generation numbers as then we know the root was once mounted with an older |
24 | * kernel that was not aware of the root item structure change. |
25 | */ |
26 | static void btrfs_read_root_item(struct extent_buffer *eb, int slot, |
27 | struct btrfs_root_item *item) |
28 | { |
29 | u32 len; |
30 | int need_reset = 0; |
31 | |
32 | len = btrfs_item_size(eb, slot); |
33 | read_extent_buffer(eb, dst: item, btrfs_item_ptr_offset(eb, slot), |
34 | min_t(u32, len, sizeof(*item))); |
35 | if (len < sizeof(*item)) |
36 | need_reset = 1; |
37 | if (!need_reset && btrfs_root_generation(s: item) |
38 | != btrfs_root_generation_v2(s: item)) { |
39 | if (btrfs_root_generation_v2(s: item) != 0) { |
40 | btrfs_warn(eb->fs_info, |
41 | "mismatching generation and generation_v2 found in root item. This root was probably mounted with an older kernel. Resetting all new fields." ); |
42 | } |
43 | need_reset = 1; |
44 | } |
45 | if (need_reset) { |
46 | /* Clear all members from generation_v2 onwards. */ |
47 | memset_startat(item, 0, generation_v2); |
48 | generate_random_guid(guid: item->uuid); |
49 | } |
50 | } |
51 | |
52 | /* |
53 | * Lookup the root by the key. |
54 | * |
55 | * root: the root of the root tree |
56 | * search_key: the key to search |
57 | * path: the path we search |
58 | * root_item: the root item of the tree we look for |
59 | * root_key: the root key of the tree we look for |
60 | * |
61 | * If ->offset of 'search_key' is -1ULL, it means we are not sure the offset |
62 | * of the search key, just lookup the root with the highest offset for a |
63 | * given objectid. |
64 | * |
65 | * If we find something return 0, otherwise > 0, < 0 on error. |
66 | */ |
67 | int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key, |
68 | struct btrfs_path *path, struct btrfs_root_item *root_item, |
69 | struct btrfs_key *root_key) |
70 | { |
71 | struct btrfs_key found_key; |
72 | struct extent_buffer *l; |
73 | int ret; |
74 | int slot; |
75 | |
76 | ret = btrfs_search_slot(NULL, root, key: search_key, p: path, ins_len: 0, cow: 0); |
77 | if (ret < 0) |
78 | return ret; |
79 | |
80 | if (search_key->offset != -1ULL) { /* the search key is exact */ |
81 | if (ret > 0) |
82 | goto out; |
83 | } else { |
84 | /* |
85 | * Key with offset -1 found, there would have to exist a root |
86 | * with such id, but this is out of the valid range. |
87 | */ |
88 | if (ret == 0) { |
89 | ret = -EUCLEAN; |
90 | goto out; |
91 | } |
92 | if (path->slots[0] == 0) |
93 | goto out; |
94 | path->slots[0]--; |
95 | ret = 0; |
96 | } |
97 | |
98 | l = path->nodes[0]; |
99 | slot = path->slots[0]; |
100 | |
101 | btrfs_item_key_to_cpu(eb: l, cpu_key: &found_key, nr: slot); |
102 | if (found_key.objectid != search_key->objectid || |
103 | found_key.type != BTRFS_ROOT_ITEM_KEY) { |
104 | ret = 1; |
105 | goto out; |
106 | } |
107 | |
108 | if (root_item) |
109 | btrfs_read_root_item(eb: l, slot, item: root_item); |
110 | if (root_key) |
111 | memcpy(root_key, &found_key, sizeof(found_key)); |
112 | out: |
113 | btrfs_release_path(p: path); |
114 | return ret; |
115 | } |
116 | |
117 | void btrfs_set_root_node(struct btrfs_root_item *item, |
118 | struct extent_buffer *node) |
119 | { |
120 | btrfs_set_root_bytenr(s: item, val: node->start); |
121 | btrfs_set_root_level(s: item, val: btrfs_header_level(eb: node)); |
122 | btrfs_set_root_generation(s: item, val: btrfs_header_generation(eb: node)); |
123 | } |
124 | |
125 | /* |
126 | * copy the data in 'item' into the btree |
127 | */ |
128 | int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root |
129 | *root, struct btrfs_key *key, struct btrfs_root_item |
130 | *item) |
131 | { |
132 | struct btrfs_fs_info *fs_info = root->fs_info; |
133 | struct btrfs_path *path; |
134 | struct extent_buffer *l; |
135 | int ret; |
136 | int slot; |
137 | unsigned long ptr; |
138 | u32 old_len; |
139 | |
140 | path = btrfs_alloc_path(); |
141 | if (!path) |
142 | return -ENOMEM; |
143 | |
144 | ret = btrfs_search_slot(trans, root, key, p: path, ins_len: 0, cow: 1); |
145 | if (ret < 0) |
146 | goto out; |
147 | |
148 | if (ret > 0) { |
149 | btrfs_crit(fs_info, |
150 | "unable to find root key (%llu %u %llu) in tree %llu" , |
151 | key->objectid, key->type, key->offset, btrfs_root_id(root)); |
152 | ret = -EUCLEAN; |
153 | btrfs_abort_transaction(trans, ret); |
154 | goto out; |
155 | } |
156 | |
157 | l = path->nodes[0]; |
158 | slot = path->slots[0]; |
159 | ptr = btrfs_item_ptr_offset(l, slot); |
160 | old_len = btrfs_item_size(eb: l, slot); |
161 | |
162 | /* |
163 | * If this is the first time we update the root item which originated |
164 | * from an older kernel, we need to enlarge the item size to make room |
165 | * for the added fields. |
166 | */ |
167 | if (old_len < sizeof(*item)) { |
168 | btrfs_release_path(p: path); |
169 | ret = btrfs_search_slot(trans, root, key, p: path, |
170 | ins_len: -1, cow: 1); |
171 | if (ret < 0) { |
172 | btrfs_abort_transaction(trans, ret); |
173 | goto out; |
174 | } |
175 | |
176 | ret = btrfs_del_item(trans, root, path); |
177 | if (ret < 0) { |
178 | btrfs_abort_transaction(trans, ret); |
179 | goto out; |
180 | } |
181 | btrfs_release_path(p: path); |
182 | ret = btrfs_insert_empty_item(trans, root, path, |
183 | key, data_size: sizeof(*item)); |
184 | if (ret < 0) { |
185 | btrfs_abort_transaction(trans, ret); |
186 | goto out; |
187 | } |
188 | l = path->nodes[0]; |
189 | slot = path->slots[0]; |
190 | ptr = btrfs_item_ptr_offset(l, slot); |
191 | } |
192 | |
193 | /* |
194 | * Update generation_v2 so at the next mount we know the new root |
195 | * fields are valid. |
196 | */ |
197 | btrfs_set_root_generation_v2(s: item, val: btrfs_root_generation(s: item)); |
198 | |
199 | write_extent_buffer(eb: l, src: item, start: ptr, len: sizeof(*item)); |
200 | out: |
201 | btrfs_free_path(p: path); |
202 | return ret; |
203 | } |
204 | |
205 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
206 | const struct btrfs_key *key, struct btrfs_root_item *item) |
207 | { |
208 | /* |
209 | * Make sure generation v1 and v2 match. See update_root for details. |
210 | */ |
211 | btrfs_set_root_generation_v2(s: item, val: btrfs_root_generation(s: item)); |
212 | return btrfs_insert_item(trans, root, key, data: item, data_size: sizeof(*item)); |
213 | } |
214 | |
215 | int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info) |
216 | { |
217 | struct btrfs_root *tree_root = fs_info->tree_root; |
218 | struct extent_buffer *leaf; |
219 | struct btrfs_path *path; |
220 | struct btrfs_key key; |
221 | struct btrfs_root *root; |
222 | int err = 0; |
223 | int ret; |
224 | |
225 | path = btrfs_alloc_path(); |
226 | if (!path) |
227 | return -ENOMEM; |
228 | |
229 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
230 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
231 | key.offset = 0; |
232 | |
233 | while (1) { |
234 | u64 root_objectid; |
235 | |
236 | ret = btrfs_search_slot(NULL, root: tree_root, key: &key, p: path, ins_len: 0, cow: 0); |
237 | if (ret < 0) { |
238 | err = ret; |
239 | break; |
240 | } |
241 | |
242 | leaf = path->nodes[0]; |
243 | if (path->slots[0] >= btrfs_header_nritems(eb: leaf)) { |
244 | ret = btrfs_next_leaf(root: tree_root, path); |
245 | if (ret < 0) |
246 | err = ret; |
247 | if (ret != 0) |
248 | break; |
249 | leaf = path->nodes[0]; |
250 | } |
251 | |
252 | btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[0]); |
253 | btrfs_release_path(p: path); |
254 | |
255 | if (key.objectid != BTRFS_ORPHAN_OBJECTID || |
256 | key.type != BTRFS_ORPHAN_ITEM_KEY) |
257 | break; |
258 | |
259 | root_objectid = key.offset; |
260 | key.offset++; |
261 | |
262 | root = btrfs_get_fs_root(fs_info, objectid: root_objectid, check_ref: false); |
263 | err = PTR_ERR_OR_ZERO(ptr: root); |
264 | if (err && err != -ENOENT) { |
265 | break; |
266 | } else if (err == -ENOENT) { |
267 | struct btrfs_trans_handle *trans; |
268 | |
269 | btrfs_release_path(p: path); |
270 | |
271 | trans = btrfs_join_transaction(root: tree_root); |
272 | if (IS_ERR(ptr: trans)) { |
273 | err = PTR_ERR(ptr: trans); |
274 | btrfs_handle_fs_error(fs_info, err, |
275 | "Failed to start trans to delete orphan item" ); |
276 | break; |
277 | } |
278 | err = btrfs_del_orphan_item(trans, root: tree_root, |
279 | offset: root_objectid); |
280 | btrfs_end_transaction(trans); |
281 | if (err) { |
282 | btrfs_handle_fs_error(fs_info, err, |
283 | "Failed to delete root orphan item" ); |
284 | break; |
285 | } |
286 | continue; |
287 | } |
288 | |
289 | WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)); |
290 | if (btrfs_root_refs(s: &root->root_item) == 0) { |
291 | struct btrfs_key drop_key; |
292 | |
293 | btrfs_disk_key_to_cpu(cpu_key: &drop_key, disk_key: &root->root_item.drop_progress); |
294 | /* |
295 | * If we have a non-zero drop_progress then we know we |
296 | * made it partly through deleting this snapshot, and |
297 | * thus we need to make sure we block any balance from |
298 | * happening until this snapshot is completely dropped. |
299 | */ |
300 | if (drop_key.objectid != 0 || drop_key.type != 0 || |
301 | drop_key.offset != 0) { |
302 | set_bit(nr: BTRFS_FS_UNFINISHED_DROPS, addr: &fs_info->flags); |
303 | set_bit(nr: BTRFS_ROOT_UNFINISHED_DROP, addr: &root->state); |
304 | } |
305 | |
306 | set_bit(nr: BTRFS_ROOT_DEAD_TREE, addr: &root->state); |
307 | btrfs_add_dead_root(root); |
308 | } |
309 | btrfs_put_root(root); |
310 | } |
311 | |
312 | btrfs_free_path(p: path); |
313 | return err; |
314 | } |
315 | |
316 | /* drop the root item for 'key' from the tree root */ |
317 | int btrfs_del_root(struct btrfs_trans_handle *trans, |
318 | const struct btrfs_key *key) |
319 | { |
320 | struct btrfs_root *root = trans->fs_info->tree_root; |
321 | struct btrfs_path *path; |
322 | int ret; |
323 | |
324 | path = btrfs_alloc_path(); |
325 | if (!path) |
326 | return -ENOMEM; |
327 | ret = btrfs_search_slot(trans, root, key, p: path, ins_len: -1, cow: 1); |
328 | if (ret < 0) |
329 | goto out; |
330 | if (ret != 0) { |
331 | /* The root must exist but we did not find it by the key. */ |
332 | ret = -EUCLEAN; |
333 | goto out; |
334 | } |
335 | |
336 | ret = btrfs_del_item(trans, root, path); |
337 | out: |
338 | btrfs_free_path(p: path); |
339 | return ret; |
340 | } |
341 | |
342 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, |
343 | u64 ref_id, u64 dirid, u64 *sequence, |
344 | const struct fscrypt_str *name) |
345 | { |
346 | struct btrfs_root *tree_root = trans->fs_info->tree_root; |
347 | struct btrfs_path *path; |
348 | struct btrfs_root_ref *ref; |
349 | struct extent_buffer *leaf; |
350 | struct btrfs_key key; |
351 | unsigned long ptr; |
352 | int ret; |
353 | |
354 | path = btrfs_alloc_path(); |
355 | if (!path) |
356 | return -ENOMEM; |
357 | |
358 | key.objectid = root_id; |
359 | key.type = BTRFS_ROOT_BACKREF_KEY; |
360 | key.offset = ref_id; |
361 | again: |
362 | ret = btrfs_search_slot(trans, root: tree_root, key: &key, p: path, ins_len: -1, cow: 1); |
363 | if (ret < 0) { |
364 | goto out; |
365 | } else if (ret == 0) { |
366 | leaf = path->nodes[0]; |
367 | ref = btrfs_item_ptr(leaf, path->slots[0], |
368 | struct btrfs_root_ref); |
369 | ptr = (unsigned long)(ref + 1); |
370 | if ((btrfs_root_ref_dirid(eb: leaf, s: ref) != dirid) || |
371 | (btrfs_root_ref_name_len(eb: leaf, s: ref) != name->len) || |
372 | memcmp_extent_buffer(eb: leaf, ptrv: name->name, start: ptr, len: name->len)) { |
373 | ret = -ENOENT; |
374 | goto out; |
375 | } |
376 | *sequence = btrfs_root_ref_sequence(eb: leaf, s: ref); |
377 | |
378 | ret = btrfs_del_item(trans, root: tree_root, path); |
379 | if (ret) |
380 | goto out; |
381 | } else { |
382 | ret = -ENOENT; |
383 | goto out; |
384 | } |
385 | |
386 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { |
387 | btrfs_release_path(p: path); |
388 | key.objectid = ref_id; |
389 | key.type = BTRFS_ROOT_REF_KEY; |
390 | key.offset = root_id; |
391 | goto again; |
392 | } |
393 | |
394 | out: |
395 | btrfs_free_path(p: path); |
396 | return ret; |
397 | } |
398 | |
399 | /* |
400 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY |
401 | * or BTRFS_ROOT_BACKREF_KEY. |
402 | * |
403 | * The dirid, sequence, name and name_len refer to the directory entry |
404 | * that is referencing the root. |
405 | * |
406 | * For a forward ref, the root_id is the id of the tree referencing |
407 | * the root and ref_id is the id of the subvol or snapshot. |
408 | * |
409 | * For a back ref the root_id is the id of the subvol or snapshot and |
410 | * ref_id is the id of the tree referencing it. |
411 | * |
412 | * Will return 0, -ENOMEM, or anything from the CoW path |
413 | */ |
414 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, |
415 | u64 ref_id, u64 dirid, u64 sequence, |
416 | const struct fscrypt_str *name) |
417 | { |
418 | struct btrfs_root *tree_root = trans->fs_info->tree_root; |
419 | struct btrfs_key key; |
420 | int ret; |
421 | struct btrfs_path *path; |
422 | struct btrfs_root_ref *ref; |
423 | struct extent_buffer *leaf; |
424 | unsigned long ptr; |
425 | |
426 | path = btrfs_alloc_path(); |
427 | if (!path) |
428 | return -ENOMEM; |
429 | |
430 | key.objectid = root_id; |
431 | key.type = BTRFS_ROOT_BACKREF_KEY; |
432 | key.offset = ref_id; |
433 | again: |
434 | ret = btrfs_insert_empty_item(trans, root: tree_root, path, key: &key, |
435 | data_size: sizeof(*ref) + name->len); |
436 | if (ret) { |
437 | btrfs_abort_transaction(trans, ret); |
438 | btrfs_free_path(p: path); |
439 | return ret; |
440 | } |
441 | |
442 | leaf = path->nodes[0]; |
443 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); |
444 | btrfs_set_root_ref_dirid(eb: leaf, s: ref, val: dirid); |
445 | btrfs_set_root_ref_sequence(eb: leaf, s: ref, val: sequence); |
446 | btrfs_set_root_ref_name_len(eb: leaf, s: ref, val: name->len); |
447 | ptr = (unsigned long)(ref + 1); |
448 | write_extent_buffer(eb: leaf, src: name->name, start: ptr, len: name->len); |
449 | |
450 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { |
451 | btrfs_release_path(p: path); |
452 | key.objectid = ref_id; |
453 | key.type = BTRFS_ROOT_REF_KEY; |
454 | key.offset = root_id; |
455 | goto again; |
456 | } |
457 | |
458 | btrfs_free_path(p: path); |
459 | return 0; |
460 | } |
461 | |
462 | /* |
463 | * Old btrfs forgets to init root_item->flags and root_item->byte_limit |
464 | * for subvolumes. To work around this problem, we steal a bit from |
465 | * root_item->inode_item->flags, and use it to indicate if those fields |
466 | * have been properly initialized. |
467 | */ |
468 | void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) |
469 | { |
470 | u64 inode_flags = btrfs_stack_inode_flags(s: &root_item->inode); |
471 | |
472 | if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { |
473 | inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; |
474 | btrfs_set_stack_inode_flags(s: &root_item->inode, val: inode_flags); |
475 | btrfs_set_root_flags(s: root_item, val: 0); |
476 | btrfs_set_root_limit(s: root_item, val: 0); |
477 | } |
478 | } |
479 | |
480 | void btrfs_update_root_times(struct btrfs_trans_handle *trans, |
481 | struct btrfs_root *root) |
482 | { |
483 | struct btrfs_root_item *item = &root->root_item; |
484 | struct timespec64 ct; |
485 | |
486 | ktime_get_real_ts64(tv: &ct); |
487 | spin_lock(lock: &root->root_item_lock); |
488 | btrfs_set_root_ctransid(s: item, val: trans->transid); |
489 | btrfs_set_stack_timespec_sec(s: &item->ctime, val: ct.tv_sec); |
490 | btrfs_set_stack_timespec_nsec(s: &item->ctime, val: ct.tv_nsec); |
491 | spin_unlock(lock: &root->root_item_lock); |
492 | } |
493 | |
494 | /* |
495 | * Reserve space for subvolume operation. |
496 | * |
497 | * root: the root of the parent directory |
498 | * rsv: block reservation |
499 | * items: the number of items that we need do reservation |
500 | * use_global_rsv: allow fallback to the global block reservation |
501 | * |
502 | * This function is used to reserve the space for snapshot/subvolume |
503 | * creation and deletion. Those operations are different with the |
504 | * common file/directory operations, they change two fs/file trees |
505 | * and root tree, the number of items that the qgroup reserves is |
506 | * different with the free space reservation. So we can not use |
507 | * the space reservation mechanism in start_transaction(). |
508 | */ |
509 | int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, |
510 | struct btrfs_block_rsv *rsv, int items, |
511 | bool use_global_rsv) |
512 | { |
513 | u64 qgroup_num_bytes = 0; |
514 | u64 num_bytes; |
515 | int ret; |
516 | struct btrfs_fs_info *fs_info = root->fs_info; |
517 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; |
518 | |
519 | if (btrfs_qgroup_enabled(fs_info)) { |
520 | /* One for parent inode, two for dir entries */ |
521 | qgroup_num_bytes = 3 * fs_info->nodesize; |
522 | ret = btrfs_qgroup_reserve_meta_prealloc(root, |
523 | num_bytes: qgroup_num_bytes, enforce: true, |
524 | noflush: false); |
525 | if (ret) |
526 | return ret; |
527 | } |
528 | |
529 | num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items: items); |
530 | rsv->space_info = btrfs_find_space_info(info: fs_info, |
531 | BTRFS_BLOCK_GROUP_METADATA); |
532 | ret = btrfs_block_rsv_add(fs_info, block_rsv: rsv, num_bytes, |
533 | flush: BTRFS_RESERVE_FLUSH_ALL); |
534 | |
535 | if (ret == -ENOSPC && use_global_rsv) |
536 | ret = btrfs_block_rsv_migrate(src_rsv: global_rsv, dst_rsv: rsv, num_bytes, update_size: true); |
537 | |
538 | if (ret && qgroup_num_bytes) |
539 | btrfs_qgroup_free_meta_prealloc(root, num_bytes: qgroup_num_bytes); |
540 | |
541 | if (!ret) { |
542 | spin_lock(lock: &rsv->lock); |
543 | rsv->qgroup_rsv_reserved += qgroup_num_bytes; |
544 | spin_unlock(lock: &rsv->lock); |
545 | } |
546 | return ret; |
547 | } |
548 | |