backref.c source code [linux/fs/btrfs/backref.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (C) 2011 STRATO. All rights reserved.
4	*/
5
6	#include <linux/mm.h>
7	#include <linux/rbtree.h>
8	#include <trace/events/btrfs.h>
9	#include "ctree.h"
10	#include "disk-io.h"
11	#include "backref.h"
12	#include "ulist.h"
13	#include "transaction.h"
14	#include "delayed-ref.h"
15	#include "locking.h"
16	#include "misc.h"
17	#include "tree-mod-log.h"
18	#include "fs.h"
19	#include "accessors.h"
20	#include "extent-tree.h"
21	#include "relocation.h"
22	#include "tree-checker.h"
23
24	/ Just arbitrary numbers so we can be sure one of these happened. /
25	#define BACKREF_FOUND_SHARED 6
26	#define BACKREF_FOUND_NOT_SHARED 7
27
28	struct extent_inode_elem {
29	u64 inum;
30	u64 offset;
31	u64 num_bytes;
32	struct extent_inode_elem *next;
33	};
34
35	static int check_extent_in_eb(struct btrfs_backref_walk_ctx *ctx,
36	const struct btrfs_key *key,
37	const struct extent_buffer *eb,
38	const struct btrfs_file_extent_item *fi,
39	struct extent_inode_elem **eie)
40	{
41	const u64 data_len = btrfs_file_extent_num_bytes(eb, s: fi);
42	u64 offset = key->offset;
43	struct extent_inode_elem *e;
44	const u64 *root_ids;
45	int root_count;
46	bool cached;
47
48	if (!ctx->ignore_extent_item_pos &&
49	!btrfs_file_extent_compression(eb, s: fi) &&
50	!btrfs_file_extent_encryption(eb, s: fi) &&
51	!btrfs_file_extent_other_encoding(eb, s: fi)) {
52	u64 data_offset;
53
54	data_offset = btrfs_file_extent_offset(eb, s: fi);
55
56	if (ctx->extent_item_pos < data_offset \|\|
57	ctx->extent_item_pos >= data_offset + data_len)
58	return `1`;
59	offset += ctx->extent_item_pos - data_offset;
60	}
61
62	if (!ctx->indirect_ref_iterator \|\| !ctx->cache_lookup)
63	goto add_inode_elem;
64
65	cached = ctx->cache_lookup(eb->start, ctx->user_ctx, &root_ids,
66	&root_count);
67	if (!cached)
68	goto add_inode_elem;
69
70	for (int i = `0`; i < root_count; i++) {
71	int ret;
72
73	ret = ctx->indirect_ref_iterator(key->objectid, offset,
74	data_len, root_ids[i],
75	ctx->user_ctx);
76	if (ret)
77	return ret;
78	}
79
80	add_inode_elem:
81	e = kmalloc(size: sizeof(*e), GFP_NOFS);
82	if (!e)
83	return -ENOMEM;
84
85	e->next = *eie;
86	e->inum = key->objectid;
87	e->offset = offset;
88	e->num_bytes = data_len;
89	*eie = e;
90
91	return `0`;
92	}
93
94	static void free_inode_elem_list(struct extent_inode_elem *eie)
95	{
96	struct extent_inode_elem *eie_next;
97
98	for (; eie; eie = eie_next) {
99	eie_next = eie->next;
100	kfree(objp: eie);
101	}
102	}
103
104	static int find_extent_in_eb(struct btrfs_backref_walk_ctx *ctx,
105	const struct extent_buffer *eb,
106	struct extent_inode_elem **eie)
107	{
108	u64 disk_byte;
109	struct btrfs_key key;
110	struct btrfs_file_extent_item *fi;
111	int slot;
112	int nritems;
113	int extent_type;
114	int ret;
115
116	/*
117	* from the shared data ref, we only have the leaf but we need
118	* the key. thus, we must look into all items and see that we
119	* find one (some) with a reference to our extent item.
120	*/
121	nritems = btrfs_header_nritems(eb);
122	for (slot = `0`; slot < nritems; ++slot) {
123	btrfs_item_key_to_cpu(eb, cpu_key: &key, nr: slot);
124	if (key.type != BTRFS_EXTENT_DATA_KEY)
125	continue;
126	fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
127	extent_type = btrfs_file_extent_type(eb, s: fi);
128	if (extent_type == BTRFS_FILE_EXTENT_INLINE)
129	continue;
130	/ don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that /
131	disk_byte = btrfs_file_extent_disk_bytenr(eb, s: fi);
132	if (disk_byte != ctx->bytenr)
133	continue;
134
135	ret = check_extent_in_eb(ctx, key: &key, eb, fi, eie);
136	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\| ret < `0`)
137	return ret;
138	}
139
140	return `0`;
141	}
142
143	struct preftree {
144	struct rb_root_cached root;
145	unsigned int count;
146	};
147
148	#define PREFTREE_INIT { .root = RB_ROOT_CACHED, .count = 0 }
149
150	struct preftrees {
151	struct preftree direct; / BTRFS_SHARED_[DATA\|BLOCK]_REF_KEY /
152	struct preftree indirect; / BTRFS_[TREE_BLOCK\|EXTENT_DATA]_REF_KEY /
153	struct preftree indirect_missing_keys;
154	};
155
156	/*
157	* Checks for a shared extent during backref search.
158	*
159	* The share_count tracks prelim_refs (direct and indirect) having a
160	* ref->count >0:
161	* - incremented when a ref->count transitions to >0
162	* - decremented when a ref->count transitions to <1
163	*/
164	struct share_check {
165	struct btrfs_backref_share_check_ctx *ctx;
166	struct btrfs_root *root;
167	u64 inum;
168	u64 data_bytenr;
169	u64 data_extent_gen;
170	/*
171	* Counts number of inodes that refer to an extent (different inodes in
172	* the same root or different roots) that we could find. The sharedness
173	* check typically stops once this counter gets greater than 1, so it
174	* may not reflect the total number of inodes.
175	*/
176	int share_count;
177	/*
178	* The number of times we found our inode refers to the data extent we
179	* are determining the sharedness. In other words, how many file extent
180	* items we could find for our inode that point to our target data
181	* extent. The value we get here after finishing the extent sharedness
182	* check may be smaller than reality, but if it ends up being greater
183	* than 1, then we know for sure the inode has multiple file extent
184	* items that point to our inode, and we can safely assume it's useful
185	* to cache the sharedness check result.
186	*/
187	int self_ref_count;
188	bool have_delayed_delete_refs;
189	};
190
191	static inline int extent_is_shared(struct share_check *sc)
192	{
193	return (sc && sc->share_count > `1`) ? BACKREF_FOUND_SHARED : `0`;
194	}
195
196	static struct kmem_cache *btrfs_prelim_ref_cache;
197
198	int __init btrfs_prelim_ref_init(void)
199	{
200	btrfs_prelim_ref_cache = kmem_cache_create(name: "btrfs_prelim_ref",
201	size: sizeof(struct prelim_ref), align: `0`, flags: `0`, NULL);
202	if (!btrfs_prelim_ref_cache)
203	return -ENOMEM;
204	return `0`;
205	}
206
207	void __cold btrfs_prelim_ref_exit(void)
208	{
209	kmem_cache_destroy(s: btrfs_prelim_ref_cache);
210	}
211
212	static void free_pref(struct prelim_ref *ref)
213	{
214	kmem_cache_free(s: btrfs_prelim_ref_cache, objp: ref);
215	}
216
217	/*
218	* Return 0 when both refs are for the same block (and can be merged).
219	* A -1 return indicates ref1 is a 'lower' block than ref2, while 1
220	* indicates a 'higher' block.
221	*/
222	static int prelim_ref_compare(struct prelim_ref *ref1,
223	struct prelim_ref *ref2)
224	{
225	if (ref1->level < ref2->level)
226	return -`1`;
227	if (ref1->level > ref2->level)
228	return `1`;
229	if (ref1->root_id < ref2->root_id)
230	return -`1`;
231	if (ref1->root_id > ref2->root_id)
232	return `1`;
233	if (ref1->key_for_search.type < ref2->key_for_search.type)
234	return -`1`;
235	if (ref1->key_for_search.type > ref2->key_for_search.type)
236	return `1`;
237	if (ref1->key_for_search.objectid < ref2->key_for_search.objectid)
238	return -`1`;
239	if (ref1->key_for_search.objectid > ref2->key_for_search.objectid)
240	return `1`;
241	if (ref1->key_for_search.offset < ref2->key_for_search.offset)
242	return -`1`;
243	if (ref1->key_for_search.offset > ref2->key_for_search.offset)
244	return `1`;
245	if (ref1->parent < ref2->parent)
246	return -`1`;
247	if (ref1->parent > ref2->parent)
248	return `1`;
249
250	return `0`;
251	}
252
253	static void update_share_count(struct share_check sc, int* oldcount,
254	int newcount, struct prelim_ref *newref)
255	{
256	if ((!sc) \|\| (oldcount == `0` && newcount < `1`))
257	return;
258
259	if (oldcount > `0` && newcount < `1`)
260	sc->share_count--;
261	else if (oldcount < `1` && newcount > `0`)
262	sc->share_count++;
263
264	if (newref->root_id == sc->root->root_key.objectid &&
265	newref->wanted_disk_byte == sc->data_bytenr &&
266	newref->key_for_search.objectid == sc->inum)
267	sc->self_ref_count += newref->count;
268	}
269
270	/*
271	* Add @newref to the @root rbtree, merging identical refs.
272	*
273	* Callers should assume that newref has been freed after calling.
274	*/
275	static void prelim_ref_insert(const struct btrfs_fs_info *fs_info,
276	struct preftree *preftree,
277	struct prelim_ref *newref,
278	struct share_check *sc)
279	{
280	struct rb_root_cached *root;
281	struct rb_node **p;
282	struct rb_node *parent = NULL;
283	struct prelim_ref *ref;
284	int result;
285	bool leftmost = true;
286
287	root = &preftree->root;
288	p = &root->rb_root.rb_node;
289
290	while (*p) {
291	parent = *p;
292	ref = rb_entry(parent, struct prelim_ref, rbnode);
293	result = prelim_ref_compare(ref1: ref, ref2: newref);
294	if (result < `0`) {
295	p = &(*p)->rb_left;
296	} else if (result > `0`) {
297	p = &(*p)->rb_right;
298	leftmost = false;
299	} else {
300	/ Identical refs, merge them and free @newref /
301	struct extent_inode_elem *eie = ref->inode_list;
302
303	while (eie && eie->next)
304	eie = eie->next;
305
306	if (!eie)
307	ref->inode_list = newref->inode_list;
308	else
309	eie->next = newref->inode_list;
310	trace_btrfs_prelim_ref_merge(fs_info, oldref: ref, newref,
311	tree_size: preftree->count);
312	/*
313	* A delayed ref can have newref->count < 0.
314	* The ref->count is updated to follow any
315	* BTRFS_[ADD\|DROP]_DELAYED_REF actions.
316	*/
317	update_share_count(sc, oldcount: ref->count,
318	newcount: ref->count + newref->count, newref);
319	ref->count += newref->count;
320	free_pref(ref: newref);
321	return;
322	}
323	}
324
325	update_share_count(sc, oldcount: `0`, newcount: newref->count, newref);
326	preftree->count++;
327	trace_btrfs_prelim_ref_insert(fs_info, oldref: newref, NULL, tree_size: preftree->count);
328	rb_link_node(node: &newref->rbnode, parent, rb_link: p);
329	rb_insert_color_cached(node: &newref->rbnode, root, leftmost);
330	}
331
332	/*
333	* Release the entire tree. We don't care about internal consistency so
334	* just free everything and then reset the tree root.
335	*/
336	static void prelim_release(struct preftree *preftree)
337	{
338	struct prelim_ref ref, next_ref;
339
340	rbtree_postorder_for_each_entry_safe(ref, next_ref,
341	&preftree->root.rb_root, rbnode) {
342	free_inode_elem_list(eie: ref->inode_list);
343	free_pref(ref);
344	}
345
346	preftree->root = RB_ROOT_CACHED;
347	preftree->count = `0`;
348	}
349
350	/*
351	* the rules for all callers of this function are:
352	* - obtaining the parent is the goal
353	* - if you add a key, you must know that it is a correct key
354	* - if you cannot add the parent or a correct key, then we will look into the
355	* block later to set a correct key
356	*
357	* delayed refs
358	* ============
359	* backref type \| shared \| indirect \| shared \| indirect
360	* information \| tree \| tree \| data \| data
361	* --------------------+--------+----------+--------+----------
362	* parent logical \| y \| - \| - \| -
363	* key to resolve \| - \| y \| y \| y
364	* tree block logical \| - \| - \| - \| -
365	* root for resolving \| y \| y \| y \| y
366	*
367	* - column 1: we've the parent -> done
368	* - column 2, 3, 4: we use the key to find the parent
369	*
370	* on disk refs (inline or keyed)
371	* ==============================
372	* backref type \| shared \| indirect \| shared \| indirect
373	* information \| tree \| tree \| data \| data
374	* --------------------+--------+----------+--------+----------
375	* parent logical \| y \| - \| y \| -
376	* key to resolve \| - \| - \| - \| y
377	* tree block logical \| y \| y \| y \| y
378	* root for resolving \| - \| y \| y \| y
379	*
380	* - column 1, 3: we've the parent -> done
381	* - column 2: we take the first key from the block to find the parent
382	* (see add_missing_keys)
383	* - column 4: we use the key to find the parent
384	*
385	* additional information that's available but not required to find the parent
386	* block might help in merging entries to gain some speed.
387	*/
388	static int add_prelim_ref(const struct btrfs_fs_info *fs_info,
389	struct preftree *preftree, u64 root_id,
390	const struct btrfs_key key, int* level, u64 parent,
391	u64 wanted_disk_byte, int count,
392	struct share_check *sc, gfp_t gfp_mask)
393	{
394	struct prelim_ref *ref;
395
396	if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
397	return `0`;
398
399	ref = kmem_cache_alloc(cachep: btrfs_prelim_ref_cache, flags: gfp_mask);
400	if (!ref)
401	return -ENOMEM;
402
403	ref->root_id = root_id;
404	if (key)
405	ref->key_for_search = *key;
406	else
407	memset(&ref->key_for_search, `0`, sizeof(ref->key_for_search));
408
409	ref->inode_list = NULL;
410	ref->level = level;
411	ref->count = count;
412	ref->parent = parent;
413	ref->wanted_disk_byte = wanted_disk_byte;
414	prelim_ref_insert(fs_info, preftree, newref: ref, sc);
415	return extent_is_shared(sc);
416	}
417
418	/ direct refs use root == 0, key == NULL /
419	static int add_direct_ref(const struct btrfs_fs_info *fs_info,
420	struct preftrees preftrees, int* level, u64 parent,
421	u64 wanted_disk_byte, int count,
422	struct share_check *sc, gfp_t gfp_mask)
423	{
424	return add_prelim_ref(fs_info, preftree: &preftrees->direct, root_id: `0`, NULL, level,
425	parent, wanted_disk_byte, count, sc, gfp_mask);
426	}
427
428	/ indirect refs use parent == 0 /
429	static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
430	struct preftrees *preftrees, u64 root_id,
431	const struct btrfs_key key, int* level,
432	u64 wanted_disk_byte, int count,
433	struct share_check *sc, gfp_t gfp_mask)
434	{
435	struct preftree *tree = &preftrees->indirect;
436
437	if (!key)
438	tree = &preftrees->indirect_missing_keys;
439	return add_prelim_ref(fs_info, preftree: tree, root_id, key, level, parent: `0`,
440	wanted_disk_byte, count, sc, gfp_mask);
441	}
442
443	static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr)
444	{
445	struct rb_node **p = &preftrees->direct.root.rb_root.rb_node;
446	struct rb_node *parent = NULL;
447	struct prelim_ref *ref = NULL;
448	struct prelim_ref target = {};
449	int result;
450
451	target.parent = bytenr;
452
453	while (*p) {
454	parent = *p;
455	ref = rb_entry(parent, struct prelim_ref, rbnode);
456	result = prelim_ref_compare(ref1: ref, ref2: &target);
457
458	if (result < `0`)
459	p = &(*p)->rb_left;
460	else if (result > `0`)
461	p = &(*p)->rb_right;
462	else
463	return `1`;
464	}
465	return `0`;
466	}
467
468	static int add_all_parents(struct btrfs_backref_walk_ctx *ctx,
469	struct btrfs_root root, struct* btrfs_path *path,
470	struct ulist *parents,
471	struct preftrees preftrees, struct* prelim_ref *ref,
472	int level)
473	{
474	int ret = `0`;
475	int slot;
476	struct extent_buffer *eb;
477	struct btrfs_key key;
478	struct btrfs_key *key_for_search = &ref->key_for_search;
479	struct btrfs_file_extent_item *fi;
480	struct extent_inode_elem eie = NULL, old = NULL;
481	u64 disk_byte;
482	u64 wanted_disk_byte = ref->wanted_disk_byte;
483	u64 count = `0`;
484	u64 data_offset;
485	u8 type;
486
487	if (level != `0`) {
488	eb = path->nodes[level];
489	ret = ulist_add(ulist: parents, val: eb->start, aux: `0`, GFP_NOFS);
490	if (ret < `0`)
491	return ret;
492	return `0`;
493	}
494
495	/*
496	* 1. We normally enter this function with the path already pointing to
497	* the first item to check. But sometimes, we may enter it with
498	* slot == nritems.
499	* 2. We are searching for normal backref but bytenr of this leaf
500	* matches shared data backref
501	* 3. The leaf owner is not equal to the root we are searching
502	*
503	* For these cases, go to the next leaf before we continue.
504	*/
505	eb = path->nodes[`0`];
506	if (path->slots[`0`] >= btrfs_header_nritems(eb) \|\|
507	is_shared_data_backref(preftrees, bytenr: eb->start) \|\|
508	ref->root_id != btrfs_header_owner(eb)) {
509	if (ctx->time_seq == BTRFS_SEQ_LAST)
510	ret = btrfs_next_leaf(root, path);
511	else
512	ret = btrfs_next_old_leaf(root, path, time_seq: ctx->time_seq);
513	}
514
515	while (!ret && count < ref->count) {
516	eb = path->nodes[`0`];
517	slot = path->slots[`0`];
518
519	btrfs_item_key_to_cpu(eb, cpu_key: &key, nr: slot);
520
521	if (key.objectid != key_for_search->objectid \|\|
522	key.type != BTRFS_EXTENT_DATA_KEY)
523	break;
524
525	/*
526	* We are searching for normal backref but bytenr of this leaf
527	* matches shared data backref, OR
528	* the leaf owner is not equal to the root we are searching for
529	*/
530	if (slot == `0` &&
531	(is_shared_data_backref(preftrees, bytenr: eb->start) \|\|
532	ref->root_id != btrfs_header_owner(eb))) {
533	if (ctx->time_seq == BTRFS_SEQ_LAST)
534	ret = btrfs_next_leaf(root, path);
535	else
536	ret = btrfs_next_old_leaf(root, path, time_seq: ctx->time_seq);
537	continue;
538	}
539	fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
540	type = btrfs_file_extent_type(eb, s: fi);
541	if (type == BTRFS_FILE_EXTENT_INLINE)
542	goto next;
543	disk_byte = btrfs_file_extent_disk_bytenr(eb, s: fi);
544	data_offset = btrfs_file_extent_offset(eb, s: fi);
545
546	if (disk_byte == wanted_disk_byte) {
547	eie = NULL;
548	old = NULL;
549	if (ref->key_for_search.offset == key.offset - data_offset)
550	count++;
551	else
552	goto next;
553	if (!ctx->skip_inode_ref_list) {
554	ret = check_extent_in_eb(ctx, key: &key, eb, fi, eie: &eie);
555	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\|
556	ret < `0`)
557	break;
558	}
559	if (ret > `0`)
560	goto next;
561	ret = ulist_add_merge_ptr(ulist: parents, val: eb->start,
562	aux: eie, old_aux: (void **)&old, GFP_NOFS);
563	if (ret < `0`)
564	break;
565	if (!ret && !ctx->skip_inode_ref_list) {
566	while (old->next)
567	old = old->next;
568	old->next = eie;
569	}
570	eie = NULL;
571	}
572	next:
573	if (ctx->time_seq == BTRFS_SEQ_LAST)
574	ret = btrfs_next_item(root, p: path);
575	else
576	ret = btrfs_next_old_item(root, path, time_seq: ctx->time_seq);
577	}
578
579	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\| ret < `0`)
580	free_inode_elem_list(eie);
581	else if (ret > `0`)
582	ret = `0`;
583
584	return ret;
585	}
586
587	/*
588	* resolve an indirect backref in the form (root_id, key, level)
589	* to a logical address
590	*/
591	static int resolve_indirect_ref(struct btrfs_backref_walk_ctx *ctx,
592	struct btrfs_path *path,
593	struct preftrees *preftrees,
594	struct prelim_ref ref, struct* ulist *parents)
595	{
596	struct btrfs_root *root;
597	struct extent_buffer *eb;
598	int ret = `0`;
599	int root_level;
600	int level = ref->level;
601	struct btrfs_key search_key = ref->key_for_search;
602
603	/*
604	* If we're search_commit_root we could possibly be holding locks on
605	* other tree nodes. This happens when qgroups does backref walks when
606	* adding new delayed refs. To deal with this we need to look in cache
607	* for the root, and if we don't find it then we need to search the
608	* tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
609	* here.
610	*/
611	if (path->search_commit_root)
612	root = btrfs_get_fs_root_commit_root(fs_info: ctx->fs_info, path, objectid: ref->root_id);
613	else
614	root = btrfs_get_fs_root(fs_info: ctx->fs_info, objectid: ref->root_id, check_ref: false);
615	if (IS_ERR(ptr: root)) {
616	ret = PTR_ERR(ptr: root);
617	goto out_free;
618	}
619
620	if (!path->search_commit_root &&
621	test_bit(BTRFS_ROOT_DELETING, &root->state)) {
622	ret = -ENOENT;
623	goto out;
624	}
625
626	if (btrfs_is_testing(fs_info: ctx->fs_info)) {
627	ret = -ENOENT;
628	goto out;
629	}
630
631	if (path->search_commit_root)
632	root_level = btrfs_header_level(eb: root->commit_root);
633	else if (ctx->time_seq == BTRFS_SEQ_LAST)
634	root_level = btrfs_header_level(eb: root->node);
635	else
636	root_level = btrfs_old_root_level(root, time_seq: ctx->time_seq);
637
638	if (root_level + `1` == level)
639	goto out;
640
641	/*
642	* We can often find data backrefs with an offset that is too large
643	* (>= LLONG_MAX, maximum allowed file offset) due to underflows when
644	* subtracting a file's offset with the data offset of its
645	* corresponding extent data item. This can happen for example in the
646	* clone ioctl.
647	*
648	* So if we detect such case we set the search key's offset to zero to
649	* make sure we will find the matching file extent item at
650	* add_all_parents(), otherwise we will miss it because the offset
651	* taken form the backref is much larger then the offset of the file
652	* extent item. This can make us scan a very large number of file
653	* extent items, but at least it will not make us miss any.
654	*
655	* This is an ugly workaround for a behaviour that should have never
656	* existed, but it does and a fix for the clone ioctl would touch a lot
657	* of places, cause backwards incompatibility and would not fix the
658	* problem for extents cloned with older kernels.
659	*/
660	if (search_key.type == BTRFS_EXTENT_DATA_KEY &&
661	search_key.offset >= LLONG_MAX)
662	search_key.offset = `0`;
663	path->lowest_level = level;
664	if (ctx->time_seq == BTRFS_SEQ_LAST)
665	ret = btrfs_search_slot(NULL, root, key: &search_key, p: path, ins_len: `0`, cow: `0`);
666	else
667	ret = btrfs_search_old_slot(root, key: &search_key, p: path, time_seq: ctx->time_seq);
668
669	btrfs_debug(ctx->fs_info,
670	"search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)",
671	ref->root_id, level, ref->count, ret,
672	ref->key_for_search.objectid, ref->key_for_search.type,
673	ref->key_for_search.offset);
674	if (ret < `0`)
675	goto out;
676
677	eb = path->nodes[level];
678	while (!eb) {
679	if (WARN_ON(!level)) {
680	ret = `1`;
681	goto out;
682	}
683	level--;
684	eb = path->nodes[level];
685	}
686
687	ret = add_all_parents(ctx, root, path, parents, preftrees, ref, level);
688	out:
689	btrfs_put_root(root);
690	out_free:
691	path->lowest_level = `0`;
692	btrfs_release_path(p: path);
693	return ret;
694	}
695
696	static struct extent_inode_elem *
697	unode_aux_to_inode_list(struct ulist_node *node)
698	{
699	if (!node)
700	return NULL;
701	return (struct extent_inode_elem *)(uintptr_t)node->aux;
702	}
703
704	static void free_leaf_list(struct ulist *ulist)
705	{
706	struct ulist_node *node;
707	struct ulist_iterator uiter;
708
709	ULIST_ITER_INIT(&uiter);
710	while ((node = ulist_next(ulist, uiter: &uiter)))
711	free_inode_elem_list(eie: unode_aux_to_inode_list(node));
712
713	ulist_free(ulist);
714	}
715
716	/*
717	* We maintain three separate rbtrees: one for direct refs, one for
718	* indirect refs which have a key, and one for indirect refs which do not
719	* have a key. Each tree does merge on insertion.
720	*
721	* Once all of the references are located, we iterate over the tree of
722	* indirect refs with missing keys. An appropriate key is located and
723	* the ref is moved onto the tree for indirect refs. After all missing
724	* keys are thus located, we iterate over the indirect ref tree, resolve
725	* each reference, and then insert the resolved reference onto the
726	* direct tree (merging there too).
727	*
728	* New backrefs (i.e., for parent nodes) are added to the appropriate
729	* rbtree as they are encountered. The new backrefs are subsequently
730	* resolved as above.
731	*/
732	static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx,
733	struct btrfs_path *path,
734	struct preftrees *preftrees,
735	struct share_check *sc)
736	{
737	int err;
738	int ret = `0`;
739	struct ulist *parents;
740	struct ulist_node *node;
741	struct ulist_iterator uiter;
742	struct rb_node *rnode;
743
744	parents = ulist_alloc(GFP_NOFS);
745	if (!parents)
746	return -ENOMEM;
747
748	/*
749	* We could trade memory usage for performance here by iterating
750	* the tree, allocating new refs for each insertion, and then
751	* freeing the entire indirect tree when we're done. In some test
752	* cases, the tree can grow quite large (~200k objects).
753	*/
754	while ((rnode = rb_first_cached(&preftrees->indirect.root))) {
755	struct prelim_ref *ref;
756
757	ref = rb_entry(rnode, struct prelim_ref, rbnode);
758	if (WARN(ref->parent,
759	"BUG: direct ref found in indirect tree")) {
760	ret = -EINVAL;
761	goto out;
762	}
763
764	rb_erase_cached(node: &ref->rbnode, root: &preftrees->indirect.root);
765	preftrees->indirect.count--;
766
767	if (ref->count == `0`) {
768	free_pref(ref);
769	continue;
770	}
771
772	if (sc && ref->root_id != sc->root->root_key.objectid) {
773	free_pref(ref);
774	ret = BACKREF_FOUND_SHARED;
775	goto out;
776	}
777	err = resolve_indirect_ref(ctx, path, preftrees, ref, parents);
778	/*
779	* we can only tolerate ENOENT,otherwise,we should catch error
780	* and return directly.
781	*/
782	if (err == -ENOENT) {
783	prelim_ref_insert(fs_info: ctx->fs_info, preftree: &preftrees->direct, newref: ref,
784	NULL);
785	continue;
786	} else if (err) {
787	free_pref(ref);
788	ret = err;
789	goto out;
790	}
791
792	/ we put the first parent into the ref at hand /
793	ULIST_ITER_INIT(&uiter);
794	node = ulist_next(ulist: parents, uiter: &uiter);
795	ref->parent = node ? node->val : `0`;
796	ref->inode_list = unode_aux_to_inode_list(node);
797
798	/ Add a prelim_ref(s) for any other parent(s). /
799	while ((node = ulist_next(ulist: parents, uiter: &uiter))) {
800	struct prelim_ref *new_ref;
801
802	new_ref = kmem_cache_alloc(cachep: btrfs_prelim_ref_cache,
803	GFP_NOFS);
804	if (!new_ref) {
805	free_pref(ref);
806	ret = -ENOMEM;
807	goto out;
808	}
809	memcpy(new_ref, ref, sizeof(*ref));
810	new_ref->parent = node->val;
811	new_ref->inode_list = unode_aux_to_inode_list(node);
812	prelim_ref_insert(fs_info: ctx->fs_info, preftree: &preftrees->direct,
813	newref: new_ref, NULL);
814	}
815
816	/*
817	* Now it's a direct ref, put it in the direct tree. We must
818	* do this last because the ref could be merged/freed here.
819	*/
820	prelim_ref_insert(fs_info: ctx->fs_info, preftree: &preftrees->direct, newref: ref, NULL);
821
822	ulist_reinit(ulist: parents);
823	cond_resched();
824	}
825	out:
826	/*
827	* We may have inode lists attached to refs in the parents ulist, so we
828	* must free them before freeing the ulist and its refs.
829	*/
830	free_leaf_list(ulist: parents);
831	return ret;
832	}
833
834	/*
835	* read tree blocks and add keys where required.
836	*/
837	static int add_missing_keys(struct btrfs_fs_info *fs_info,
838	struct preftrees *preftrees, bool lock)
839	{
840	struct prelim_ref *ref;
841	struct extent_buffer *eb;
842	struct preftree *tree = &preftrees->indirect_missing_keys;
843	struct rb_node *node;
844
845	while ((node = rb_first_cached(&tree->root))) {
846	struct btrfs_tree_parent_check check = { `0` };
847
848	ref = rb_entry(node, struct prelim_ref, rbnode);
849	rb_erase_cached(node, root: &tree->root);
850
851	BUG_ON(ref->parent); / should not be a direct ref /
852	BUG_ON(ref->key_for_search.type);
853	BUG_ON(!ref->wanted_disk_byte);
854
855	check.level = ref->level - `1`;
856	check.owner_root = ref->root_id;
857
858	eb = read_tree_block(fs_info, bytenr: ref->wanted_disk_byte, check: &check);
859	if (IS_ERR(ptr: eb)) {
860	free_pref(ref);
861	return PTR_ERR(ptr: eb);
862	}
863	if (!extent_buffer_uptodate(eb)) {
864	free_pref(ref);
865	free_extent_buffer(eb);
866	return -EIO;
867	}
868
869	if (lock)
870	btrfs_tree_read_lock(eb);
871	if (btrfs_header_level(eb) == `0`)
872	btrfs_item_key_to_cpu(eb, cpu_key: &ref->key_for_search, nr: `0`);
873	else
874	btrfs_node_key_to_cpu(eb, cpu_key: &ref->key_for_search, nr: `0`);
875	if (lock)
876	btrfs_tree_read_unlock(eb);
877	free_extent_buffer(eb);
878	prelim_ref_insert(fs_info, preftree: &preftrees->indirect, newref: ref, NULL);
879	cond_resched();
880	}
881	return `0`;
882	}
883
884	/*
885	* add all currently queued delayed refs from this head whose seq nr is
886	* smaller or equal that seq to the list
887	*/
888	static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
889	struct btrfs_delayed_ref_head *head, u64 seq,
890	struct preftrees preftrees, struct* share_check *sc)
891	{
892	struct btrfs_delayed_ref_node *node;
893	struct btrfs_key key;
894	struct rb_node *n;
895	int count;
896	int ret = `0`;
897
898	spin_lock(lock: &head->lock);
899	for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
900	node = rb_entry(n, struct btrfs_delayed_ref_node,
901	ref_node);
902	if (node->seq > seq)
903	continue;
904
905	switch (node->action) {
906	case BTRFS_ADD_DELAYED_EXTENT:
907	case BTRFS_UPDATE_DELAYED_HEAD:
908	WARN_ON(`1`);
909	continue;
910	case BTRFS_ADD_DELAYED_REF:
911	count = node->ref_mod;
912	break;
913	case BTRFS_DROP_DELAYED_REF:
914	count = node->ref_mod * -`1`;
915	break;
916	default:
917	BUG();
918	}
919	switch (node->type) {
920	case BTRFS_TREE_BLOCK_REF_KEY: {
921	/ NORMAL INDIRECT METADATA backref /
922	struct btrfs_delayed_tree_ref *ref;
923	struct btrfs_key *key_ptr = NULL;
924
925	if (head->extent_op && head->extent_op->update_key) {
926	btrfs_disk_key_to_cpu(cpu_key: &key, disk_key: &head->extent_op->key);
927	key_ptr = &key;
928	}
929
930	ref = btrfs_delayed_node_to_tree_ref(node);
931	ret = add_indirect_ref(fs_info, preftrees, root_id: ref->root,
932	key: key_ptr, level: ref->level + `1`,
933	wanted_disk_byte: node->bytenr, count, sc,
934	GFP_ATOMIC);
935	break;
936	}
937	case BTRFS_SHARED_BLOCK_REF_KEY: {
938	/ SHARED DIRECT METADATA backref /
939	struct btrfs_delayed_tree_ref *ref;
940
941	ref = btrfs_delayed_node_to_tree_ref(node);
942
943	ret = add_direct_ref(fs_info, preftrees, level: ref->level + `1`,
944	parent: ref->parent, wanted_disk_byte: node->bytenr, count,
945	sc, GFP_ATOMIC);
946	break;
947	}
948	case BTRFS_EXTENT_DATA_REF_KEY: {
949	/ NORMAL INDIRECT DATA backref /
950	struct btrfs_delayed_data_ref *ref;
951	ref = btrfs_delayed_node_to_data_ref(node);
952
953	key.objectid = ref->objectid;
954	key.type = BTRFS_EXTENT_DATA_KEY;
955	key.offset = ref->offset;
956
957	/*
958	* If we have a share check context and a reference for
959	* another inode, we can't exit immediately. This is
960	* because even if this is a BTRFS_ADD_DELAYED_REF
961	* reference we may find next a BTRFS_DROP_DELAYED_REF
962	* which cancels out this ADD reference.
963	*
964	* If this is a DROP reference and there was no previous
965	* ADD reference, then we need to signal that when we
966	* process references from the extent tree (through
967	* add_inline_refs() and add_keyed_refs()), we should
968	* not exit early if we find a reference for another
969	* inode, because one of the delayed DROP references
970	* may cancel that reference in the extent tree.
971	*/
972	if (sc && count < `0`)
973	sc->have_delayed_delete_refs = true;
974
975	ret = add_indirect_ref(fs_info, preftrees, root_id: ref->root,
976	key: &key, level: `0`, wanted_disk_byte: node->bytenr, count, sc,
977	GFP_ATOMIC);
978	break;
979	}
980	case BTRFS_SHARED_DATA_REF_KEY: {
981	/ SHARED DIRECT FULL backref /
982	struct btrfs_delayed_data_ref *ref;
983
984	ref = btrfs_delayed_node_to_data_ref(node);
985
986	ret = add_direct_ref(fs_info, preftrees, level: `0`, parent: ref->parent,
987	wanted_disk_byte: node->bytenr, count, sc,
988	GFP_ATOMIC);
989	break;
990	}
991	default:
992	WARN_ON(`1`);
993	}
994	/*
995	* We must ignore BACKREF_FOUND_SHARED until all delayed
996	* refs have been checked.
997	*/
998	if (ret && (ret != BACKREF_FOUND_SHARED))
999	break;
1000	}
1001	if (!ret)
1002	ret = extent_is_shared(sc);
1003
1004	spin_unlock(lock: &head->lock);
1005	return ret;
1006	}
1007
1008	/*
1009	* add all inline backrefs for bytenr to the list
1010	*
1011	* Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED.
1012	*/
1013	static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
1014	struct btrfs_path *path,
1015	int info_level, struct* preftrees *preftrees,
1016	struct share_check *sc)
1017	{
1018	int ret = `0`;
1019	int slot;
1020	struct extent_buffer *leaf;
1021	struct btrfs_key key;
1022	struct btrfs_key found_key;
1023	unsigned long ptr;
1024	unsigned long end;
1025	struct btrfs_extent_item *ei;
1026	u64 flags;
1027	u64 item_size;
1028
1029	/*
1030	* enumerate all inline refs
1031	*/
1032	leaf = path->nodes[`0`];
1033	slot = path->slots[`0`];
1034
1035	item_size = btrfs_item_size(eb: leaf, slot);
1036	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
1037
1038	if (ctx->check_extent_item) {
1039	ret = ctx->check_extent_item(ctx->bytenr, ei, leaf, ctx->user_ctx);
1040	if (ret)
1041	return ret;
1042	}
1043
1044	flags = btrfs_extent_flags(eb: leaf, s: ei);
1045	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &found_key, nr: slot);
1046
1047	ptr = (unsigned long)(ei + `1`);
1048	end = (unsigned long)ei + item_size;
1049
1050	if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1051	flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1052	struct btrfs_tree_block_info *info;
1053
1054	info = (struct btrfs_tree_block_info *)ptr;
1055	*info_level = btrfs_tree_block_level(eb: leaf, s: info);
1056	ptr += sizeof(struct btrfs_tree_block_info);
1057	BUG_ON(ptr > end);
1058	} else if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
1059	*info_level = found_key.offset;
1060	} else {
1061	BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1062	}
1063
1064	while (ptr < end) {
1065	struct btrfs_extent_inline_ref *iref;
1066	u64 offset;
1067	int type;
1068
1069	iref = (struct btrfs_extent_inline_ref *)ptr;
1070	type = btrfs_get_extent_inline_ref_type(eb: leaf, iref,
1071	is_data: BTRFS_REF_TYPE_ANY);
1072	if (type == BTRFS_REF_TYPE_INVALID)
1073	return -EUCLEAN;
1074
1075	offset = btrfs_extent_inline_ref_offset(eb: leaf, s: iref);
1076
1077	switch (type) {
1078	case BTRFS_SHARED_BLOCK_REF_KEY:
1079	ret = add_direct_ref(fs_info: ctx->fs_info, preftrees,
1080	level: *info_level + `1`, parent: offset,
1081	wanted_disk_byte: ctx->bytenr, count: `1`, NULL, GFP_NOFS);
1082	break;
1083	case BTRFS_SHARED_DATA_REF_KEY: {
1084	struct btrfs_shared_data_ref *sdref;
1085	int count;
1086
1087	sdref = (struct btrfs_shared_data_ref *)(iref + `1`);
1088	count = btrfs_shared_data_ref_count(eb: leaf, s: sdref);
1089
1090	ret = add_direct_ref(fs_info: ctx->fs_info, preftrees, level: `0`, parent: offset,
1091	wanted_disk_byte: ctx->bytenr, count, sc, GFP_NOFS);
1092	break;
1093	}
1094	case BTRFS_TREE_BLOCK_REF_KEY:
1095	ret = add_indirect_ref(fs_info: ctx->fs_info, preftrees, root_id: offset,
1096	NULL, level: *info_level + `1`,
1097	wanted_disk_byte: ctx->bytenr, count: `1`, NULL, GFP_NOFS);
1098	break;
1099	case BTRFS_EXTENT_DATA_REF_KEY: {
1100	struct btrfs_extent_data_ref *dref;
1101	int count;
1102	u64 root;
1103
1104	dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1105	count = btrfs_extent_data_ref_count(eb: leaf, s: dref);
1106	key.objectid = btrfs_extent_data_ref_objectid(eb: leaf,
1107	s: dref);
1108	key.type = BTRFS_EXTENT_DATA_KEY;
1109	key.offset = btrfs_extent_data_ref_offset(eb: leaf, s: dref);
1110
1111	if (sc && key.objectid != sc->inum &&
1112	!sc->have_delayed_delete_refs) {
1113	ret = BACKREF_FOUND_SHARED;
1114	break;
1115	}
1116
1117	root = btrfs_extent_data_ref_root(eb: leaf, s: dref);
1118
1119	if (!ctx->skip_data_ref \|\|
1120	!ctx->skip_data_ref(root, key.objectid, key.offset,
1121	ctx->user_ctx))
1122	ret = add_indirect_ref(fs_info: ctx->fs_info, preftrees,
1123	root_id: root, key: &key, level: `0`, wanted_disk_byte: ctx->bytenr,
1124	count, sc, GFP_NOFS);
1125	break;
1126	}
1127	case BTRFS_EXTENT_OWNER_REF_KEY:
1128	ASSERT(btrfs_fs_incompat(ctx->fs_info, SIMPLE_QUOTA));
1129	break;
1130	default:
1131	WARN_ON(`1`);
1132	}
1133	if (ret)
1134	return ret;
1135	ptr += btrfs_extent_inline_ref_size(type);
1136	}
1137
1138	return `0`;
1139	}
1140
1141	/*
1142	* add all non-inline backrefs for bytenr to the list
1143	*
1144	* Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED.
1145	*/
1146	static int add_keyed_refs(struct btrfs_backref_walk_ctx *ctx,
1147	struct btrfs_root *extent_root,
1148	struct btrfs_path *path,
1149	int info_level, struct preftrees *preftrees,
1150	struct share_check *sc)
1151	{
1152	struct btrfs_fs_info *fs_info = extent_root->fs_info;
1153	int ret;
1154	int slot;
1155	struct extent_buffer *leaf;
1156	struct btrfs_key key;
1157
1158	while (`1`) {
1159	ret = btrfs_next_item(root: extent_root, p: path);
1160	if (ret < `0`)
1161	break;
1162	if (ret) {
1163	ret = `0`;
1164	break;
1165	}
1166
1167	slot = path->slots[`0`];
1168	leaf = path->nodes[`0`];
1169	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
1170
1171	if (key.objectid != ctx->bytenr)
1172	break;
1173	if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
1174	continue;
1175	if (key.type > BTRFS_SHARED_DATA_REF_KEY)
1176	break;
1177
1178	switch (key.type) {
1179	case BTRFS_SHARED_BLOCK_REF_KEY:
1180	/ SHARED DIRECT METADATA backref /
1181	ret = add_direct_ref(fs_info, preftrees,
1182	level: info_level + `1`, parent: key.offset,
1183	wanted_disk_byte: ctx->bytenr, count: `1`, NULL, GFP_NOFS);
1184	break;
1185	case BTRFS_SHARED_DATA_REF_KEY: {
1186	/ SHARED DIRECT FULL backref /
1187	struct btrfs_shared_data_ref *sdref;
1188	int count;
1189
1190	sdref = btrfs_item_ptr(leaf, slot,
1191	struct btrfs_shared_data_ref);
1192	count = btrfs_shared_data_ref_count(eb: leaf, s: sdref);
1193	ret = add_direct_ref(fs_info, preftrees, level: `0`,
1194	parent: key.offset, wanted_disk_byte: ctx->bytenr, count,
1195	sc, GFP_NOFS);
1196	break;
1197	}
1198	case BTRFS_TREE_BLOCK_REF_KEY:
1199	/ NORMAL INDIRECT METADATA backref /
1200	ret = add_indirect_ref(fs_info, preftrees, root_id: key.offset,
1201	NULL, level: info_level + `1`, wanted_disk_byte: ctx->bytenr,
1202	count: `1`, NULL, GFP_NOFS);
1203	break;
1204	case BTRFS_EXTENT_DATA_REF_KEY: {
1205	/ NORMAL INDIRECT DATA backref /
1206	struct btrfs_extent_data_ref *dref;
1207	int count;
1208	u64 root;
1209
1210	dref = btrfs_item_ptr(leaf, slot,
1211	struct btrfs_extent_data_ref);
1212	count = btrfs_extent_data_ref_count(eb: leaf, s: dref);
1213	key.objectid = btrfs_extent_data_ref_objectid(eb: leaf,
1214	s: dref);
1215	key.type = BTRFS_EXTENT_DATA_KEY;
1216	key.offset = btrfs_extent_data_ref_offset(eb: leaf, s: dref);
1217
1218	if (sc && key.objectid != sc->inum &&
1219	!sc->have_delayed_delete_refs) {
1220	ret = BACKREF_FOUND_SHARED;
1221	break;
1222	}
1223
1224	root = btrfs_extent_data_ref_root(eb: leaf, s: dref);
1225
1226	if (!ctx->skip_data_ref \|\|
1227	!ctx->skip_data_ref(root, key.objectid, key.offset,
1228	ctx->user_ctx))
1229	ret = add_indirect_ref(fs_info, preftrees, root_id: root,
1230	key: &key, level: `0`, wanted_disk_byte: ctx->bytenr,
1231	count, sc, GFP_NOFS);
1232	break;
1233	}
1234	default:
1235	WARN_ON(`1`);
1236	}
1237	if (ret)
1238	return ret;
1239
1240	}
1241
1242	return ret;
1243	}
1244
1245	/*
1246	* The caller has joined a transaction or is holding a read lock on the
1247	* fs_info->commit_root_sem semaphore, so no need to worry about the root's last
1248	* snapshot field changing while updating or checking the cache.
1249	*/
1250	static bool lookup_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx,
1251	struct btrfs_root *root,
1252	u64 bytenr, int level, bool *is_shared)
1253	{
1254	const struct btrfs_fs_info *fs_info = root->fs_info;
1255	struct btrfs_backref_shared_cache_entry *entry;
1256
1257	if (!current->journal_info)
1258	lockdep_assert_held(&fs_info->commit_root_sem);
1259
1260	if (!ctx->use_path_cache)
1261	return false;
1262
1263	if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
1264	return false;
1265
1266	/*
1267	* Level -1 is used for the data extent, which is not reliable to cache
1268	* because its reference count can increase or decrease without us
1269	* realizing. We cache results only for extent buffers that lead from
1270	* the root node down to the leaf with the file extent item.
1271	*/
1272	ASSERT(level >= `0`);
1273
1274	entry = &ctx->path_cache_entries[level];
1275
1276	/ Unused cache entry or being used for some other extent buffer. /
1277	if (entry->bytenr != bytenr)
1278	return false;
1279
1280	/*
1281	* We cached a false result, but the last snapshot generation of the
1282	* root changed, so we now have a snapshot. Don't trust the result.
1283	*/
1284	if (!entry->is_shared &&
1285	entry->gen != btrfs_root_last_snapshot(s: &root->root_item))
1286	return false;
1287
1288	/*
1289	* If we cached a true result and the last generation used for dropping
1290	* a root changed, we can not trust the result, because the dropped root
1291	* could be a snapshot sharing this extent buffer.
1292	*/
1293	if (entry->is_shared &&
1294	entry->gen != btrfs_get_last_root_drop_gen(fs_info))
1295	return false;
1296
1297	*is_shared = entry->is_shared;
1298	/*
1299	* If the node at this level is shared, than all nodes below are also
1300	* shared. Currently some of the nodes below may be marked as not shared
1301	* because we have just switched from one leaf to another, and switched
1302	* also other nodes above the leaf and below the current level, so mark
1303	* them as shared.
1304	*/
1305	if (*is_shared) {
1306	for (int i = `0`; i < level; i++) {
1307	ctx->path_cache_entries[i].is_shared = true;
1308	ctx->path_cache_entries[i].gen = entry->gen;
1309	}
1310	}
1311
1312	return true;
1313	}
1314
1315	/*
1316	* The caller has joined a transaction or is holding a read lock on the
1317	* fs_info->commit_root_sem semaphore, so no need to worry about the root's last
1318	* snapshot field changing while updating or checking the cache.
1319	*/
1320	static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx,
1321	struct btrfs_root *root,
1322	u64 bytenr, int level, bool is_shared)
1323	{
1324	const struct btrfs_fs_info *fs_info = root->fs_info;
1325	struct btrfs_backref_shared_cache_entry *entry;
1326	u64 gen;
1327
1328	if (!current->journal_info)
1329	lockdep_assert_held(&fs_info->commit_root_sem);
1330
1331	if (!ctx->use_path_cache)
1332	return;
1333
1334	if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
1335	return;
1336
1337	/*
1338	* Level -1 is used for the data extent, which is not reliable to cache
1339	* because its reference count can increase or decrease without us
1340	* realizing. We cache results only for extent buffers that lead from
1341	* the root node down to the leaf with the file extent item.
1342	*/
1343	ASSERT(level >= `0`);
1344
1345	if (is_shared)
1346	gen = btrfs_get_last_root_drop_gen(fs_info);
1347	else
1348	gen = btrfs_root_last_snapshot(s: &root->root_item);
1349
1350	entry = &ctx->path_cache_entries[level];
1351	entry->bytenr = bytenr;
1352	entry->is_shared = is_shared;
1353	entry->gen = gen;
1354
1355	/*
1356	* If we found an extent buffer is shared, set the cache result for all
1357	* extent buffers below it to true. As nodes in the path are COWed,
1358	* their sharedness is moved to their children, and if a leaf is COWed,
1359	* then the sharedness of a data extent becomes direct, the refcount of
1360	* data extent is increased in the extent item at the extent tree.
1361	*/
1362	if (is_shared) {
1363	for (int i = `0`; i < level; i++) {
1364	entry = &ctx->path_cache_entries[i];
1365	entry->is_shared = is_shared;
1366	entry->gen = gen;
1367	}
1368	}
1369	}
1370
1371	/*
1372	* this adds all existing backrefs (inline backrefs, backrefs and delayed
1373	* refs) for the given bytenr to the refs list, merges duplicates and resolves
1374	* indirect refs to their parent bytenr.
1375	* When roots are found, they're added to the roots list
1376	*
1377	* @ctx: Backref walking context object, must be not NULL.
1378	* @sc: If !NULL, then immediately return BACKREF_FOUND_SHARED when a
1379	* shared extent is detected.
1380	*
1381	* Otherwise this returns 0 for success and <0 for an error.
1382	*
1383	* FIXME some caching might speed things up
1384	*/
1385	static int find_parent_nodes(struct btrfs_backref_walk_ctx *ctx,
1386	struct share_check *sc)
1387	{
1388	struct btrfs_root *root = btrfs_extent_root(fs_info: ctx->fs_info, bytenr: ctx->bytenr);
1389	struct btrfs_key key;
1390	struct btrfs_path *path;
1391	struct btrfs_delayed_ref_root *delayed_refs = NULL;
1392	struct btrfs_delayed_ref_head *head;
1393	int info_level = `0`;
1394	int ret;
1395	struct prelim_ref *ref;
1396	struct rb_node *node;
1397	struct extent_inode_elem *eie = NULL;
1398	struct preftrees preftrees = {
1399	.direct = PREFTREE_INIT,
1400	.indirect = PREFTREE_INIT,
1401	.indirect_missing_keys = PREFTREE_INIT
1402	};
1403
1404	/ Roots ulist is not needed when using a sharedness check context. /
1405	if (sc)
1406	ASSERT(ctx->roots == NULL);
1407
1408	key.objectid = ctx->bytenr;
1409	key.offset = (u64)-`1`;
1410	if (btrfs_fs_incompat(ctx->fs_info, SKINNY_METADATA))
1411	key.type = BTRFS_METADATA_ITEM_KEY;
1412	else
1413	key.type = BTRFS_EXTENT_ITEM_KEY;
1414
1415	path = btrfs_alloc_path();
1416	if (!path)
1417	return -ENOMEM;
1418	if (!ctx->trans) {
1419	path->search_commit_root = `1`;
1420	path->skip_locking = `1`;
1421	}
1422
1423	if (ctx->time_seq == BTRFS_SEQ_LAST)
1424	path->skip_locking = `1`;
1425
1426	again:
1427	head = NULL;
1428
1429	ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: `0`, cow: `0`);
1430	if (ret < `0`)
1431	goto out;
1432	if (ret == `0`) {
1433	/*
1434	* Key with offset -1 found, there would have to exist an extent
1435	* item with such offset, but this is out of the valid range.
1436	*/
1437	ret = -EUCLEAN;
1438	goto out;
1439	}
1440
1441	if (ctx->trans && likely(ctx->trans->type != __TRANS_DUMMY) &&
1442	ctx->time_seq != BTRFS_SEQ_LAST) {
1443	/*
1444	* We have a specific time_seq we care about and trans which
1445	* means we have the path lock, we need to grab the ref head and
1446	* lock it so we have a consistent view of the refs at the given
1447	* time.
1448	*/
1449	delayed_refs = &ctx->trans->transaction->delayed_refs;
1450	spin_lock(lock: &delayed_refs->lock);
1451	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr: ctx->bytenr);
1452	if (head) {
1453	if (!mutex_trylock(lock: &head->mutex)) {
1454	refcount_inc(r: &head->refs);
1455	spin_unlock(lock: &delayed_refs->lock);
1456
1457	btrfs_release_path(p: path);
1458
1459	/*
1460	* Mutex was contended, block until it's
1461	* released and try again
1462	*/
1463	mutex_lock(&head->mutex);
1464	mutex_unlock(lock: &head->mutex);
1465	btrfs_put_delayed_ref_head(head);
1466	goto again;
1467	}
1468	spin_unlock(lock: &delayed_refs->lock);
1469	ret = add_delayed_refs(fs_info: ctx->fs_info, head, seq: ctx->time_seq,
1470	preftrees: &preftrees, sc);
1471	mutex_unlock(lock: &head->mutex);
1472	if (ret)
1473	goto out;
1474	} else {
1475	spin_unlock(lock: &delayed_refs->lock);
1476	}
1477	}
1478
1479	if (path->slots[`0`]) {
1480	struct extent_buffer *leaf;
1481	int slot;
1482
1483	path->slots[`0`]--;
1484	leaf = path->nodes[`0`];
1485	slot = path->slots[`0`];
1486	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
1487	if (key.objectid == ctx->bytenr &&
1488	(key.type == BTRFS_EXTENT_ITEM_KEY \|\|
1489	key.type == BTRFS_METADATA_ITEM_KEY)) {
1490	ret = add_inline_refs(ctx, path, info_level: &info_level,
1491	preftrees: &preftrees, sc);
1492	if (ret)
1493	goto out;
1494	ret = add_keyed_refs(ctx, extent_root: root, path, info_level,
1495	preftrees: &preftrees, sc);
1496	if (ret)
1497	goto out;
1498	}
1499	}
1500
1501	/*
1502	* If we have a share context and we reached here, it means the extent
1503	* is not directly shared (no multiple reference items for it),
1504	* otherwise we would have exited earlier with a return value of
1505	* BACKREF_FOUND_SHARED after processing delayed references or while
1506	* processing inline or keyed references from the extent tree.
1507	* The extent may however be indirectly shared through shared subtrees
1508	* as a result from creating snapshots, so we determine below what is
1509	* its parent node, in case we are dealing with a metadata extent, or
1510	* what's the leaf (or leaves), from a fs tree, that has a file extent
1511	* item pointing to it in case we are dealing with a data extent.
1512	*/
1513	ASSERT(extent_is_shared(sc) == `0`);
1514
1515	/*
1516	* If we are here for a data extent and we have a share_check structure
1517	* it means the data extent is not directly shared (does not have
1518	* multiple reference items), so we have to check if a path in the fs
1519	* tree (going from the root node down to the leaf that has the file
1520	* extent item pointing to the data extent) is shared, that is, if any
1521	* of the extent buffers in the path is referenced by other trees.
1522	*/
1523	if (sc && ctx->bytenr == sc->data_bytenr) {
1524	/*
1525	* If our data extent is from a generation more recent than the
1526	* last generation used to snapshot the root, then we know that
1527	* it can not be shared through subtrees, so we can skip
1528	* resolving indirect references, there's no point in
1529	* determining the extent buffers for the path from the fs tree
1530	* root node down to the leaf that has the file extent item that
1531	* points to the data extent.
1532	*/
1533	if (sc->data_extent_gen >
1534	btrfs_root_last_snapshot(s: &sc->root->root_item)) {
1535	ret = BACKREF_FOUND_NOT_SHARED;
1536	goto out;
1537	}
1538
1539	/*
1540	* If we are only determining if a data extent is shared or not
1541	* and the corresponding file extent item is located in the same
1542	* leaf as the previous file extent item, we can skip resolving
1543	* indirect references for a data extent, since the fs tree path
1544	* is the same (same leaf, so same path). We skip as long as the
1545	* cached result for the leaf is valid and only if there's only
1546	* one file extent item pointing to the data extent, because in
1547	* the case of multiple file extent items, they may be located
1548	* in different leaves and therefore we have multiple paths.
1549	*/
1550	if (sc->ctx->curr_leaf_bytenr == sc->ctx->prev_leaf_bytenr &&
1551	sc->self_ref_count == `1`) {
1552	bool cached;
1553	bool is_shared;
1554
1555	cached = lookup_backref_shared_cache(ctx: sc->ctx, root: sc->root,
1556	bytenr: sc->ctx->curr_leaf_bytenr,
1557	level: `0`, is_shared: &is_shared);
1558	if (cached) {
1559	if (is_shared)
1560	ret = BACKREF_FOUND_SHARED;
1561	else
1562	ret = BACKREF_FOUND_NOT_SHARED;
1563	goto out;
1564	}
1565	}
1566	}
1567
1568	btrfs_release_path(p: path);
1569
1570	ret = add_missing_keys(fs_info: ctx->fs_info, preftrees: &preftrees, lock: path->skip_locking == `0`);
1571	if (ret)
1572	goto out;
1573
1574	WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root));
1575
1576	ret = resolve_indirect_refs(ctx, path, preftrees: &preftrees, sc);
1577	if (ret)
1578	goto out;
1579
1580	WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root.rb_root));
1581
1582	/*
1583	* This walks the tree of merged and resolved refs. Tree blocks are
1584	* read in as needed. Unique entries are added to the ulist, and
1585	* the list of found roots is updated.
1586	*
1587	* We release the entire tree in one go before returning.
1588	*/
1589	node = rb_first_cached(&preftrees.direct.root);
1590	while (node) {
1591	ref = rb_entry(node, struct prelim_ref, rbnode);
1592	node = rb_next(&ref->rbnode);
1593	/*
1594	* ref->count < 0 can happen here if there are delayed
1595	* refs with a node->action of BTRFS_DROP_DELAYED_REF.
1596	* prelim_ref_insert() relies on this when merging
1597	* identical refs to keep the overall count correct.
1598	* prelim_ref_insert() will merge only those refs
1599	* which compare identically. Any refs having
1600	* e.g. different offsets would not be merged,
1601	* and would retain their original ref->count < 0.
1602	*/
1603	if (ctx->roots && ref->count && ref->root_id && ref->parent == `0`) {
1604	/ no parent == root of tree /
1605	ret = ulist_add(ulist: ctx->roots, val: ref->root_id, aux: `0`, GFP_NOFS);
1606	if (ret < `0`)
1607	goto out;
1608	}
1609	if (ref->count && ref->parent) {
1610	if (!ctx->skip_inode_ref_list && !ref->inode_list &&
1611	ref->level == `0`) {
1612	struct btrfs_tree_parent_check check = { `0` };
1613	struct extent_buffer *eb;
1614
1615	check.level = ref->level;
1616
1617	eb = read_tree_block(fs_info: ctx->fs_info, bytenr: ref->parent,
1618	check: &check);
1619	if (IS_ERR(ptr: eb)) {
1620	ret = PTR_ERR(ptr: eb);
1621	goto out;
1622	}
1623	if (!extent_buffer_uptodate(eb)) {
1624	free_extent_buffer(eb);
1625	ret = -EIO;
1626	goto out;
1627	}
1628
1629	if (!path->skip_locking)
1630	btrfs_tree_read_lock(eb);
1631	ret = find_extent_in_eb(ctx, eb, eie: &eie);
1632	if (!path->skip_locking)
1633	btrfs_tree_read_unlock(eb);
1634	free_extent_buffer(eb);
1635	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\|
1636	ret < `0`)
1637	goto out;
1638	ref->inode_list = eie;
1639	/*
1640	* We transferred the list ownership to the ref,
1641	* so set to NULL to avoid a double free in case
1642	* an error happens after this.
1643	*/
1644	eie = NULL;
1645	}
1646	ret = ulist_add_merge_ptr(ulist: ctx->refs, val: ref->parent,
1647	aux: ref->inode_list,
1648	old_aux: (void **)&eie, GFP_NOFS);
1649	if (ret < `0`)
1650	goto out;
1651	if (!ret && !ctx->skip_inode_ref_list) {
1652	/*
1653	* We've recorded that parent, so we must extend
1654	* its inode list here.
1655	*
1656	* However if there was corruption we may not
1657	* have found an eie, return an error in this
1658	* case.
1659	*/
1660	ASSERT(eie);
1661	if (!eie) {
1662	ret = -EUCLEAN;
1663	goto out;
1664	}
1665	while (eie->next)
1666	eie = eie->next;
1667	eie->next = ref->inode_list;
1668	}
1669	eie = NULL;
1670	/*
1671	* We have transferred the inode list ownership from
1672	* this ref to the ref we added to the 'refs' ulist.
1673	* So set this ref's inode list to NULL to avoid
1674	* use-after-free when our caller uses it or double
1675	* frees in case an error happens before we return.
1676	*/
1677	ref->inode_list = NULL;
1678	}
1679	cond_resched();
1680	}
1681
1682	out:
1683	btrfs_free_path(p: path);
1684
1685	prelim_release(preftree: &preftrees.direct);
1686	prelim_release(preftree: &preftrees.indirect);
1687	prelim_release(preftree: &preftrees.indirect_missing_keys);
1688
1689	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\| ret < `0`)
1690	free_inode_elem_list(eie);
1691	return ret;
1692	}
1693
1694	/*
1695	* Finds all leaves with a reference to the specified combination of
1696	* @ctx->bytenr and @ctx->extent_item_pos. The bytenr of the found leaves are
1697	* added to the ulist at @ctx->refs, and that ulist is allocated by this
1698	* function. The caller should free the ulist with free_leaf_list() if
1699	* @ctx->ignore_extent_item_pos is false, otherwise a fimple ulist_free() is
1700	* enough.
1701	*
1702	* Returns 0 on success and < 0 on error. On error @ctx->refs is not allocated.
1703	*/
1704	int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx)
1705	{
1706	int ret;
1707
1708	ASSERT(ctx->refs == NULL);
1709
1710	ctx->refs = ulist_alloc(GFP_NOFS);
1711	if (!ctx->refs)
1712	return -ENOMEM;
1713
1714	ret = find_parent_nodes(ctx, NULL);
1715	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\|
1716	(ret < `0` && ret != -ENOENT)) {
1717	free_leaf_list(ulist: ctx->refs);
1718	ctx->refs = NULL;
1719	return ret;
1720	}
1721
1722	return `0`;
1723	}
1724
1725	/*
1726	* Walk all backrefs for a given extent to find all roots that reference this
1727	* extent. Walking a backref means finding all extents that reference this
1728	* extent and in turn walk the backrefs of those, too. Naturally this is a
1729	* recursive process, but here it is implemented in an iterative fashion: We
1730	* find all referencing extents for the extent in question and put them on a
1731	* list. In turn, we find all referencing extents for those, further appending
1732	* to the list. The way we iterate the list allows adding more elements after
1733	* the current while iterating. The process stops when we reach the end of the
1734	* list.
1735	*
1736	* Found roots are added to @ctx->roots, which is allocated by this function if
1737	* it points to NULL, in which case the caller is responsible for freeing it
1738	* after it's not needed anymore.
1739	* This function requires @ctx->refs to be NULL, as it uses it for allocating a
1740	* ulist to do temporary work, and frees it before returning.
1741	*
1742	* Returns 0 on success, < 0 on error.
1743	*/
1744	static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
1745	{
1746	const u64 orig_bytenr = ctx->bytenr;
1747	const bool orig_skip_inode_ref_list = ctx->skip_inode_ref_list;
1748	bool roots_ulist_allocated = false;
1749	struct ulist_iterator uiter;
1750	int ret = `0`;
1751
1752	ASSERT(ctx->refs == NULL);
1753
1754	ctx->refs = ulist_alloc(GFP_NOFS);
1755	if (!ctx->refs)
1756	return -ENOMEM;
1757
1758	if (!ctx->roots) {
1759	ctx->roots = ulist_alloc(GFP_NOFS);
1760	if (!ctx->roots) {
1761	ulist_free(ulist: ctx->refs);
1762	ctx->refs = NULL;
1763	return -ENOMEM;
1764	}
1765	roots_ulist_allocated = true;
1766	}
1767
1768	ctx->skip_inode_ref_list = true;
1769
1770	ULIST_ITER_INIT(&uiter);
1771	while (`1`) {
1772	struct ulist_node *node;
1773
1774	ret = find_parent_nodes(ctx, NULL);
1775	if (ret < `0` && ret != -ENOENT) {
1776	if (roots_ulist_allocated) {
1777	ulist_free(ulist: ctx->roots);
1778	ctx->roots = NULL;
1779	}
1780	break;
1781	}
1782	ret = `0`;
1783	node = ulist_next(ulist: ctx->refs, uiter: &uiter);
1784	if (!node)
1785	break;
1786	ctx->bytenr = node->val;
1787	cond_resched();
1788	}
1789
1790	ulist_free(ulist: ctx->refs);
1791	ctx->refs = NULL;
1792	ctx->bytenr = orig_bytenr;
1793	ctx->skip_inode_ref_list = orig_skip_inode_ref_list;
1794
1795	return ret;
1796	}
1797
1798	int btrfs_find_all_roots(struct btrfs_backref_walk_ctx *ctx,
1799	bool skip_commit_root_sem)
1800	{
1801	int ret;
1802
1803	if (!ctx->trans && !skip_commit_root_sem)
1804	down_read(sem: &ctx->fs_info->commit_root_sem);
1805	ret = btrfs_find_all_roots_safe(ctx);
1806	if (!ctx->trans && !skip_commit_root_sem)
1807	up_read(sem: &ctx->fs_info->commit_root_sem);
1808	return ret;
1809	}
1810
1811	struct btrfs_backref_share_check_ctx btrfs_alloc_backref_share_check_ctx(void*)
1812	{
1813	struct btrfs_backref_share_check_ctx *ctx;
1814
1815	ctx = kzalloc(size: sizeof(*ctx), GFP_KERNEL);
1816	if (!ctx)
1817	return NULL;
1818
1819	ulist_init(ulist: &ctx->refs);
1820
1821	return ctx;
1822	}
1823
1824	void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx)
1825	{
1826	if (!ctx)
1827	return;
1828
1829	ulist_release(ulist: &ctx->refs);
1830	kfree(objp: ctx);
1831	}
1832
1833	/*
1834	* Check if a data extent is shared or not.
1835	*
1836	* @inode: The inode whose extent we are checking.
1837	* @bytenr: Logical bytenr of the extent we are checking.
1838	* @extent_gen: Generation of the extent (file extent item) or 0 if it is
1839	* not known.
1840	* @ctx: A backref sharedness check context.
1841	*
1842	* btrfs_is_data_extent_shared uses the backref walking code but will short
1843	* circuit as soon as it finds a root or inode that doesn't match the
1844	* one passed in. This provides a significant performance benefit for
1845	* callers (such as fiemap) which want to know whether the extent is
1846	* shared but do not need a ref count.
1847	*
1848	* This attempts to attach to the running transaction in order to account for
1849	* delayed refs, but continues on even when no running transaction exists.
1850	*
1851	* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
1852	*/
1853	int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
1854	u64 extent_gen,
1855	struct btrfs_backref_share_check_ctx *ctx)
1856	{
1857	struct btrfs_backref_walk_ctx walk_ctx = { `0` };
1858	struct btrfs_root *root = inode->root;
1859	struct btrfs_fs_info *fs_info = root->fs_info;
1860	struct btrfs_trans_handle *trans;
1861	struct ulist_iterator uiter;
1862	struct ulist_node *node;
1863	struct btrfs_seq_list elem = BTRFS_SEQ_LIST_INIT(elem);
1864	int ret = `0`;
1865	struct share_check shared = {
1866	.ctx = ctx,
1867	.root = root,
1868	.inum = btrfs_ino(inode),
1869	.data_bytenr = bytenr,
1870	.data_extent_gen = extent_gen,
1871	.share_count = `0`,
1872	.self_ref_count = `0`,
1873	.have_delayed_delete_refs = false,
1874	};
1875	int level;
1876	bool leaf_cached;
1877	bool leaf_is_shared;
1878
1879	for (int i = `0`; i < BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE; i++) {
1880	if (ctx->prev_extents_cache[i].bytenr == bytenr)
1881	return ctx->prev_extents_cache[i].is_shared;
1882	}
1883
1884	ulist_init(ulist: &ctx->refs);
1885
1886	trans = btrfs_join_transaction_nostart(root);
1887	if (IS_ERR(ptr: trans)) {
1888	if (PTR_ERR(ptr: trans) != -ENOENT && PTR_ERR(ptr: trans) != -EROFS) {
1889	ret = PTR_ERR(ptr: trans);
1890	goto out;
1891	}
1892	trans = NULL;
1893	down_read(sem: &fs_info->commit_root_sem);
1894	} else {
1895	btrfs_get_tree_mod_seq(fs_info, elem: &elem);
1896	walk_ctx.time_seq = elem.seq;
1897	}
1898
1899	ctx->use_path_cache = true;
1900
1901	/*
1902	* We may have previously determined that the current leaf is shared.
1903	* If it is, then we have a data extent that is shared due to a shared
1904	* subtree (caused by snapshotting) and we don't need to check for data
1905	* backrefs. If the leaf is not shared, then we must do backref walking
1906	* to determine if the data extent is shared through reflinks.
1907	*/
1908	leaf_cached = lookup_backref_shared_cache(ctx, root,
1909	bytenr: ctx->curr_leaf_bytenr, level: `0`,
1910	is_shared: &leaf_is_shared);
1911	if (leaf_cached && leaf_is_shared) {
1912	ret = `1`;
1913	goto out_trans;
1914	}
1915
1916	walk_ctx.skip_inode_ref_list = true;
1917	walk_ctx.trans = trans;
1918	walk_ctx.fs_info = fs_info;
1919	walk_ctx.refs = &ctx->refs;
1920
1921	/ -1 means we are in the bytenr of the data extent. /
1922	level = -`1`;
1923	ULIST_ITER_INIT(&uiter);
1924	while (`1`) {
1925	const unsigned long prev_ref_count = ctx->refs.nnodes;
1926
1927	walk_ctx.bytenr = bytenr;
1928	ret = find_parent_nodes(ctx: &walk_ctx, sc: &shared);
1929	if (ret == BACKREF_FOUND_SHARED \|\|
1930	ret == BACKREF_FOUND_NOT_SHARED) {
1931	/ If shared must return 1, otherwise return 0. /
1932	ret = (ret == BACKREF_FOUND_SHARED) ? `1` : `0`;
1933	if (level >= `0`)
1934	store_backref_shared_cache(ctx, root, bytenr,
1935	level, is_shared: ret == `1`);
1936	break;
1937	}
1938	if (ret < `0` && ret != -ENOENT)
1939	break;
1940	ret = `0`;
1941
1942	/*
1943	* More than one extent buffer (bytenr) may have been added to
1944	* the ctx->refs ulist, in which case we have to check multiple
1945	* tree paths in case the first one is not shared, so we can not
1946	* use the path cache which is made for a single path. Multiple
1947	* extent buffers at the current level happen when:
1948	*
1949	* 1) level -1, the data extent: If our data extent was not
1950	* directly shared (without multiple reference items), then
1951	* it might have a single reference item with a count > 1 for
1952	* the same offset, which means there are 2 (or more) file
1953	* extent items that point to the data extent - this happens
1954	* when a file extent item needs to be split and then one
1955	* item gets moved to another leaf due to a b+tree leaf split
1956	* when inserting some item. In this case the file extent
1957	* items may be located in different leaves and therefore
1958	* some of the leaves may be referenced through shared
1959	* subtrees while others are not. Since our extent buffer
1960	* cache only works for a single path (by far the most common
1961	* case and simpler to deal with), we can not use it if we
1962	* have multiple leaves (which implies multiple paths).
1963	*
1964	* 2) level >= 0, a tree node/leaf: We can have a mix of direct
1965	* and indirect references on a b+tree node/leaf, so we have
1966	* to check multiple paths, and the extent buffer (the
1967	* current bytenr) may be shared or not. One example is
1968	* during relocation as we may get a shared tree block ref
1969	* (direct ref) and a non-shared tree block ref (indirect
1970	* ref) for the same node/leaf.
1971	*/
1972	if ((ctx->refs.nnodes - prev_ref_count) > `1`)
1973	ctx->use_path_cache = false;
1974
1975	if (level >= `0`)
1976	store_backref_shared_cache(ctx, root, bytenr,
1977	level, is_shared: false);
1978	node = ulist_next(ulist: &ctx->refs, uiter: &uiter);
1979	if (!node)
1980	break;
1981	bytenr = node->val;
1982	if (ctx->use_path_cache) {
1983	bool is_shared;
1984	bool cached;
1985
1986	level++;
1987	cached = lookup_backref_shared_cache(ctx, root, bytenr,
1988	level, is_shared: &is_shared);
1989	if (cached) {
1990	ret = (is_shared ? `1` : `0`);
1991	break;
1992	}
1993	}
1994	shared.share_count = `0`;
1995	shared.have_delayed_delete_refs = false;
1996	cond_resched();
1997	}
1998
1999	/*
2000	* If the path cache is disabled, then it means at some tree level we
2001	* got multiple parents due to a mix of direct and indirect backrefs or
2002	* multiple leaves with file extent items pointing to the same data
2003	* extent. We have to invalidate the cache and cache only the sharedness
2004	* result for the levels where we got only one node/reference.
2005	*/
2006	if (!ctx->use_path_cache) {
2007	int i = `0`;
2008
2009	level--;
2010	if (ret >= `0` && level >= `0`) {
2011	bytenr = ctx->path_cache_entries[level].bytenr;
2012	ctx->use_path_cache = true;
2013	store_backref_shared_cache(ctx, root, bytenr, level, is_shared: ret);
2014	i = level + `1`;
2015	}
2016
2017	for ( ; i < BTRFS_MAX_LEVEL; i++)
2018	ctx->path_cache_entries[i].bytenr = `0`;
2019	}
2020
2021	/*
2022	* Cache the sharedness result for the data extent if we know our inode
2023	* has more than 1 file extent item that refers to the data extent.
2024	*/
2025	if (ret >= `0` && shared.self_ref_count > `1`) {
2026	int slot = ctx->prev_extents_cache_slot;
2027
2028	ctx->prev_extents_cache[slot].bytenr = shared.data_bytenr;
2029	ctx->prev_extents_cache[slot].is_shared = (ret == `1`);
2030
2031	slot = (slot + `1`) % BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE;
2032	ctx->prev_extents_cache_slot = slot;
2033	}
2034
2035	out_trans:
2036	if (trans) {
2037	btrfs_put_tree_mod_seq(fs_info, elem: &elem);
2038	btrfs_end_transaction(trans);
2039	} else {
2040	up_read(sem: &fs_info->commit_root_sem);
2041	}
2042	out:
2043	ulist_release(ulist: &ctx->refs);
2044	ctx->prev_leaf_bytenr = ctx->curr_leaf_bytenr;
2045
2046	return ret;
2047	}
2048
2049	int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
2050	u64 start_off, struct btrfs_path *path,
2051	struct btrfs_inode_extref **ret_extref,
2052	u64 *found_off)
2053	{
2054	int ret, slot;
2055	struct btrfs_key key;
2056	struct btrfs_key found_key;
2057	struct btrfs_inode_extref *extref;
2058	const struct extent_buffer *leaf;
2059	unsigned long ptr;
2060
2061	key.objectid = inode_objectid;
2062	key.type = BTRFS_INODE_EXTREF_KEY;
2063	key.offset = start_off;
2064
2065	ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: `0`, cow: `0`);
2066	if (ret < `0`)
2067	return ret;
2068
2069	while (`1`) {
2070	leaf = path->nodes[`0`];
2071	slot = path->slots[`0`];
2072	if (slot >= btrfs_header_nritems(eb: leaf)) {
2073	/*
2074	* If the item at offset is not found,
2075	* btrfs_search_slot will point us to the slot
2076	* where it should be inserted. In our case
2077	* that will be the slot directly before the
2078	* next INODE_REF_KEY_V2 item. In the case
2079	* that we're pointing to the last slot in a
2080	* leaf, we must move one leaf over.
2081	*/
2082	ret = btrfs_next_leaf(root, path);
2083	if (ret) {
2084	if (ret >= `1`)
2085	ret = -ENOENT;
2086	break;
2087	}
2088	continue;
2089	}
2090
2091	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &found_key, nr: slot);
2092
2093	/*
2094	* Check that we're still looking at an extended ref key for
2095	* this particular objectid. If we have different
2096	* objectid or type then there are no more to be found
2097	* in the tree and we can exit.
2098	*/
2099	ret = -ENOENT;
2100	if (found_key.objectid != inode_objectid)
2101	break;
2102	if (found_key.type != BTRFS_INODE_EXTREF_KEY)
2103	break;
2104
2105	ret = `0`;
2106	ptr = btrfs_item_ptr_offset(leaf, path->slots[`0`]);
2107	extref = (struct btrfs_inode_extref *)ptr;
2108	*ret_extref = extref;
2109	if (found_off)
2110	*found_off = found_key.offset;
2111	break;
2112	}
2113
2114	return ret;
2115	}
2116
2117	/*
2118	* this iterates to turn a name (from iref/extref) into a full filesystem path.
2119	* Elements of the path are separated by '/' and the path is guaranteed to be
2120	* 0-terminated. the path is only given within the current file system.
2121	* Therefore, it never starts with a '/'. the caller is responsible to provide
2122	* "size" bytes in "dest". the dest buffer will be filled backwards. finally,
2123	* the start point of the resulting string is returned. this pointer is within
2124	* dest, normally.
2125	* in case the path buffer would overflow, the pointer is decremented further
2126	* as if output was written to the buffer, though no more output is actually
2127	* generated. that way, the caller can determine how much space would be
2128	* required for the path to fit into the buffer. in that case, the returned
2129	* value will be smaller than dest. callers must check this!
2130	*/
2131	char btrfs_ref_to_path(struct* btrfs_root fs_root, struct* btrfs_path *path,
2132	u32 name_len, unsigned long name_off,
2133	struct extent_buffer *eb_in, u64 parent,
2134	char *dest, u32 size)
2135	{
2136	int slot;
2137	u64 next_inum;
2138	int ret;
2139	s64 bytes_left = ((s64)size) - `1`;
2140	struct extent_buffer *eb = eb_in;
2141	struct btrfs_key found_key;
2142	struct btrfs_inode_ref *iref;
2143
2144	if (bytes_left >= `0`)
2145	dest[bytes_left] = `'\0'`;
2146
2147	while (`1`) {
2148	bytes_left -= name_len;
2149	if (bytes_left >= `0`)
2150	read_extent_buffer(eb, dst: dest + bytes_left,
2151	start: name_off, len: name_len);
2152	if (eb != eb_in) {
2153	if (!path->skip_locking)
2154	btrfs_tree_read_unlock(eb);
2155	free_extent_buffer(eb);
2156	}
2157	ret = btrfs_find_item(fs_root, path, inum: parent, ioff: `0`,
2158	BTRFS_INODE_REF_KEY, found_key: &found_key);
2159	if (ret > `0`)
2160	ret = -ENOENT;
2161	if (ret)
2162	break;
2163
2164	next_inum = found_key.offset;
2165
2166	/ regular exit ahead /
2167	if (parent == next_inum)
2168	break;
2169
2170	slot = path->slots[`0`];
2171	eb = path->nodes[`0`];
2172	/ make sure we can use eb after releasing the path /
2173	if (eb != eb_in) {
2174	path->nodes[`0`] = NULL;
2175	path->locks[`0`] = `0`;
2176	}
2177	btrfs_release_path(p: path);
2178	iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
2179
2180	name_len = btrfs_inode_ref_name_len(eb, s: iref);
2181	name_off = (unsigned long)(iref + `1`);
2182
2183	parent = next_inum;
2184	--bytes_left;
2185	if (bytes_left >= `0`)
2186	dest[bytes_left] = `'/'`;
2187	}
2188
2189	btrfs_release_path(p: path);
2190
2191	if (ret)
2192	return ERR_PTR(error: ret);
2193
2194	return dest + bytes_left;
2195	}
2196
2197	/*
2198	* this makes the path point to (logical EXTENT_ITEM *)
2199	* returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
2200	* tree blocks and <0 on error.
2201	*/
2202	int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
2203	struct btrfs_path path, struct* btrfs_key *found_key,
2204	u64 *flags_ret)
2205	{
2206	struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr: logical);
2207	int ret;
2208	u64 flags;
2209	u64 size = `0`;
2210	u32 item_size;
2211	const struct extent_buffer *eb;
2212	struct btrfs_extent_item *ei;
2213	struct btrfs_key key;
2214
2215	if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2216	key.type = BTRFS_METADATA_ITEM_KEY;
2217	else
2218	key.type = BTRFS_EXTENT_ITEM_KEY;
2219	key.objectid = logical;
2220	key.offset = (u64)-`1`;
2221
2222	ret = btrfs_search_slot(NULL, root: extent_root, key: &key, p: path, ins_len: `0`, cow: `0`);
2223	if (ret < `0`)
2224	return ret;
2225	if (ret == `0`) {
2226	/*
2227	* Key with offset -1 found, there would have to exist an extent
2228	* item with such offset, but this is out of the valid range.
2229	*/
2230	return -EUCLEAN;
2231	}
2232
2233	ret = btrfs_previous_extent_item(root: extent_root, path, min_objectid: `0`);
2234	if (ret) {
2235	if (ret > `0`)
2236	ret = -ENOENT;
2237	return ret;
2238	}
2239	btrfs_item_key_to_cpu(eb: path->nodes[`0`], cpu_key: found_key, nr: path->slots[`0`]);
2240	if (found_key->type == BTRFS_METADATA_ITEM_KEY)
2241	size = fs_info->nodesize;
2242	else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
2243	size = found_key->offset;
2244
2245	if (found_key->objectid > logical \|\|
2246	found_key->objectid + size <= logical) {
2247	btrfs_debug(fs_info,
2248	"logical %llu is not within any extent", logical);
2249	return -ENOENT;
2250	}
2251
2252	eb = path->nodes[`0`];
2253	item_size = btrfs_item_size(eb, slot: path->slots[`0`]);
2254
2255	ei = btrfs_item_ptr(eb, path->slots[`0`], struct btrfs_extent_item);
2256	flags = btrfs_extent_flags(eb, s: ei);
2257
2258	btrfs_debug(fs_info,
2259	"logical %llu is at position %llu within the extent (%llu EXTENT_ITEM %llu) flags %#llx size %u",
2260	logical, logical - found_key->objectid, found_key->objectid,
2261	found_key->offset, flags, item_size);
2262
2263	WARN_ON(!flags_ret);
2264	if (flags_ret) {
2265	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
2266	*flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2267	else if (flags & BTRFS_EXTENT_FLAG_DATA)
2268	*flags_ret = BTRFS_EXTENT_FLAG_DATA;
2269	else
2270	BUG();
2271	return `0`;
2272	}
2273
2274	return -EIO;
2275	}
2276
2277	/*
2278	* helper function to iterate extent inline refs. ptr must point to a 0 value
2279	* for the first call and may be modified. it is used to track state.
2280	* if more refs exist, 0 is returned and the next call to
2281	* get_extent_inline_ref must pass the modified ptr parameter to get the
2282	* next ref. after the last ref was processed, 1 is returned.
2283	* returns <0 on error
2284	*/
2285	static int get_extent_inline_ref(unsigned long *ptr,
2286	const struct extent_buffer *eb,
2287	const struct btrfs_key *key,
2288	const struct btrfs_extent_item *ei,
2289	u32 item_size,
2290	struct btrfs_extent_inline_ref **out_eiref,
2291	int *out_type)
2292	{
2293	unsigned long end;
2294	u64 flags;
2295	struct btrfs_tree_block_info *info;
2296
2297	if (!*ptr) {
2298	/ first call /
2299	flags = btrfs_extent_flags(eb, s: ei);
2300	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2301	if (key->type == BTRFS_METADATA_ITEM_KEY) {
2302	/ a skinny metadata extent /
2303	*out_eiref =
2304	(struct btrfs_extent_inline_ref *)(ei + `1`);
2305	} else {
2306	WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
2307	info = (struct btrfs_tree_block_info *)(ei + `1`);
2308	*out_eiref =
2309	(struct btrfs_extent_inline_ref *)(info + `1`);
2310	}
2311	} else {
2312	out_eiref = (struct* btrfs_extent_inline_ref *)(ei + `1`);
2313	}
2314	ptr = (unsigned* long)*out_eiref;
2315	if ((unsigned long)(ptr) >= (unsigned* long)ei + item_size)
2316	return -ENOENT;
2317	}
2318
2319	end = (unsigned long)ei + item_size;
2320	out_eiref = (struct* btrfs_extent_inline_ref )(ptr);
2321	out_type = btrfs_get_extent_inline_ref_type(eb, iref: out_eiref,
2322	is_data: BTRFS_REF_TYPE_ANY);
2323	if (*out_type == BTRFS_REF_TYPE_INVALID)
2324	return -EUCLEAN;
2325
2326	ptr += btrfs_extent_inline_ref_size(type: out_type);
2327	WARN_ON(*ptr > end);
2328	if (*ptr == end)
2329	return `1`; / last /
2330
2331	return `0`;
2332	}
2333
2334	/*
2335	* reads the tree block backref for an extent. tree level and root are returned
2336	* through out_level and out_root. ptr must point to a 0 value for the first
2337	* call and may be modified (see get_extent_inline_ref comment).
2338	* returns 0 if data was provided, 1 if there was no more data to provide or
2339	* <0 on error.
2340	*/
2341	int tree_backref_for_extent(unsigned long ptr, struct* extent_buffer *eb,
2342	struct btrfs_key key, struct* btrfs_extent_item *ei,
2343	u32 item_size, u64 out_root, u8 out_level)
2344	{
2345	int ret;
2346	int type;
2347	struct btrfs_extent_inline_ref *eiref;
2348
2349	if (ptr == (unsigned* long)-`1`)
2350	return `1`;
2351
2352	while (`1`) {
2353	ret = get_extent_inline_ref(ptr, eb, key, ei, item_size,
2354	out_eiref: &eiref, out_type: &type);
2355	if (ret < `0`)
2356	return ret;
2357
2358	if (type == BTRFS_TREE_BLOCK_REF_KEY \|\|
2359	type == BTRFS_SHARED_BLOCK_REF_KEY)
2360	break;
2361
2362	if (ret == `1`)
2363	return `1`;
2364	}
2365
2366	/ we can treat both ref types equally here /
2367	*out_root = btrfs_extent_inline_ref_offset(eb, s: eiref);
2368
2369	if (key->type == BTRFS_EXTENT_ITEM_KEY) {
2370	struct btrfs_tree_block_info *info;
2371
2372	info = (struct btrfs_tree_block_info *)(ei + `1`);
2373	*out_level = btrfs_tree_block_level(eb, s: info);
2374	} else {
2375	ASSERT(key->type == BTRFS_METADATA_ITEM_KEY);
2376	*out_level = (u8)key->offset;
2377	}
2378
2379	if (ret == `1`)
2380	ptr = (unsigned* long)-`1`;
2381
2382	return `0`;
2383	}
2384
2385	static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
2386	struct extent_inode_elem *inode_list,
2387	u64 root, u64 extent_item_objectid,
2388	iterate_extent_inodes_t iterate, void* *ctx)
2389	{
2390	struct extent_inode_elem *eie;
2391	int ret = `0`;
2392
2393	for (eie = inode_list; eie; eie = eie->next) {
2394	btrfs_debug(fs_info,
2395	"ref for %llu resolved, key (%llu EXTEND_DATA %llu), root %llu",
2396	extent_item_objectid, eie->inum,
2397	eie->offset, root);
2398	ret = iterate(eie->inum, eie->offset, eie->num_bytes, root, ctx);
2399	if (ret) {
2400	btrfs_debug(fs_info,
2401	"stopping iteration for %llu due to ret=%d",
2402	extent_item_objectid, ret);
2403	break;
2404	}
2405	}
2406
2407	return ret;
2408	}
2409
2410	/*
2411	* calls iterate() for every inode that references the extent identified by
2412	* the given parameters.
2413	* when the iterator function returns a non-zero value, iteration stops.
2414	*/
2415	int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx,
2416	bool search_commit_root,
2417	iterate_extent_inodes_t iterate, void* *user_ctx)
2418	{
2419	int ret;
2420	struct ulist *refs;
2421	struct ulist_node *ref_node;
2422	struct btrfs_seq_list seq_elem = BTRFS_SEQ_LIST_INIT(seq_elem);
2423	struct ulist_iterator ref_uiter;
2424
2425	btrfs_debug(ctx->fs_info, "resolving all inodes for extent %llu",
2426	ctx->bytenr);
2427
2428	ASSERT(ctx->trans == NULL);
2429	ASSERT(ctx->roots == NULL);
2430
2431	if (!search_commit_root) {
2432	struct btrfs_trans_handle *trans;
2433
2434	trans = btrfs_attach_transaction(root: ctx->fs_info->tree_root);
2435	if (IS_ERR(ptr: trans)) {
2436	if (PTR_ERR(ptr: trans) != -ENOENT &&
2437	PTR_ERR(ptr: trans) != -EROFS)
2438	return PTR_ERR(ptr: trans);
2439	trans = NULL;
2440	}
2441	ctx->trans = trans;
2442	}
2443
2444	if (ctx->trans) {
2445	btrfs_get_tree_mod_seq(fs_info: ctx->fs_info, elem: &seq_elem);
2446	ctx->time_seq = seq_elem.seq;
2447	} else {
2448	down_read(sem: &ctx->fs_info->commit_root_sem);
2449	}
2450
2451	ret = btrfs_find_all_leafs(ctx);
2452	if (ret)
2453	goto out;
2454	refs = ctx->refs;
2455	ctx->refs = NULL;
2456
2457	ULIST_ITER_INIT(&ref_uiter);
2458	while (!ret && (ref_node = ulist_next(ulist: refs, uiter: &ref_uiter))) {
2459	const u64 leaf_bytenr = ref_node->val;
2460	struct ulist_node *root_node;
2461	struct ulist_iterator root_uiter;
2462	struct extent_inode_elem *inode_list;
2463
2464	inode_list = (struct extent_inode_elem *)(uintptr_t)ref_node->aux;
2465
2466	if (ctx->cache_lookup) {
2467	const u64 *root_ids;
2468	int root_count;
2469	bool cached;
2470
2471	cached = ctx->cache_lookup(leaf_bytenr, ctx->user_ctx,
2472	&root_ids, &root_count);
2473	if (cached) {
2474	for (int i = `0`; i < root_count; i++) {
2475	ret = iterate_leaf_refs(fs_info: ctx->fs_info,
2476	inode_list,
2477	root: root_ids[i],
2478	extent_item_objectid: leaf_bytenr,
2479	iterate,
2480	ctx: user_ctx);
2481	if (ret)
2482	break;
2483	}
2484	continue;
2485	}
2486	}
2487
2488	if (!ctx->roots) {
2489	ctx->roots = ulist_alloc(GFP_NOFS);
2490	if (!ctx->roots) {
2491	ret = -ENOMEM;
2492	break;
2493	}
2494	}
2495
2496	ctx->bytenr = leaf_bytenr;
2497	ret = btrfs_find_all_roots_safe(ctx);
2498	if (ret)
2499	break;
2500
2501	if (ctx->cache_store)
2502	ctx->cache_store(leaf_bytenr, ctx->roots, ctx->user_ctx);
2503
2504	ULIST_ITER_INIT(&root_uiter);
2505	while (!ret && (root_node = ulist_next(ulist: ctx->roots, uiter: &root_uiter))) {
2506	btrfs_debug(ctx->fs_info,
2507	"root %llu references leaf %llu, data list %#llx",
2508	root_node->val, ref_node->val,
2509	ref_node->aux);
2510	ret = iterate_leaf_refs(fs_info: ctx->fs_info, inode_list,
2511	root: root_node->val, extent_item_objectid: ctx->bytenr,
2512	iterate, ctx: user_ctx);
2513	}
2514	ulist_reinit(ulist: ctx->roots);
2515	}
2516
2517	free_leaf_list(ulist: refs);
2518	out:
2519	if (ctx->trans) {
2520	btrfs_put_tree_mod_seq(fs_info: ctx->fs_info, elem: &seq_elem);
2521	btrfs_end_transaction(trans: ctx->trans);
2522	ctx->trans = NULL;
2523	} else {
2524	up_read(sem: &ctx->fs_info->commit_root_sem);
2525	}
2526
2527	ulist_free(ulist: ctx->roots);
2528	ctx->roots = NULL;
2529
2530	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP)
2531	ret = `0`;
2532
2533	return ret;
2534	}
2535
2536	static int build_ino_list(u64 inum, u64 offset, u64 num_bytes, u64 root, void *ctx)
2537	{
2538	struct btrfs_data_container *inodes = ctx;
2539	const size_t c = `3` * sizeof(u64);
2540
2541	if (inodes->bytes_left >= c) {
2542	inodes->bytes_left -= c;
2543	inodes->val[inodes->elem_cnt] = inum;
2544	inodes->val[inodes->elem_cnt + `1`] = offset;
2545	inodes->val[inodes->elem_cnt + `2`] = root;
2546	inodes->elem_cnt += `3`;
2547	} else {
2548	inodes->bytes_missing += c - inodes->bytes_left;
2549	inodes->bytes_left = `0`;
2550	inodes->elem_missed += `3`;
2551	}
2552
2553	return `0`;
2554	}
2555
2556	int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
2557	struct btrfs_path *path,
2558	void *ctx, bool ignore_offset)
2559	{
2560	struct btrfs_backref_walk_ctx walk_ctx = { `0` };
2561	int ret;
2562	u64 flags = `0`;
2563	struct btrfs_key found_key;
2564	int search_commit_root = path->search_commit_root;
2565
2566	ret = extent_from_logical(fs_info, logical, path, found_key: &found_key, flags_ret: &flags);
2567	btrfs_release_path(p: path);
2568	if (ret < `0`)
2569	return ret;
2570	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
2571	return -EINVAL;
2572
2573	walk_ctx.bytenr = found_key.objectid;
2574	if (ignore_offset)
2575	walk_ctx.ignore_extent_item_pos = true;
2576	else
2577	walk_ctx.extent_item_pos = logical - found_key.objectid;
2578	walk_ctx.fs_info = fs_info;
2579
2580	return iterate_extent_inodes(ctx: &walk_ctx, search_commit_root,
2581	iterate: build_ino_list, user_ctx: ctx);
2582	}
2583
2584	static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
2585	struct extent_buffer eb, struct* inode_fs_paths *ipath);
2586
2587	static int iterate_inode_refs(u64 inum, struct inode_fs_paths *ipath)
2588	{
2589	int ret = `0`;
2590	int slot;
2591	u32 cur;
2592	u32 len;
2593	u32 name_len;
2594	u64 parent = `0`;
2595	int found = `0`;
2596	struct btrfs_root *fs_root = ipath->fs_root;
2597	struct btrfs_path *path = ipath->btrfs_path;
2598	struct extent_buffer *eb;
2599	struct btrfs_inode_ref *iref;
2600	struct btrfs_key found_key;
2601
2602	while (!ret) {
2603	ret = btrfs_find_item(fs_root, path, inum,
2604	ioff: parent ? parent + `1` : `0`, BTRFS_INODE_REF_KEY,
2605	found_key: &found_key);
2606
2607	if (ret < `0`)
2608	break;
2609	if (ret) {
2610	ret = found ? `0` : -ENOENT;
2611	break;
2612	}
2613	++found;
2614
2615	parent = found_key.offset;
2616	slot = path->slots[`0`];
2617	eb = btrfs_clone_extent_buffer(src: path->nodes[`0`]);
2618	if (!eb) {
2619	ret = -ENOMEM;
2620	break;
2621	}
2622	btrfs_release_path(p: path);
2623
2624	iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
2625
2626	for (cur = `0`; cur < btrfs_item_size(eb, slot); cur += len) {
2627	name_len = btrfs_inode_ref_name_len(eb, s: iref);
2628	/ path must be released before calling iterate()! /
2629	btrfs_debug(fs_root->fs_info,
2630	"following ref at offset %u for inode %llu in tree %llu",
2631	cur, found_key.objectid,
2632	fs_root->root_key.objectid);
2633	ret = inode_to_path(inum: parent, name_len,
2634	name_off: (unsigned long)(iref + `1`), eb, ipath);
2635	if (ret)
2636	break;
2637	len = sizeof(*iref) + name_len;
2638	iref = (struct btrfs_inode_ref )((char* *)iref + len);
2639	}
2640	free_extent_buffer(eb);
2641	}
2642
2643	btrfs_release_path(p: path);
2644
2645	return ret;
2646	}
2647
2648	static int iterate_inode_extrefs(u64 inum, struct inode_fs_paths *ipath)
2649	{
2650	int ret;
2651	int slot;
2652	u64 offset = `0`;
2653	u64 parent;
2654	int found = `0`;
2655	struct btrfs_root *fs_root = ipath->fs_root;
2656	struct btrfs_path *path = ipath->btrfs_path;
2657	struct extent_buffer *eb;
2658	struct btrfs_inode_extref *extref;
2659	u32 item_size;
2660	u32 cur_offset;
2661	unsigned long ptr;
2662
2663	while (`1`) {
2664	ret = btrfs_find_one_extref(root: fs_root, inode_objectid: inum, start_off: offset, path, ret_extref: &extref,
2665	found_off: &offset);
2666	if (ret < `0`)
2667	break;
2668	if (ret) {
2669	ret = found ? `0` : -ENOENT;
2670	break;
2671	}
2672	++found;
2673
2674	slot = path->slots[`0`];
2675	eb = btrfs_clone_extent_buffer(src: path->nodes[`0`]);
2676	if (!eb) {
2677	ret = -ENOMEM;
2678	break;
2679	}
2680	btrfs_release_path(p: path);
2681
2682	item_size = btrfs_item_size(eb, slot);
2683	ptr = btrfs_item_ptr_offset(eb, slot);
2684	cur_offset = `0`;
2685
2686	while (cur_offset < item_size) {
2687	u32 name_len;
2688
2689	extref = (struct btrfs_inode_extref *)(ptr + cur_offset);
2690	parent = btrfs_inode_extref_parent(eb, s: extref);
2691	name_len = btrfs_inode_extref_name_len(eb, s: extref);
2692	ret = inode_to_path(inum: parent, name_len,
2693	name_off: (unsigned long)&extref->name, eb, ipath);
2694	if (ret)
2695	break;
2696
2697	cur_offset += btrfs_inode_extref_name_len(eb, s: extref);
2698	cur_offset += sizeof(*extref);
2699	}
2700	free_extent_buffer(eb);
2701
2702	offset++;
2703	}
2704
2705	btrfs_release_path(p: path);
2706
2707	return ret;
2708	}
2709
2710	/*
2711	* returns 0 if the path could be dumped (probably truncated)
2712	* returns <0 in case of an error
2713	*/
2714	static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
2715	struct extent_buffer eb, struct* inode_fs_paths *ipath)
2716	{
2717	char *fspath;
2718	char *fspath_min;
2719	int i = ipath->fspath->elem_cnt;
2720	const int s_ptr = sizeof(char *);
2721	u32 bytes_left;
2722
2723	bytes_left = ipath->fspath->bytes_left > s_ptr ?
2724	ipath->fspath->bytes_left - s_ptr : `0`;
2725
2726	fspath_min = (char )ipath->fspath->val + (i + `1`) s_ptr;
2727	fspath = btrfs_ref_to_path(fs_root: ipath->fs_root, path: ipath->btrfs_path, name_len,
2728	name_off, eb_in: eb, parent: inum, dest: fspath_min, size: bytes_left);
2729	if (IS_ERR(ptr: fspath))
2730	return PTR_ERR(ptr: fspath);
2731
2732	if (fspath > fspath_min) {
2733	ipath->fspath->val[i] = (u64)(unsigned long)fspath;
2734	++ipath->fspath->elem_cnt;
2735	ipath->fspath->bytes_left = fspath - fspath_min;
2736	} else {
2737	++ipath->fspath->elem_missed;
2738	ipath->fspath->bytes_missing += fspath_min - fspath;
2739	ipath->fspath->bytes_left = `0`;
2740	}
2741
2742	return `0`;
2743	}
2744
2745	/*
2746	* this dumps all file system paths to the inode into the ipath struct, provided
2747	* is has been created large enough. each path is zero-terminated and accessed
2748	* from ipath->fspath->val[i].
2749	* when it returns, there are ipath->fspath->elem_cnt number of paths available
2750	* in ipath->fspath->val[]. when the allocated space wasn't sufficient, the
2751	* number of missed paths is recorded in ipath->fspath->elem_missed, otherwise,
2752	* it's zero. ipath->fspath->bytes_missing holds the number of bytes that would
2753	* have been needed to return all paths.
2754	*/
2755	int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
2756	{
2757	int ret;
2758	int found_refs = `0`;
2759
2760	ret = iterate_inode_refs(inum, ipath);
2761	if (!ret)
2762	++found_refs;
2763	else if (ret != -ENOENT)
2764	return ret;
2765
2766	ret = iterate_inode_extrefs(inum, ipath);
2767	if (ret == -ENOENT && found_refs)
2768	return `0`;
2769
2770	return ret;
2771	}
2772
2773	struct btrfs_data_container *init_data_container(u32 total_bytes)
2774	{
2775	struct btrfs_data_container *data;
2776	size_t alloc_bytes;
2777
2778	alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
2779	data = kvmalloc(size: alloc_bytes, GFP_KERNEL);
2780	if (!data)
2781	return ERR_PTR(error: -ENOMEM);
2782
2783	if (total_bytes >= sizeof(*data)) {
2784	data->bytes_left = total_bytes - sizeof(*data);
2785	data->bytes_missing = `0`;
2786	} else {
2787	data->bytes_missing = sizeof(*data) - total_bytes;
2788	data->bytes_left = `0`;
2789	}
2790
2791	data->elem_cnt = `0`;
2792	data->elem_missed = `0`;
2793
2794	return data;
2795	}
2796
2797	/*
2798	* allocates space to return multiple file system paths for an inode.
2799	* total_bytes to allocate are passed, note that space usable for actual path
2800	* information will be total_bytes - sizeof(struct inode_fs_paths).
2801	* the returned pointer must be freed with free_ipath() in the end.
2802	*/
2803	struct inode_fs_paths init_ipath(s32 total_bytes, struct* btrfs_root *fs_root,
2804	struct btrfs_path *path)
2805	{
2806	struct inode_fs_paths *ifp;
2807	struct btrfs_data_container *fspath;
2808
2809	fspath = init_data_container(total_bytes);
2810	if (IS_ERR(ptr: fspath))
2811	return ERR_CAST(ptr: fspath);
2812
2813	ifp = kmalloc(size: sizeof(*ifp), GFP_KERNEL);
2814	if (!ifp) {
2815	kvfree(addr: fspath);
2816	return ERR_PTR(error: -ENOMEM);
2817	}
2818
2819	ifp->btrfs_path = path;
2820	ifp->fspath = fspath;
2821	ifp->fs_root = fs_root;
2822
2823	return ifp;
2824	}
2825
2826	void free_ipath(struct inode_fs_paths *ipath)
2827	{
2828	if (!ipath)
2829	return;
2830	kvfree(addr: ipath->fspath);
2831	kfree(objp: ipath);
2832	}
2833
2834	struct btrfs_backref_iter btrfs_backref_iter_alloc(struct* btrfs_fs_info *fs_info)
2835	{
2836	struct btrfs_backref_iter *ret;
2837
2838	ret = kzalloc(size: sizeof(*ret), GFP_NOFS);
2839	if (!ret)
2840	return NULL;
2841
2842	ret->path = btrfs_alloc_path();
2843	if (!ret->path) {
2844	kfree(objp: ret);
2845	return NULL;
2846	}
2847
2848	/ Current backref iterator only supports iteration in commit root /
2849	ret->path->search_commit_root = `1`;
2850	ret->path->skip_locking = `1`;
2851	ret->fs_info = fs_info;
2852
2853	return ret;
2854	}
2855
2856	static void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
2857	{
2858	iter->bytenr = `0`;
2859	iter->item_ptr = `0`;
2860	iter->cur_ptr = `0`;
2861	iter->end_ptr = `0`;
2862	btrfs_release_path(p: iter->path);
2863	memset(&iter->cur_key, `0`, sizeof(iter->cur_key));
2864	}
2865
2866	int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
2867	{
2868	struct btrfs_fs_info *fs_info = iter->fs_info;
2869	struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
2870	struct btrfs_path *path = iter->path;
2871	struct btrfs_extent_item *ei;
2872	struct btrfs_key key;
2873	int ret;
2874
2875	key.objectid = bytenr;
2876	key.type = BTRFS_METADATA_ITEM_KEY;
2877	key.offset = (u64)-`1`;
2878	iter->bytenr = bytenr;
2879
2880	ret = btrfs_search_slot(NULL, root: extent_root, key: &key, p: path, ins_len: `0`, cow: `0`);
2881	if (ret < `0`)
2882	return ret;
2883	if (ret == `0`) {
2884	/*
2885	* Key with offset -1 found, there would have to exist an extent
2886	* item with such offset, but this is out of the valid range.
2887	*/
2888	ret = -EUCLEAN;
2889	goto release;
2890	}
2891	if (path->slots[`0`] == `0`) {
2892	WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
2893	ret = -EUCLEAN;
2894	goto release;
2895	}
2896	path->slots[`0`]--;
2897
2898	btrfs_item_key_to_cpu(eb: path->nodes[`0`], cpu_key: &key, nr: path->slots[`0`]);
2899	if ((key.type != BTRFS_EXTENT_ITEM_KEY &&
2900	key.type != BTRFS_METADATA_ITEM_KEY) \|\| key.objectid != bytenr) {
2901	ret = -ENOENT;
2902	goto release;
2903	}
2904	memcpy(&iter->cur_key, &key, sizeof(key));
2905	iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[`0`],
2906	path->slots[`0`]);
2907	iter->end_ptr = (u32)(iter->item_ptr +
2908	btrfs_item_size(eb: path->nodes[`0`], slot: path->slots[`0`]));
2909	ei = btrfs_item_ptr(path->nodes[`0`], path->slots[`0`],
2910	struct btrfs_extent_item);
2911
2912	/*
2913	* Only support iteration on tree backref yet.
2914	*
2915	* This is an extra precaution for non skinny-metadata, where
2916	* EXTENT_ITEM is also used for tree blocks, that we can only use
2917	* extent flags to determine if it's a tree block.
2918	*/
2919	if (btrfs_extent_flags(eb: path->nodes[`0`], s: ei) & BTRFS_EXTENT_FLAG_DATA) {
2920	ret = -ENOTSUPP;
2921	goto release;
2922	}
2923	iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei));
2924
2925	/ If there is no inline backref, go search for keyed backref /
2926	if (iter->cur_ptr >= iter->end_ptr) {
2927	ret = btrfs_next_item(root: extent_root, p: path);
2928
2929	/ No inline nor keyed ref /
2930	if (ret > `0`) {
2931	ret = -ENOENT;
2932	goto release;
2933	}
2934	if (ret < `0`)
2935	goto release;
2936
2937	btrfs_item_key_to_cpu(eb: path->nodes[`0`], cpu_key: &iter->cur_key,
2938	nr: path->slots[`0`]);
2939	if (iter->cur_key.objectid != bytenr \|\|
2940	(iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
2941	iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) {
2942	ret = -ENOENT;
2943	goto release;
2944	}
2945	iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[`0`],
2946	path->slots[`0`]);
2947	iter->item_ptr = iter->cur_ptr;
2948	iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size(
2949	eb: path->nodes[`0`], slot: path->slots[`0`]));
2950	}
2951
2952	return `0`;
2953	release:
2954	btrfs_backref_iter_release(iter);
2955	return ret;
2956	}
2957
2958	static bool btrfs_backref_iter_is_inline_ref(struct btrfs_backref_iter *iter)
2959	{
2960	if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY \|\|
2961	iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
2962	return true;
2963	return false;
2964	}
2965
2966	/*
2967	* Go to the next backref item of current bytenr, can be either inlined or
2968	* keyed.
2969	*
2970	* Caller needs to check whether it's inline ref or not by iter->cur_key.
2971	*
2972	* Return 0 if we get next backref without problem.
2973	* Return >0 if there is no extra backref for this bytenr.
2974	* Return <0 if there is something wrong happened.
2975	*/
2976	int btrfs_backref_iter_next(struct btrfs_backref_iter *iter)
2977	{
2978	struct extent_buffer *eb = iter->path->nodes[`0`];
2979	struct btrfs_root *extent_root;
2980	struct btrfs_path *path = iter->path;
2981	struct btrfs_extent_inline_ref *iref;
2982	int ret;
2983	u32 size;
2984
2985	if (btrfs_backref_iter_is_inline_ref(iter)) {
2986	/ We're still inside the inline refs /
2987	ASSERT(iter->cur_ptr < iter->end_ptr);
2988
2989	if (btrfs_backref_has_tree_block_info(iter)) {
2990	/ First tree block info /
2991	size = sizeof(struct btrfs_tree_block_info);
2992	} else {
2993	/ Use inline ref type to determine the size /
2994	int type;
2995
2996	iref = (struct btrfs_extent_inline_ref *)
2997	((unsigned long)iter->cur_ptr);
2998	type = btrfs_extent_inline_ref_type(eb, s: iref);
2999
3000	size = btrfs_extent_inline_ref_size(type);
3001	}
3002	iter->cur_ptr += size;
3003	if (iter->cur_ptr < iter->end_ptr)
3004	return `0`;
3005
3006	/ All inline items iterated, fall through /
3007	}
3008
3009	/ We're at keyed items, there is no inline item, go to the next one /
3010	extent_root = btrfs_extent_root(fs_info: iter->fs_info, bytenr: iter->bytenr);
3011	ret = btrfs_next_item(root: extent_root, p: iter->path);
3012	if (ret)
3013	return ret;
3014
3015	btrfs_item_key_to_cpu(eb: path->nodes[`0`], cpu_key: &iter->cur_key, nr: path->slots[`0`]);
3016	if (iter->cur_key.objectid != iter->bytenr \|\|
3017	(iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
3018	iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY))
3019	return `1`;
3020	iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[`0`],
3021	path->slots[`0`]);
3022	iter->cur_ptr = iter->item_ptr;
3023	iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size(eb: path->nodes[`0`],
3024	slot: path->slots[`0`]);
3025	return `0`;
3026	}
3027
3028	void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
3029	struct btrfs_backref_cache *cache, bool is_reloc)
3030	{
3031	int i;
3032
3033	cache->rb_root = RB_ROOT;
3034	for (i = `0`; i < BTRFS_MAX_LEVEL; i++)
3035	INIT_LIST_HEAD(list: &cache->pending[i]);
3036	INIT_LIST_HEAD(list: &cache->changed);
3037	INIT_LIST_HEAD(list: &cache->detached);
3038	INIT_LIST_HEAD(list: &cache->leaves);
3039	INIT_LIST_HEAD(list: &cache->pending_edge);
3040	INIT_LIST_HEAD(list: &cache->useless_node);
3041	cache->fs_info = fs_info;
3042	cache->is_reloc = is_reloc;
3043	}
3044
3045	struct btrfs_backref_node *btrfs_backref_alloc_node(
3046	struct btrfs_backref_cache cache, u64 bytenr, int* level)
3047	{
3048	struct btrfs_backref_node *node;
3049
3050	ASSERT(level >= `0` && level < BTRFS_MAX_LEVEL);
3051	node = kzalloc(size: sizeof(*node), GFP_NOFS);
3052	if (!node)
3053	return node;
3054
3055	INIT_LIST_HEAD(list: &node->list);
3056	INIT_LIST_HEAD(list: &node->upper);
3057	INIT_LIST_HEAD(list: &node->lower);
3058	RB_CLEAR_NODE(&node->rb_node);
3059	cache->nr_nodes++;
3060	node->level = level;
3061	node->bytenr = bytenr;
3062
3063	return node;
3064	}
3065
3066	void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
3067	struct btrfs_backref_node *node)
3068	{
3069	if (node) {
3070	ASSERT(list_empty(&node->list));
3071	ASSERT(list_empty(&node->lower));
3072	ASSERT(node->eb == NULL);
3073	cache->nr_nodes--;
3074	btrfs_put_root(root: node->root);
3075	kfree(objp: node);
3076	}
3077	}
3078
3079	struct btrfs_backref_edge *btrfs_backref_alloc_edge(
3080	struct btrfs_backref_cache *cache)
3081	{
3082	struct btrfs_backref_edge *edge;
3083
3084	edge = kzalloc(size: sizeof(*edge), GFP_NOFS);
3085	if (edge)
3086	cache->nr_edges++;
3087	return edge;
3088	}
3089
3090	void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
3091	struct btrfs_backref_edge *edge)
3092	{
3093	if (edge) {
3094	cache->nr_edges--;
3095	kfree(objp: edge);
3096	}
3097	}
3098
3099	void btrfs_backref_unlock_node_buffer(struct btrfs_backref_node *node)
3100	{
3101	if (node->locked) {
3102	btrfs_tree_unlock(eb: node->eb);
3103	node->locked = `0`;
3104	}
3105	}
3106
3107	void btrfs_backref_drop_node_buffer(struct btrfs_backref_node *node)
3108	{
3109	if (node->eb) {
3110	btrfs_backref_unlock_node_buffer(node);
3111	free_extent_buffer(eb: node->eb);
3112	node->eb = NULL;
3113	}
3114	}
3115
3116	/*
3117	* Drop the backref node from cache without cleaning up its children
3118	* edges.
3119	*
3120	* This can only be called on node without parent edges.
3121	* The children edges are still kept as is.
3122	*/
3123	void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
3124	struct btrfs_backref_node *node)
3125	{
3126	ASSERT(list_empty(&node->upper));
3127
3128	btrfs_backref_drop_node_buffer(node);
3129	list_del_init(entry: &node->list);
3130	list_del_init(entry: &node->lower);
3131	if (!RB_EMPTY_NODE(&node->rb_node))
3132	rb_erase(&node->rb_node, &tree->rb_root);
3133	btrfs_backref_free_node(cache: tree, node);
3134	}
3135
3136	/*
3137	* Drop the backref node from cache, also cleaning up all its
3138	* upper edges and any uncached nodes in the path.
3139	*
3140	* This cleanup happens bottom up, thus the node should either
3141	* be the lowest node in the cache or a detached node.
3142	*/
3143	void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
3144	struct btrfs_backref_node *node)
3145	{
3146	struct btrfs_backref_node *upper;
3147	struct btrfs_backref_edge *edge;
3148
3149	if (!node)
3150	return;
3151
3152	BUG_ON(!node->lowest && !node->detached);
3153	while (!list_empty(head: &node->upper)) {
3154	edge = list_entry(node->upper.next, struct btrfs_backref_edge,
3155	list[LOWER]);
3156	upper = edge->node[UPPER];
3157	list_del(entry: &edge->list[LOWER]);
3158	list_del(entry: &edge->list[UPPER]);
3159	btrfs_backref_free_edge(cache, edge);
3160
3161	/*
3162	* Add the node to leaf node list if no other child block
3163	* cached.
3164	*/
3165	if (list_empty(head: &upper->lower)) {
3166	list_add_tail(new: &upper->lower, head: &cache->leaves);
3167	upper->lowest = `1`;
3168	}
3169	}
3170
3171	btrfs_backref_drop_node(tree: cache, node);
3172	}
3173
3174	/*
3175	* Release all nodes/edges from current cache
3176	*/
3177	void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
3178	{
3179	struct btrfs_backref_node *node;
3180	int i;
3181
3182	while (!list_empty(head: &cache->detached)) {
3183	node = list_entry(cache->detached.next,
3184	struct btrfs_backref_node, list);
3185	btrfs_backref_cleanup_node(cache, node);
3186	}
3187
3188	while (!list_empty(head: &cache->leaves)) {
3189	node = list_entry(cache->leaves.next,
3190	struct btrfs_backref_node, lower);
3191	btrfs_backref_cleanup_node(cache, node);
3192	}
3193
3194	cache->last_trans = `0`;
3195
3196	for (i = `0`; i < BTRFS_MAX_LEVEL; i++)
3197	ASSERT(list_empty(&cache->pending[i]));
3198	ASSERT(list_empty(&cache->pending_edge));
3199	ASSERT(list_empty(&cache->useless_node));
3200	ASSERT(list_empty(&cache->changed));
3201	ASSERT(list_empty(&cache->detached));
3202	ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
3203	ASSERT(!cache->nr_nodes);
3204	ASSERT(!cache->nr_edges);
3205	}
3206
3207	void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
3208	struct btrfs_backref_node *lower,
3209	struct btrfs_backref_node *upper,
3210	int link_which)
3211	{
3212	ASSERT(upper && lower && upper->level == lower->level + `1`);
3213	edge->node[LOWER] = lower;
3214	edge->node[UPPER] = upper;
3215	if (link_which & LINK_LOWER)
3216	list_add_tail(new: &edge->list[LOWER], head: &lower->upper);
3217	if (link_which & LINK_UPPER)
3218	list_add_tail(new: &edge->list[UPPER], head: &upper->lower);
3219	}
3220	/*
3221	* Handle direct tree backref
3222	*
3223	* Direct tree backref means, the backref item shows its parent bytenr
3224	* directly. This is for SHARED_BLOCK_REF backref (keyed or inlined).
3225	*
3226	* @ref_key: The converted backref key.
3227	* For keyed backref, it's the item key.
3228	* For inlined backref, objectid is the bytenr,
3229	* type is btrfs_inline_ref_type, offset is
3230	* btrfs_inline_ref_offset.
3231	*/
3232	static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
3233	struct btrfs_key *ref_key,
3234	struct btrfs_backref_node *cur)
3235	{
3236	struct btrfs_backref_edge *edge;
3237	struct btrfs_backref_node *upper;
3238	struct rb_node *rb_node;
3239
3240	ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY);
3241
3242	/ Only reloc root uses backref pointing to itself /
3243	if (ref_key->objectid == ref_key->offset) {
3244	struct btrfs_root *root;
3245
3246	cur->is_reloc_root = `1`;
3247	/ Only reloc backref cache cares about a specific root /
3248	if (cache->is_reloc) {
3249	root = find_reloc_root(fs_info: cache->fs_info, bytenr: cur->bytenr);
3250	if (!root)
3251	return -ENOENT;
3252	cur->root = root;
3253	} else {
3254	/*
3255	* For generic purpose backref cache, reloc root node
3256	* is useless.
3257	*/
3258	list_add(new: &cur->list, head: &cache->useless_node);
3259	}
3260	return `0`;
3261	}
3262
3263	edge = btrfs_backref_alloc_edge(cache);
3264	if (!edge)
3265	return -ENOMEM;
3266
3267	rb_node = rb_simple_search(root: &cache->rb_root, bytenr: ref_key->offset);
3268	if (!rb_node) {
3269	/ Parent node not yet cached /
3270	upper = btrfs_backref_alloc_node(cache, bytenr: ref_key->offset,
3271	level: cur->level + `1`);
3272	if (!upper) {
3273	btrfs_backref_free_edge(cache, edge);
3274	return -ENOMEM;
3275	}
3276
3277	/*
3278	* Backrefs for the upper level block isn't cached, add the
3279	* block to pending list
3280	*/
3281	list_add_tail(new: &edge->list[UPPER], head: &cache->pending_edge);
3282	} else {
3283	/ Parent node already cached /
3284	upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
3285	ASSERT(upper->checked);
3286	INIT_LIST_HEAD(list: &edge->list[UPPER]);
3287	}
3288	btrfs_backref_link_edge(edge, lower: cur, upper, LINK_LOWER);
3289	return `0`;
3290	}
3291
3292	/*
3293	* Handle indirect tree backref
3294	*
3295	* Indirect tree backref means, we only know which tree the node belongs to.
3296	* We still need to do a tree search to find out the parents. This is for
3297	* TREE_BLOCK_REF backref (keyed or inlined).
3298	*
3299	* @trans: Transaction handle.
3300	* @ref_key: The same as @ref_key in handle_direct_tree_backref()
3301	* @tree_key: The first key of this tree block.
3302	* @path: A clean (released) path, to avoid allocating path every time
3303	* the function get called.
3304	*/
3305	static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
3306	struct btrfs_backref_cache *cache,
3307	struct btrfs_path *path,
3308	struct btrfs_key *ref_key,
3309	struct btrfs_key *tree_key,
3310	struct btrfs_backref_node *cur)
3311	{
3312	struct btrfs_fs_info *fs_info = cache->fs_info;
3313	struct btrfs_backref_node *upper;
3314	struct btrfs_backref_node *lower;
3315	struct btrfs_backref_edge *edge;
3316	struct extent_buffer *eb;
3317	struct btrfs_root *root;
3318	struct rb_node *rb_node;
3319	int level;
3320	bool need_check = true;
3321	int ret;
3322
3323	root = btrfs_get_fs_root(fs_info, objectid: ref_key->offset, check_ref: false);
3324	if (IS_ERR(ptr: root))
3325	return PTR_ERR(ptr: root);
3326	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
3327	cur->cowonly = `1`;
3328
3329	if (btrfs_root_level(s: &root->root_item) == cur->level) {
3330	/ Tree root /
3331	ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr);
3332	/*
3333	* For reloc backref cache, we may ignore reloc root. But for
3334	* general purpose backref cache, we can't rely on
3335	* btrfs_should_ignore_reloc_root() as it may conflict with
3336	* current running relocation and lead to missing root.
3337	*
3338	* For general purpose backref cache, reloc root detection is
3339	* completely relying on direct backref (key->offset is parent
3340	* bytenr), thus only do such check for reloc cache.
3341	*/
3342	if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) {
3343	btrfs_put_root(root);
3344	list_add(new: &cur->list, head: &cache->useless_node);
3345	} else {
3346	cur->root = root;
3347	}
3348	return `0`;
3349	}
3350
3351	level = cur->level + `1`;
3352
3353	/ Search the tree to find parent blocks referring to the block /
3354	path->search_commit_root = `1`;
3355	path->skip_locking = `1`;
3356	path->lowest_level = level;
3357	ret = btrfs_search_slot(NULL, root, key: tree_key, p: path, ins_len: `0`, cow: `0`);
3358	path->lowest_level = `0`;
3359	if (ret < `0`) {
3360	btrfs_put_root(root);
3361	return ret;
3362	}
3363	if (ret > `0` && path->slots[level] > `0`)
3364	path->slots[level]--;
3365
3366	eb = path->nodes[level];
3367	if (btrfs_node_blockptr(eb, nr: path->slots[level]) != cur->bytenr) {
3368	btrfs_err(fs_info,
3369	"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
3370	cur->bytenr, level - `1`, root->root_key.objectid,
3371	tree_key->objectid, tree_key->type, tree_key->offset);
3372	btrfs_put_root(root);
3373	ret = -ENOENT;
3374	goto out;
3375	}
3376	lower = cur;
3377
3378	/ Add all nodes and edges in the path /
3379	for (; level < BTRFS_MAX_LEVEL; level++) {
3380	if (!path->nodes[level]) {
3381	ASSERT(btrfs_root_bytenr(&root->root_item) ==
3382	lower->bytenr);
3383	/ Same as previous should_ignore_reloc_root() call /
3384	if (btrfs_should_ignore_reloc_root(root) &&
3385	cache->is_reloc) {
3386	btrfs_put_root(root);
3387	list_add(new: &lower->list, head: &cache->useless_node);
3388	} else {
3389	lower->root = root;
3390	}
3391	break;
3392	}
3393
3394	edge = btrfs_backref_alloc_edge(cache);
3395	if (!edge) {
3396	btrfs_put_root(root);
3397	ret = -ENOMEM;
3398	goto out;
3399	}
3400
3401	eb = path->nodes[level];
3402	rb_node = rb_simple_search(root: &cache->rb_root, bytenr: eb->start);
3403	if (!rb_node) {
3404	upper = btrfs_backref_alloc_node(cache, bytenr: eb->start,
3405	level: lower->level + `1`);
3406	if (!upper) {
3407	btrfs_put_root(root);
3408	btrfs_backref_free_edge(cache, edge);
3409	ret = -ENOMEM;
3410	goto out;
3411	}
3412	upper->owner = btrfs_header_owner(eb);
3413	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
3414	upper->cowonly = `1`;
3415
3416	/*
3417	* If we know the block isn't shared we can avoid
3418	* checking its backrefs.
3419	*/
3420	if (btrfs_block_can_be_shared(trans, root, buf: eb))
3421	upper->checked = `0`;
3422	else
3423	upper->checked = `1`;
3424
3425	/*
3426	* Add the block to pending list if we need to check its
3427	* backrefs, we only do this once while walking up a
3428	* tree as we will catch anything else later on.
3429	*/
3430	if (!upper->checked && need_check) {
3431	need_check = false;
3432	list_add_tail(new: &edge->list[UPPER],
3433	head: &cache->pending_edge);
3434	} else {
3435	if (upper->checked)
3436	need_check = true;
3437	INIT_LIST_HEAD(list: &edge->list[UPPER]);
3438	}
3439	} else {
3440	upper = rb_entry(rb_node, struct btrfs_backref_node,
3441	rb_node);
3442	ASSERT(upper->checked);
3443	INIT_LIST_HEAD(list: &edge->list[UPPER]);
3444	if (!upper->owner)
3445	upper->owner = btrfs_header_owner(eb);
3446	}
3447	btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER);
3448
3449	if (rb_node) {
3450	btrfs_put_root(root);
3451	break;
3452	}
3453	lower = upper;
3454	upper = NULL;
3455	}
3456	out:
3457	btrfs_release_path(p: path);
3458	return ret;
3459	}
3460
3461	/*
3462	* Add backref node @cur into @cache.
3463	*
3464	* NOTE: Even if the function returned 0, @cur is not yet cached as its upper
3465	* links aren't yet bi-directional. Needs to finish such links.
3466	* Use btrfs_backref_finish_upper_links() to finish such linkage.
3467	*
3468	* @trans: Transaction handle.
3469	* @path: Released path for indirect tree backref lookup
3470	* @iter: Released backref iter for extent tree search
3471	* @node_key: The first key of the tree block
3472	*/
3473	int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
3474	struct btrfs_backref_cache *cache,
3475	struct btrfs_path *path,
3476	struct btrfs_backref_iter *iter,
3477	struct btrfs_key *node_key,
3478	struct btrfs_backref_node *cur)
3479	{
3480	struct btrfs_backref_edge *edge;
3481	struct btrfs_backref_node *exist;
3482	int ret;
3483
3484	ret = btrfs_backref_iter_start(iter, bytenr: cur->bytenr);
3485	if (ret < `0`)
3486	return ret;
3487	/*
3488	* We skip the first btrfs_tree_block_info, as we don't use the key
3489	* stored in it, but fetch it from the tree block
3490	*/
3491	if (btrfs_backref_has_tree_block_info(iter)) {
3492	ret = btrfs_backref_iter_next(iter);
3493	if (ret < `0`)
3494	goto out;
3495	/ No extra backref? This means the tree block is corrupted /
3496	if (ret > `0`) {
3497	ret = -EUCLEAN;
3498	goto out;
3499	}
3500	}
3501	WARN_ON(cur->checked);
3502	if (!list_empty(head: &cur->upper)) {
3503	/*
3504	* The backref was added previously when processing backref of
3505	* type BTRFS_TREE_BLOCK_REF_KEY
3506	*/
3507	ASSERT(list_is_singular(&cur->upper));
3508	edge = list_entry(cur->upper.next, struct btrfs_backref_edge,
3509	list[LOWER]);
3510	ASSERT(list_empty(&edge->list[UPPER]));
3511	exist = edge->node[UPPER];
3512	/*
3513	* Add the upper level block to pending list if we need check
3514	* its backrefs
3515	*/
3516	if (!exist->checked)
3517	list_add_tail(new: &edge->list[UPPER], head: &cache->pending_edge);
3518	} else {
3519	exist = NULL;
3520	}
3521
3522	for (; ret == `0`; ret = btrfs_backref_iter_next(iter)) {
3523	struct extent_buffer *eb;
3524	struct btrfs_key key;
3525	int type;
3526
3527	cond_resched();
3528	eb = iter->path->nodes[`0`];
3529
3530	key.objectid = iter->bytenr;
3531	if (btrfs_backref_iter_is_inline_ref(iter)) {
3532	struct btrfs_extent_inline_ref *iref;
3533
3534	/ Update key for inline backref /
3535	iref = (struct btrfs_extent_inline_ref *)
3536	((unsigned long)iter->cur_ptr);
3537	type = btrfs_get_extent_inline_ref_type(eb, iref,
3538	is_data: BTRFS_REF_TYPE_BLOCK);
3539	if (type == BTRFS_REF_TYPE_INVALID) {
3540	ret = -EUCLEAN;
3541	goto out;
3542	}
3543	key.type = type;
3544	key.offset = btrfs_extent_inline_ref_offset(eb, s: iref);
3545	} else {
3546	key.type = iter->cur_key.type;
3547	key.offset = iter->cur_key.offset;
3548	}
3549
3550	/*
3551	* Parent node found and matches current inline ref, no need to
3552	* rebuild this node for this inline ref
3553	*/
3554	if (exist &&
3555	((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
3556	exist->owner == key.offset) \|\|
3557	(key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
3558	exist->bytenr == key.offset))) {
3559	exist = NULL;
3560	continue;
3561	}
3562
3563	/ SHARED_BLOCK_REF means key.offset is the parent bytenr /
3564	if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
3565	ret = handle_direct_tree_backref(cache, ref_key: &key, cur);
3566	if (ret < `0`)
3567	goto out;
3568	} else if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
3569	/*
3570	* key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref
3571	* offset means the root objectid. We need to search
3572	* the tree to get its parent bytenr.
3573	*/
3574	ret = handle_indirect_tree_backref(trans, cache, path,
3575	ref_key: &key, tree_key: node_key, cur);
3576	if (ret < `0`)
3577	goto out;
3578	}
3579	/*
3580	* Unrecognized tree backref items (if it can pass tree-checker)
3581	* would be ignored.
3582	*/
3583	}
3584	ret = `0`;
3585	cur->checked = `1`;
3586	WARN_ON(exist);
3587	out:
3588	btrfs_backref_iter_release(iter);
3589	return ret;
3590	}
3591
3592	/*
3593	* Finish the upwards linkage created by btrfs_backref_add_tree_node()
3594	*/
3595	int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
3596	struct btrfs_backref_node *start)
3597	{
3598	struct list_head *useless_node = &cache->useless_node;
3599	struct btrfs_backref_edge *edge;
3600	struct rb_node *rb_node;
3601	LIST_HEAD(pending_edge);
3602
3603	ASSERT(start->checked);
3604
3605	/ Insert this node to cache if it's not COW-only /
3606	if (!start->cowonly) {
3607	rb_node = rb_simple_insert(root: &cache->rb_root, bytenr: start->bytenr,
3608	node: &start->rb_node);
3609	if (rb_node)
3610	btrfs_backref_panic(fs_info: cache->fs_info, bytenr: start->bytenr,
3611	error: -EEXIST);
3612	list_add_tail(new: &start->lower, head: &cache->leaves);
3613	}
3614
3615	/*
3616	* Use breadth first search to iterate all related edges.
3617	*
3618	* The starting points are all the edges of this node
3619	*/
3620	list_for_each_entry(edge, &start->upper, list[LOWER])
3621	list_add_tail(new: &edge->list[UPPER], head: &pending_edge);
3622
3623	while (!list_empty(head: &pending_edge)) {
3624	struct btrfs_backref_node *upper;
3625	struct btrfs_backref_node *lower;
3626
3627	edge = list_first_entry(&pending_edge,
3628	struct btrfs_backref_edge, list[UPPER]);
3629	list_del_init(entry: &edge->list[UPPER]);
3630	upper = edge->node[UPPER];
3631	lower = edge->node[LOWER];
3632
3633	/ Parent is detached, no need to keep any edges /
3634	if (upper->detached) {
3635	list_del(entry: &edge->list[LOWER]);
3636	btrfs_backref_free_edge(cache, edge);
3637
3638	/ Lower node is orphan, queue for cleanup /
3639	if (list_empty(head: &lower->upper))
3640	list_add(new: &lower->list, head: useless_node);
3641	continue;
3642	}
3643
3644	/*
3645	* All new nodes added in current build_backref_tree() haven't
3646	* been linked to the cache rb tree.
3647	* So if we have upper->rb_node populated, this means a cache
3648	* hit. We only need to link the edge, as @upper and all its
3649	* parents have already been linked.
3650	*/
3651	if (!RB_EMPTY_NODE(&upper->rb_node)) {
3652	if (upper->lowest) {
3653	list_del_init(entry: &upper->lower);
3654	upper->lowest = `0`;
3655	}
3656
3657	list_add_tail(new: &edge->list[UPPER], head: &upper->lower);
3658	continue;
3659	}
3660
3661	/ Sanity check, we shouldn't have any unchecked nodes /
3662	if (!upper->checked) {
3663	ASSERT(`0`);
3664	return -EUCLEAN;
3665	}
3666
3667	/ Sanity check, COW-only node has non-COW-only parent /
3668	if (start->cowonly != upper->cowonly) {
3669	ASSERT(`0`);
3670	return -EUCLEAN;
3671	}
3672
3673	/ Only cache non-COW-only (subvolume trees) tree blocks /
3674	if (!upper->cowonly) {
3675	rb_node = rb_simple_insert(root: &cache->rb_root, bytenr: upper->bytenr,
3676	node: &upper->rb_node);
3677	if (rb_node) {
3678	btrfs_backref_panic(fs_info: cache->fs_info,
3679	bytenr: upper->bytenr, error: -EEXIST);
3680	return -EUCLEAN;
3681	}
3682	}
3683
3684	list_add_tail(new: &edge->list[UPPER], head: &upper->lower);
3685
3686	/*
3687	* Also queue all the parent edges of this uncached node
3688	* to finish the upper linkage
3689	*/
3690	list_for_each_entry(edge, &upper->upper, list[LOWER])
3691	list_add_tail(new: &edge->list[UPPER], head: &pending_edge);
3692	}
3693	return `0`;
3694	}
3695
3696	void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
3697	struct btrfs_backref_node *node)
3698	{
3699	struct btrfs_backref_node *lower;
3700	struct btrfs_backref_node *upper;
3701	struct btrfs_backref_edge *edge;
3702
3703	while (!list_empty(head: &cache->useless_node)) {
3704	lower = list_first_entry(&cache->useless_node,
3705	struct btrfs_backref_node, list);
3706	list_del_init(entry: &lower->list);
3707	}
3708	while (!list_empty(head: &cache->pending_edge)) {
3709	edge = list_first_entry(&cache->pending_edge,
3710	struct btrfs_backref_edge, list[UPPER]);
3711	list_del(entry: &edge->list[UPPER]);
3712	list_del(entry: &edge->list[LOWER]);
3713	lower = edge->node[LOWER];
3714	upper = edge->node[UPPER];
3715	btrfs_backref_free_edge(cache, edge);
3716
3717	/*
3718	* Lower is no longer linked to any upper backref nodes and
3719	* isn't in the cache, we can free it ourselves.
3720	*/
3721	if (list_empty(head: &lower->upper) &&
3722	RB_EMPTY_NODE(&lower->rb_node))
3723	list_add(new: &lower->list, head: &cache->useless_node);
3724
3725	if (!RB_EMPTY_NODE(&upper->rb_node))
3726	continue;
3727
3728	/ Add this guy's upper edges to the list to process /
3729	list_for_each_entry(edge, &upper->upper, list[LOWER])
3730	list_add_tail(new: &edge->list[UPPER],
3731	head: &cache->pending_edge);
3732	if (list_empty(head: &upper->upper))
3733	list_add(new: &upper->list, head: &cache->useless_node);
3734	}
3735
3736	while (!list_empty(head: &cache->useless_node)) {
3737	lower = list_first_entry(&cache->useless_node,
3738	struct btrfs_backref_node, list);
3739	list_del_init(entry: &lower->list);
3740	if (lower == node)
3741	node = NULL;
3742	btrfs_backref_drop_node(tree: cache, node: lower);
3743	}
3744
3745	btrfs_backref_cleanup_node(cache, node);
3746	ASSERT(list_empty(&cache->useless_node) &&
3747	list_empty(&cache->pending_edge));
3748	}
3749

source code of linux/fs/btrfs/backref.c