backref.c source code [linux/fs/btrfs/backref.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (C) 2011 STRATO. All rights reserved.
4	*/
5
6	#include <linux/mm.h>
7	#include <linux/rbtree.h>
8	#include <trace/events/btrfs.h>
9	#include "ctree.h"
10	#include "disk-io.h"
11	#include "backref.h"
12	#include "ulist.h"
13	#include "transaction.h"
14	#include "delayed-ref.h"
15	#include "locking.h"
16	#include "misc.h"
17	#include "tree-mod-log.h"
18	#include "fs.h"
19	#include "accessors.h"
20	#include "extent-tree.h"
21	#include "relocation.h"
22	#include "tree-checker.h"
23
24	/ Just arbitrary numbers so we can be sure one of these happened. /
25	#define BACKREF_FOUND_SHARED 6
26	#define BACKREF_FOUND_NOT_SHARED 7
27
28	struct extent_inode_elem {
29	u64 inum;
30	u64 offset;
31	u64 num_bytes;
32	struct extent_inode_elem *next;
33	};
34
35	static int check_extent_in_eb(struct btrfs_backref_walk_ctx *ctx,
36	const struct btrfs_key *key,
37	const struct extent_buffer *eb,
38	const struct btrfs_file_extent_item *fi,
39	struct extent_inode_elem **eie)
40	{
41	const u64 data_len = btrfs_file_extent_num_bytes(eb, s: fi);
42	u64 offset = key->offset;
43	struct extent_inode_elem *e;
44	const u64 *root_ids;
45	int root_count;
46	bool cached;
47
48	if (!ctx->ignore_extent_item_pos &&
49	!btrfs_file_extent_compression(eb, s: fi) &&
50	!btrfs_file_extent_encryption(eb, s: fi) &&
51	!btrfs_file_extent_other_encoding(eb, s: fi)) {
52	u64 data_offset;
53
54	data_offset = btrfs_file_extent_offset(eb, s: fi);
55
56	if (ctx->extent_item_pos < data_offset \|\|
57	ctx->extent_item_pos >= data_offset + data_len)
58	return `1`;
59	offset += ctx->extent_item_pos - data_offset;
60	}
61
62	if (!ctx->indirect_ref_iterator \|\| !ctx->cache_lookup)
63	goto add_inode_elem;
64
65	cached = ctx->cache_lookup(eb->start, ctx->user_ctx, &root_ids,
66	&root_count);
67	if (!cached)
68	goto add_inode_elem;
69
70	for (int i = `0`; i < root_count; i++) {
71	int ret;
72
73	ret = ctx->indirect_ref_iterator(key->objectid, offset,
74	data_len, root_ids[i],
75	ctx->user_ctx);
76	if (ret)
77	return ret;
78	}
79
80	add_inode_elem:
81	e = kmalloc(sizeof(*e), GFP_NOFS);
82	if (!e)
83	return -ENOMEM;
84
85	e->next = *eie;
86	e->inum = key->objectid;
87	e->offset = offset;
88	e->num_bytes = data_len;
89	*eie = e;
90
91	return `0`;
92	}
93
94	static void free_inode_elem_list(struct extent_inode_elem *eie)
95	{
96	struct extent_inode_elem *eie_next;
97
98	for (; eie; eie = eie_next) {
99	eie_next = eie->next;
100	kfree(objp: eie);
101	}
102	}
103
104	static int find_extent_in_eb(struct btrfs_backref_walk_ctx *ctx,
105	const struct extent_buffer *eb,
106	struct extent_inode_elem **eie)
107	{
108	u64 disk_byte;
109	struct btrfs_key key;
110	struct btrfs_file_extent_item *fi;
111	int slot;
112	int nritems;
113	int extent_type;
114	int ret;
115
116	/*
117	* from the shared data ref, we only have the leaf but we need
118	* the key. thus, we must look into all items and see that we
119	* find one (some) with a reference to our extent item.
120	*/
121	nritems = btrfs_header_nritems(eb);
122	for (slot = `0`; slot < nritems; ++slot) {
123	btrfs_item_key_to_cpu(eb, cpu_key: &key, nr: slot);
124	if (key.type != BTRFS_EXTENT_DATA_KEY)
125	continue;
126	fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
127	extent_type = btrfs_file_extent_type(eb, s: fi);
128	if (extent_type == BTRFS_FILE_EXTENT_INLINE)
129	continue;
130	/ don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that /
131	disk_byte = btrfs_file_extent_disk_bytenr(eb, s: fi);
132	if (disk_byte != ctx->bytenr)
133	continue;
134
135	ret = check_extent_in_eb(ctx, key: &key, eb, fi, eie);
136	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\| ret < `0`)
137	return ret;
138	}
139
140	return `0`;
141	}
142
143	struct preftree {
144	struct rb_root_cached root;
145	unsigned int count;
146	};
147
148	#define PREFTREE_INIT { .root = RB_ROOT_CACHED, .count = 0 }
149
150	struct preftrees {
151	struct preftree direct; / BTRFS_SHARED_[DATA\|BLOCK]_REF_KEY /
152	struct preftree indirect; / BTRFS_[TREE_BLOCK\|EXTENT_DATA]_REF_KEY /
153	struct preftree indirect_missing_keys;
154	};
155
156	/*
157	* Checks for a shared extent during backref search.
158	*
159	* The share_count tracks prelim_refs (direct and indirect) having a
160	* ref->count >0:
161	* - incremented when a ref->count transitions to >0
162	* - decremented when a ref->count transitions to <1
163	*/
164	struct share_check {
165	struct btrfs_backref_share_check_ctx *ctx;
166	struct btrfs_root *root;
167	u64 inum;
168	u64 data_bytenr;
169	u64 data_extent_gen;
170	/*
171	* Counts number of inodes that refer to an extent (different inodes in
172	* the same root or different roots) that we could find. The sharedness
173	* check typically stops once this counter gets greater than 1, so it
174	* may not reflect the total number of inodes.
175	*/
176	int share_count;
177	/*
178	* The number of times we found our inode refers to the data extent we
179	* are determining the sharedness. In other words, how many file extent
180	* items we could find for our inode that point to our target data
181	* extent. The value we get here after finishing the extent sharedness
182	* check may be smaller than reality, but if it ends up being greater
183	* than 1, then we know for sure the inode has multiple file extent
184	* items that point to our inode, and we can safely assume it's useful
185	* to cache the sharedness check result.
186	*/
187	int self_ref_count;
188	bool have_delayed_delete_refs;
189	};
190
191	static inline int extent_is_shared(struct share_check *sc)
192	{
193	return (sc && sc->share_count > `1`) ? BACKREF_FOUND_SHARED : `0`;
194	}
195
196	static struct kmem_cache *btrfs_prelim_ref_cache;
197
198	int __init btrfs_prelim_ref_init(void)
199	{
200	btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
201	sizeof(struct prelim_ref), `0`, `0`, NULL);
202	if (!btrfs_prelim_ref_cache)
203	return -ENOMEM;
204	return `0`;
205	}
206
207	void __cold btrfs_prelim_ref_exit(void)
208	{
209	kmem_cache_destroy(s: btrfs_prelim_ref_cache);
210	}
211
212	static void free_pref(struct prelim_ref *ref)
213	{
214	kmem_cache_free(s: btrfs_prelim_ref_cache, objp: ref);
215	}
216
217	/*
218	* Return 0 when both refs are for the same block (and can be merged).
219	* A -1 return indicates ref1 is a 'lower' block than ref2, while 1
220	* indicates a 'higher' block.
221	*/
222	static int prelim_ref_compare(const struct prelim_ref *ref1,
223	const struct prelim_ref *ref2)
224	{
225	if (ref1->level < ref2->level)
226	return -`1`;
227	if (ref1->level > ref2->level)
228	return `1`;
229	if (ref1->root_id < ref2->root_id)
230	return -`1`;
231	if (ref1->root_id > ref2->root_id)
232	return `1`;
233	if (ref1->key_for_search.type < ref2->key_for_search.type)
234	return -`1`;
235	if (ref1->key_for_search.type > ref2->key_for_search.type)
236	return `1`;
237	if (ref1->key_for_search.objectid < ref2->key_for_search.objectid)
238	return -`1`;
239	if (ref1->key_for_search.objectid > ref2->key_for_search.objectid)
240	return `1`;
241	if (ref1->key_for_search.offset < ref2->key_for_search.offset)
242	return -`1`;
243	if (ref1->key_for_search.offset > ref2->key_for_search.offset)
244	return `1`;
245	if (ref1->parent < ref2->parent)
246	return -`1`;
247	if (ref1->parent > ref2->parent)
248	return `1`;
249
250	return `0`;
251	}
252
253	static int prelim_ref_rb_add_cmp(const struct rb_node *new,
254	const struct rb_node *exist)
255	{
256	const struct prelim_ref *ref_new =
257	rb_entry(new, struct prelim_ref, rbnode);
258	const struct prelim_ref *ref_exist =
259	rb_entry(exist, struct prelim_ref, rbnode);
260
261	/*
262	* prelim_ref_compare() expects the first parameter as the existing one,
263	* different from the rb_find_add_cached() order.
264	*/
265	return prelim_ref_compare(ref1: ref_exist, ref2: ref_new);
266	}
267
268	static void update_share_count(struct share_check sc, int* oldcount,
269	int newcount, const struct prelim_ref *newref)
270	{
271	if ((!sc) \|\| (oldcount == `0` && newcount < `1`))
272	return;
273
274	if (oldcount > `0` && newcount < `1`)
275	sc->share_count--;
276	else if (oldcount < `1` && newcount > `0`)
277	sc->share_count++;
278
279	if (newref->root_id == btrfs_root_id(root: sc->root) &&
280	newref->wanted_disk_byte == sc->data_bytenr &&
281	newref->key_for_search.objectid == sc->inum)
282	sc->self_ref_count += newref->count;
283	}
284
285	/*
286	* Add @newref to the @root rbtree, merging identical refs.
287	*
288	* Callers should assume that newref has been freed after calling.
289	*/
290	static void prelim_ref_insert(const struct btrfs_fs_info *fs_info,
291	struct preftree *preftree,
292	struct prelim_ref *newref,
293	struct share_check *sc)
294	{
295	struct rb_root_cached *root;
296	struct rb_node *exist;
297
298	root = &preftree->root;
299	exist = rb_find_add_cached(node: &newref->rbnode, tree: root, cmp: prelim_ref_rb_add_cmp);
300	if (exist) {
301	struct prelim_ref ref = rb_entry(exist, struct* prelim_ref, rbnode);
302	/ Identical refs, merge them and free @newref /
303	struct extent_inode_elem *eie = ref->inode_list;
304
305	while (eie && eie->next)
306	eie = eie->next;
307
308	if (!eie)
309	ref->inode_list = newref->inode_list;
310	else
311	eie->next = newref->inode_list;
312	trace_btrfs_prelim_ref_merge(fs_info, oldref: ref, newref,
313	tree_size: preftree->count);
314	/*
315	* A delayed ref can have newref->count < 0.
316	* The ref->count is updated to follow any
317	* BTRFS_[ADD\|DROP]_DELAYED_REF actions.
318	*/
319	update_share_count(sc, oldcount: ref->count,
320	newcount: ref->count + newref->count, newref);
321	ref->count += newref->count;
322	free_pref(ref: newref);
323	return;
324	}
325
326	update_share_count(sc, oldcount: `0`, newcount: newref->count, newref);
327	preftree->count++;
328	trace_btrfs_prelim_ref_insert(fs_info, oldref: newref, NULL, tree_size: preftree->count);
329	}
330
331	/*
332	* Release the entire tree. We don't care about internal consistency so
333	* just free everything and then reset the tree root.
334	*/
335	static void prelim_release(struct preftree *preftree)
336	{
337	struct prelim_ref ref, next_ref;
338
339	rbtree_postorder_for_each_entry_safe(ref, next_ref,
340	&preftree->root.rb_root, rbnode) {
341	free_inode_elem_list(eie: ref->inode_list);
342	free_pref(ref);
343	}
344
345	preftree->root = RB_ROOT_CACHED;
346	preftree->count = `0`;
347	}
348
349	/*
350	* the rules for all callers of this function are:
351	* - obtaining the parent is the goal
352	* - if you add a key, you must know that it is a correct key
353	* - if you cannot add the parent or a correct key, then we will look into the
354	* block later to set a correct key
355	*
356	* delayed refs
357	* ============
358	* backref type \| shared \| indirect \| shared \| indirect
359	* information \| tree \| tree \| data \| data
360	* --------------------+--------+----------+--------+----------
361	* parent logical \| y \| - \| - \| -
362	* key to resolve \| - \| y \| y \| y
363	* tree block logical \| - \| - \| - \| -
364	* root for resolving \| y \| y \| y \| y
365	*
366	* - column 1: we've the parent -> done
367	* - column 2, 3, 4: we use the key to find the parent
368	*
369	* on disk refs (inline or keyed)
370	* ==============================
371	* backref type \| shared \| indirect \| shared \| indirect
372	* information \| tree \| tree \| data \| data
373	* --------------------+--------+----------+--------+----------
374	* parent logical \| y \| - \| y \| -
375	* key to resolve \| - \| - \| - \| y
376	* tree block logical \| y \| y \| y \| y
377	* root for resolving \| - \| y \| y \| y
378	*
379	* - column 1, 3: we've the parent -> done
380	* - column 2: we take the first key from the block to find the parent
381	* (see add_missing_keys)
382	* - column 4: we use the key to find the parent
383	*
384	* additional information that's available but not required to find the parent
385	* block might help in merging entries to gain some speed.
386	*/
387	static int add_prelim_ref(const struct btrfs_fs_info *fs_info,
388	struct preftree *preftree, u64 root_id,
389	const struct btrfs_key key, int* level, u64 parent,
390	u64 wanted_disk_byte, int count,
391	struct share_check *sc, gfp_t gfp_mask)
392	{
393	struct prelim_ref *ref;
394
395	if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
396	return `0`;
397
398	ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
399	if (!ref)
400	return -ENOMEM;
401
402	ref->root_id = root_id;
403	if (key)
404	ref->key_for_search = *key;
405	else
406	memset(&ref->key_for_search, `0`, sizeof(ref->key_for_search));
407
408	ref->inode_list = NULL;
409	ref->level = level;
410	ref->count = count;
411	ref->parent = parent;
412	ref->wanted_disk_byte = wanted_disk_byte;
413	prelim_ref_insert(fs_info, preftree, newref: ref, sc);
414	return extent_is_shared(sc);
415	}
416
417	/ direct refs use root == 0, key == NULL /
418	static int add_direct_ref(const struct btrfs_fs_info *fs_info,
419	struct preftrees preftrees, int* level, u64 parent,
420	u64 wanted_disk_byte, int count,
421	struct share_check *sc, gfp_t gfp_mask)
422	{
423	return add_prelim_ref(fs_info, preftree: &preftrees->direct, root_id: `0`, NULL, level,
424	parent, wanted_disk_byte, count, sc, gfp_mask);
425	}
426
427	/ indirect refs use parent == 0 /
428	static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
429	struct preftrees *preftrees, u64 root_id,
430	const struct btrfs_key key, int* level,
431	u64 wanted_disk_byte, int count,
432	struct share_check *sc, gfp_t gfp_mask)
433	{
434	struct preftree *tree = &preftrees->indirect;
435
436	if (!key)
437	tree = &preftrees->indirect_missing_keys;
438	return add_prelim_ref(fs_info, preftree: tree, root_id, key, level, parent: `0`,
439	wanted_disk_byte, count, sc, gfp_mask);
440	}
441
442	static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr)
443	{
444	struct rb_node **p = &preftrees->direct.root.rb_root.rb_node;
445	struct rb_node *parent = NULL;
446	struct prelim_ref *ref = NULL;
447	struct prelim_ref target = {};
448	int result;
449
450	target.parent = bytenr;
451
452	while (*p) {
453	parent = *p;
454	ref = rb_entry(parent, struct prelim_ref, rbnode);
455	result = prelim_ref_compare(ref1: ref, ref2: &target);
456
457	if (result < `0`)
458	p = &(*p)->rb_left;
459	else if (result > `0`)
460	p = &(*p)->rb_right;
461	else
462	return `1`;
463	}
464	return `0`;
465	}
466
467	static int add_all_parents(struct btrfs_backref_walk_ctx *ctx,
468	struct btrfs_root root, struct* btrfs_path *path,
469	struct ulist *parents,
470	struct preftrees preftrees, struct* prelim_ref *ref,
471	int level)
472	{
473	int ret = `0`;
474	int slot;
475	struct extent_buffer *eb;
476	struct btrfs_key key;
477	struct btrfs_key *key_for_search = &ref->key_for_search;
478	struct btrfs_file_extent_item *fi;
479	struct extent_inode_elem eie = NULL, old = NULL;
480	u64 disk_byte;
481	u64 wanted_disk_byte = ref->wanted_disk_byte;
482	u64 count = `0`;
483	u64 data_offset;
484	u8 type;
485
486	if (level != `0`) {
487	eb = path->nodes[level];
488	ret = ulist_add(ulist: parents, val: eb->start, aux: `0`, GFP_NOFS);
489	if (ret < `0`)
490	return ret;
491	return `0`;
492	}
493
494	/*
495	* 1. We normally enter this function with the path already pointing to
496	* the first item to check. But sometimes, we may enter it with
497	* slot == nritems.
498	* 2. We are searching for normal backref but bytenr of this leaf
499	* matches shared data backref
500	* 3. The leaf owner is not equal to the root we are searching
501	*
502	* For these cases, go to the next leaf before we continue.
503	*/
504	eb = path->nodes[`0`];
505	if (path->slots[`0`] >= btrfs_header_nritems(eb) \|\|
506	is_shared_data_backref(preftrees, bytenr: eb->start) \|\|
507	ref->root_id != btrfs_header_owner(eb)) {
508	if (ctx->time_seq == BTRFS_SEQ_LAST)
509	ret = btrfs_next_leaf(root, path);
510	else
511	ret = btrfs_next_old_leaf(root, path, time_seq: ctx->time_seq);
512	}
513
514	while (!ret && count < ref->count) {
515	eb = path->nodes[`0`];
516	slot = path->slots[`0`];
517
518	btrfs_item_key_to_cpu(eb, cpu_key: &key, nr: slot);
519
520	if (key.objectid != key_for_search->objectid \|\|
521	key.type != BTRFS_EXTENT_DATA_KEY)
522	break;
523
524	/*
525	* We are searching for normal backref but bytenr of this leaf
526	* matches shared data backref, OR
527	* the leaf owner is not equal to the root we are searching for
528	*/
529	if (slot == `0` &&
530	(is_shared_data_backref(preftrees, bytenr: eb->start) \|\|
531	ref->root_id != btrfs_header_owner(eb))) {
532	if (ctx->time_seq == BTRFS_SEQ_LAST)
533	ret = btrfs_next_leaf(root, path);
534	else
535	ret = btrfs_next_old_leaf(root, path, time_seq: ctx->time_seq);
536	continue;
537	}
538	fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
539	type = btrfs_file_extent_type(eb, s: fi);
540	if (type == BTRFS_FILE_EXTENT_INLINE)
541	goto next;
542	disk_byte = btrfs_file_extent_disk_bytenr(eb, s: fi);
543	data_offset = btrfs_file_extent_offset(eb, s: fi);
544
545	if (disk_byte == wanted_disk_byte) {
546	eie = NULL;
547	old = NULL;
548	if (ref->key_for_search.offset == key.offset - data_offset)
549	count++;
550	else
551	goto next;
552	if (!ctx->skip_inode_ref_list) {
553	ret = check_extent_in_eb(ctx, key: &key, eb, fi, eie: &eie);
554	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\|
555	ret < `0`)
556	break;
557	}
558	if (ret > `0`)
559	goto next;
560	ret = ulist_add_merge_ptr(ulist: parents, val: eb->start,
561	aux: eie, old_aux: (void **)&old, GFP_NOFS);
562	if (ret < `0`)
563	break;
564	if (!ret && !ctx->skip_inode_ref_list) {
565	while (old->next)
566	old = old->next;
567	old->next = eie;
568	}
569	eie = NULL;
570	}
571	next:
572	if (ctx->time_seq == BTRFS_SEQ_LAST)
573	ret = btrfs_next_item(root, p: path);
574	else
575	ret = btrfs_next_old_item(root, path, time_seq: ctx->time_seq);
576	}
577
578	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\| ret < `0`)
579	free_inode_elem_list(eie);
580	else if (ret > `0`)
581	ret = `0`;
582
583	return ret;
584	}
585
586	/*
587	* resolve an indirect backref in the form (root_id, key, level)
588	* to a logical address
589	*/
590	static int resolve_indirect_ref(struct btrfs_backref_walk_ctx *ctx,
591	struct btrfs_path *path,
592	struct preftrees *preftrees,
593	struct prelim_ref ref, struct* ulist *parents)
594	{
595	struct btrfs_root *root;
596	struct extent_buffer *eb;
597	int ret = `0`;
598	int root_level;
599	int level = ref->level;
600	struct btrfs_key search_key = ref->key_for_search;
601
602	/*
603	* If we're search_commit_root we could possibly be holding locks on
604	* other tree nodes. This happens when qgroups does backref walks when
605	* adding new delayed refs. To deal with this we need to look in cache
606	* for the root, and if we don't find it then we need to search the
607	* tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
608	* here.
609	*/
610	if (path->search_commit_root)
611	root = btrfs_get_fs_root_commit_root(fs_info: ctx->fs_info, path, objectid: ref->root_id);
612	else
613	root = btrfs_get_fs_root(fs_info: ctx->fs_info, objectid: ref->root_id, check_ref: false);
614	if (IS_ERR(ptr: root)) {
615	ret = PTR_ERR(ptr: root);
616	goto out_free;
617	}
618
619	if (!path->search_commit_root &&
620	test_bit(BTRFS_ROOT_DELETING, &root->state)) {
621	ret = -ENOENT;
622	goto out;
623	}
624
625	if (btrfs_is_testing(fs_info: ctx->fs_info)) {
626	ret = -ENOENT;
627	goto out;
628	}
629
630	if (path->search_commit_root)
631	root_level = btrfs_header_level(eb: root->commit_root);
632	else if (ctx->time_seq == BTRFS_SEQ_LAST)
633	root_level = btrfs_header_level(eb: root->node);
634	else
635	root_level = btrfs_old_root_level(root, time_seq: ctx->time_seq);
636
637	if (root_level + `1` == level)
638	goto out;
639
640	/*
641	* We can often find data backrefs with an offset that is too large
642	* (>= LLONG_MAX, maximum allowed file offset) due to underflows when
643	* subtracting a file's offset with the data offset of its
644	* corresponding extent data item. This can happen for example in the
645	* clone ioctl.
646	*
647	* So if we detect such case we set the search key's offset to zero to
648	* make sure we will find the matching file extent item at
649	* add_all_parents(), otherwise we will miss it because the offset
650	* taken form the backref is much larger then the offset of the file
651	* extent item. This can make us scan a very large number of file
652	* extent items, but at least it will not make us miss any.
653	*
654	* This is an ugly workaround for a behaviour that should have never
655	* existed, but it does and a fix for the clone ioctl would touch a lot
656	* of places, cause backwards incompatibility and would not fix the
657	* problem for extents cloned with older kernels.
658	*/
659	if (search_key.type == BTRFS_EXTENT_DATA_KEY &&
660	search_key.offset >= LLONG_MAX)
661	search_key.offset = `0`;
662	path->lowest_level = level;
663	if (ctx->time_seq == BTRFS_SEQ_LAST)
664	ret = btrfs_search_slot(NULL, root, key: &search_key, p: path, ins_len: `0`, cow: `0`);
665	else
666	ret = btrfs_search_old_slot(root, key: &search_key, p: path, time_seq: ctx->time_seq);
667
668	btrfs_debug(ctx->fs_info,
669	"search slot in root %llu (level %d, ref count %d) returned %d for key " BTRFS_KEY_FMT,
670	ref->root_id, level, ref->count, ret,
671	BTRFS_KEY_FMT_VALUE(&ref->key_for_search));
672	if (ret < `0`)
673	goto out;
674
675	eb = path->nodes[level];
676	while (!eb) {
677	if (WARN_ON(!level)) {
678	ret = `1`;
679	goto out;
680	}
681	level--;
682	eb = path->nodes[level];
683	}
684
685	ret = add_all_parents(ctx, root, path, parents, preftrees, ref, level);
686	out:
687	btrfs_put_root(root);
688	out_free:
689	path->lowest_level = `0`;
690	btrfs_release_path(p: path);
691	return ret;
692	}
693
694	static struct extent_inode_elem *
695	unode_aux_to_inode_list(struct ulist_node *node)
696	{
697	if (!node)
698	return NULL;
699	return (struct extent_inode_elem *)(uintptr_t)node->aux;
700	}
701
702	static void free_leaf_list(struct ulist *ulist)
703	{
704	struct ulist_node *node;
705	struct ulist_iterator uiter;
706
707	ULIST_ITER_INIT(&uiter);
708	while ((node = ulist_next(ulist, uiter: &uiter)))
709	free_inode_elem_list(eie: unode_aux_to_inode_list(node));
710
711	ulist_free(ulist);
712	}
713
714	/*
715	* We maintain three separate rbtrees: one for direct refs, one for
716	* indirect refs which have a key, and one for indirect refs which do not
717	* have a key. Each tree does merge on insertion.
718	*
719	* Once all of the references are located, we iterate over the tree of
720	* indirect refs with missing keys. An appropriate key is located and
721	* the ref is moved onto the tree for indirect refs. After all missing
722	* keys are thus located, we iterate over the indirect ref tree, resolve
723	* each reference, and then insert the resolved reference onto the
724	* direct tree (merging there too).
725	*
726	* New backrefs (i.e., for parent nodes) are added to the appropriate
727	* rbtree as they are encountered. The new backrefs are subsequently
728	* resolved as above.
729	*/
730	static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx,
731	struct btrfs_path *path,
732	struct preftrees *preftrees,
733	struct share_check *sc)
734	{
735	int ret = `0`;
736	struct ulist *parents;
737	struct ulist_node *node;
738	struct ulist_iterator uiter;
739	struct rb_node *rnode;
740
741	parents = ulist_alloc(GFP_NOFS);
742	if (!parents)
743	return -ENOMEM;
744
745	/*
746	* We could trade memory usage for performance here by iterating
747	* the tree, allocating new refs for each insertion, and then
748	* freeing the entire indirect tree when we're done. In some test
749	* cases, the tree can grow quite large (~200k objects).
750	*/
751	while ((rnode = rb_first_cached(&preftrees->indirect.root))) {
752	struct prelim_ref *ref;
753	int ret2;
754
755	ref = rb_entry(rnode, struct prelim_ref, rbnode);
756	if (WARN(ref->parent,
757	"BUG: direct ref found in indirect tree")) {
758	ret = -EINVAL;
759	goto out;
760	}
761
762	rb_erase_cached(node: &ref->rbnode, root: &preftrees->indirect.root);
763	preftrees->indirect.count--;
764
765	if (ref->count == `0`) {
766	free_pref(ref);
767	continue;
768	}
769
770	if (sc && ref->root_id != btrfs_root_id(root: sc->root)) {
771	free_pref(ref);
772	ret = BACKREF_FOUND_SHARED;
773	goto out;
774	}
775	ret2 = resolve_indirect_ref(ctx, path, preftrees, ref, parents);
776	/*
777	* we can only tolerate ENOENT,otherwise,we should catch error
778	* and return directly.
779	*/
780	if (ret2 == -ENOENT) {
781	prelim_ref_insert(fs_info: ctx->fs_info, preftree: &preftrees->direct, newref: ref,
782	NULL);
783	continue;
784	} else if (ret2) {
785	free_pref(ref);
786	ret = ret2;
787	goto out;
788	}
789
790	/ we put the first parent into the ref at hand /
791	ULIST_ITER_INIT(&uiter);
792	node = ulist_next(ulist: parents, uiter: &uiter);
793	ref->parent = node ? node->val : `0`;
794	ref->inode_list = unode_aux_to_inode_list(node);
795
796	/ Add a prelim_ref(s) for any other parent(s). /
797	while ((node = ulist_next(ulist: parents, uiter: &uiter))) {
798	struct prelim_ref *new_ref;
799
800	new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
801	GFP_NOFS);
802	if (!new_ref) {
803	free_pref(ref);
804	ret = -ENOMEM;
805	goto out;
806	}
807	memcpy(new_ref, ref, sizeof(*ref));
808	new_ref->parent = node->val;
809	new_ref->inode_list = unode_aux_to_inode_list(node);
810	prelim_ref_insert(fs_info: ctx->fs_info, preftree: &preftrees->direct,
811	newref: new_ref, NULL);
812	}
813
814	/*
815	* Now it's a direct ref, put it in the direct tree. We must
816	* do this last because the ref could be merged/freed here.
817	*/
818	prelim_ref_insert(fs_info: ctx->fs_info, preftree: &preftrees->direct, newref: ref, NULL);
819
820	ulist_reinit(ulist: parents);
821	cond_resched();
822	}
823	out:
824	/*
825	* We may have inode lists attached to refs in the parents ulist, so we
826	* must free them before freeing the ulist and its refs.
827	*/
828	free_leaf_list(ulist: parents);
829	return ret;
830	}
831
832	/*
833	* read tree blocks and add keys where required.
834	*/
835	static int add_missing_keys(struct btrfs_fs_info *fs_info,
836	struct preftrees *preftrees, bool lock)
837	{
838	struct prelim_ref *ref;
839	struct extent_buffer *eb;
840	struct preftree *tree = &preftrees->indirect_missing_keys;
841	struct rb_node *node;
842
843	while ((node = rb_first_cached(&tree->root))) {
844	struct btrfs_tree_parent_check check = { `0` };
845
846	ref = rb_entry(node, struct prelim_ref, rbnode);
847	rb_erase_cached(node, root: &tree->root);
848
849	BUG_ON(ref->parent); / should not be a direct ref /
850	BUG_ON(ref->key_for_search.type);
851	BUG_ON(!ref->wanted_disk_byte);
852
853	check.level = ref->level - `1`;
854	check.owner_root = ref->root_id;
855
856	eb = read_tree_block(fs_info, bytenr: ref->wanted_disk_byte, check: &check);
857	if (IS_ERR(ptr: eb)) {
858	free_pref(ref);
859	return PTR_ERR(ptr: eb);
860	}
861	if (unlikely(!extent_buffer_uptodate(eb))) {
862	free_pref(ref);
863	free_extent_buffer(eb);
864	return -EIO;
865	}
866
867	if (lock)
868	btrfs_tree_read_lock(eb);
869	if (btrfs_header_level(eb) == `0`)
870	btrfs_item_key_to_cpu(eb, cpu_key: &ref->key_for_search, nr: `0`);
871	else
872	btrfs_node_key_to_cpu(eb, cpu_key: &ref->key_for_search, nr: `0`);
873	if (lock)
874	btrfs_tree_read_unlock(eb);
875	free_extent_buffer(eb);
876	prelim_ref_insert(fs_info, preftree: &preftrees->indirect, newref: ref, NULL);
877	cond_resched();
878	}
879	return `0`;
880	}
881
882	/*
883	* add all currently queued delayed refs from this head whose seq nr is
884	* smaller or equal that seq to the list
885	*/
886	static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
887	struct btrfs_delayed_ref_head *head, u64 seq,
888	struct preftrees preftrees, struct* share_check *sc)
889	{
890	struct btrfs_delayed_ref_node *node;
891	struct btrfs_key key;
892	struct rb_node *n;
893	int count;
894	int ret = `0`;
895
896	spin_lock(lock: &head->lock);
897	for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
898	node = rb_entry(n, struct btrfs_delayed_ref_node,
899	ref_node);
900	if (node->seq > seq)
901	continue;
902
903	switch (node->action) {
904	case BTRFS_ADD_DELAYED_EXTENT:
905	case BTRFS_UPDATE_DELAYED_HEAD:
906	WARN_ON(`1`);
907	continue;
908	case BTRFS_ADD_DELAYED_REF:
909	count = node->ref_mod;
910	break;
911	case BTRFS_DROP_DELAYED_REF:
912	count = node->ref_mod * -`1`;
913	break;
914	default:
915	BUG();
916	}
917	switch (node->type) {
918	case BTRFS_TREE_BLOCK_REF_KEY: {
919	/ NORMAL INDIRECT METADATA backref /
920	struct btrfs_key *key_ptr = NULL;
921	/ The owner of a tree block ref is the level. /
922	int level = btrfs_delayed_ref_owner(node);
923
924	if (head->extent_op && head->extent_op->update_key) {
925	btrfs_disk_key_to_cpu(cpu_key: &key, disk_key: &head->extent_op->key);
926	key_ptr = &key;
927	}
928
929	ret = add_indirect_ref(fs_info, preftrees, root_id: node->ref_root,
930	key: key_ptr, level: level + `1`, wanted_disk_byte: node->bytenr,
931	count, sc, GFP_ATOMIC);
932	break;
933	}
934	case BTRFS_SHARED_BLOCK_REF_KEY: {
935	/*
936	* SHARED DIRECT METADATA backref
937	*
938	* The owner of a tree block ref is the level.
939	*/
940	int level = btrfs_delayed_ref_owner(node);
941
942	ret = add_direct_ref(fs_info, preftrees, level: level + `1`,
943	parent: node->parent, wanted_disk_byte: node->bytenr, count,
944	sc, GFP_ATOMIC);
945	break;
946	}
947	case BTRFS_EXTENT_DATA_REF_KEY: {
948	/ NORMAL INDIRECT DATA backref /
949	key.objectid = btrfs_delayed_ref_owner(node);
950	key.type = BTRFS_EXTENT_DATA_KEY;
951	key.offset = btrfs_delayed_ref_offset(node);
952
953	/*
954	* If we have a share check context and a reference for
955	* another inode, we can't exit immediately. This is
956	* because even if this is a BTRFS_ADD_DELAYED_REF
957	* reference we may find next a BTRFS_DROP_DELAYED_REF
958	* which cancels out this ADD reference.
959	*
960	* If this is a DROP reference and there was no previous
961	* ADD reference, then we need to signal that when we
962	* process references from the extent tree (through
963	* add_inline_refs() and add_keyed_refs()), we should
964	* not exit early if we find a reference for another
965	* inode, because one of the delayed DROP references
966	* may cancel that reference in the extent tree.
967	*/
968	if (sc && count < `0`)
969	sc->have_delayed_delete_refs = true;
970
971	ret = add_indirect_ref(fs_info, preftrees, root_id: node->ref_root,
972	key: &key, level: `0`, wanted_disk_byte: node->bytenr, count, sc,
973	GFP_ATOMIC);
974	break;
975	}
976	case BTRFS_SHARED_DATA_REF_KEY: {
977	/ SHARED DIRECT FULL backref /
978	ret = add_direct_ref(fs_info, preftrees, level: `0`, parent: node->parent,
979	wanted_disk_byte: node->bytenr, count, sc,
980	GFP_ATOMIC);
981	break;
982	}
983	default:
984	WARN_ON(`1`);
985	}
986	/*
987	* We must ignore BACKREF_FOUND_SHARED until all delayed
988	* refs have been checked.
989	*/
990	if (ret && (ret != BACKREF_FOUND_SHARED))
991	break;
992	}
993	if (!ret)
994	ret = extent_is_shared(sc);
995
996	spin_unlock(lock: &head->lock);
997	return ret;
998	}
999
1000	/*
1001	* add all inline backrefs for bytenr to the list
1002	*
1003	* Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED.
1004	*/
1005	static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
1006	struct btrfs_path *path,
1007	int info_level, struct* preftrees *preftrees,
1008	struct share_check *sc)
1009	{
1010	int ret = `0`;
1011	int slot;
1012	struct extent_buffer *leaf;
1013	struct btrfs_key key;
1014	struct btrfs_key found_key;
1015	unsigned long ptr;
1016	unsigned long end;
1017	struct btrfs_extent_item *ei;
1018	u64 flags;
1019	u64 item_size;
1020
1021	/*
1022	* enumerate all inline refs
1023	*/
1024	leaf = path->nodes[`0`];
1025	slot = path->slots[`0`];
1026
1027	item_size = btrfs_item_size(eb: leaf, slot);
1028	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
1029
1030	if (ctx->check_extent_item) {
1031	ret = ctx->check_extent_item(ctx->bytenr, ei, leaf, ctx->user_ctx);
1032	if (ret)
1033	return ret;
1034	}
1035
1036	flags = btrfs_extent_flags(eb: leaf, s: ei);
1037	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &found_key, nr: slot);
1038
1039	ptr = (unsigned long)(ei + `1`);
1040	end = (unsigned long)ei + item_size;
1041
1042	if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1043	flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1044	struct btrfs_tree_block_info *info;
1045
1046	info = (struct btrfs_tree_block_info *)ptr;
1047	*info_level = btrfs_tree_block_level(eb: leaf, s: info);
1048	ptr += sizeof(struct btrfs_tree_block_info);
1049	BUG_ON(ptr > end);
1050	} else if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
1051	*info_level = found_key.offset;
1052	} else {
1053	BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1054	}
1055
1056	while (ptr < end) {
1057	struct btrfs_extent_inline_ref *iref;
1058	u64 offset;
1059	int type;
1060
1061	iref = (struct btrfs_extent_inline_ref *)ptr;
1062	type = btrfs_get_extent_inline_ref_type(eb: leaf, iref,
1063	is_data: BTRFS_REF_TYPE_ANY);
1064	if (unlikely(type == BTRFS_REF_TYPE_INVALID))
1065	return -EUCLEAN;
1066
1067	offset = btrfs_extent_inline_ref_offset(eb: leaf, s: iref);
1068
1069	switch (type) {
1070	case BTRFS_SHARED_BLOCK_REF_KEY:
1071	ret = add_direct_ref(fs_info: ctx->fs_info, preftrees,
1072	level: *info_level + `1`, parent: offset,
1073	wanted_disk_byte: ctx->bytenr, count: `1`, NULL, GFP_NOFS);
1074	break;
1075	case BTRFS_SHARED_DATA_REF_KEY: {
1076	struct btrfs_shared_data_ref *sdref;
1077	int count;
1078
1079	sdref = (struct btrfs_shared_data_ref *)(iref + `1`);
1080	count = btrfs_shared_data_ref_count(eb: leaf, s: sdref);
1081
1082	ret = add_direct_ref(fs_info: ctx->fs_info, preftrees, level: `0`, parent: offset,
1083	wanted_disk_byte: ctx->bytenr, count, sc, GFP_NOFS);
1084	break;
1085	}
1086	case BTRFS_TREE_BLOCK_REF_KEY:
1087	ret = add_indirect_ref(fs_info: ctx->fs_info, preftrees, root_id: offset,
1088	NULL, level: *info_level + `1`,
1089	wanted_disk_byte: ctx->bytenr, count: `1`, NULL, GFP_NOFS);
1090	break;
1091	case BTRFS_EXTENT_DATA_REF_KEY: {
1092	struct btrfs_extent_data_ref *dref;
1093	int count;
1094	u64 root;
1095
1096	dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1097	count = btrfs_extent_data_ref_count(eb: leaf, s: dref);
1098	key.objectid = btrfs_extent_data_ref_objectid(eb: leaf,
1099	s: dref);
1100	key.type = BTRFS_EXTENT_DATA_KEY;
1101	key.offset = btrfs_extent_data_ref_offset(eb: leaf, s: dref);
1102
1103	if (sc && key.objectid != sc->inum &&
1104	!sc->have_delayed_delete_refs) {
1105	ret = BACKREF_FOUND_SHARED;
1106	break;
1107	}
1108
1109	root = btrfs_extent_data_ref_root(eb: leaf, s: dref);
1110
1111	if (!ctx->skip_data_ref \|\|
1112	!ctx->skip_data_ref(root, key.objectid, key.offset,
1113	ctx->user_ctx))
1114	ret = add_indirect_ref(fs_info: ctx->fs_info, preftrees,
1115	root_id: root, key: &key, level: `0`, wanted_disk_byte: ctx->bytenr,
1116	count, sc, GFP_NOFS);
1117	break;
1118	}
1119	case BTRFS_EXTENT_OWNER_REF_KEY:
1120	ASSERT(btrfs_fs_incompat(ctx->fs_info, SIMPLE_QUOTA));
1121	break;
1122	default:
1123	WARN_ON(`1`);
1124	}
1125	if (ret)
1126	return ret;
1127	ptr += btrfs_extent_inline_ref_size(type);
1128	}
1129
1130	return `0`;
1131	}
1132
1133	/*
1134	* add all non-inline backrefs for bytenr to the list
1135	*
1136	* Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED.
1137	*/
1138	static int add_keyed_refs(struct btrfs_backref_walk_ctx *ctx,
1139	struct btrfs_root *extent_root,
1140	struct btrfs_path *path,
1141	int info_level, struct preftrees *preftrees,
1142	struct share_check *sc)
1143	{
1144	struct btrfs_fs_info *fs_info = extent_root->fs_info;
1145	int ret;
1146	int slot;
1147	struct extent_buffer *leaf;
1148	struct btrfs_key key;
1149
1150	while (`1`) {
1151	ret = btrfs_next_item(root: extent_root, p: path);
1152	if (ret < `0`)
1153	break;
1154	if (ret) {
1155	ret = `0`;
1156	break;
1157	}
1158
1159	slot = path->slots[`0`];
1160	leaf = path->nodes[`0`];
1161	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
1162
1163	if (key.objectid != ctx->bytenr)
1164	break;
1165	if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
1166	continue;
1167	if (key.type > BTRFS_SHARED_DATA_REF_KEY)
1168	break;
1169
1170	switch (key.type) {
1171	case BTRFS_SHARED_BLOCK_REF_KEY:
1172	/ SHARED DIRECT METADATA backref /
1173	ret = add_direct_ref(fs_info, preftrees,
1174	level: info_level + `1`, parent: key.offset,
1175	wanted_disk_byte: ctx->bytenr, count: `1`, NULL, GFP_NOFS);
1176	break;
1177	case BTRFS_SHARED_DATA_REF_KEY: {
1178	/ SHARED DIRECT FULL backref /
1179	struct btrfs_shared_data_ref *sdref;
1180	int count;
1181
1182	sdref = btrfs_item_ptr(leaf, slot,
1183	struct btrfs_shared_data_ref);
1184	count = btrfs_shared_data_ref_count(eb: leaf, s: sdref);
1185	ret = add_direct_ref(fs_info, preftrees, level: `0`,
1186	parent: key.offset, wanted_disk_byte: ctx->bytenr, count,
1187	sc, GFP_NOFS);
1188	break;
1189	}
1190	case BTRFS_TREE_BLOCK_REF_KEY:
1191	/ NORMAL INDIRECT METADATA backref /
1192	ret = add_indirect_ref(fs_info, preftrees, root_id: key.offset,
1193	NULL, level: info_level + `1`, wanted_disk_byte: ctx->bytenr,
1194	count: `1`, NULL, GFP_NOFS);
1195	break;
1196	case BTRFS_EXTENT_DATA_REF_KEY: {
1197	/ NORMAL INDIRECT DATA backref /
1198	struct btrfs_extent_data_ref *dref;
1199	int count;
1200	u64 root;
1201
1202	dref = btrfs_item_ptr(leaf, slot,
1203	struct btrfs_extent_data_ref);
1204	count = btrfs_extent_data_ref_count(eb: leaf, s: dref);
1205	key.objectid = btrfs_extent_data_ref_objectid(eb: leaf,
1206	s: dref);
1207	key.type = BTRFS_EXTENT_DATA_KEY;
1208	key.offset = btrfs_extent_data_ref_offset(eb: leaf, s: dref);
1209
1210	if (sc && key.objectid != sc->inum &&
1211	!sc->have_delayed_delete_refs) {
1212	ret = BACKREF_FOUND_SHARED;
1213	break;
1214	}
1215
1216	root = btrfs_extent_data_ref_root(eb: leaf, s: dref);
1217
1218	if (!ctx->skip_data_ref \|\|
1219	!ctx->skip_data_ref(root, key.objectid, key.offset,
1220	ctx->user_ctx))
1221	ret = add_indirect_ref(fs_info, preftrees, root_id: root,
1222	key: &key, level: `0`, wanted_disk_byte: ctx->bytenr,
1223	count, sc, GFP_NOFS);
1224	break;
1225	}
1226	default:
1227	WARN_ON(`1`);
1228	}
1229	if (ret)
1230	return ret;
1231
1232	}
1233
1234	return ret;
1235	}
1236
1237	/*
1238	* The caller has joined a transaction or is holding a read lock on the
1239	* fs_info->commit_root_sem semaphore, so no need to worry about the root's last
1240	* snapshot field changing while updating or checking the cache.
1241	*/
1242	static bool lookup_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx,
1243	struct btrfs_root *root,
1244	u64 bytenr, int level, bool *is_shared)
1245	{
1246	const struct btrfs_fs_info *fs_info = root->fs_info;
1247	struct btrfs_backref_shared_cache_entry *entry;
1248
1249	if (!current->journal_info)
1250	lockdep_assert_held(&fs_info->commit_root_sem);
1251
1252	if (!ctx->use_path_cache)
1253	return false;
1254
1255	if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
1256	return false;
1257
1258	/*
1259	* Level -1 is used for the data extent, which is not reliable to cache
1260	* because its reference count can increase or decrease without us
1261	* realizing. We cache results only for extent buffers that lead from
1262	* the root node down to the leaf with the file extent item.
1263	*/
1264	ASSERT(level >= `0`);
1265
1266	entry = &ctx->path_cache_entries[level];
1267
1268	/ Unused cache entry or being used for some other extent buffer. /
1269	if (entry->bytenr != bytenr)
1270	return false;
1271
1272	/*
1273	* We cached a false result, but the last snapshot generation of the
1274	* root changed, so we now have a snapshot. Don't trust the result.
1275	*/
1276	if (!entry->is_shared &&
1277	entry->gen != btrfs_root_last_snapshot(s: &root->root_item))
1278	return false;
1279
1280	/*
1281	* If we cached a true result and the last generation used for dropping
1282	* a root changed, we can not trust the result, because the dropped root
1283	* could be a snapshot sharing this extent buffer.
1284	*/
1285	if (entry->is_shared &&
1286	entry->gen != btrfs_get_last_root_drop_gen(fs_info))
1287	return false;
1288
1289	*is_shared = entry->is_shared;
1290	/*
1291	* If the node at this level is shared, than all nodes below are also
1292	* shared. Currently some of the nodes below may be marked as not shared
1293	* because we have just switched from one leaf to another, and switched
1294	* also other nodes above the leaf and below the current level, so mark
1295	* them as shared.
1296	*/
1297	if (*is_shared) {
1298	for (int i = `0`; i < level; i++) {
1299	ctx->path_cache_entries[i].is_shared = true;
1300	ctx->path_cache_entries[i].gen = entry->gen;
1301	}
1302	}
1303
1304	return true;
1305	}
1306
1307	/*
1308	* The caller has joined a transaction or is holding a read lock on the
1309	* fs_info->commit_root_sem semaphore, so no need to worry about the root's last
1310	* snapshot field changing while updating or checking the cache.
1311	*/
1312	static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx,
1313	struct btrfs_root *root,
1314	u64 bytenr, int level, bool is_shared)
1315	{
1316	const struct btrfs_fs_info *fs_info = root->fs_info;
1317	struct btrfs_backref_shared_cache_entry *entry;
1318	u64 gen;
1319
1320	if (!current->journal_info)
1321	lockdep_assert_held(&fs_info->commit_root_sem);
1322
1323	if (!ctx->use_path_cache)
1324	return;
1325
1326	if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
1327	return;
1328
1329	/*
1330	* Level -1 is used for the data extent, which is not reliable to cache
1331	* because its reference count can increase or decrease without us
1332	* realizing. We cache results only for extent buffers that lead from
1333	* the root node down to the leaf with the file extent item.
1334	*/
1335	ASSERT(level >= `0`);
1336
1337	if (is_shared)
1338	gen = btrfs_get_last_root_drop_gen(fs_info);
1339	else
1340	gen = btrfs_root_last_snapshot(s: &root->root_item);
1341
1342	entry = &ctx->path_cache_entries[level];
1343	entry->bytenr = bytenr;
1344	entry->is_shared = is_shared;
1345	entry->gen = gen;
1346
1347	/*
1348	* If we found an extent buffer is shared, set the cache result for all
1349	* extent buffers below it to true. As nodes in the path are COWed,
1350	* their sharedness is moved to their children, and if a leaf is COWed,
1351	* then the sharedness of a data extent becomes direct, the refcount of
1352	* data extent is increased in the extent item at the extent tree.
1353	*/
1354	if (is_shared) {
1355	for (int i = `0`; i < level; i++) {
1356	entry = &ctx->path_cache_entries[i];
1357	entry->is_shared = is_shared;
1358	entry->gen = gen;
1359	}
1360	}
1361	}
1362
1363	/*
1364	* this adds all existing backrefs (inline backrefs, backrefs and delayed
1365	* refs) for the given bytenr to the refs list, merges duplicates and resolves
1366	* indirect refs to their parent bytenr.
1367	* When roots are found, they're added to the roots list
1368	*
1369	* @ctx: Backref walking context object, must be not NULL.
1370	* @sc: If !NULL, then immediately return BACKREF_FOUND_SHARED when a
1371	* shared extent is detected.
1372	*
1373	* Otherwise this returns 0 for success and <0 for an error.
1374	*
1375	* FIXME some caching might speed things up
1376	*/
1377	static int find_parent_nodes(struct btrfs_backref_walk_ctx *ctx,
1378	struct share_check *sc)
1379	{
1380	struct btrfs_root *root = btrfs_extent_root(fs_info: ctx->fs_info, bytenr: ctx->bytenr);
1381	struct btrfs_key key;
1382	struct btrfs_path *path;
1383	struct btrfs_delayed_ref_root *delayed_refs = NULL;
1384	struct btrfs_delayed_ref_head *head;
1385	int info_level = `0`;
1386	int ret;
1387	struct prelim_ref *ref;
1388	struct rb_node *node;
1389	struct extent_inode_elem *eie = NULL;
1390	struct preftrees preftrees = {
1391	.direct = PREFTREE_INIT,
1392	.indirect = PREFTREE_INIT,
1393	.indirect_missing_keys = PREFTREE_INIT
1394	};
1395
1396	/ Roots ulist is not needed when using a sharedness check context. /
1397	if (sc)
1398	ASSERT(ctx->roots == NULL);
1399
1400	key.objectid = ctx->bytenr;
1401	if (btrfs_fs_incompat(ctx->fs_info, SKINNY_METADATA))
1402	key.type = BTRFS_METADATA_ITEM_KEY;
1403	else
1404	key.type = BTRFS_EXTENT_ITEM_KEY;
1405	key.offset = (u64)-`1`;
1406
1407	path = btrfs_alloc_path();
1408	if (!path)
1409	return -ENOMEM;
1410	if (!ctx->trans) {
1411	path->search_commit_root = true;
1412	path->skip_locking = true;
1413	}
1414
1415	if (ctx->time_seq == BTRFS_SEQ_LAST)
1416	path->skip_locking = true;
1417
1418	again:
1419	head = NULL;
1420
1421	ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: `0`, cow: `0`);
1422	if (ret < `0`)
1423	goto out;
1424	if (unlikely(ret == `0`)) {
1425	/*
1426	* Key with offset -1 found, there would have to exist an extent
1427	* item with such offset, but this is out of the valid range.
1428	*/
1429	ret = -EUCLEAN;
1430	goto out;
1431	}
1432
1433	if (ctx->trans && likely(ctx->trans->type != __TRANS_DUMMY) &&
1434	ctx->time_seq != BTRFS_SEQ_LAST) {
1435	/*
1436	* We have a specific time_seq we care about and trans which
1437	* means we have the path lock, we need to grab the ref head and
1438	* lock it so we have a consistent view of the refs at the given
1439	* time.
1440	*/
1441	delayed_refs = &ctx->trans->transaction->delayed_refs;
1442	spin_lock(lock: &delayed_refs->lock);
1443	head = btrfs_find_delayed_ref_head(fs_info: ctx->fs_info, delayed_refs,
1444	bytenr: ctx->bytenr);
1445	if (head) {
1446	if (!mutex_trylock(&head->mutex)) {
1447	refcount_inc(r: &head->refs);
1448	spin_unlock(lock: &delayed_refs->lock);
1449
1450	btrfs_release_path(p: path);
1451
1452	/*
1453	* Mutex was contended, block until it's
1454	* released and try again
1455	*/
1456	mutex_lock(&head->mutex);
1457	mutex_unlock(lock: &head->mutex);
1458	btrfs_put_delayed_ref_head(head);
1459	goto again;
1460	}
1461	spin_unlock(lock: &delayed_refs->lock);
1462	ret = add_delayed_refs(fs_info: ctx->fs_info, head, seq: ctx->time_seq,
1463	preftrees: &preftrees, sc);
1464	mutex_unlock(lock: &head->mutex);
1465	if (ret)
1466	goto out;
1467	} else {
1468	spin_unlock(lock: &delayed_refs->lock);
1469	}
1470	}
1471
1472	if (path->slots[`0`]) {
1473	struct extent_buffer *leaf;
1474	int slot;
1475
1476	path->slots[`0`]--;
1477	leaf = path->nodes[`0`];
1478	slot = path->slots[`0`];
1479	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
1480	if (key.objectid == ctx->bytenr &&
1481	(key.type == BTRFS_EXTENT_ITEM_KEY \|\|
1482	key.type == BTRFS_METADATA_ITEM_KEY)) {
1483	ret = add_inline_refs(ctx, path, info_level: &info_level,
1484	preftrees: &preftrees, sc);
1485	if (ret)
1486	goto out;
1487	ret = add_keyed_refs(ctx, extent_root: root, path, info_level,
1488	preftrees: &preftrees, sc);
1489	if (ret)
1490	goto out;
1491	}
1492	}
1493
1494	/*
1495	* If we have a share context and we reached here, it means the extent
1496	* is not directly shared (no multiple reference items for it),
1497	* otherwise we would have exited earlier with a return value of
1498	* BACKREF_FOUND_SHARED after processing delayed references or while
1499	* processing inline or keyed references from the extent tree.
1500	* The extent may however be indirectly shared through shared subtrees
1501	* as a result from creating snapshots, so we determine below what is
1502	* its parent node, in case we are dealing with a metadata extent, or
1503	* what's the leaf (or leaves), from a fs tree, that has a file extent
1504	* item pointing to it in case we are dealing with a data extent.
1505	*/
1506	ASSERT(extent_is_shared(sc) == `0`);
1507
1508	/*
1509	* If we are here for a data extent and we have a share_check structure
1510	* it means the data extent is not directly shared (does not have
1511	* multiple reference items), so we have to check if a path in the fs
1512	* tree (going from the root node down to the leaf that has the file
1513	* extent item pointing to the data extent) is shared, that is, if any
1514	* of the extent buffers in the path is referenced by other trees.
1515	*/
1516	if (sc && ctx->bytenr == sc->data_bytenr) {
1517	/*
1518	* If our data extent is from a generation more recent than the
1519	* last generation used to snapshot the root, then we know that
1520	* it can not be shared through subtrees, so we can skip
1521	* resolving indirect references, there's no point in
1522	* determining the extent buffers for the path from the fs tree
1523	* root node down to the leaf that has the file extent item that
1524	* points to the data extent.
1525	*/
1526	if (sc->data_extent_gen >
1527	btrfs_root_last_snapshot(s: &sc->root->root_item)) {
1528	ret = BACKREF_FOUND_NOT_SHARED;
1529	goto out;
1530	}
1531
1532	/*
1533	* If we are only determining if a data extent is shared or not
1534	* and the corresponding file extent item is located in the same
1535	* leaf as the previous file extent item, we can skip resolving
1536	* indirect references for a data extent, since the fs tree path
1537	* is the same (same leaf, so same path). We skip as long as the
1538	* cached result for the leaf is valid and only if there's only
1539	* one file extent item pointing to the data extent, because in
1540	* the case of multiple file extent items, they may be located
1541	* in different leaves and therefore we have multiple paths.
1542	*/
1543	if (sc->ctx->curr_leaf_bytenr == sc->ctx->prev_leaf_bytenr &&
1544	sc->self_ref_count == `1`) {
1545	bool cached;
1546	bool is_shared;
1547
1548	cached = lookup_backref_shared_cache(ctx: sc->ctx, root: sc->root,
1549	bytenr: sc->ctx->curr_leaf_bytenr,
1550	level: `0`, is_shared: &is_shared);
1551	if (cached) {
1552	if (is_shared)
1553	ret = BACKREF_FOUND_SHARED;
1554	else
1555	ret = BACKREF_FOUND_NOT_SHARED;
1556	goto out;
1557	}
1558	}
1559	}
1560
1561	btrfs_release_path(p: path);
1562
1563	ret = add_missing_keys(fs_info: ctx->fs_info, preftrees: &preftrees, lock: !path->skip_locking);
1564	if (ret)
1565	goto out;
1566
1567	WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root));
1568
1569	ret = resolve_indirect_refs(ctx, path, preftrees: &preftrees, sc);
1570	if (ret)
1571	goto out;
1572
1573	WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root.rb_root));
1574
1575	/*
1576	* This walks the tree of merged and resolved refs. Tree blocks are
1577	* read in as needed. Unique entries are added to the ulist, and
1578	* the list of found roots is updated.
1579	*
1580	* We release the entire tree in one go before returning.
1581	*/
1582	node = rb_first_cached(&preftrees.direct.root);
1583	while (node) {
1584	ref = rb_entry(node, struct prelim_ref, rbnode);
1585	node = rb_next(&ref->rbnode);
1586	/*
1587	* ref->count < 0 can happen here if there are delayed
1588	* refs with a node->action of BTRFS_DROP_DELAYED_REF.
1589	* prelim_ref_insert() relies on this when merging
1590	* identical refs to keep the overall count correct.
1591	* prelim_ref_insert() will merge only those refs
1592	* which compare identically. Any refs having
1593	* e.g. different offsets would not be merged,
1594	* and would retain their original ref->count < 0.
1595	*/
1596	if (ctx->roots && ref->count && ref->root_id && ref->parent == `0`) {
1597	/ no parent == root of tree /
1598	ret = ulist_add(ulist: ctx->roots, val: ref->root_id, aux: `0`, GFP_NOFS);
1599	if (ret < `0`)
1600	goto out;
1601	}
1602	if (ref->count && ref->parent) {
1603	if (!ctx->skip_inode_ref_list && !ref->inode_list &&
1604	ref->level == `0`) {
1605	struct btrfs_tree_parent_check check = { `0` };
1606	struct extent_buffer *eb;
1607
1608	check.level = ref->level;
1609
1610	eb = read_tree_block(fs_info: ctx->fs_info, bytenr: ref->parent,
1611	check: &check);
1612	if (IS_ERR(ptr: eb)) {
1613	ret = PTR_ERR(ptr: eb);
1614	goto out;
1615	}
1616	if (unlikely(!extent_buffer_uptodate(eb))) {
1617	free_extent_buffer(eb);
1618	ret = -EIO;
1619	goto out;
1620	}
1621
1622	if (!path->skip_locking)
1623	btrfs_tree_read_lock(eb);
1624	ret = find_extent_in_eb(ctx, eb, eie: &eie);
1625	if (!path->skip_locking)
1626	btrfs_tree_read_unlock(eb);
1627	free_extent_buffer(eb);
1628	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\|
1629	ret < `0`)
1630	goto out;
1631	ref->inode_list = eie;
1632	/*
1633	* We transferred the list ownership to the ref,
1634	* so set to NULL to avoid a double free in case
1635	* an error happens after this.
1636	*/
1637	eie = NULL;
1638	}
1639	ret = ulist_add_merge_ptr(ulist: ctx->refs, val: ref->parent,
1640	aux: ref->inode_list,
1641	old_aux: (void **)&eie, GFP_NOFS);
1642	if (ret < `0`)
1643	goto out;
1644	if (!ret && !ctx->skip_inode_ref_list) {
1645	/*
1646	* We've recorded that parent, so we must extend
1647	* its inode list here.
1648	*
1649	* However if there was corruption we may not
1650	* have found an eie, return an error in this
1651	* case.
1652	*/
1653	ASSERT(eie);
1654	if (unlikely(!eie)) {
1655	ret = -EUCLEAN;
1656	goto out;
1657	}
1658	while (eie->next)
1659	eie = eie->next;
1660	eie->next = ref->inode_list;
1661	}
1662	eie = NULL;
1663	/*
1664	* We have transferred the inode list ownership from
1665	* this ref to the ref we added to the 'refs' ulist.
1666	* So set this ref's inode list to NULL to avoid
1667	* use-after-free when our caller uses it or double
1668	* frees in case an error happens before we return.
1669	*/
1670	ref->inode_list = NULL;
1671	}
1672	cond_resched();
1673	}
1674
1675	out:
1676	btrfs_free_path(p: path);
1677
1678	prelim_release(preftree: &preftrees.direct);
1679	prelim_release(preftree: &preftrees.indirect);
1680	prelim_release(preftree: &preftrees.indirect_missing_keys);
1681
1682	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\| ret < `0`)
1683	free_inode_elem_list(eie);
1684	return ret;
1685	}
1686
1687	/*
1688	* Finds all leaves with a reference to the specified combination of
1689	* @ctx->bytenr and @ctx->extent_item_pos. The bytenr of the found leaves are
1690	* added to the ulist at @ctx->refs, and that ulist is allocated by this
1691	* function. The caller should free the ulist with free_leaf_list() if
1692	* @ctx->ignore_extent_item_pos is false, otherwise a simple ulist_free() is
1693	* enough.
1694	*
1695	* Returns 0 on success and < 0 on error. On error @ctx->refs is not allocated.
1696	*/
1697	int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx)
1698	{
1699	int ret;
1700
1701	ASSERT(ctx->refs == NULL);
1702
1703	ctx->refs = ulist_alloc(GFP_NOFS);
1704	if (!ctx->refs)
1705	return -ENOMEM;
1706
1707	ret = find_parent_nodes(ctx, NULL);
1708	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP \|\|
1709	(ret < `0` && ret != -ENOENT)) {
1710	free_leaf_list(ulist: ctx->refs);
1711	ctx->refs = NULL;
1712	return ret;
1713	}
1714
1715	return `0`;
1716	}
1717
1718	/*
1719	* Walk all backrefs for a given extent to find all roots that reference this
1720	* extent. Walking a backref means finding all extents that reference this
1721	* extent and in turn walk the backrefs of those, too. Naturally this is a
1722	* recursive process, but here it is implemented in an iterative fashion: We
1723	* find all referencing extents for the extent in question and put them on a
1724	* list. In turn, we find all referencing extents for those, further appending
1725	* to the list. The way we iterate the list allows adding more elements after
1726	* the current while iterating. The process stops when we reach the end of the
1727	* list.
1728	*
1729	* Found roots are added to @ctx->roots, which is allocated by this function if
1730	* it points to NULL, in which case the caller is responsible for freeing it
1731	* after it's not needed anymore.
1732	* This function requires @ctx->refs to be NULL, as it uses it for allocating a
1733	* ulist to do temporary work, and frees it before returning.
1734	*
1735	* Returns 0 on success, < 0 on error.
1736	*/
1737	static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
1738	{
1739	const u64 orig_bytenr = ctx->bytenr;
1740	const bool orig_skip_inode_ref_list = ctx->skip_inode_ref_list;
1741	bool roots_ulist_allocated = false;
1742	struct ulist_iterator uiter;
1743	int ret = `0`;
1744
1745	ASSERT(ctx->refs == NULL);
1746
1747	ctx->refs = ulist_alloc(GFP_NOFS);
1748	if (!ctx->refs)
1749	return -ENOMEM;
1750
1751	if (!ctx->roots) {
1752	ctx->roots = ulist_alloc(GFP_NOFS);
1753	if (!ctx->roots) {
1754	ulist_free(ulist: ctx->refs);
1755	ctx->refs = NULL;
1756	return -ENOMEM;
1757	}
1758	roots_ulist_allocated = true;
1759	}
1760
1761	ctx->skip_inode_ref_list = true;
1762
1763	ULIST_ITER_INIT(&uiter);
1764	while (`1`) {
1765	struct ulist_node *node;
1766
1767	ret = find_parent_nodes(ctx, NULL);
1768	if (ret < `0` && ret != -ENOENT) {
1769	if (roots_ulist_allocated) {
1770	ulist_free(ulist: ctx->roots);
1771	ctx->roots = NULL;
1772	}
1773	break;
1774	}
1775	ret = `0`;
1776	node = ulist_next(ulist: ctx->refs, uiter: &uiter);
1777	if (!node)
1778	break;
1779	ctx->bytenr = node->val;
1780	cond_resched();
1781	}
1782
1783	ulist_free(ulist: ctx->refs);
1784	ctx->refs = NULL;
1785	ctx->bytenr = orig_bytenr;
1786	ctx->skip_inode_ref_list = orig_skip_inode_ref_list;
1787
1788	return ret;
1789	}
1790
1791	int btrfs_find_all_roots(struct btrfs_backref_walk_ctx *ctx,
1792	bool skip_commit_root_sem)
1793	{
1794	int ret;
1795
1796	if (!ctx->trans && !skip_commit_root_sem)
1797	down_read(sem: &ctx->fs_info->commit_root_sem);
1798	ret = btrfs_find_all_roots_safe(ctx);
1799	if (!ctx->trans && !skip_commit_root_sem)
1800	up_read(sem: &ctx->fs_info->commit_root_sem);
1801	return ret;
1802	}
1803
1804	struct btrfs_backref_share_check_ctx btrfs_alloc_backref_share_check_ctx(void*)
1805	{
1806	struct btrfs_backref_share_check_ctx *ctx;
1807
1808	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1809	if (!ctx)
1810	return NULL;
1811
1812	ulist_init(ulist: &ctx->refs);
1813
1814	return ctx;
1815	}
1816
1817	void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx)
1818	{
1819	if (!ctx)
1820	return;
1821
1822	ulist_release(ulist: &ctx->refs);
1823	kfree(objp: ctx);
1824	}
1825
1826	/*
1827	* Check if a data extent is shared or not.
1828	*
1829	* @inode: The inode whose extent we are checking.
1830	* @bytenr: Logical bytenr of the extent we are checking.
1831	* @extent_gen: Generation of the extent (file extent item) or 0 if it is
1832	* not known.
1833	* @ctx: A backref sharedness check context.
1834	*
1835	* btrfs_is_data_extent_shared uses the backref walking code but will short
1836	* circuit as soon as it finds a root or inode that doesn't match the
1837	* one passed in. This provides a significant performance benefit for
1838	* callers (such as fiemap) which want to know whether the extent is
1839	* shared but do not need a ref count.
1840	*
1841	* This attempts to attach to the running transaction in order to account for
1842	* delayed refs, but continues on even when no running transaction exists.
1843	*
1844	* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
1845	*/
1846	int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
1847	u64 extent_gen,
1848	struct btrfs_backref_share_check_ctx *ctx)
1849	{
1850	struct btrfs_backref_walk_ctx walk_ctx = { `0` };
1851	struct btrfs_root *root = inode->root;
1852	struct btrfs_fs_info *fs_info = root->fs_info;
1853	struct btrfs_trans_handle *trans;
1854	struct ulist_iterator uiter;
1855	struct ulist_node *node;
1856	struct btrfs_seq_list elem = BTRFS_SEQ_LIST_INIT(elem);
1857	int ret = `0`;
1858	struct share_check shared = {
1859	.ctx = ctx,
1860	.root = root,
1861	.inum = btrfs_ino(inode),
1862	.data_bytenr = bytenr,
1863	.data_extent_gen = extent_gen,
1864	.share_count = `0`,
1865	.self_ref_count = `0`,
1866	.have_delayed_delete_refs = false,
1867	};
1868	int level;
1869	bool leaf_cached;
1870	bool leaf_is_shared;
1871
1872	for (int i = `0`; i < BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE; i++) {
1873	if (ctx->prev_extents_cache[i].bytenr == bytenr)
1874	return ctx->prev_extents_cache[i].is_shared;
1875	}
1876
1877	ulist_init(ulist: &ctx->refs);
1878
1879	trans = btrfs_join_transaction_nostart(root);
1880	if (IS_ERR(ptr: trans)) {
1881	if (PTR_ERR(ptr: trans) != -ENOENT && PTR_ERR(ptr: trans) != -EROFS) {
1882	ret = PTR_ERR(ptr: trans);
1883	goto out;
1884	}
1885	trans = NULL;
1886	down_read(sem: &fs_info->commit_root_sem);
1887	} else {
1888	btrfs_get_tree_mod_seq(fs_info, elem: &elem);
1889	walk_ctx.time_seq = elem.seq;
1890	}
1891
1892	ctx->use_path_cache = true;
1893
1894	/*
1895	* We may have previously determined that the current leaf is shared.
1896	* If it is, then we have a data extent that is shared due to a shared
1897	* subtree (caused by snapshotting) and we don't need to check for data
1898	* backrefs. If the leaf is not shared, then we must do backref walking
1899	* to determine if the data extent is shared through reflinks.
1900	*/
1901	leaf_cached = lookup_backref_shared_cache(ctx, root,
1902	bytenr: ctx->curr_leaf_bytenr, level: `0`,
1903	is_shared: &leaf_is_shared);
1904	if (leaf_cached && leaf_is_shared) {
1905	ret = `1`;
1906	goto out_trans;
1907	}
1908
1909	walk_ctx.skip_inode_ref_list = true;
1910	walk_ctx.trans = trans;
1911	walk_ctx.fs_info = fs_info;
1912	walk_ctx.refs = &ctx->refs;
1913
1914	/ -1 means we are in the bytenr of the data extent. /
1915	level = -`1`;
1916	ULIST_ITER_INIT(&uiter);
1917	while (`1`) {
1918	const unsigned long prev_ref_count = ctx->refs.nnodes;
1919
1920	walk_ctx.bytenr = bytenr;
1921	ret = find_parent_nodes(ctx: &walk_ctx, sc: &shared);
1922	if (ret == BACKREF_FOUND_SHARED \|\|
1923	ret == BACKREF_FOUND_NOT_SHARED) {
1924	/ If shared must return 1, otherwise return 0. /
1925	ret = (ret == BACKREF_FOUND_SHARED) ? `1` : `0`;
1926	if (level >= `0`)
1927	store_backref_shared_cache(ctx, root, bytenr,
1928	level, is_shared: ret == `1`);
1929	break;
1930	}
1931	if (ret < `0` && ret != -ENOENT)
1932	break;
1933	ret = `0`;
1934
1935	/*
1936	* More than one extent buffer (bytenr) may have been added to
1937	* the ctx->refs ulist, in which case we have to check multiple
1938	* tree paths in case the first one is not shared, so we can not
1939	* use the path cache which is made for a single path. Multiple
1940	* extent buffers at the current level happen when:
1941	*
1942	* 1) level -1, the data extent: If our data extent was not
1943	* directly shared (without multiple reference items), then
1944	* it might have a single reference item with a count > 1 for
1945	* the same offset, which means there are 2 (or more) file
1946	* extent items that point to the data extent - this happens
1947	* when a file extent item needs to be split and then one
1948	* item gets moved to another leaf due to a b+tree leaf split
1949	* when inserting some item. In this case the file extent
1950	* items may be located in different leaves and therefore
1951	* some of the leaves may be referenced through shared
1952	* subtrees while others are not. Since our extent buffer
1953	* cache only works for a single path (by far the most common
1954	* case and simpler to deal with), we can not use it if we
1955	* have multiple leaves (which implies multiple paths).
1956	*
1957	* 2) level >= 0, a tree node/leaf: We can have a mix of direct
1958	* and indirect references on a b+tree node/leaf, so we have
1959	* to check multiple paths, and the extent buffer (the
1960	* current bytenr) may be shared or not. One example is
1961	* during relocation as we may get a shared tree block ref
1962	* (direct ref) and a non-shared tree block ref (indirect
1963	* ref) for the same node/leaf.
1964	*/
1965	if ((ctx->refs.nnodes - prev_ref_count) > `1`)
1966	ctx->use_path_cache = false;
1967
1968	if (level >= `0`)
1969	store_backref_shared_cache(ctx, root, bytenr,
1970	level, is_shared: false);
1971	node = ulist_next(ulist: &ctx->refs, uiter: &uiter);
1972	if (!node)
1973	break;
1974	bytenr = node->val;
1975	if (ctx->use_path_cache) {
1976	bool is_shared;
1977	bool cached;
1978
1979	level++;
1980	cached = lookup_backref_shared_cache(ctx, root, bytenr,
1981	level, is_shared: &is_shared);
1982	if (cached) {
1983	ret = (is_shared ? `1` : `0`);
1984	break;
1985	}
1986	}
1987	shared.share_count = `0`;
1988	shared.have_delayed_delete_refs = false;
1989	cond_resched();
1990	}
1991
1992	/*
1993	* If the path cache is disabled, then it means at some tree level we
1994	* got multiple parents due to a mix of direct and indirect backrefs or
1995	* multiple leaves with file extent items pointing to the same data
1996	* extent. We have to invalidate the cache and cache only the sharedness
1997	* result for the levels where we got only one node/reference.
1998	*/
1999	if (!ctx->use_path_cache) {
2000	int i = `0`;
2001
2002	level--;
2003	if (ret >= `0` && level >= `0`) {
2004	bytenr = ctx->path_cache_entries[level].bytenr;
2005	ctx->use_path_cache = true;
2006	store_backref_shared_cache(ctx, root, bytenr, level, is_shared: ret);
2007	i = level + `1`;
2008	}
2009
2010	for ( ; i < BTRFS_MAX_LEVEL; i++)
2011	ctx->path_cache_entries[i].bytenr = `0`;
2012	}
2013
2014	/*
2015	* Cache the sharedness result for the data extent if we know our inode
2016	* has more than 1 file extent item that refers to the data extent.
2017	*/
2018	if (ret >= `0` && shared.self_ref_count > `1`) {
2019	int slot = ctx->prev_extents_cache_slot;
2020
2021	ctx->prev_extents_cache[slot].bytenr = shared.data_bytenr;
2022	ctx->prev_extents_cache[slot].is_shared = (ret == `1`);
2023
2024	slot = (slot + `1`) % BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE;
2025	ctx->prev_extents_cache_slot = slot;
2026	}
2027
2028	out_trans:
2029	if (trans) {
2030	btrfs_put_tree_mod_seq(fs_info, elem: &elem);
2031	btrfs_end_transaction(trans);
2032	} else {
2033	up_read(sem: &fs_info->commit_root_sem);
2034	}
2035	out:
2036	ulist_release(ulist: &ctx->refs);
2037	ctx->prev_leaf_bytenr = ctx->curr_leaf_bytenr;
2038
2039	return ret;
2040	}
2041
2042	int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
2043	u64 start_off, struct btrfs_path *path,
2044	struct btrfs_inode_extref **ret_extref,
2045	u64 *found_off)
2046	{
2047	int ret, slot;
2048	struct btrfs_key key;
2049	struct btrfs_key found_key;
2050	struct btrfs_inode_extref *extref;
2051	const struct extent_buffer *leaf;
2052	unsigned long ptr;
2053
2054	key.objectid = inode_objectid;
2055	key.type = BTRFS_INODE_EXTREF_KEY;
2056	key.offset = start_off;
2057
2058	ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: `0`, cow: `0`);
2059	if (ret < `0`)
2060	return ret;
2061
2062	while (`1`) {
2063	leaf = path->nodes[`0`];
2064	slot = path->slots[`0`];
2065	if (slot >= btrfs_header_nritems(eb: leaf)) {
2066	/*
2067	* If the item at offset is not found,
2068	* btrfs_search_slot will point us to the slot
2069	* where it should be inserted. In our case
2070	* that will be the slot directly before the
2071	* next INODE_REF_KEY_V2 item. In the case
2072	* that we're pointing to the last slot in a
2073	* leaf, we must move one leaf over.
2074	*/
2075	ret = btrfs_next_leaf(root, path);
2076	if (ret) {
2077	if (ret >= `1`)
2078	ret = -ENOENT;
2079	break;
2080	}
2081	continue;
2082	}
2083
2084	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &found_key, nr: slot);
2085
2086	/*
2087	* Check that we're still looking at an extended ref key for
2088	* this particular objectid. If we have different
2089	* objectid or type then there are no more to be found
2090	* in the tree and we can exit.
2091	*/
2092	ret = -ENOENT;
2093	if (found_key.objectid != inode_objectid)
2094	break;
2095	if (found_key.type != BTRFS_INODE_EXTREF_KEY)
2096	break;
2097
2098	ret = `0`;
2099	ptr = btrfs_item_ptr_offset(leaf, path->slots[`0`]);
2100	extref = (struct btrfs_inode_extref *)ptr;
2101	*ret_extref = extref;
2102	if (found_off)
2103	*found_off = found_key.offset;
2104	break;
2105	}
2106
2107	return ret;
2108	}
2109
2110	/*
2111	* this iterates to turn a name (from iref/extref) into a full filesystem path.
2112	* Elements of the path are separated by '/' and the path is guaranteed to be
2113	* 0-terminated. the path is only given within the current file system.
2114	* Therefore, it never starts with a '/'. the caller is responsible to provide
2115	* "size" bytes in "dest". the dest buffer will be filled backwards. finally,
2116	* the start point of the resulting string is returned. this pointer is within
2117	* dest, normally.
2118	* in case the path buffer would overflow, the pointer is decremented further
2119	* as if output was written to the buffer, though no more output is actually
2120	* generated. that way, the caller can determine how much space would be
2121	* required for the path to fit into the buffer. in that case, the returned
2122	* value will be smaller than dest. callers must check this!
2123	*/
2124	char btrfs_ref_to_path(struct* btrfs_root fs_root, struct* btrfs_path *path,
2125	u32 name_len, unsigned long name_off,
2126	struct extent_buffer *eb_in, u64 parent,
2127	char *dest, u32 size)
2128	{
2129	int slot;
2130	u64 next_inum;
2131	int ret;
2132	s64 bytes_left = ((s64)size) - `1`;
2133	struct extent_buffer *eb = eb_in;
2134	struct btrfs_key found_key;
2135	struct btrfs_inode_ref *iref;
2136
2137	if (bytes_left >= `0`)
2138	dest[bytes_left] = `'\0'`;
2139
2140	while (`1`) {
2141	bytes_left -= name_len;
2142	if (bytes_left >= `0`)
2143	read_extent_buffer(eb, dst: dest + bytes_left,
2144	start: name_off, len: name_len);
2145	if (eb != eb_in) {
2146	if (!path->skip_locking)
2147	btrfs_tree_read_unlock(eb);
2148	free_extent_buffer(eb);
2149	}
2150	ret = btrfs_find_item(fs_root, path, inum: parent, ioff: `0`,
2151	BTRFS_INODE_REF_KEY, found_key: &found_key);
2152	if (ret > `0`)
2153	ret = -ENOENT;
2154	if (ret)
2155	break;
2156
2157	next_inum = found_key.offset;
2158
2159	/ regular exit ahead /
2160	if (parent == next_inum)
2161	break;
2162
2163	slot = path->slots[`0`];
2164	eb = path->nodes[`0`];
2165	/ make sure we can use eb after releasing the path /
2166	if (eb != eb_in) {
2167	path->nodes[`0`] = NULL;
2168	path->locks[`0`] = `0`;
2169	}
2170	btrfs_release_path(p: path);
2171	iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
2172
2173	name_len = btrfs_inode_ref_name_len(eb, s: iref);
2174	name_off = (unsigned long)(iref + `1`);
2175
2176	parent = next_inum;
2177	--bytes_left;
2178	if (bytes_left >= `0`)
2179	dest[bytes_left] = `'/'`;
2180	}
2181
2182	btrfs_release_path(p: path);
2183
2184	if (ret)
2185	return ERR_PTR(error: ret);
2186
2187	return dest + bytes_left;
2188	}
2189
2190	/*
2191	* this makes the path point to (logical EXTENT_ITEM *)
2192	* returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
2193	* tree blocks and <0 on error.
2194	*/
2195	int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
2196	struct btrfs_path path, struct* btrfs_key *found_key,
2197	u64 *flags_ret)
2198	{
2199	struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr: logical);
2200	int ret;
2201	u64 flags;
2202	u64 size = `0`;
2203	const struct extent_buffer *eb;
2204	struct btrfs_extent_item *ei;
2205	struct btrfs_key key;
2206
2207	key.objectid = logical;
2208	if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2209	key.type = BTRFS_METADATA_ITEM_KEY;
2210	else
2211	key.type = BTRFS_EXTENT_ITEM_KEY;
2212	key.offset = (u64)-`1`;
2213
2214	ret = btrfs_search_slot(NULL, root: extent_root, key: &key, p: path, ins_len: `0`, cow: `0`);
2215	if (ret < `0`)
2216	return ret;
2217	if (unlikely(ret == `0`)) {
2218	/*
2219	* Key with offset -1 found, there would have to exist an extent
2220	* item with such offset, but this is out of the valid range.
2221	*/
2222	return -EUCLEAN;
2223	}
2224
2225	ret = btrfs_previous_extent_item(root: extent_root, path, min_objectid: `0`);
2226	if (ret) {
2227	if (ret > `0`)
2228	ret = -ENOENT;
2229	return ret;
2230	}
2231	btrfs_item_key_to_cpu(eb: path->nodes[`0`], cpu_key: found_key, nr: path->slots[`0`]);
2232	if (found_key->type == BTRFS_METADATA_ITEM_KEY)
2233	size = fs_info->nodesize;
2234	else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
2235	size = found_key->offset;
2236
2237	if (found_key->objectid > logical \|\|
2238	found_key->objectid + size <= logical) {
2239	btrfs_debug(fs_info,
2240	"logical %llu is not within any extent", logical);
2241	return -ENOENT;
2242	}
2243
2244	eb = path->nodes[`0`];
2245
2246	ei = btrfs_item_ptr(eb, path->slots[`0`], struct btrfs_extent_item);
2247	flags = btrfs_extent_flags(eb, s: ei);
2248
2249	btrfs_debug(fs_info,
2250	"logical %llu is at position %llu within the extent (%llu EXTENT_ITEM %llu) flags %#llx size %u",
2251	logical, logical - found_key->objectid, found_key->objectid,
2252	found_key->offset, flags, btrfs_item_size(eb, path->slots[`0`]));
2253
2254	WARN_ON(!flags_ret);
2255	if (flags_ret) {
2256	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
2257	*flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2258	else if (flags & BTRFS_EXTENT_FLAG_DATA)
2259	*flags_ret = BTRFS_EXTENT_FLAG_DATA;
2260	else
2261	BUG();
2262	return `0`;
2263	}
2264
2265	return -EIO;
2266	}
2267
2268	/*
2269	* helper function to iterate extent inline refs. ptr must point to a 0 value
2270	* for the first call and may be modified. it is used to track state.
2271	* if more refs exist, 0 is returned and the next call to
2272	* get_extent_inline_ref must pass the modified ptr parameter to get the
2273	* next ref. after the last ref was processed, 1 is returned.
2274	* returns <0 on error
2275	*/
2276	static int get_extent_inline_ref(unsigned long *ptr,
2277	const struct extent_buffer *eb,
2278	const struct btrfs_key *key,
2279	const struct btrfs_extent_item *ei,
2280	u32 item_size,
2281	struct btrfs_extent_inline_ref **out_eiref,
2282	int *out_type)
2283	{
2284	unsigned long end;
2285	u64 flags;
2286	struct btrfs_tree_block_info *info;
2287
2288	if (!*ptr) {
2289	/ first call /
2290	flags = btrfs_extent_flags(eb, s: ei);
2291	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2292	if (key->type == BTRFS_METADATA_ITEM_KEY) {
2293	/ a skinny metadata extent /
2294	*out_eiref =
2295	(struct btrfs_extent_inline_ref *)(ei + `1`);
2296	} else {
2297	WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
2298	info = (struct btrfs_tree_block_info *)(ei + `1`);
2299	*out_eiref =
2300	(struct btrfs_extent_inline_ref *)(info + `1`);
2301	}
2302	} else {
2303	out_eiref = (struct* btrfs_extent_inline_ref *)(ei + `1`);
2304	}
2305	ptr = (unsigned* long)*out_eiref;
2306	if ((unsigned long)(ptr) >= (unsigned* long)ei + item_size)
2307	return -ENOENT;
2308	}
2309
2310	end = (unsigned long)ei + item_size;
2311	out_eiref = (struct* btrfs_extent_inline_ref )(ptr);
2312	out_type = btrfs_get_extent_inline_ref_type(eb, iref: out_eiref,
2313	is_data: BTRFS_REF_TYPE_ANY);
2314	if (unlikely(*out_type == BTRFS_REF_TYPE_INVALID))
2315	return -EUCLEAN;
2316
2317	ptr += btrfs_extent_inline_ref_size(type: out_type);
2318	WARN_ON(*ptr > end);
2319	if (*ptr == end)
2320	return `1`; / last /
2321
2322	return `0`;
2323	}
2324
2325	/*
2326	* reads the tree block backref for an extent. tree level and root are returned
2327	* through out_level and out_root. ptr must point to a 0 value for the first
2328	* call and may be modified (see get_extent_inline_ref comment).
2329	* returns 0 if data was provided, 1 if there was no more data to provide or
2330	* <0 on error.
2331	*/
2332	int tree_backref_for_extent(unsigned long ptr, struct* extent_buffer *eb,
2333	struct btrfs_key key, struct* btrfs_extent_item *ei,
2334	u32 item_size, u64 out_root, u8 out_level)
2335	{
2336	int ret;
2337	int type;
2338	struct btrfs_extent_inline_ref *eiref;
2339
2340	if (ptr == (unsigned* long)-`1`)
2341	return `1`;
2342
2343	while (`1`) {
2344	ret = get_extent_inline_ref(ptr, eb, key, ei, item_size,
2345	out_eiref: &eiref, out_type: &type);
2346	if (ret < `0`)
2347	return ret;
2348
2349	if (type == BTRFS_TREE_BLOCK_REF_KEY \|\|
2350	type == BTRFS_SHARED_BLOCK_REF_KEY)
2351	break;
2352
2353	if (ret == `1`)
2354	return `1`;
2355	}
2356
2357	/ we can treat both ref types equally here /
2358	*out_root = btrfs_extent_inline_ref_offset(eb, s: eiref);
2359
2360	if (key->type == BTRFS_EXTENT_ITEM_KEY) {
2361	struct btrfs_tree_block_info *info;
2362
2363	info = (struct btrfs_tree_block_info *)(ei + `1`);
2364	*out_level = btrfs_tree_block_level(eb, s: info);
2365	} else {
2366	ASSERT(key->type == BTRFS_METADATA_ITEM_KEY);
2367	*out_level = (u8)key->offset;
2368	}
2369
2370	if (ret == `1`)
2371	ptr = (unsigned* long)-`1`;
2372
2373	return `0`;
2374	}
2375
2376	static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
2377	struct extent_inode_elem *inode_list,
2378	u64 root, u64 extent_item_objectid,
2379	iterate_extent_inodes_t iterate, void* *ctx)
2380	{
2381	struct extent_inode_elem *eie;
2382	int ret = `0`;
2383
2384	for (eie = inode_list; eie; eie = eie->next) {
2385	btrfs_debug(fs_info,
2386	"ref for %llu resolved, key (%llu EXTEND_DATA %llu), root %llu",
2387	extent_item_objectid, eie->inum,
2388	eie->offset, root);
2389	ret = iterate(eie->inum, eie->offset, eie->num_bytes, root, ctx);
2390	if (ret) {
2391	btrfs_debug(fs_info,
2392	"stopping iteration for %llu due to ret=%d",
2393	extent_item_objectid, ret);
2394	break;
2395	}
2396	}
2397
2398	return ret;
2399	}
2400
2401	/*
2402	* calls iterate() for every inode that references the extent identified by
2403	* the given parameters.
2404	* when the iterator function returns a non-zero value, iteration stops.
2405	*/
2406	int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx,
2407	bool search_commit_root,
2408	iterate_extent_inodes_t iterate, void* *user_ctx)
2409	{
2410	int ret;
2411	struct ulist *refs;
2412	struct ulist_node *ref_node;
2413	struct btrfs_seq_list seq_elem = BTRFS_SEQ_LIST_INIT(seq_elem);
2414	struct ulist_iterator ref_uiter;
2415
2416	btrfs_debug(ctx->fs_info, "resolving all inodes for extent %llu",
2417	ctx->bytenr);
2418
2419	ASSERT(ctx->trans == NULL);
2420	ASSERT(ctx->roots == NULL);
2421
2422	if (!search_commit_root) {
2423	struct btrfs_trans_handle *trans;
2424
2425	trans = btrfs_attach_transaction(root: ctx->fs_info->tree_root);
2426	if (IS_ERR(ptr: trans)) {
2427	if (PTR_ERR(ptr: trans) != -ENOENT &&
2428	PTR_ERR(ptr: trans) != -EROFS)
2429	return PTR_ERR(ptr: trans);
2430	trans = NULL;
2431	}
2432	ctx->trans = trans;
2433	}
2434
2435	if (ctx->trans) {
2436	btrfs_get_tree_mod_seq(fs_info: ctx->fs_info, elem: &seq_elem);
2437	ctx->time_seq = seq_elem.seq;
2438	} else {
2439	down_read(sem: &ctx->fs_info->commit_root_sem);
2440	}
2441
2442	ret = btrfs_find_all_leafs(ctx);
2443	if (ret)
2444	goto out;
2445	refs = ctx->refs;
2446	ctx->refs = NULL;
2447
2448	ULIST_ITER_INIT(&ref_uiter);
2449	while (!ret && (ref_node = ulist_next(ulist: refs, uiter: &ref_uiter))) {
2450	const u64 leaf_bytenr = ref_node->val;
2451	struct ulist_node *root_node;
2452	struct ulist_iterator root_uiter;
2453	struct extent_inode_elem *inode_list;
2454
2455	inode_list = (struct extent_inode_elem *)(uintptr_t)ref_node->aux;
2456
2457	if (ctx->cache_lookup) {
2458	const u64 *root_ids;
2459	int root_count;
2460	bool cached;
2461
2462	cached = ctx->cache_lookup(leaf_bytenr, ctx->user_ctx,
2463	&root_ids, &root_count);
2464	if (cached) {
2465	for (int i = `0`; i < root_count; i++) {
2466	ret = iterate_leaf_refs(fs_info: ctx->fs_info,
2467	inode_list,
2468	root: root_ids[i],
2469	extent_item_objectid: leaf_bytenr,
2470	iterate,
2471	ctx: user_ctx);
2472	if (ret)
2473	break;
2474	}
2475	continue;
2476	}
2477	}
2478
2479	if (!ctx->roots) {
2480	ctx->roots = ulist_alloc(GFP_NOFS);
2481	if (!ctx->roots) {
2482	ret = -ENOMEM;
2483	break;
2484	}
2485	}
2486
2487	ctx->bytenr = leaf_bytenr;
2488	ret = btrfs_find_all_roots_safe(ctx);
2489	if (ret)
2490	break;
2491
2492	if (ctx->cache_store)
2493	ctx->cache_store(leaf_bytenr, ctx->roots, ctx->user_ctx);
2494
2495	ULIST_ITER_INIT(&root_uiter);
2496	while (!ret && (root_node = ulist_next(ulist: ctx->roots, uiter: &root_uiter))) {
2497	btrfs_debug(ctx->fs_info,
2498	"root %llu references leaf %llu, data list %#llx",
2499	root_node->val, ref_node->val,
2500	ref_node->aux);
2501	ret = iterate_leaf_refs(fs_info: ctx->fs_info, inode_list,
2502	root: root_node->val, extent_item_objectid: ctx->bytenr,
2503	iterate, ctx: user_ctx);
2504	}
2505	ulist_reinit(ulist: ctx->roots);
2506	}
2507
2508	free_leaf_list(ulist: refs);
2509	out:
2510	if (ctx->trans) {
2511	btrfs_put_tree_mod_seq(fs_info: ctx->fs_info, elem: &seq_elem);
2512	btrfs_end_transaction(trans: ctx->trans);
2513	ctx->trans = NULL;
2514	} else {
2515	up_read(sem: &ctx->fs_info->commit_root_sem);
2516	}
2517
2518	ulist_free(ulist: ctx->roots);
2519	ctx->roots = NULL;
2520
2521	if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP)
2522	ret = `0`;
2523
2524	return ret;
2525	}
2526
2527	static int build_ino_list(u64 inum, u64 offset, u64 num_bytes, u64 root, void *ctx)
2528	{
2529	struct btrfs_data_container *inodes = ctx;
2530	const size_t c = `3` * sizeof(u64);
2531
2532	if (inodes->bytes_left >= c) {
2533	inodes->bytes_left -= c;
2534	inodes->val[inodes->elem_cnt] = inum;
2535	inodes->val[inodes->elem_cnt + `1`] = offset;
2536	inodes->val[inodes->elem_cnt + `2`] = root;
2537	inodes->elem_cnt += `3`;
2538	} else {
2539	inodes->bytes_missing += c - inodes->bytes_left;
2540	inodes->bytes_left = `0`;
2541	inodes->elem_missed += `3`;
2542	}
2543
2544	return `0`;
2545	}
2546
2547	int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
2548	void *ctx, bool ignore_offset)
2549	{
2550	struct btrfs_backref_walk_ctx walk_ctx = { `0` };
2551	int ret;
2552	u64 flags = `0`;
2553	struct btrfs_key found_key;
2554	struct btrfs_path *path;
2555
2556	path = btrfs_alloc_path();
2557	if (!path)
2558	return -ENOMEM;
2559
2560	ret = extent_from_logical(fs_info, logical, path, found_key: &found_key, flags_ret: &flags);
2561	btrfs_free_path(p: path);
2562	if (ret < `0`)
2563	return ret;
2564	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
2565	return -EINVAL;
2566
2567	walk_ctx.bytenr = found_key.objectid;
2568	if (ignore_offset)
2569	walk_ctx.ignore_extent_item_pos = true;
2570	else
2571	walk_ctx.extent_item_pos = logical - found_key.objectid;
2572	walk_ctx.fs_info = fs_info;
2573
2574	return iterate_extent_inodes(ctx: &walk_ctx, search_commit_root: false, iterate: build_ino_list, user_ctx: ctx);
2575	}
2576
2577	static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
2578	struct extent_buffer eb, struct* inode_fs_paths *ipath);
2579
2580	static int iterate_inode_refs(u64 inum, struct inode_fs_paths *ipath)
2581	{
2582	int ret = `0`;
2583	int slot;
2584	u32 cur;
2585	u32 len;
2586	u32 name_len;
2587	u64 parent = `0`;
2588	int found = `0`;
2589	struct btrfs_root *fs_root = ipath->fs_root;
2590	struct btrfs_path *path = ipath->btrfs_path;
2591	struct extent_buffer *eb;
2592	struct btrfs_inode_ref *iref;
2593	struct btrfs_key found_key;
2594
2595	while (!ret) {
2596	ret = btrfs_find_item(fs_root, path, inum,
2597	ioff: parent ? parent + `1` : `0`, BTRFS_INODE_REF_KEY,
2598	found_key: &found_key);
2599
2600	if (ret < `0`)
2601	break;
2602	if (ret) {
2603	ret = found ? `0` : -ENOENT;
2604	break;
2605	}
2606	++found;
2607
2608	parent = found_key.offset;
2609	slot = path->slots[`0`];
2610	eb = btrfs_clone_extent_buffer(src: path->nodes[`0`]);
2611	if (!eb) {
2612	ret = -ENOMEM;
2613	break;
2614	}
2615	btrfs_release_path(p: path);
2616
2617	iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
2618
2619	for (cur = `0`; cur < btrfs_item_size(eb, slot); cur += len) {
2620	name_len = btrfs_inode_ref_name_len(eb, s: iref);
2621	/ path must be released before calling iterate()! /
2622	btrfs_debug(fs_root->fs_info,
2623	"following ref at offset %u for inode %llu in tree %llu",
2624	cur, found_key.objectid,
2625	btrfs_root_id(fs_root));
2626	ret = inode_to_path(inum: parent, name_len,
2627	name_off: (unsigned long)(iref + `1`), eb, ipath);
2628	if (ret)
2629	break;
2630	len = sizeof(*iref) + name_len;
2631	iref = (struct btrfs_inode_ref )((char* *)iref + len);
2632	}
2633	free_extent_buffer(eb);
2634	}
2635
2636	btrfs_release_path(p: path);
2637
2638	return ret;
2639	}
2640
2641	static int iterate_inode_extrefs(u64 inum, struct inode_fs_paths *ipath)
2642	{
2643	int ret;
2644	int slot;
2645	u64 offset = `0`;
2646	u64 parent;
2647	int found = `0`;
2648	struct btrfs_root *fs_root = ipath->fs_root;
2649	struct btrfs_path *path = ipath->btrfs_path;
2650	struct extent_buffer *eb;
2651	struct btrfs_inode_extref *extref;
2652	u32 item_size;
2653	u32 cur_offset;
2654	unsigned long ptr;
2655
2656	while (`1`) {
2657	ret = btrfs_find_one_extref(root: fs_root, inode_objectid: inum, start_off: offset, path, ret_extref: &extref,
2658	found_off: &offset);
2659	if (ret < `0`)
2660	break;
2661	if (ret) {
2662	ret = found ? `0` : -ENOENT;
2663	break;
2664	}
2665	++found;
2666
2667	slot = path->slots[`0`];
2668	eb = btrfs_clone_extent_buffer(src: path->nodes[`0`]);
2669	if (!eb) {
2670	ret = -ENOMEM;
2671	break;
2672	}
2673	btrfs_release_path(p: path);
2674
2675	item_size = btrfs_item_size(eb, slot);
2676	ptr = btrfs_item_ptr_offset(eb, slot);
2677	cur_offset = `0`;
2678
2679	while (cur_offset < item_size) {
2680	u32 name_len;
2681
2682	extref = (struct btrfs_inode_extref *)(ptr + cur_offset);
2683	parent = btrfs_inode_extref_parent(eb, s: extref);
2684	name_len = btrfs_inode_extref_name_len(eb, s: extref);
2685	ret = inode_to_path(inum: parent, name_len,
2686	name_off: (unsigned long)&extref->name, eb, ipath);
2687	if (ret)
2688	break;
2689
2690	cur_offset += btrfs_inode_extref_name_len(eb, s: extref);
2691	cur_offset += sizeof(*extref);
2692	}
2693	free_extent_buffer(eb);
2694
2695	offset++;
2696	}
2697
2698	btrfs_release_path(p: path);
2699
2700	return ret;
2701	}
2702
2703	/*
2704	* returns 0 if the path could be dumped (probably truncated)
2705	* returns <0 in case of an error
2706	*/
2707	static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
2708	struct extent_buffer eb, struct* inode_fs_paths *ipath)
2709	{
2710	char *fspath;
2711	char *fspath_min;
2712	int i = ipath->fspath->elem_cnt;
2713	const int s_ptr = sizeof(char *);
2714	u32 bytes_left;
2715
2716	bytes_left = ipath->fspath->bytes_left > s_ptr ?
2717	ipath->fspath->bytes_left - s_ptr : `0`;
2718
2719	fspath_min = (char )ipath->fspath->val + (i + `1`) s_ptr;
2720	fspath = btrfs_ref_to_path(fs_root: ipath->fs_root, path: ipath->btrfs_path, name_len,
2721	name_off, eb_in: eb, parent: inum, dest: fspath_min, size: bytes_left);
2722	if (IS_ERR(ptr: fspath))
2723	return PTR_ERR(ptr: fspath);
2724
2725	if (fspath > fspath_min) {
2726	ipath->fspath->val[i] = (u64)(unsigned long)fspath;
2727	++ipath->fspath->elem_cnt;
2728	ipath->fspath->bytes_left = fspath - fspath_min;
2729	} else {
2730	++ipath->fspath->elem_missed;
2731	ipath->fspath->bytes_missing += fspath_min - fspath;
2732	ipath->fspath->bytes_left = `0`;
2733	}
2734
2735	return `0`;
2736	}
2737
2738	/*
2739	* this dumps all file system paths to the inode into the ipath struct, provided
2740	* is has been created large enough. each path is zero-terminated and accessed
2741	* from ipath->fspath->val[i].
2742	* when it returns, there are ipath->fspath->elem_cnt number of paths available
2743	* in ipath->fspath->val[]. when the allocated space wasn't sufficient, the
2744	* number of missed paths is recorded in ipath->fspath->elem_missed, otherwise,
2745	* it's zero. ipath->fspath->bytes_missing holds the number of bytes that would
2746	* have been needed to return all paths.
2747	*/
2748	int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
2749	{
2750	int ret;
2751	int found_refs = `0`;
2752
2753	ret = iterate_inode_refs(inum, ipath);
2754	if (!ret)
2755	++found_refs;
2756	else if (ret != -ENOENT)
2757	return ret;
2758
2759	ret = iterate_inode_extrefs(inum, ipath);
2760	if (ret == -ENOENT && found_refs)
2761	return `0`;
2762
2763	return ret;
2764	}
2765
2766	struct btrfs_data_container *init_data_container(u32 total_bytes)
2767	{
2768	struct btrfs_data_container *data;
2769	size_t alloc_bytes;
2770
2771	alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
2772	data = kvzalloc(alloc_bytes, GFP_KERNEL);
2773	if (!data)
2774	return ERR_PTR(error: -ENOMEM);
2775
2776	if (total_bytes >= sizeof(*data))
2777	data->bytes_left = total_bytes - sizeof(*data);
2778	else
2779	data->bytes_missing = sizeof(*data) - total_bytes;
2780
2781	return data;
2782	}
2783
2784	/*
2785	* allocates space to return multiple file system paths for an inode.
2786	* total_bytes to allocate are passed, note that space usable for actual path
2787	* information will be total_bytes - sizeof(struct inode_fs_paths).
2788	* the returned pointer must be freed with __free_inode_fs_paths() in the end.
2789	*/
2790	struct inode_fs_paths init_ipath(s32 total_bytes, struct* btrfs_root *fs_root,
2791	struct btrfs_path *path)
2792	{
2793	struct inode_fs_paths *ifp;
2794	struct btrfs_data_container *fspath;
2795
2796	fspath = init_data_container(total_bytes);
2797	if (IS_ERR(ptr: fspath))
2798	return ERR_CAST(ptr: fspath);
2799
2800	ifp = kmalloc(sizeof(*ifp), GFP_KERNEL);
2801	if (!ifp) {
2802	kvfree(addr: fspath);
2803	return ERR_PTR(error: -ENOMEM);
2804	}
2805
2806	ifp->btrfs_path = path;
2807	ifp->fspath = fspath;
2808	ifp->fs_root = fs_root;
2809
2810	return ifp;
2811	}
2812
2813	struct btrfs_backref_iter btrfs_backref_iter_alloc(struct* btrfs_fs_info *fs_info)
2814	{
2815	struct btrfs_backref_iter *ret;
2816
2817	ret = kzalloc(sizeof(*ret), GFP_NOFS);
2818	if (!ret)
2819	return NULL;
2820
2821	ret->path = btrfs_alloc_path();
2822	if (!ret->path) {
2823	kfree(objp: ret);
2824	return NULL;
2825	}
2826
2827	/ Current backref iterator only supports iteration in commit root /
2828	ret->path->search_commit_root = true;
2829	ret->path->skip_locking = true;
2830	ret->fs_info = fs_info;
2831
2832	return ret;
2833	}
2834
2835	static void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
2836	{
2837	iter->bytenr = `0`;
2838	iter->item_ptr = `0`;
2839	iter->cur_ptr = `0`;
2840	iter->end_ptr = `0`;
2841	btrfs_release_path(p: iter->path);
2842	memset(&iter->cur_key, `0`, sizeof(iter->cur_key));
2843	}
2844
2845	int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
2846	{
2847	struct btrfs_fs_info *fs_info = iter->fs_info;
2848	struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
2849	struct btrfs_path *path = iter->path;
2850	struct btrfs_extent_item *ei;
2851	struct btrfs_key key;
2852	int ret;
2853
2854	key.objectid = bytenr;
2855	key.type = BTRFS_METADATA_ITEM_KEY;
2856	key.offset = (u64)-`1`;
2857	iter->bytenr = bytenr;
2858
2859	ret = btrfs_search_slot(NULL, root: extent_root, key: &key, p: path, ins_len: `0`, cow: `0`);
2860	if (ret < `0`)
2861	return ret;
2862	if (unlikely(ret == `0`)) {
2863	/*
2864	* Key with offset -1 found, there would have to exist an extent
2865	* item with such offset, but this is out of the valid range.
2866	*/
2867	ret = -EUCLEAN;
2868	goto release;
2869	}
2870	if (unlikely(path->slots[`0`] == `0`)) {
2871	DEBUG_WARN();
2872	ret = -EUCLEAN;
2873	goto release;
2874	}
2875	path->slots[`0`]--;
2876
2877	btrfs_item_key_to_cpu(eb: path->nodes[`0`], cpu_key: &key, nr: path->slots[`0`]);
2878	if ((key.type != BTRFS_EXTENT_ITEM_KEY &&
2879	key.type != BTRFS_METADATA_ITEM_KEY) \|\| key.objectid != bytenr) {
2880	ret = -ENOENT;
2881	goto release;
2882	}
2883	memcpy(&iter->cur_key, &key, sizeof(key));
2884	iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[`0`],
2885	path->slots[`0`]);
2886	iter->end_ptr = (u32)(iter->item_ptr +
2887	btrfs_item_size(eb: path->nodes[`0`], slot: path->slots[`0`]));
2888	ei = btrfs_item_ptr(path->nodes[`0`], path->slots[`0`],
2889	struct btrfs_extent_item);
2890
2891	/*
2892	* Only support iteration on tree backref yet.
2893	*
2894	* This is an extra precaution for non skinny-metadata, where
2895	* EXTENT_ITEM is also used for tree blocks, that we can only use
2896	* extent flags to determine if it's a tree block.
2897	*/
2898	if (btrfs_extent_flags(eb: path->nodes[`0`], s: ei) & BTRFS_EXTENT_FLAG_DATA) {
2899	ret = -ENOTSUPP;
2900	goto release;
2901	}
2902	iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei));
2903
2904	/ If there is no inline backref, go search for keyed backref /
2905	if (iter->cur_ptr >= iter->end_ptr) {
2906	ret = btrfs_next_item(root: extent_root, p: path);
2907
2908	/ No inline nor keyed ref /
2909	if (ret > `0`) {
2910	ret = -ENOENT;
2911	goto release;
2912	}
2913	if (ret < `0`)
2914	goto release;
2915
2916	btrfs_item_key_to_cpu(eb: path->nodes[`0`], cpu_key: &iter->cur_key,
2917	nr: path->slots[`0`]);
2918	if (iter->cur_key.objectid != bytenr \|\|
2919	(iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
2920	iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) {
2921	ret = -ENOENT;
2922	goto release;
2923	}
2924	iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[`0`],
2925	path->slots[`0`]);
2926	iter->item_ptr = iter->cur_ptr;
2927	iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size(
2928	eb: path->nodes[`0`], slot: path->slots[`0`]));
2929	}
2930
2931	return `0`;
2932	release:
2933	btrfs_backref_iter_release(iter);
2934	return ret;
2935	}
2936
2937	static bool btrfs_backref_iter_is_inline_ref(struct btrfs_backref_iter *iter)
2938	{
2939	if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY \|\|
2940	iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
2941	return true;
2942	return false;
2943	}
2944
2945	/*
2946	* Go to the next backref item of current bytenr, can be either inlined or
2947	* keyed.
2948	*
2949	* Caller needs to check whether it's inline ref or not by iter->cur_key.
2950	*
2951	* Return 0 if we get next backref without problem.
2952	* Return >0 if there is no extra backref for this bytenr.
2953	* Return <0 if there is something wrong happened.
2954	*/
2955	int btrfs_backref_iter_next(struct btrfs_backref_iter *iter)
2956	{
2957	struct extent_buffer *eb = iter->path->nodes[`0`];
2958	struct btrfs_root *extent_root;
2959	struct btrfs_path *path = iter->path;
2960	struct btrfs_extent_inline_ref *iref;
2961	int ret;
2962	u32 size;
2963
2964	if (btrfs_backref_iter_is_inline_ref(iter)) {
2965	/ We're still inside the inline refs /
2966	ASSERT(iter->cur_ptr < iter->end_ptr);
2967
2968	if (btrfs_backref_has_tree_block_info(iter)) {
2969	/ First tree block info /
2970	size = sizeof(struct btrfs_tree_block_info);
2971	} else {
2972	/ Use inline ref type to determine the size /
2973	int type;
2974
2975	iref = (struct btrfs_extent_inline_ref *)
2976	((unsigned long)iter->cur_ptr);
2977	type = btrfs_extent_inline_ref_type(eb, s: iref);
2978
2979	size = btrfs_extent_inline_ref_size(type);
2980	}
2981	iter->cur_ptr += size;
2982	if (iter->cur_ptr < iter->end_ptr)
2983	return `0`;
2984
2985	/ All inline items iterated, fall through /
2986	}
2987
2988	/ We're at keyed items, there is no inline item, go to the next one /
2989	extent_root = btrfs_extent_root(fs_info: iter->fs_info, bytenr: iter->bytenr);
2990	ret = btrfs_next_item(root: extent_root, p: iter->path);
2991	if (ret)
2992	return ret;
2993
2994	btrfs_item_key_to_cpu(eb: path->nodes[`0`], cpu_key: &iter->cur_key, nr: path->slots[`0`]);
2995	if (iter->cur_key.objectid != iter->bytenr \|\|
2996	(iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
2997	iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY))
2998	return `1`;
2999	iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[`0`],
3000	path->slots[`0`]);
3001	iter->cur_ptr = iter->item_ptr;
3002	iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size(eb: path->nodes[`0`],
3003	slot: path->slots[`0`]);
3004	return `0`;
3005	}
3006
3007	void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
3008	struct btrfs_backref_cache *cache, bool is_reloc)
3009	{
3010	int i;
3011
3012	cache->rb_root = RB_ROOT;
3013	for (i = `0`; i < BTRFS_MAX_LEVEL; i++)
3014	INIT_LIST_HEAD(list: &cache->pending[i]);
3015	INIT_LIST_HEAD(list: &cache->pending_edge);
3016	INIT_LIST_HEAD(list: &cache->useless_node);
3017	cache->fs_info = fs_info;
3018	cache->is_reloc = is_reloc;
3019	}
3020
3021	struct btrfs_backref_node *btrfs_backref_alloc_node(
3022	struct btrfs_backref_cache cache, u64 bytenr, int* level)
3023	{
3024	struct btrfs_backref_node *node;
3025
3026	ASSERT(level >= `0` && level < BTRFS_MAX_LEVEL);
3027	node = kzalloc(sizeof(*node), GFP_NOFS);
3028	if (!node)
3029	return node;
3030
3031	INIT_LIST_HEAD(list: &node->list);
3032	INIT_LIST_HEAD(list: &node->upper);
3033	INIT_LIST_HEAD(list: &node->lower);
3034	RB_CLEAR_NODE(&node->rb_node);
3035	cache->nr_nodes++;
3036	node->level = level;
3037	node->bytenr = bytenr;
3038
3039	return node;
3040	}
3041
3042	void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
3043	struct btrfs_backref_node *node)
3044	{
3045	if (node) {
3046	ASSERT(list_empty(&node->list));
3047	ASSERT(list_empty(&node->lower));
3048	ASSERT(node->eb == NULL);
3049	cache->nr_nodes--;
3050	btrfs_put_root(root: node->root);
3051	kfree(objp: node);
3052	}
3053	}
3054
3055	struct btrfs_backref_edge *btrfs_backref_alloc_edge(
3056	struct btrfs_backref_cache *cache)
3057	{
3058	struct btrfs_backref_edge *edge;
3059
3060	edge = kzalloc(sizeof(*edge), GFP_NOFS);
3061	if (edge)
3062	cache->nr_edges++;
3063	return edge;
3064	}
3065
3066	void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
3067	struct btrfs_backref_edge *edge)
3068	{
3069	if (edge) {
3070	cache->nr_edges--;
3071	kfree(objp: edge);
3072	}
3073	}
3074
3075	void btrfs_backref_unlock_node_buffer(struct btrfs_backref_node *node)
3076	{
3077	if (node->locked) {
3078	btrfs_tree_unlock(eb: node->eb);
3079	node->locked = `0`;
3080	}
3081	}
3082
3083	void btrfs_backref_drop_node_buffer(struct btrfs_backref_node *node)
3084	{
3085	if (node->eb) {
3086	btrfs_backref_unlock_node_buffer(node);
3087	free_extent_buffer(eb: node->eb);
3088	node->eb = NULL;
3089	}
3090	}
3091
3092	/*
3093	* Drop the backref node from cache without cleaning up its children
3094	* edges.
3095	*
3096	* This can only be called on node without parent edges.
3097	* The children edges are still kept as is.
3098	*/
3099	void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
3100	struct btrfs_backref_node *node)
3101	{
3102	ASSERT(list_empty(&node->upper));
3103
3104	btrfs_backref_drop_node_buffer(node);
3105	list_del_init(entry: &node->list);
3106	list_del_init(entry: &node->lower);
3107	if (!RB_EMPTY_NODE(&node->rb_node))
3108	rb_erase(&node->rb_node, &tree->rb_root);
3109	btrfs_backref_free_node(cache: tree, node);
3110	}
3111
3112	/*
3113	* Drop the backref node from cache, also cleaning up all its
3114	* upper edges and any uncached nodes in the path.
3115	*
3116	* This cleanup happens bottom up, thus the node should either
3117	* be the lowest node in the cache or a detached node.
3118	*/
3119	void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
3120	struct btrfs_backref_node *node)
3121	{
3122	struct btrfs_backref_edge *edge;
3123
3124	if (!node)
3125	return;
3126
3127	while (!list_empty(head: &node->upper)) {
3128	edge = list_first_entry(&node->upper, struct btrfs_backref_edge,
3129	list[LOWER]);
3130	list_del(entry: &edge->list[LOWER]);
3131	list_del(entry: &edge->list[UPPER]);
3132	btrfs_backref_free_edge(cache, edge);
3133	}
3134
3135	btrfs_backref_drop_node(tree: cache, node);
3136	}
3137
3138	/*
3139	* Release all nodes/edges from current cache
3140	*/
3141	void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
3142	{
3143	struct btrfs_backref_node *node;
3144
3145	while ((node = rb_entry_safe(rb_first(&cache->rb_root),
3146	struct btrfs_backref_node, rb_node)))
3147	btrfs_backref_cleanup_node(cache, node);
3148
3149	ASSERT(list_empty(&cache->pending_edge));
3150	ASSERT(list_empty(&cache->useless_node));
3151	ASSERT(!cache->nr_nodes);
3152	ASSERT(!cache->nr_edges);
3153	}
3154
3155	static void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
3156	struct btrfs_backref_node *lower,
3157	struct btrfs_backref_node *upper)
3158	{
3159	ASSERT(upper && lower && upper->level == lower->level + `1`);
3160	edge->node[LOWER] = lower;
3161	edge->node[UPPER] = upper;
3162	list_add_tail(new: &edge->list[LOWER], head: &lower->upper);
3163	}
3164	/*
3165	* Handle direct tree backref
3166	*
3167	* Direct tree backref means, the backref item shows its parent bytenr
3168	* directly. This is for SHARED_BLOCK_REF backref (keyed or inlined).
3169	*
3170	* @ref_key: The converted backref key.
3171	* For keyed backref, it's the item key.
3172	* For inlined backref, objectid is the bytenr,
3173	* type is btrfs_inline_ref_type, offset is
3174	* btrfs_inline_ref_offset.
3175	*/
3176	static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
3177	struct btrfs_key *ref_key,
3178	struct btrfs_backref_node *cur)
3179	{
3180	struct btrfs_backref_edge *edge;
3181	struct btrfs_backref_node *upper;
3182	struct rb_node *rb_node;
3183
3184	ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY);
3185
3186	/ Only reloc root uses backref pointing to itself /
3187	if (ref_key->objectid == ref_key->offset) {
3188	struct btrfs_root *root;
3189
3190	cur->is_reloc_root = `1`;
3191	/ Only reloc backref cache cares about a specific root /
3192	if (cache->is_reloc) {
3193	root = find_reloc_root(fs_info: cache->fs_info, bytenr: cur->bytenr);
3194	if (!root)
3195	return -ENOENT;
3196	cur->root = root;
3197	} else {
3198	/*
3199	* For generic purpose backref cache, reloc root node
3200	* is useless.
3201	*/
3202	list_add(new: &cur->list, head: &cache->useless_node);
3203	}
3204	return `0`;
3205	}
3206
3207	edge = btrfs_backref_alloc_edge(cache);
3208	if (!edge)
3209	return -ENOMEM;
3210
3211	rb_node = rb_simple_search(root: &cache->rb_root, bytenr: ref_key->offset);
3212	if (!rb_node) {
3213	/ Parent node not yet cached /
3214	upper = btrfs_backref_alloc_node(cache, bytenr: ref_key->offset,
3215	level: cur->level + `1`);
3216	if (!upper) {
3217	btrfs_backref_free_edge(cache, edge);
3218	return -ENOMEM;
3219	}
3220
3221	/*
3222	* Backrefs for the upper level block isn't cached, add the
3223	* block to pending list
3224	*/
3225	list_add_tail(new: &edge->list[UPPER], head: &cache->pending_edge);
3226	} else {
3227	/ Parent node already cached /
3228	upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
3229	ASSERT(upper->checked);
3230	INIT_LIST_HEAD(list: &edge->list[UPPER]);
3231	}
3232	btrfs_backref_link_edge(edge, lower: cur, upper);
3233	return `0`;
3234	}
3235
3236	/*
3237	* Handle indirect tree backref
3238	*
3239	* Indirect tree backref means, we only know which tree the node belongs to.
3240	* We still need to do a tree search to find out the parents. This is for
3241	* TREE_BLOCK_REF backref (keyed or inlined).
3242	*
3243	* @trans: Transaction handle.
3244	* @ref_key: The same as @ref_key in handle_direct_tree_backref()
3245	* @tree_key: The first key of this tree block.
3246	* @path: A clean (released) path, to avoid allocating path every time
3247	* the function get called.
3248	*/
3249	static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
3250	struct btrfs_backref_cache *cache,
3251	struct btrfs_path *path,
3252	struct btrfs_key *ref_key,
3253	struct btrfs_key *tree_key,
3254	struct btrfs_backref_node *cur)
3255	{
3256	struct btrfs_fs_info *fs_info = cache->fs_info;
3257	struct btrfs_backref_node *upper;
3258	struct btrfs_backref_node *lower;
3259	struct btrfs_backref_edge *edge;
3260	struct extent_buffer *eb;
3261	struct btrfs_root *root;
3262	struct rb_node *rb_node;
3263	int level;
3264	bool need_check = true;
3265	int ret;
3266
3267	root = btrfs_get_fs_root(fs_info, objectid: ref_key->offset, check_ref: false);
3268	if (IS_ERR(ptr: root))
3269	return PTR_ERR(ptr: root);
3270
3271	/ We shouldn't be using backref cache for non-shareable roots. /
3272	if (unlikely(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))) {
3273	btrfs_put_root(root);
3274	return -EUCLEAN;
3275	}
3276
3277	if (btrfs_root_level(s: &root->root_item) == cur->level) {
3278	/ Tree root /
3279	ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr);
3280	/*
3281	* For reloc backref cache, we may ignore reloc root. But for
3282	* general purpose backref cache, we can't rely on
3283	* btrfs_should_ignore_reloc_root() as it may conflict with
3284	* current running relocation and lead to missing root.
3285	*
3286	* For general purpose backref cache, reloc root detection is
3287	* completely relying on direct backref (key->offset is parent
3288	* bytenr), thus only do such check for reloc cache.
3289	*/
3290	if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) {
3291	btrfs_put_root(root);
3292	list_add(new: &cur->list, head: &cache->useless_node);
3293	} else {
3294	cur->root = root;
3295	}
3296	return `0`;
3297	}
3298
3299	level = cur->level + `1`;
3300
3301	/ Search the tree to find parent blocks referring to the block /
3302	path->search_commit_root = true;
3303	path->skip_locking = true;
3304	path->lowest_level = level;
3305	ret = btrfs_search_slot(NULL, root, key: tree_key, p: path, ins_len: `0`, cow: `0`);
3306	path->lowest_level = `0`;
3307	if (ret < `0`) {
3308	btrfs_put_root(root);
3309	return ret;
3310	}
3311	if (ret > `0` && path->slots[level] > `0`)
3312	path->slots[level]--;
3313
3314	eb = path->nodes[level];
3315	if (btrfs_node_blockptr(eb, nr: path->slots[level]) != cur->bytenr) {
3316	btrfs_err(fs_info,
3317	"couldn't find block (%llu) (level %d) in tree (%llu) with key " BTRFS_KEY_FMT,
3318	cur->bytenr, level - `1`, btrfs_root_id(root),
3319	BTRFS_KEY_FMT_VALUE(tree_key));
3320	btrfs_put_root(root);
3321	ret = -ENOENT;
3322	goto out;
3323	}
3324	lower = cur;
3325
3326	/ Add all nodes and edges in the path /
3327	for (; level < BTRFS_MAX_LEVEL; level++) {
3328	if (!path->nodes[level]) {
3329	ASSERT(btrfs_root_bytenr(&root->root_item) ==
3330	lower->bytenr);
3331	/ Same as previous should_ignore_reloc_root() call /
3332	if (btrfs_should_ignore_reloc_root(root) &&
3333	cache->is_reloc) {
3334	btrfs_put_root(root);
3335	list_add(new: &lower->list, head: &cache->useless_node);
3336	} else {
3337	lower->root = root;
3338	}
3339	break;
3340	}
3341
3342	edge = btrfs_backref_alloc_edge(cache);
3343	if (!edge) {
3344	btrfs_put_root(root);
3345	ret = -ENOMEM;
3346	goto out;
3347	}
3348
3349	eb = path->nodes[level];
3350	rb_node = rb_simple_search(root: &cache->rb_root, bytenr: eb->start);
3351	if (!rb_node) {
3352	upper = btrfs_backref_alloc_node(cache, bytenr: eb->start,
3353	level: lower->level + `1`);
3354	if (!upper) {
3355	btrfs_put_root(root);
3356	btrfs_backref_free_edge(cache, edge);
3357	ret = -ENOMEM;
3358	goto out;
3359	}
3360	upper->owner = btrfs_header_owner(eb);
3361
3362	/ We shouldn't be using backref cache for non shareable roots. /
3363	if (unlikely(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))) {
3364	btrfs_put_root(root);
3365	btrfs_backref_free_edge(cache, edge);
3366	btrfs_backref_free_node(cache, node: upper);
3367	ret = -EUCLEAN;
3368	goto out;
3369	}
3370
3371	/*
3372	* If we know the block isn't shared we can avoid
3373	* checking its backrefs.
3374	*/
3375	if (btrfs_block_can_be_shared(trans, root, buf: eb))
3376	upper->checked = `0`;
3377	else
3378	upper->checked = `1`;
3379
3380	/*
3381	* Add the block to pending list if we need to check its
3382	* backrefs, we only do this once while walking up a
3383	* tree as we will catch anything else later on.
3384	*/
3385	if (!upper->checked && need_check) {
3386	need_check = false;
3387	list_add_tail(new: &edge->list[UPPER],
3388	head: &cache->pending_edge);
3389	} else {
3390	if (upper->checked)
3391	need_check = true;
3392	INIT_LIST_HEAD(list: &edge->list[UPPER]);
3393	}
3394	} else {
3395	upper = rb_entry(rb_node, struct btrfs_backref_node,
3396	rb_node);
3397	ASSERT(upper->checked);
3398	INIT_LIST_HEAD(list: &edge->list[UPPER]);
3399	if (!upper->owner)
3400	upper->owner = btrfs_header_owner(eb);
3401	}
3402	btrfs_backref_link_edge(edge, lower, upper);
3403
3404	if (rb_node) {
3405	btrfs_put_root(root);
3406	break;
3407	}
3408	lower = upper;
3409	upper = NULL;
3410	}
3411	out:
3412	btrfs_release_path(p: path);
3413	return ret;
3414	}
3415
3416	/*
3417	* Add backref node @cur into @cache.
3418	*
3419	* NOTE: Even if the function returned 0, @cur is not yet cached as its upper
3420	* links aren't yet bi-directional. Needs to finish such links.
3421	* Use btrfs_backref_finish_upper_links() to finish such linkage.
3422	*
3423	* @trans: Transaction handle.
3424	* @path: Released path for indirect tree backref lookup
3425	* @iter: Released backref iter for extent tree search
3426	* @node_key: The first key of the tree block
3427	*/
3428	int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
3429	struct btrfs_backref_cache *cache,
3430	struct btrfs_path *path,
3431	struct btrfs_backref_iter *iter,
3432	struct btrfs_key *node_key,
3433	struct btrfs_backref_node *cur)
3434	{
3435	struct btrfs_backref_edge *edge;
3436	struct btrfs_backref_node *exist;
3437	int ret;
3438
3439	ret = btrfs_backref_iter_start(iter, bytenr: cur->bytenr);
3440	if (ret < `0`)
3441	return ret;
3442	/*
3443	* We skip the first btrfs_tree_block_info, as we don't use the key
3444	* stored in it, but fetch it from the tree block
3445	*/
3446	if (btrfs_backref_has_tree_block_info(iter)) {
3447	ret = btrfs_backref_iter_next(iter);
3448	if (ret < `0`)
3449	goto out;
3450	/ No extra backref? This means the tree block is corrupted /
3451	if (unlikely(ret > `0`)) {
3452	ret = -EUCLEAN;
3453	goto out;
3454	}
3455	}
3456	WARN_ON(cur->checked);
3457	if (!list_empty(head: &cur->upper)) {
3458	/*
3459	* The backref was added previously when processing backref of
3460	* type BTRFS_TREE_BLOCK_REF_KEY
3461	*/
3462	ASSERT(list_is_singular(&cur->upper));
3463	edge = list_first_entry(&cur->upper, struct btrfs_backref_edge,
3464	list[LOWER]);
3465	ASSERT(list_empty(&edge->list[UPPER]));
3466	exist = edge->node[UPPER];
3467	/*
3468	* Add the upper level block to pending list if we need check
3469	* its backrefs
3470	*/
3471	if (!exist->checked)
3472	list_add_tail(new: &edge->list[UPPER], head: &cache->pending_edge);
3473	} else {
3474	exist = NULL;
3475	}
3476
3477	for (; ret == `0`; ret = btrfs_backref_iter_next(iter)) {
3478	struct extent_buffer *eb;
3479	struct btrfs_key key;
3480	int type;
3481
3482	cond_resched();
3483	eb = iter->path->nodes[`0`];
3484
3485	key.objectid = iter->bytenr;
3486	if (btrfs_backref_iter_is_inline_ref(iter)) {
3487	struct btrfs_extent_inline_ref *iref;
3488
3489	/ Update key for inline backref /
3490	iref = (struct btrfs_extent_inline_ref *)
3491	((unsigned long)iter->cur_ptr);
3492	type = btrfs_get_extent_inline_ref_type(eb, iref,
3493	is_data: BTRFS_REF_TYPE_BLOCK);
3494	if (unlikely(type == BTRFS_REF_TYPE_INVALID)) {
3495	ret = -EUCLEAN;
3496	goto out;
3497	}
3498	key.type = type;
3499	key.offset = btrfs_extent_inline_ref_offset(eb, s: iref);
3500	} else {
3501	key.type = iter->cur_key.type;
3502	key.offset = iter->cur_key.offset;
3503	}
3504
3505	/*
3506	* Parent node found and matches current inline ref, no need to
3507	* rebuild this node for this inline ref
3508	*/
3509	if (exist &&
3510	((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
3511	exist->owner == key.offset) \|\|
3512	(key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
3513	exist->bytenr == key.offset))) {
3514	exist = NULL;
3515	continue;
3516	}
3517
3518	/ SHARED_BLOCK_REF means key.offset is the parent bytenr /
3519	if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
3520	ret = handle_direct_tree_backref(cache, ref_key: &key, cur);
3521	if (ret < `0`)
3522	goto out;
3523	} else if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
3524	/*
3525	* key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref
3526	* offset means the root objectid. We need to search
3527	* the tree to get its parent bytenr.
3528	*/
3529	ret = handle_indirect_tree_backref(trans, cache, path,
3530	ref_key: &key, tree_key: node_key, cur);
3531	if (ret < `0`)
3532	goto out;
3533	}
3534	/*
3535	* Unrecognized tree backref items (if it can pass tree-checker)
3536	* would be ignored.
3537	*/
3538	}
3539	ret = `0`;
3540	cur->checked = `1`;
3541	WARN_ON(exist);
3542	out:
3543	btrfs_backref_iter_release(iter);
3544	return ret;
3545	}
3546
3547	/*
3548	* Finish the upwards linkage created by btrfs_backref_add_tree_node()
3549	*/
3550	int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
3551	struct btrfs_backref_node *start)
3552	{
3553	struct list_head *useless_node = &cache->useless_node;
3554	struct btrfs_backref_edge *edge;
3555	struct rb_node *rb_node;
3556	LIST_HEAD(pending_edge);
3557
3558	ASSERT(start->checked);
3559
3560	rb_node = rb_simple_insert(root: &cache->rb_root, simple_node: &start->simple_node);
3561	if (rb_node)
3562	btrfs_backref_panic(fs_info: cache->fs_info, bytenr: start->bytenr, error: -EEXIST);
3563
3564	/*
3565	* Use breadth first search to iterate all related edges.
3566	*
3567	* The starting points are all the edges of this node
3568	*/
3569	list_for_each_entry(edge, &start->upper, list[LOWER])
3570	list_add_tail(new: &edge->list[UPPER], head: &pending_edge);
3571
3572	while (!list_empty(head: &pending_edge)) {
3573	struct btrfs_backref_node *upper;
3574	struct btrfs_backref_node *lower;
3575
3576	edge = list_first_entry(&pending_edge,
3577	struct btrfs_backref_edge, list[UPPER]);
3578	list_del_init(entry: &edge->list[UPPER]);
3579	upper = edge->node[UPPER];
3580	lower = edge->node[LOWER];
3581
3582	/ Parent is detached, no need to keep any edges /
3583	if (upper->detached) {
3584	list_del(entry: &edge->list[LOWER]);
3585	btrfs_backref_free_edge(cache, edge);
3586
3587	/ Lower node is orphan, queue for cleanup /
3588	if (list_empty(head: &lower->upper))
3589	list_add(new: &lower->list, head: useless_node);
3590	continue;
3591	}
3592
3593	/*
3594	* All new nodes added in current build_backref_tree() haven't
3595	* been linked to the cache rb tree.
3596	* So if we have upper->rb_node populated, this means a cache
3597	* hit. We only need to link the edge, as @upper and all its
3598	* parents have already been linked.
3599	*/
3600	if (!RB_EMPTY_NODE(&upper->rb_node)) {
3601	list_add_tail(new: &edge->list[UPPER], head: &upper->lower);
3602	continue;
3603	}
3604
3605	/ Sanity check, we shouldn't have any unchecked nodes /
3606	if (unlikely(!upper->checked)) {
3607	DEBUG_WARN("we should not have any unchecked nodes");
3608	return -EUCLEAN;
3609	}
3610
3611	rb_node = rb_simple_insert(root: &cache->rb_root, simple_node: &upper->simple_node);
3612	if (unlikely(rb_node)) {
3613	btrfs_backref_panic(fs_info: cache->fs_info, bytenr: upper->bytenr, error: -EEXIST);
3614	return -EUCLEAN;
3615	}
3616
3617	list_add_tail(new: &edge->list[UPPER], head: &upper->lower);
3618
3619	/*
3620	* Also queue all the parent edges of this uncached node
3621	* to finish the upper linkage
3622	*/
3623	list_for_each_entry(edge, &upper->upper, list[LOWER])
3624	list_add_tail(new: &edge->list[UPPER], head: &pending_edge);
3625	}
3626	return `0`;
3627	}
3628
3629	void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
3630	struct btrfs_backref_node *node)
3631	{
3632	struct btrfs_backref_node *lower;
3633	struct btrfs_backref_node *upper;
3634	struct btrfs_backref_edge *edge;
3635
3636	while (!list_empty(head: &cache->useless_node)) {
3637	lower = list_first_entry(&cache->useless_node,
3638	struct btrfs_backref_node, list);
3639	list_del_init(entry: &lower->list);
3640	}
3641	while (!list_empty(head: &cache->pending_edge)) {
3642	edge = list_first_entry(&cache->pending_edge,
3643	struct btrfs_backref_edge, list[UPPER]);
3644	list_del(entry: &edge->list[UPPER]);
3645	list_del(entry: &edge->list[LOWER]);
3646	lower = edge->node[LOWER];
3647	upper = edge->node[UPPER];
3648	btrfs_backref_free_edge(cache, edge);
3649
3650	/*
3651	* Lower is no longer linked to any upper backref nodes and
3652	* isn't in the cache, we can free it ourselves.
3653	*/
3654	if (list_empty(head: &lower->upper) &&
3655	RB_EMPTY_NODE(&lower->rb_node))
3656	list_add(new: &lower->list, head: &cache->useless_node);
3657
3658	if (!RB_EMPTY_NODE(&upper->rb_node))
3659	continue;
3660
3661	/ Add this guy's upper edges to the list to process /
3662	list_for_each_entry(edge, &upper->upper, list[LOWER])
3663	list_add_tail(new: &edge->list[UPPER],
3664	head: &cache->pending_edge);
3665	if (list_empty(head: &upper->upper))
3666	list_add(new: &upper->list, head: &cache->useless_node);
3667	}
3668
3669	while (!list_empty(head: &cache->useless_node)) {
3670	lower = list_first_entry(&cache->useless_node,
3671	struct btrfs_backref_node, list);
3672	list_del_init(entry: &lower->list);
3673	if (lower == node)
3674	node = NULL;
3675	btrfs_backref_drop_node(tree: cache, node: lower);
3676	}
3677
3678	btrfs_backref_cleanup_node(cache, node);
3679	ASSERT(list_empty(&cache->useless_node) &&
3680	list_empty(&cache->pending_edge));
3681	}
3682

source code of linux/fs/btrfs/backref.c