ioctl.c source code [linux/fs/btrfs/ioctl.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (C) 2007 Oracle. All rights reserved.
4	*/
5
6	#include <linux/kernel.h>
7	#include <linux/bio.h>
8	#include <linux/file.h>
9	#include <linux/fs.h>
10	#include <linux/fsnotify.h>
11	#include <linux/pagemap.h>
12	#include <linux/highmem.h>
13	#include <linux/time.h>
14	#include <linux/string.h>
15	#include <linux/backing-dev.h>
16	#include <linux/mount.h>
17	#include <linux/namei.h>
18	#include <linux/writeback.h>
19	#include <linux/compat.h>
20	#include <linux/security.h>
21	#include <linux/xattr.h>
22	#include <linux/mm.h>
23	#include <linux/slab.h>
24	#include <linux/blkdev.h>
25	#include <linux/uuid.h>
26	#include <linux/btrfs.h>
27	#include <linux/uaccess.h>
28	#include <linux/iversion.h>
29	#include <linux/fileattr.h>
30	#include <linux/fsverity.h>
31	#include <linux/sched/xacct.h>
32	#include <linux/io_uring/cmd.h>
33	#include "ctree.h"
34	#include "disk-io.h"
35	#include "export.h"
36	#include "transaction.h"
37	#include "btrfs_inode.h"
38	#include "volumes.h"
39	#include "locking.h"
40	#include "backref.h"
41	#include "send.h"
42	#include "dev-replace.h"
43	#include "props.h"
44	#include "sysfs.h"
45	#include "qgroup.h"
46	#include "tree-log.h"
47	#include "compression.h"
48	#include "space-info.h"
49	#include "block-group.h"
50	#include "fs.h"
51	#include "accessors.h"
52	#include "extent-tree.h"
53	#include "root-tree.h"
54	#include "defrag.h"
55	#include "dir-item.h"
56	#include "uuid-tree.h"
57	#include "ioctl.h"
58	#include "file.h"
59	#include "scrub.h"
60	#include "super.h"
61
62	#ifdef CONFIG_64BIT
63	/ If we have a 32-bit userspace and 64-bit kernel, then the UAPI*
64	* structures are incorrect, as the timespec structure from userspace
65	* is 4 bytes too small. We define these alternatives here to teach
66	* the kernel about the 32-bit struct packing.
67	*/
68	struct btrfs_ioctl_timespec_32 {
69	__u64 sec;
70	__u32 nsec;
71	} __attribute__ ((__packed__));
72
73	struct btrfs_ioctl_received_subvol_args_32 {
74	char uuid[BTRFS_UUID_SIZE]; / in /
75	__u64 stransid; / in /
76	__u64 rtransid; / out /
77	struct btrfs_ioctl_timespec_32 stime; / in /
78	struct btrfs_ioctl_timespec_32 rtime; / out /
79	__u64 flags; / in /
80	__u64 reserved[`16`]; / in /
81	} __attribute__ ((__packed__));
82
83	#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \
84	struct btrfs_ioctl_received_subvol_args_32)
85	#endif
86
87	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
88	struct btrfs_ioctl_send_args_32 {
89	__s64 send_fd; / in /
90	__u64 clone_sources_count; / in /
91	compat_uptr_t clone_sources; / in /
92	__u64 parent_root; / in /
93	__u64 flags; / in /
94	__u32 version; / in /
95	__u8 reserved[`28`]; / in /
96	} __attribute__ ((__packed__));
97
98	#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
99	struct btrfs_ioctl_send_args_32)
100
101	struct btrfs_ioctl_encoded_io_args_32 {
102	compat_uptr_t iov;
103	compat_ulong_t iovcnt;
104	__s64 offset;
105	__u64 flags;
106	__u64 len;
107	__u64 unencoded_len;
108	__u64 unencoded_offset;
109	__u32 compression;
110	__u32 encryption;
111	__u8 reserved[`64`];
112	};
113
114	#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
115	struct btrfs_ioctl_encoded_io_args_32)
116	#define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \
117	struct btrfs_ioctl_encoded_io_args_32)
118	#endif
119
120	/ Mask out flags that are inappropriate for the given type of inode. /
121	static unsigned int btrfs_mask_fsflags_for_type(const struct inode *inode,
122	unsigned int flags)
123	{
124	if (S_ISDIR(inode->i_mode))
125	return flags;
126	else if (S_ISREG(inode->i_mode))
127	return flags & ~FS_DIRSYNC_FL;
128	else
129	return flags & (FS_NODUMP_FL \| FS_NOATIME_FL);
130	}
131
132	/*
133	* Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
134	* ioctl.
135	*/
136	static unsigned int btrfs_inode_flags_to_fsflags(const struct btrfs_inode *inode)
137	{
138	unsigned int iflags = `0`;
139	u32 flags = inode->flags;
140	u32 ro_flags = inode->ro_flags;
141
142	if (flags & BTRFS_INODE_SYNC)
143	iflags \|= FS_SYNC_FL;
144	if (flags & BTRFS_INODE_IMMUTABLE)
145	iflags \|= FS_IMMUTABLE_FL;
146	if (flags & BTRFS_INODE_APPEND)
147	iflags \|= FS_APPEND_FL;
148	if (flags & BTRFS_INODE_NODUMP)
149	iflags \|= FS_NODUMP_FL;
150	if (flags & BTRFS_INODE_NOATIME)
151	iflags \|= FS_NOATIME_FL;
152	if (flags & BTRFS_INODE_DIRSYNC)
153	iflags \|= FS_DIRSYNC_FL;
154	if (flags & BTRFS_INODE_NODATACOW)
155	iflags \|= FS_NOCOW_FL;
156	if (ro_flags & BTRFS_INODE_RO_VERITY)
157	iflags \|= FS_VERITY_FL;
158
159	if (flags & BTRFS_INODE_NOCOMPRESS)
160	iflags \|= FS_NOCOMP_FL;
161	else if (flags & BTRFS_INODE_COMPRESS)
162	iflags \|= FS_COMPR_FL;
163
164	return iflags;
165	}
166
167	/*
168	* Update inode->i_flags based on the btrfs internal flags.
169	*/
170	void btrfs_sync_inode_flags_to_i_flags(struct btrfs_inode *inode)
171	{
172	unsigned int new_fl = `0`;
173
174	if (inode->flags & BTRFS_INODE_SYNC)
175	new_fl \|= S_SYNC;
176	if (inode->flags & BTRFS_INODE_IMMUTABLE)
177	new_fl \|= S_IMMUTABLE;
178	if (inode->flags & BTRFS_INODE_APPEND)
179	new_fl \|= S_APPEND;
180	if (inode->flags & BTRFS_INODE_NOATIME)
181	new_fl \|= S_NOATIME;
182	if (inode->flags & BTRFS_INODE_DIRSYNC)
183	new_fl \|= S_DIRSYNC;
184	if (inode->ro_flags & BTRFS_INODE_RO_VERITY)
185	new_fl \|= S_VERITY;
186
187	set_mask_bits(&inode->vfs_inode.i_flags,
188	S_SYNC \| S_APPEND \| S_IMMUTABLE \| S_NOATIME \| S_DIRSYNC \|
189	S_VERITY, new_fl);
190	}
191
192	/*
193	* Check if @flags are a supported and valid set of FS_*_FL flags and that
194	* the old and new flags are not conflicting
195	*/
196	static int check_fsflags(unsigned int old_flags, unsigned int flags)
197	{
198	if (flags & ~(FS_IMMUTABLE_FL \| FS_APPEND_FL \| \
199	FS_NOATIME_FL \| FS_NODUMP_FL \| \
200	FS_SYNC_FL \| FS_DIRSYNC_FL \| \
201	FS_NOCOMP_FL \| FS_COMPR_FL \|
202	FS_NOCOW_FL))
203	return -EOPNOTSUPP;
204
205	/ COMPR and NOCOMP on new/old are valid /
206	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
207	return -EINVAL;
208
209	if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
210	return -EINVAL;
211
212	/ NOCOW and compression options are mutually exclusive /
213	if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL \| FS_NOCOMP_FL)))
214	return -EINVAL;
215	if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL \| FS_NOCOMP_FL)))
216	return -EINVAL;
217
218	return `0`;
219	}
220
221	static int check_fsflags_compatible(const struct btrfs_fs_info *fs_info,
222	unsigned int flags)
223	{
224	if (btrfs_is_zoned(fs_info) && (flags & FS_NOCOW_FL))
225	return -EPERM;
226
227	return `0`;
228	}
229
230	int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args)
231	{
232	if (memchr(p: vol_args->name, c: `0`, size: sizeof(vol_args->name)) == NULL)
233	return -ENAMETOOLONG;
234	return `0`;
235	}
236
237	static int btrfs_check_ioctl_vol_args2_subvol_name(const struct btrfs_ioctl_vol_args_v2 *vol_args2)
238	{
239	if (memchr(p: vol_args2->name, c: `0`, size: sizeof(vol_args2->name)) == NULL)
240	return -ENAMETOOLONG;
241	return `0`;
242	}
243
244	/*
245	* Set flags/xflags from the internal inode flags. The remaining items of
246	* fsxattr are zeroed.
247	*/
248	int btrfs_fileattr_get(struct dentry dentry, struct* file_kattr *fa)
249	{
250	const struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
251
252	fileattr_fill_flags(fa, flags: btrfs_inode_flags_to_fsflags(inode));
253	return `0`;
254	}
255
256	int btrfs_fileattr_set(struct mnt_idmap *idmap,
257	struct dentry dentry, struct* file_kattr *fa)
258	{
259	struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
260	struct btrfs_root *root = inode->root;
261	struct btrfs_fs_info *fs_info = root->fs_info;
262	struct btrfs_trans_handle *trans;
263	unsigned int fsflags, old_fsflags;
264	int ret;
265	const char *comp = NULL;
266	u32 inode_flags;
267
268	if (btrfs_root_readonly(root))
269	return -EROFS;
270
271	if (fileattr_has_fsx(fa))
272	return -EOPNOTSUPP;
273
274	fsflags = btrfs_mask_fsflags_for_type(inode: &inode->vfs_inode, flags: fa->flags);
275	old_fsflags = btrfs_inode_flags_to_fsflags(inode);
276	ret = check_fsflags(old_flags: old_fsflags, flags: fsflags);
277	if (ret)
278	return ret;
279
280	ret = check_fsflags_compatible(fs_info, flags: fsflags);
281	if (ret)
282	return ret;
283
284	inode_flags = inode->flags;
285	if (fsflags & FS_SYNC_FL)
286	inode_flags \|= BTRFS_INODE_SYNC;
287	else
288	inode_flags &= ~BTRFS_INODE_SYNC;
289	if (fsflags & FS_IMMUTABLE_FL)
290	inode_flags \|= BTRFS_INODE_IMMUTABLE;
291	else
292	inode_flags &= ~BTRFS_INODE_IMMUTABLE;
293	if (fsflags & FS_APPEND_FL)
294	inode_flags \|= BTRFS_INODE_APPEND;
295	else
296	inode_flags &= ~BTRFS_INODE_APPEND;
297	if (fsflags & FS_NODUMP_FL)
298	inode_flags \|= BTRFS_INODE_NODUMP;
299	else
300	inode_flags &= ~BTRFS_INODE_NODUMP;
301	if (fsflags & FS_NOATIME_FL)
302	inode_flags \|= BTRFS_INODE_NOATIME;
303	else
304	inode_flags &= ~BTRFS_INODE_NOATIME;
305
306	/ If coming from FS_IOC_FSSETXATTR then skip unconverted flags /
307	if (!fa->flags_valid) {
308	/ 1 item for the inode /
309	trans = btrfs_start_transaction(root, num_items: `1`);
310	if (IS_ERR(ptr: trans))
311	return PTR_ERR(ptr: trans);
312	goto update_flags;
313	}
314
315	if (fsflags & FS_DIRSYNC_FL)
316	inode_flags \|= BTRFS_INODE_DIRSYNC;
317	else
318	inode_flags &= ~BTRFS_INODE_DIRSYNC;
319	if (fsflags & FS_NOCOW_FL) {
320	if (S_ISREG(inode->vfs_inode.i_mode)) {
321	/*
322	* It's safe to turn csums off here, no extents exist.
323	* Otherwise we want the flag to reflect the real COW
324	* status of the file and will not set it.
325	*/
326	if (inode->vfs_inode.i_size == `0`)
327	inode_flags \|= BTRFS_INODE_NODATACOW \|
328	BTRFS_INODE_NODATASUM;
329	} else {
330	inode_flags \|= BTRFS_INODE_NODATACOW;
331	}
332	} else {
333	/*
334	* Revert back under same assumptions as above
335	*/
336	if (S_ISREG(inode->vfs_inode.i_mode)) {
337	if (inode->vfs_inode.i_size == `0`)
338	inode_flags &= ~(BTRFS_INODE_NODATACOW \|
339	BTRFS_INODE_NODATASUM);
340	} else {
341	inode_flags &= ~BTRFS_INODE_NODATACOW;
342	}
343	}
344
345	/*
346	* The COMPRESS flag can only be changed by users, while the NOCOMPRESS
347	* flag may be changed automatically if compression code won't make
348	* things smaller.
349	*/
350	if (fsflags & FS_NOCOMP_FL) {
351	inode_flags &= ~BTRFS_INODE_COMPRESS;
352	inode_flags \|= BTRFS_INODE_NOCOMPRESS;
353	} else if (fsflags & FS_COMPR_FL) {
354
355	if (IS_SWAPFILE(&inode->vfs_inode))
356	return -ETXTBSY;
357
358	inode_flags \|= BTRFS_INODE_COMPRESS;
359	inode_flags &= ~BTRFS_INODE_NOCOMPRESS;
360
361	comp = btrfs_compress_type2str(type: fs_info->compress_type);
362	if (!comp \|\| comp[`0`] == `0`)
363	comp = btrfs_compress_type2str(type: BTRFS_COMPRESS_ZLIB);
364	} else {
365	inode_flags &= ~(BTRFS_INODE_COMPRESS \| BTRFS_INODE_NOCOMPRESS);
366	}
367
368	/*
369	* 1 for inode item
370	* 2 for properties
371	*/
372	trans = btrfs_start_transaction(root, num_items: `3`);
373	if (IS_ERR(ptr: trans))
374	return PTR_ERR(ptr: trans);
375
376	if (comp) {
377	ret = btrfs_set_prop(trans, inode, name: "btrfs.compression",
378	value: comp, strlen(comp), flags: `0`);
379	if (unlikely(ret)) {
380	btrfs_abort_transaction(trans, ret);
381	goto out_end_trans;
382	}
383	} else {
384	ret = btrfs_set_prop(trans, inode, name: "btrfs.compression", NULL, value_len: `0`, flags: `0`);
385	if (unlikely(ret && ret != -ENODATA)) {
386	btrfs_abort_transaction(trans, ret);
387	goto out_end_trans;
388	}
389	}
390
391	update_flags:
392	inode->flags = inode_flags;
393	btrfs_update_inode_mapping_flags(inode);
394	btrfs_sync_inode_flags_to_i_flags(inode);
395	inode_inc_iversion(inode: &inode->vfs_inode);
396	inode_set_ctime_current(inode: &inode->vfs_inode);
397	ret = btrfs_update_inode(trans, inode);
398
399	out_end_trans:
400	btrfs_end_transaction(trans);
401	return ret;
402	}
403
404	static int btrfs_ioctl_getversion(const struct inode inode, int* __user *arg)
405	{
406	return put_user(inode->i_generation, arg);
407	}
408
409	static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
410	void __user *arg)
411	{
412	struct btrfs_device *device;
413	struct fstrim_range range;
414	u64 minlen = ULLONG_MAX;
415	u64 num_devices = `0`;
416	int ret;
417
418	if (!capable(CAP_SYS_ADMIN))
419	return -EPERM;
420
421	/*
422	* btrfs_trim_block_group() depends on space cache, which is not
423	* available in zoned filesystem. So, disallow fitrim on a zoned
424	* filesystem for now.
425	*/
426	if (btrfs_is_zoned(fs_info))
427	return -EOPNOTSUPP;
428
429	/*
430	* If the fs is mounted with nologreplay, which requires it to be
431	* mounted in RO mode as well, we can not allow discard on free space
432	* inside block groups, because log trees refer to extents that are not
433	* pinned in a block group's free space cache (pinning the extents is
434	* precisely the first phase of replaying a log tree).
435	*/
436	if (btrfs_test_opt(fs_info, NOLOGREPLAY))
437	return -EROFS;
438
439	rcu_read_lock();
440	list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
441	dev_list) {
442	if (!device->bdev \|\| !bdev_max_discard_sectors(bdev: device->bdev))
443	continue;
444	num_devices++;
445	minlen = min_t(u64, bdev_discard_granularity(device->bdev),
446	minlen);
447	}
448	rcu_read_unlock();
449
450	if (!num_devices)
451	return -EOPNOTSUPP;
452	if (copy_from_user(to: &range, from: arg, n: sizeof(range)))
453	return -EFAULT;
454
455	/*
456	* NOTE: Don't truncate the range using super->total_bytes. Bytenr of
457	* block group is in the logical address space, which can be any
458	* sectorsize aligned bytenr in the range [0, U64_MAX].
459	*/
460	if (range.len < fs_info->sectorsize)
461	return -EINVAL;
462
463	range.minlen = max(range.minlen, minlen);
464	ret = btrfs_trim_fs(fs_info, range: &range);
465
466	if (copy_to_user(to: arg, from: &range, n: sizeof(range)))
467	return -EFAULT;
468
469	return ret;
470	}
471
472	/*
473	* Calculate the number of transaction items to reserve for creating a subvolume
474	* or snapshot, not including the inode, directory entries, or parent directory.
475	*/
476	static unsigned int create_subvol_num_items(const struct btrfs_qgroup_inherit *inherit)
477	{
478	/*
479	* 1 to add root block
480	* 1 to add root item
481	* 1 to add root ref
482	* 1 to add root backref
483	* 1 to add UUID item
484	* 1 to add qgroup info
485	* 1 to add qgroup limit
486	*
487	* Ideally the last two would only be accounted if qgroups are enabled,
488	* but that can change between now and the time we would insert them.
489	*/
490	unsigned int num_items = `7`;
491
492	if (inherit) {
493	/ 2 to add qgroup relations for each inherited qgroup /
494	num_items += `2` * inherit->num_qgroups;
495	}
496	return num_items;
497	}
498
499	static noinline int create_subvol(struct mnt_idmap *idmap,
500	struct inode dir, struct* dentry *dentry,
501	struct btrfs_qgroup_inherit *inherit)
502	{
503	struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
504	struct btrfs_trans_handle *trans;
505	struct btrfs_key key;
506	struct btrfs_root_item AUTO_KFREE(root_item);
507	struct btrfs_inode_item *inode_item;
508	struct extent_buffer *leaf;
509	struct btrfs_root *root = BTRFS_I(dir)->root;
510	struct btrfs_root *new_root;
511	struct btrfs_block_rsv block_rsv;
512	struct timespec64 cur_time = current_time(inode: dir);
513	struct btrfs_new_inode_args new_inode_args = {
514	.dir = dir,
515	.dentry = dentry,
516	.subvol = true,
517	};
518	unsigned int trans_num_items;
519	int ret;
520	dev_t anon_dev;
521	u64 objectid;
522	u64 qgroup_reserved = `0`;
523
524	root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
525	if (!root_item)
526	return -ENOMEM;
527
528	ret = btrfs_get_free_objectid(root: fs_info->tree_root, objectid: &objectid);
529	if (ret)
530	return ret;
531
532	/*
533	* Don't create subvolume whose level is not zero. Or qgroup will be
534	* screwed up since it assumes subvolume qgroup's level to be 0.
535	*/
536	if (btrfs_qgroup_level(qgroupid: objectid))
537	return -ENOSPC;
538
539	ret = get_anon_bdev(&anon_dev);
540	if (ret < `0`)
541	return ret;
542
543	new_inode_args.inode = btrfs_new_subvol_inode(idmap, dir);
544	if (!new_inode_args.inode) {
545	ret = -ENOMEM;
546	goto out_anon_dev;
547	}
548	ret = btrfs_new_inode_prepare(args: &new_inode_args, trans_num_items: &trans_num_items);
549	if (ret)
550	goto out_inode;
551	trans_num_items += create_subvol_num_items(inherit);
552
553	btrfs_init_block_rsv(rsv: &block_rsv, type: BTRFS_BLOCK_RSV_TEMP);
554	ret = btrfs_subvolume_reserve_metadata(root, rsv: &block_rsv,
555	nitems: trans_num_items, use_global_rsv: false);
556	if (ret)
557	goto out_new_inode_args;
558	qgroup_reserved = block_rsv.qgroup_rsv_reserved;
559
560	trans = btrfs_start_transaction(root, num_items: `0`);
561	if (IS_ERR(ptr: trans)) {
562	ret = PTR_ERR(ptr: trans);
563	goto out_release_rsv;
564	}
565	btrfs_qgroup_convert_reserved_meta(root, num_bytes: qgroup_reserved);
566	qgroup_reserved = `0`;
567	trans->block_rsv = &block_rsv;
568	trans->bytes_reserved = block_rsv.size;
569
570	ret = btrfs_qgroup_inherit(trans, srcid: `0`, objectid, inode_rootid: btrfs_root_id(root), inherit);
571	if (ret)
572	goto out;
573
574	leaf = btrfs_alloc_tree_block(trans, root, parent: `0`, root_objectid: objectid, NULL, level: `0`, hint: `0`, empty_size: `0`,
575	reloc_src_root: `0`, nest: BTRFS_NESTING_NORMAL);
576	if (IS_ERR(ptr: leaf)) {
577	ret = PTR_ERR(ptr: leaf);
578	goto out;
579	}
580
581	btrfs_mark_buffer_dirty(trans, buf: leaf);
582
583	inode_item = &root_item->inode;
584	btrfs_set_stack_inode_generation(s: inode_item, val: `1`);
585	btrfs_set_stack_inode_size(s: inode_item, val: `3`);
586	btrfs_set_stack_inode_nlink(s: inode_item, val: `1`);
587	btrfs_set_stack_inode_nbytes(s: inode_item,
588	val: fs_info->nodesize);
589	btrfs_set_stack_inode_mode(s: inode_item, S_IFDIR \| `0755`);
590
591	btrfs_set_root_flags(s: root_item, val: `0`);
592	btrfs_set_root_limit(s: root_item, val: `0`);
593	btrfs_set_stack_inode_flags(s: inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
594
595	btrfs_set_root_bytenr(s: root_item, val: leaf->start);
596	btrfs_set_root_generation(s: root_item, val: trans->transid);
597	btrfs_set_root_level(s: root_item, val: `0`);
598	btrfs_set_root_refs(s: root_item, val: `1`);
599	btrfs_set_root_used(s: root_item, val: leaf->len);
600	btrfs_set_root_last_snapshot(s: root_item, val: `0`);
601
602	btrfs_set_root_generation_v2(s: root_item,
603	val: btrfs_root_generation(s: root_item));
604	generate_random_guid(guid: root_item->uuid);
605	btrfs_set_stack_timespec_sec(s: &root_item->otime, val: cur_time.tv_sec);
606	btrfs_set_stack_timespec_nsec(s: &root_item->otime, val: cur_time.tv_nsec);
607	root_item->ctime = root_item->otime;
608	btrfs_set_root_ctransid(s: root_item, val: trans->transid);
609	btrfs_set_root_otransid(s: root_item, val: trans->transid);
610
611	btrfs_tree_unlock(eb: leaf);
612
613	btrfs_set_root_dirid(s: root_item, BTRFS_FIRST_FREE_OBJECTID);
614
615	key.objectid = objectid;
616	key.type = BTRFS_ROOT_ITEM_KEY;
617	key.offset = `0`;
618	ret = btrfs_insert_root(trans, root: fs_info->tree_root, key: &key,
619	item: root_item);
620	if (ret) {
621	int ret2;
622
623	/*
624	* Since we don't abort the transaction in this case, free the
625	* tree block so that we don't leak space and leave the
626	* filesystem in an inconsistent state (an extent item in the
627	* extent tree with a backreference for a root that does not
628	* exists).
629	*/
630	btrfs_tree_lock(eb: leaf);
631	btrfs_clear_buffer_dirty(trans, buf: leaf);
632	btrfs_tree_unlock(eb: leaf);
633	ret2 = btrfs_free_tree_block(trans, root_id: objectid, buf: leaf, parent: `0`, last_ref: `1`);
634	if (unlikely(ret2 < `0`))
635	btrfs_abort_transaction(trans, ret2);
636	free_extent_buffer(eb: leaf);
637	goto out;
638	}
639
640	free_extent_buffer(eb: leaf);
641	leaf = NULL;
642
643	new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev: &anon_dev);
644	if (IS_ERR(ptr: new_root)) {
645	ret = PTR_ERR(ptr: new_root);
646	btrfs_abort_transaction(trans, ret);
647	goto out;
648	}
649	/ anon_dev is owned by new_root now. /
650	anon_dev = `0`;
651	BTRFS_I(new_inode_args.inode)->root = new_root;
652	/ ... and new_root is owned by new_inode_args.inode now. /
653
654	ret = btrfs_record_root_in_trans(trans, root: new_root);
655	if (unlikely(ret)) {
656	btrfs_abort_transaction(trans, ret);
657	goto out;
658	}
659
660	ret = btrfs_uuid_tree_add(trans, uuid: root_item->uuid,
661	BTRFS_UUID_KEY_SUBVOL, subid: objectid);
662	if (unlikely(ret)) {
663	btrfs_abort_transaction(trans, ret);
664	goto out;
665	}
666
667	btrfs_record_new_subvolume(trans, BTRFS_I(dir));
668
669	ret = btrfs_create_new_inode(trans, args: &new_inode_args);
670	if (unlikely(ret)) {
671	btrfs_abort_transaction(trans, ret);
672	goto out;
673	}
674
675	d_instantiate_new(dentry, new_inode_args.inode);
676	new_inode_args.inode = NULL;
677
678	out:
679	trans->block_rsv = NULL;
680	trans->bytes_reserved = `0`;
681	btrfs_end_transaction(trans);
682	out_release_rsv:
683	btrfs_block_rsv_release(fs_info, block_rsv: &block_rsv, num_bytes: (u64)-`1`, NULL);
684	if (qgroup_reserved)
685	btrfs_qgroup_free_meta_prealloc(root, num_bytes: qgroup_reserved);
686	out_new_inode_args:
687	btrfs_new_inode_args_destroy(args: &new_inode_args);
688	out_inode:
689	iput(new_inode_args.inode);
690	out_anon_dev:
691	if (anon_dev)
692	free_anon_bdev(anon_dev);
693
694	return ret;
695	}
696
697	static int create_snapshot(struct btrfs_root root, struct* inode *dir,
698	struct dentry *dentry, bool readonly,
699	struct btrfs_qgroup_inherit *inherit)
700	{
701	struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
702	struct inode *inode;
703	struct btrfs_pending_snapshot *pending_snapshot;
704	unsigned int trans_num_items;
705	struct btrfs_trans_handle *trans;
706	struct btrfs_block_rsv *block_rsv;
707	u64 qgroup_reserved = `0`;
708	int ret;
709
710	/ We do not support snapshotting right now. /
711	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
712	btrfs_warn(fs_info,
713	"extent tree v2 doesn't support snapshotting yet");
714	return -EOPNOTSUPP;
715	}
716
717	if (btrfs_root_refs(s: &root->root_item) == `0`)
718	return -ENOENT;
719
720	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
721	return -EINVAL;
722
723	if (atomic_read(v: &root->nr_swapfiles)) {
724	btrfs_warn(fs_info,
725	"cannot snapshot subvolume with active swapfile");
726	return -ETXTBSY;
727	}
728
729	pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
730	if (!pending_snapshot)
731	return -ENOMEM;
732
733	ret = get_anon_bdev(&pending_snapshot->anon_dev);
734	if (ret < `0`)
735	goto free_pending;
736	pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
737	GFP_KERNEL);
738	pending_snapshot->path = btrfs_alloc_path();
739	if (!pending_snapshot->root_item \|\| !pending_snapshot->path) {
740	ret = -ENOMEM;
741	goto free_pending;
742	}
743
744	block_rsv = &pending_snapshot->block_rsv;
745	btrfs_init_block_rsv(rsv: block_rsv, type: BTRFS_BLOCK_RSV_TEMP);
746	/*
747	* 1 to add dir item
748	* 1 to add dir index
749	* 1 to update parent inode item
750	*/
751	trans_num_items = create_subvol_num_items(inherit) + `3`;
752	ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, rsv: block_rsv,
753	nitems: trans_num_items, use_global_rsv: false);
754	if (ret)
755	goto free_pending;
756	qgroup_reserved = block_rsv->qgroup_rsv_reserved;
757
758	pending_snapshot->dentry = dentry;
759	pending_snapshot->root = root;
760	pending_snapshot->readonly = readonly;
761	pending_snapshot->dir = BTRFS_I(dir);
762	pending_snapshot->inherit = inherit;
763
764	trans = btrfs_start_transaction(root, num_items: `0`);
765	if (IS_ERR(ptr: trans)) {
766	ret = PTR_ERR(ptr: trans);
767	goto fail;
768	}
769	ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
770	if (ret) {
771	btrfs_end_transaction(trans);
772	goto fail;
773	}
774	btrfs_qgroup_convert_reserved_meta(root, num_bytes: qgroup_reserved);
775	qgroup_reserved = `0`;
776
777	trans->pending_snapshot = pending_snapshot;
778
779	ret = btrfs_commit_transaction(trans);
780	if (ret)
781	goto fail;
782
783	ret = pending_snapshot->error;
784	if (ret)
785	goto fail;
786
787	ret = btrfs_orphan_cleanup(root: pending_snapshot->snap);
788	if (ret)
789	goto fail;
790
791	inode = btrfs_lookup_dentry(dir: d_inode(dentry: dentry->d_parent), dentry);
792	if (IS_ERR(ptr: inode)) {
793	ret = PTR_ERR(ptr: inode);
794	goto fail;
795	}
796
797	d_instantiate(dentry, inode);
798	ret = `0`;
799	pending_snapshot->anon_dev = `0`;
800	fail:
801	/ Prevent double freeing of anon_dev /
802	if (ret && pending_snapshot->snap)
803	pending_snapshot->snap->anon_dev = `0`;
804	btrfs_put_root(root: pending_snapshot->snap);
805	btrfs_block_rsv_release(fs_info, block_rsv, num_bytes: (u64)-`1`, NULL);
806	if (qgroup_reserved)
807	btrfs_qgroup_free_meta_prealloc(root, num_bytes: qgroup_reserved);
808	free_pending:
809	if (pending_snapshot->anon_dev)
810	free_anon_bdev(pending_snapshot->anon_dev);
811	kfree(objp: pending_snapshot->root_item);
812	btrfs_free_path(p: pending_snapshot->path);
813	kfree(objp: pending_snapshot);
814
815	return ret;
816	}
817
818	/ copy of may_delete in fs/namei.c()*
819	* Check whether we can remove a link victim from directory dir, check
820	* whether the type of victim is right.
821	* 1. We can't do it if dir is read-only (done in permission())
822	* 2. We should have write and exec permissions on dir
823	* 3. We can't remove anything from append-only dir
824	* 4. We can't do anything with immutable dir (done in permission())
825	* 5. If the sticky bit on dir is set we should either
826	* a. be owner of dir, or
827	* b. be owner of victim, or
828	* c. have CAP_FOWNER capability
829	* 6. If the victim is append-only or immutable we can't do anything with
830	* links pointing to it.
831	* 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
832	* 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
833	* 9. We can't remove a root or mountpoint.
834	* 10. We don't allow removal of NFS sillyrenamed files; it's handled by
835	* nfs_async_unlink().
836	*/
837
838	static int btrfs_may_delete(struct mnt_idmap *idmap,
839	struct inode dir, struct* dentry victim, int* isdir)
840	{
841	int ret;
842
843	if (d_really_is_negative(dentry: victim))
844	return -ENOENT;
845
846	/ The @victim is not inside @dir. /
847	if (d_inode(dentry: victim->d_parent) != dir)
848	return -EINVAL;
849	audit_inode_child(parent: dir, dentry: victim, AUDIT_TYPE_CHILD_DELETE);
850
851	ret = inode_permission(idmap, dir, MAY_WRITE \| MAY_EXEC);
852	if (ret)
853	return ret;
854	if (IS_APPEND(dir))
855	return -EPERM;
856	if (check_sticky(idmap, dir, inode: d_inode(dentry: victim)) \|\|
857	IS_APPEND(d_inode(victim)) \|\| IS_IMMUTABLE(d_inode(victim)) \|\|
858	IS_SWAPFILE(d_inode(victim)))
859	return -EPERM;
860	if (isdir) {
861	if (!d_is_dir(dentry: victim))
862	return -ENOTDIR;
863	if (IS_ROOT(victim))
864	return -EBUSY;
865	} else if (d_is_dir(dentry: victim))
866	return -EISDIR;
867	if (IS_DEADDIR(dir))
868	return -ENOENT;
869	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
870	return -EBUSY;
871	return `0`;
872	}
873
874	/ copy of may_create in fs/namei.c() /
875	static inline int btrfs_may_create(struct mnt_idmap *idmap,
876	struct inode dir, const* struct dentry *child)
877	{
878	if (d_really_is_positive(dentry: child))
879	return -EEXIST;
880	if (IS_DEADDIR(dir))
881	return -ENOENT;
882	if (!fsuidgid_has_mapping(sb: dir->i_sb, idmap))
883	return -EOVERFLOW;
884	return inode_permission(idmap, dir, MAY_WRITE \| MAY_EXEC);
885	}
886
887	/*
888	* Create a new subvolume below @parent. This is largely modeled after
889	* sys_mkdirat and vfs_mkdir, but we only do a single component lookup
890	* inside this filesystem so it's quite a bit simpler.
891	*/
892	static noinline int btrfs_mksubvol(struct dentry *parent,
893	struct mnt_idmap *idmap,
894	struct qstr qname, struct* btrfs_root *snap_src,
895	bool readonly,
896	struct btrfs_qgroup_inherit *inherit)
897	{
898	struct inode *dir = d_inode(dentry: parent);
899	struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
900	struct dentry *dentry;
901	struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len);
902	int ret;
903
904	dentry = start_creating_killable(idmap, parent, name: qname);
905	if (IS_ERR(ptr: dentry))
906	return PTR_ERR(ptr: dentry);
907
908	ret = btrfs_may_create(idmap, dir, child: dentry);
909	if (ret)
910	goto out_dput;
911
912	/*
913	* even if this name doesn't exist, we may get hash collisions.
914	* check for them now when we can safely fail
915	*/
916	ret = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, dir_ino: dir->i_ino, name: &name_str);
917	if (ret)
918	goto out_dput;
919
920	down_read(sem: &fs_info->subvol_sem);
921
922	if (btrfs_root_refs(s: &BTRFS_I(dir)->root->root_item) == `0`)
923	goto out_up_read;
924
925	if (snap_src)
926	ret = create_snapshot(root: snap_src, dir, dentry, readonly, inherit);
927	else
928	ret = create_subvol(idmap, dir, dentry, inherit);
929
930	if (!ret)
931	fsnotify_mkdir(dir, dentry);
932	out_up_read:
933	up_read(sem: &fs_info->subvol_sem);
934	out_dput:
935	end_creating(child: dentry);
936	return ret;
937	}
938
939	static noinline int btrfs_mksnapshot(struct dentry *parent,
940	struct mnt_idmap *idmap,
941	struct qstr *qname,
942	struct btrfs_root *root,
943	bool readonly,
944	struct btrfs_qgroup_inherit *inherit)
945	{
946	int ret;
947
948	/*
949	* Force new buffered writes to reserve space even when NOCOW is
950	* possible. This is to avoid later writeback (running delalloc) to
951	* fallback to COW mode and unexpectedly fail with ENOSPC.
952	*/
953	btrfs_drew_read_lock(lock: &root->snapshot_lock);
954
955	ret = btrfs_start_delalloc_snapshot(root, in_reclaim_context: false);
956	if (ret)
957	goto out;
958
959	/*
960	* All previous writes have started writeback in NOCOW mode, so now
961	* we force future writes to fallback to COW mode during snapshot
962	* creation.
963	*/
964	atomic_inc(v: &root->snapshot_force_cow);
965
966	btrfs_wait_ordered_extents(root, U64_MAX, NULL);
967
968	ret = btrfs_mksubvol(parent, idmap, qname, snap_src: root, readonly, inherit);
969
970	atomic_dec(v: &root->snapshot_force_cow);
971	out:
972	btrfs_drew_read_unlock(lock: &root->snapshot_lock);
973	return ret;
974	}
975
976	/*
977	* Try to start exclusive operation @type or cancel it if it's running.
978	*
979	* Return:
980	* 0 - normal mode, newly claimed op started
981	* >0 - normal mode, something else is running,
982	* return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS to user space
983	* ECANCELED - cancel mode, successful cancel
984	* ENOTCONN - cancel mode, operation not running anymore
985	*/
986	static int exclop_start_or_cancel_reloc(struct btrfs_fs_info *fs_info,
987	enum btrfs_exclusive_operation type, bool cancel)
988	{
989	if (!cancel) {
990	/ Start normal op /
991	if (!btrfs_exclop_start(fs_info, type))
992	return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
993	/ Exclusive operation is now claimed /
994	return `0`;
995	}
996
997	/ Cancel running op /
998	if (btrfs_exclop_start_try_lock(fs_info, type)) {
999	/*
1000	* This blocks any exclop finish from setting it to NONE, so we
1001	* request cancellation. Either it runs and we will wait for it,
1002	* or it has finished and no waiting will happen.
1003	*/
1004	atomic_inc(v: &fs_info->reloc_cancel_req);
1005	btrfs_exclop_start_unlock(fs_info);
1006
1007	if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
1008	wait_on_bit(word: &fs_info->flags, bit: BTRFS_FS_RELOC_RUNNING,
1009	TASK_INTERRUPTIBLE);
1010
1011	return -ECANCELED;
1012	}
1013
1014	/ Something else is running or none /
1015	return -ENOTCONN;
1016	}
1017
1018	static noinline int btrfs_ioctl_resize(struct file *file,
1019	void __user *arg)
1020	{
1021	BTRFS_DEV_LOOKUP_ARGS(args);
1022	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
1023	struct btrfs_fs_info *fs_info = root->fs_info;
1024	u64 new_size;
1025	u64 old_size;
1026	u64 devid = `1`;
1027	struct btrfs_ioctl_vol_args *vol_args;
1028	struct btrfs_device *device = NULL;
1029	char *sizestr;
1030	char *devstr = NULL;
1031	int ret = `0`;
1032	int mod = `0`;
1033	bool cancel;
1034
1035	if (!capable(CAP_SYS_ADMIN))
1036	return -EPERM;
1037
1038	ret = mnt_want_write_file(file);
1039	if (ret)
1040	return ret;
1041
1042	/*
1043	* Read the arguments before checking exclusivity to be able to
1044	* distinguish regular resize and cancel
1045	*/
1046	vol_args = memdup_user(arg, sizeof(*vol_args));
1047	if (IS_ERR(ptr: vol_args)) {
1048	ret = PTR_ERR(ptr: vol_args);
1049	goto out_drop;
1050	}
1051	ret = btrfs_check_ioctl_vol_args_path(vol_args);
1052	if (ret < `0`)
1053	goto out_free;
1054
1055	sizestr = vol_args->name;
1056	cancel = (strcmp("cancel", sizestr) == `0`);
1057	ret = exclop_start_or_cancel_reloc(fs_info, type: BTRFS_EXCLOP_RESIZE, cancel);
1058	if (ret)
1059	goto out_free;
1060	/ Exclusive operation is now claimed /
1061
1062	devstr = strchr(sizestr, `':'`);
1063	if (devstr) {
1064	sizestr = devstr + `1`;
1065	*devstr = `'\0'`;
1066	devstr = vol_args->name;
1067	ret = kstrtoull(s: devstr, base: `10`, res: &devid);
1068	if (ret)
1069	goto out_finish;
1070	if (!devid) {
1071	ret = -EINVAL;
1072	goto out_finish;
1073	}
1074	btrfs_info(fs_info, "resizing devid %llu", devid);
1075	}
1076
1077	args.devid = devid;
1078	device = btrfs_find_device(fs_devices: fs_info->fs_devices, args: &args);
1079	if (!device) {
1080	btrfs_info(fs_info, "resizer unable to find device %llu",
1081	devid);
1082	ret = -ENODEV;
1083	goto out_finish;
1084	}
1085
1086	if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1087	btrfs_info(fs_info,
1088	"resizer unable to apply on readonly device %llu",
1089	devid);
1090	ret = -EPERM;
1091	goto out_finish;
1092	}
1093
1094	if (!strcmp(sizestr, "max"))
1095	new_size = bdev_nr_bytes(bdev: device->bdev);
1096	else {
1097	char *retptr;
1098
1099	if (sizestr[`0`] == `'-'`) {
1100	mod = -`1`;
1101	sizestr++;
1102	} else if (sizestr[`0`] == `'+'`) {
1103	mod = `1`;
1104	sizestr++;
1105	}
1106	new_size = memparse(ptr: sizestr, retptr: &retptr);
1107	if (*retptr != `'\0'` \|\| new_size == `0`) {
1108	ret = -EINVAL;
1109	goto out_finish;
1110	}
1111	}
1112
1113	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1114	ret = -EPERM;
1115	goto out_finish;
1116	}
1117
1118	old_size = btrfs_device_get_total_bytes(dev: device);
1119
1120	if (mod < `0`) {
1121	if (new_size > old_size) {
1122	ret = -EINVAL;
1123	goto out_finish;
1124	}
1125	new_size = old_size - new_size;
1126	} else if (mod > `0`) {
1127	if (new_size > ULLONG_MAX - old_size) {
1128	ret = -ERANGE;
1129	goto out_finish;
1130	}
1131	new_size = old_size + new_size;
1132	}
1133
1134	if (new_size < SZ_256M) {
1135	ret = -EINVAL;
1136	goto out_finish;
1137	}
1138	if (new_size > bdev_nr_bytes(bdev: device->bdev)) {
1139	ret = -EFBIG;
1140	goto out_finish;
1141	}
1142
1143	new_size = round_down(new_size, fs_info->sectorsize);
1144
1145	if (new_size > old_size) {
1146	struct btrfs_trans_handle *trans;
1147
1148	trans = btrfs_start_transaction(root, num_items: `0`);
1149	if (IS_ERR(ptr: trans)) {
1150	ret = PTR_ERR(ptr: trans);
1151	goto out_finish;
1152	}
1153	ret = btrfs_grow_device(trans, device, new_size);
1154	btrfs_commit_transaction(trans);
1155	} else if (new_size < old_size) {
1156	ret = btrfs_shrink_device(device, new_size);
1157	} / equal, nothing need to do /
1158
1159	if (ret == `0` && new_size != old_size)
1160	btrfs_info(fs_info,
1161	"resize device %s (devid %llu) from %llu to %llu",
1162	btrfs_dev_name(device), device->devid,
1163	old_size, new_size);
1164	out_finish:
1165	btrfs_exclop_finish(fs_info);
1166	out_free:
1167	kfree(objp: vol_args);
1168	out_drop:
1169	mnt_drop_write_file(file);
1170	return ret;
1171	}
1172
1173	static noinline int __btrfs_ioctl_snap_create(struct file *file,
1174	struct mnt_idmap *idmap,
1175	const char name, unsigned* long fd, bool subvol,
1176	bool readonly,
1177	struct btrfs_qgroup_inherit *inherit)
1178	{
1179	int ret = `0`;
1180	struct qstr qname = QSTR_INIT(name, strlen(name));
1181
1182	if (!S_ISDIR(file_inode(file)->i_mode))
1183	return -ENOTDIR;
1184
1185	ret = mnt_want_write_file(file);
1186	if (ret)
1187	goto out;
1188
1189	if (strchr(name, `'/'`)) {
1190	ret = -EINVAL;
1191	goto out_drop_write;
1192	}
1193
1194	if (qname.name[`0`] == `'.'` &&
1195	(qname.len == `1` \|\| (qname.name[`1`] == `'.'` && qname.len == `2`))) {
1196	ret = -EEXIST;
1197	goto out_drop_write;
1198	}
1199
1200	if (subvol) {
1201	ret = btrfs_mksubvol(parent: file_dentry(file), idmap, qname: &qname, NULL,
1202	readonly, inherit);
1203	} else {
1204	CLASS(fd, src)(fd);
1205	struct inode *src_inode;
1206	if (fd_empty(f: src)) {
1207	ret = -EINVAL;
1208	goto out_drop_write;
1209	}
1210
1211	src_inode = file_inode(fd_file(src));
1212	if (src_inode->i_sb != file_inode(f: file)->i_sb) {
1213	btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
1214	"Snapshot src from another FS");
1215	ret = -EXDEV;
1216	} else if (!inode_owner_or_capable(idmap, inode: src_inode)) {
1217	/*
1218	* Subvolume creation is not restricted, but snapshots
1219	* are limited to own subvolumes only
1220	*/
1221	ret = -EPERM;
1222	} else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) {
1223	/*
1224	* Snapshots must be made with the src_inode referring
1225	* to the subvolume inode, otherwise the permission
1226	* checking above is useless because we may have
1227	* permission on a lower directory but not the subvol
1228	* itself.
1229	*/
1230	ret = -EINVAL;
1231	} else {
1232	ret = btrfs_mksnapshot(parent: file_dentry(file), idmap, qname: &qname,
1233	BTRFS_I(src_inode)->root,
1234	readonly, inherit);
1235	}
1236	}
1237	out_drop_write:
1238	mnt_drop_write_file(file);
1239	out:
1240	return ret;
1241	}
1242
1243	static noinline int btrfs_ioctl_snap_create(struct file *file,
1244	void __user *arg, bool subvol)
1245	{
1246	struct btrfs_ioctl_vol_args *vol_args;
1247	int ret;
1248
1249	if (!S_ISDIR(file_inode(file)->i_mode))
1250	return -ENOTDIR;
1251
1252	vol_args = memdup_user(arg, sizeof(*vol_args));
1253	if (IS_ERR(ptr: vol_args))
1254	return PTR_ERR(ptr: vol_args);
1255	ret = btrfs_check_ioctl_vol_args_path(vol_args);
1256	if (ret < `0`)
1257	goto out;
1258
1259	ret = __btrfs_ioctl_snap_create(file, idmap: file_mnt_idmap(file),
1260	name: vol_args->name, fd: vol_args->fd, subvol,
1261	readonly: false, NULL);
1262
1263	out:
1264	kfree(objp: vol_args);
1265	return ret;
1266	}
1267
1268	static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1269	void __user *arg, bool subvol)
1270	{
1271	struct btrfs_ioctl_vol_args_v2 *vol_args;
1272	int ret;
1273	bool readonly = false;
1274	struct btrfs_qgroup_inherit *inherit = NULL;
1275
1276	if (!S_ISDIR(file_inode(file)->i_mode))
1277	return -ENOTDIR;
1278
1279	vol_args = memdup_user(arg, sizeof(*vol_args));
1280	if (IS_ERR(ptr: vol_args))
1281	return PTR_ERR(ptr: vol_args);
1282	ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2: vol_args);
1283	if (ret < `0`)
1284	goto free_args;
1285
1286	if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) {
1287	ret = -EOPNOTSUPP;
1288	goto free_args;
1289	}
1290
1291	if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
1292	readonly = true;
1293	if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
1294	struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file));
1295
1296	if (vol_args->size < sizeof(*inherit) \|\|
1297	vol_args->size > PAGE_SIZE) {
1298	ret = -EINVAL;
1299	goto free_args;
1300	}
1301	inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
1302	if (IS_ERR(ptr: inherit)) {
1303	ret = PTR_ERR(ptr: inherit);
1304	goto free_args;
1305	}
1306
1307	ret = btrfs_qgroup_check_inherit(fs_info, inherit, size: vol_args->size);
1308	if (ret < `0`)
1309	goto free_inherit;
1310	}
1311
1312	ret = __btrfs_ioctl_snap_create(file, idmap: file_mnt_idmap(file),
1313	name: vol_args->name, fd: vol_args->fd, subvol,
1314	readonly, inherit);
1315	if (ret)
1316	goto free_inherit;
1317	free_inherit:
1318	kfree(objp: inherit);
1319	free_args:
1320	kfree(objp: vol_args);
1321	return ret;
1322	}
1323
1324	static noinline int btrfs_ioctl_subvol_getflags(struct btrfs_inode *inode,
1325	void __user *arg)
1326	{
1327	struct btrfs_root *root = inode->root;
1328	struct btrfs_fs_info *fs_info = root->fs_info;
1329	int ret = `0`;
1330	u64 flags = `0`;
1331
1332	if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1333	return -EINVAL;
1334
1335	down_read(sem: &fs_info->subvol_sem);
1336	if (btrfs_root_readonly(root))
1337	flags \|= BTRFS_SUBVOL_RDONLY;
1338	up_read(sem: &fs_info->subvol_sem);
1339
1340	if (copy_to_user(to: arg, from: &flags, n: sizeof(flags)))
1341	ret = -EFAULT;
1342
1343	return ret;
1344	}
1345
1346	static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1347	void __user *arg)
1348	{
1349	struct inode *inode = file_inode(f: file);
1350	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
1351	struct btrfs_root *root = BTRFS_I(inode)->root;
1352	struct btrfs_trans_handle *trans;
1353	u64 root_flags;
1354	u64 flags;
1355	int ret = `0`;
1356
1357	if (!inode_owner_or_capable(idmap: file_mnt_idmap(file), inode))
1358	return -EPERM;
1359
1360	ret = mnt_want_write_file(file);
1361	if (ret)
1362	goto out;
1363
1364	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
1365	ret = -EINVAL;
1366	goto out_drop_write;
1367	}
1368
1369	if (copy_from_user(to: &flags, from: arg, n: sizeof(flags))) {
1370	ret = -EFAULT;
1371	goto out_drop_write;
1372	}
1373
1374	if (flags & ~BTRFS_SUBVOL_RDONLY) {
1375	ret = -EOPNOTSUPP;
1376	goto out_drop_write;
1377	}
1378
1379	down_write(sem: &fs_info->subvol_sem);
1380
1381	/ nothing to do /
1382	if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
1383	goto out_drop_sem;
1384
1385	root_flags = btrfs_root_flags(s: &root->root_item);
1386	if (flags & BTRFS_SUBVOL_RDONLY) {
1387	btrfs_set_root_flags(s: &root->root_item,
1388	val: root_flags \| BTRFS_ROOT_SUBVOL_RDONLY);
1389	} else {
1390	/*
1391	* Block RO -> RW transition if this subvolume is involved in
1392	* send
1393	*/
1394	spin_lock(lock: &root->root_item_lock);
1395	if (root->send_in_progress == `0`) {
1396	btrfs_set_root_flags(s: &root->root_item,
1397	val: root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
1398	spin_unlock(lock: &root->root_item_lock);
1399	} else {
1400	spin_unlock(lock: &root->root_item_lock);
1401	btrfs_warn(fs_info,
1402	"Attempt to set subvolume %llu read-write during send",
1403	btrfs_root_id(root));
1404	ret = -EPERM;
1405	goto out_drop_sem;
1406	}
1407	}
1408
1409	trans = btrfs_start_transaction(root, num_items: `1`);
1410	if (IS_ERR(ptr: trans)) {
1411	ret = PTR_ERR(ptr: trans);
1412	goto out_reset;
1413	}
1414
1415	ret = btrfs_update_root(trans, root: fs_info->tree_root,
1416	key: &root->root_key, item: &root->root_item);
1417	if (ret < `0`) {
1418	btrfs_end_transaction(trans);
1419	goto out_reset;
1420	}
1421
1422	ret = btrfs_commit_transaction(trans);
1423
1424	out_reset:
1425	if (ret)
1426	btrfs_set_root_flags(s: &root->root_item, val: root_flags);
1427	out_drop_sem:
1428	up_write(sem: &fs_info->subvol_sem);
1429	out_drop_write:
1430	mnt_drop_write_file(file);
1431	out:
1432	return ret;
1433	}
1434
1435	static noinline bool key_in_sk(const struct btrfs_key *key,
1436	const struct btrfs_ioctl_search_key *sk)
1437	{
1438	struct btrfs_key test;
1439	int ret;
1440
1441	test.objectid = sk->min_objectid;
1442	test.type = sk->min_type;
1443	test.offset = sk->min_offset;
1444
1445	ret = btrfs_comp_cpu_keys(k1: key, k2: &test);
1446	if (ret < `0`)
1447	return false;
1448
1449	test.objectid = sk->max_objectid;
1450	test.type = sk->max_type;
1451	test.offset = sk->max_offset;
1452
1453	ret = btrfs_comp_cpu_keys(k1: key, k2: &test);
1454	if (ret > `0`)
1455	return false;
1456	return true;
1457	}
1458
1459	static noinline int copy_to_sk(struct btrfs_path *path,
1460	struct btrfs_key *key,
1461	const struct btrfs_ioctl_search_key *sk,
1462	u64 *buf_size,
1463	char __user *ubuf,
1464	unsigned long *sk_offset,
1465	int *num_found)
1466	{
1467	u64 found_transid;
1468	struct extent_buffer *leaf;
1469	struct btrfs_ioctl_search_header sh;
1470	struct btrfs_key test;
1471	unsigned long item_off;
1472	unsigned long item_len;
1473	int nritems;
1474	int i;
1475	int slot;
1476	int ret = `0`;
1477
1478	leaf = path->nodes[`0`];
1479	slot = path->slots[`0`];
1480	nritems = btrfs_header_nritems(eb: leaf);
1481
1482	if (btrfs_header_generation(eb: leaf) > sk->max_transid) {
1483	i = nritems;
1484	goto advance_key;
1485	}
1486	found_transid = btrfs_header_generation(eb: leaf);
1487
1488	for (i = slot; i < nritems; i++) {
1489	item_off = btrfs_item_ptr_offset(leaf, i);
1490	item_len = btrfs_item_size(eb: leaf, slot: i);
1491
1492	btrfs_item_key_to_cpu(eb: leaf, cpu_key: key, nr: i);
1493	if (!key_in_sk(key, sk))
1494	continue;
1495
1496	if (sizeof(sh) + item_len > *buf_size) {
1497	if (*num_found) {
1498	ret = `1`;
1499	goto out;
1500	}
1501
1502	/*
1503	* return one empty item back for v1, which does not
1504	* handle -EOVERFLOW
1505	*/
1506
1507	buf_size = sizeof*(sh) + item_len;
1508	item_len = `0`;
1509	ret = -EOVERFLOW;
1510	}
1511
1512	if (sizeof(sh) + item_len + sk_offset > buf_size) {
1513	ret = `1`;
1514	goto out;
1515	}
1516
1517	sh.objectid = key->objectid;
1518	sh.type = key->type;
1519	sh.offset = key->offset;
1520	sh.len = item_len;
1521	sh.transid = found_transid;
1522
1523	/*
1524	* Copy search result header. If we fault then loop again so we
1525	* can fault in the pages and -EFAULT there if there's a
1526	* problem. Otherwise we'll fault and then copy the buffer in
1527	* properly this next time through
1528	*/
1529	if (copy_to_user_nofault(dst: ubuf + sk_offset, src: &sh, size: sizeof*(sh))) {
1530	ret = `0`;
1531	goto out;
1532	}
1533
1534	sk_offset += sizeof*(sh);
1535
1536	if (item_len) {
1537	char __user up = ubuf + sk_offset;
1538	/*
1539	* Copy the item, same behavior as above, but reset the
1540	* * sk_offset so we copy the full thing again.
1541	*/
1542	if (read_extent_buffer_to_user_nofault(eb: leaf, dst: up,
1543	start: item_off, len: item_len)) {
1544	ret = `0`;
1545	sk_offset -= sizeof*(sh);
1546	goto out;
1547	}
1548
1549	*sk_offset += item_len;
1550	}
1551	(*num_found)++;
1552
1553	if (ret) / -EOVERFLOW from above /
1554	goto out;
1555
1556	if (*num_found >= sk->nr_items) {
1557	ret = `1`;
1558	goto out;
1559	}
1560	}
1561	advance_key:
1562	ret = `0`;
1563	test.objectid = sk->max_objectid;
1564	test.type = sk->max_type;
1565	test.offset = sk->max_offset;
1566	if (btrfs_comp_cpu_keys(k1: key, k2: &test) >= `0`)
1567	ret = `1`;
1568	else if (key->offset < (u64)-`1`)
1569	key->offset++;
1570	else if (key->type < (u8)-`1`) {
1571	key->offset = `0`;
1572	key->type++;
1573	} else if (key->objectid < (u64)-`1`) {
1574	key->offset = `0`;
1575	key->type = `0`;
1576	key->objectid++;
1577	} else
1578	ret = `1`;
1579	out:
1580	/*
1581	* 0: all items from this leaf copied, continue with next
1582	* 1: * more items can be copied, but unused buffer is too small
1583	* * all items were found
1584	* Either way, it will stops the loop which iterates to the next
1585	* leaf
1586	* -EOVERFLOW: item was to large for buffer
1587	* -EFAULT: could not copy extent buffer back to userspace
1588	*/
1589	return ret;
1590	}
1591
1592	static noinline int search_ioctl(struct btrfs_root *root,
1593	struct btrfs_ioctl_search_key *sk,
1594	u64 *buf_size,
1595	char __user *ubuf)
1596	{
1597	struct btrfs_fs_info *info = root->fs_info;
1598	struct btrfs_key key;
1599	BTRFS_PATH_AUTO_FREE(path);
1600	int ret;
1601	int num_found = `0`;
1602	unsigned long sk_offset = `0`;
1603
1604	if (buf_size < sizeof(struct* btrfs_ioctl_search_header)) {
1605	buf_size = sizeof(struct* btrfs_ioctl_search_header);
1606	return -EOVERFLOW;
1607	}
1608
1609	path = btrfs_alloc_path();
1610	if (!path)
1611	return -ENOMEM;
1612
1613	if (sk->tree_id == `0`) {
1614	/ Search the root that we got passed. /
1615	root = btrfs_grab_root(root);
1616	} else {
1617	/ Look up the root from the arguments. /
1618	root = btrfs_get_fs_root(fs_info: info, objectid: sk->tree_id, check_ref: true);
1619	if (IS_ERR(ptr: root))
1620	return PTR_ERR(ptr: root);
1621	}
1622
1623	key.objectid = sk->min_objectid;
1624	key.type = sk->min_type;
1625	key.offset = sk->min_offset;
1626
1627	while (`1`) {
1628	/*
1629	* Ensure that the whole user buffer is faulted in at sub-page
1630	* granularity, otherwise the loop may live-lock.
1631	*/
1632	if (fault_in_subpage_writeable(uaddr: ubuf + sk_offset, size: *buf_size - sk_offset)) {
1633	ret = -EFAULT;
1634	break;
1635	}
1636
1637	ret = btrfs_search_forward(root, min_key: &key, path, min_trans: sk->min_transid);
1638	if (ret)
1639	break;
1640
1641	ret = copy_to_sk(path, key: &key, sk, buf_size, ubuf,
1642	sk_offset: &sk_offset, num_found: &num_found);
1643	btrfs_release_path(p: path);
1644	if (ret)
1645	break;
1646
1647	}
1648	/ Normalize return values from btrfs_search_forward() and copy_to_sk(). /
1649	if (ret > `0`)
1650	ret = `0`;
1651
1652	sk->nr_items = num_found;
1653	btrfs_put_root(root);
1654	return ret;
1655	}
1656
1657	static noinline int btrfs_ioctl_tree_search(struct btrfs_root *root,
1658	void __user *argp)
1659	{
1660	struct btrfs_ioctl_search_args __user *uargs = argp;
1661	struct btrfs_ioctl_search_key sk;
1662	int ret;
1663	u64 buf_size;
1664
1665	if (!capable(CAP_SYS_ADMIN))
1666	return -EPERM;
1667
1668	if (copy_from_user(to: &sk, from: &uargs->key, n: sizeof(sk)))
1669	return -EFAULT;
1670
1671	buf_size = sizeof(uargs->buf);
1672
1673	ret = search_ioctl(root, sk: &sk, buf_size: &buf_size, ubuf: uargs->buf);
1674
1675	/*
1676	* In the origin implementation an overflow is handled by returning a
1677	* search header with a len of zero, so reset ret.
1678	*/
1679	if (ret == -EOVERFLOW)
1680	ret = `0`;
1681
1682	if (ret == `0` && copy_to_user(to: &uargs->key, from: &sk, n: sizeof(sk)))
1683	ret = -EFAULT;
1684	return ret;
1685	}
1686
1687	static noinline int btrfs_ioctl_tree_search_v2(struct btrfs_root *root,
1688	void __user *argp)
1689	{
1690	struct btrfs_ioctl_search_args_v2 __user *uarg = argp;
1691	struct btrfs_ioctl_search_args_v2 args;
1692	int ret;
1693	u64 buf_size;
1694	const u64 buf_limit = SZ_16M;
1695
1696	if (!capable(CAP_SYS_ADMIN))
1697	return -EPERM;
1698
1699	/ copy search header and buffer size /
1700	if (copy_from_user(to: &args, from: uarg, n: sizeof(args)))
1701	return -EFAULT;
1702
1703	buf_size = args.buf_size;
1704
1705	/ limit result size to 16MB /
1706	if (buf_size > buf_limit)
1707	buf_size = buf_limit;
1708
1709	ret = search_ioctl(root, sk: &args.key, buf_size: &buf_size,
1710	ubuf: (char __user *)(&uarg->buf[`0`]));
1711	if (ret == `0` && copy_to_user(to: &uarg->key, from: &args.key, n: sizeof(args.key)))
1712	ret = -EFAULT;
1713	else if (ret == -EOVERFLOW &&
1714	copy_to_user(to: &uarg->buf_size, from: &buf_size, n: sizeof(buf_size)))
1715	ret = -EFAULT;
1716
1717	return ret;
1718	}
1719
1720	/*
1721	* Search INODE_REFs to identify path name of 'dirid' directory
1722	* in a 'tree_id' tree. and sets path name to 'name'.
1723	*/
1724	static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1725	u64 tree_id, u64 dirid, char *name)
1726	{
1727	struct btrfs_root *root;
1728	struct btrfs_key key;
1729	char *ptr;
1730	int ret = -`1`;
1731	int slot;
1732	int len;
1733	int total_len = `0`;
1734	struct btrfs_inode_ref *iref;
1735	struct extent_buffer *l;
1736	BTRFS_PATH_AUTO_FREE(path);
1737
1738	if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
1739	name[`0`]=`'\0'`;
1740	return `0`;
1741	}
1742
1743	path = btrfs_alloc_path();
1744	if (!path)
1745	return -ENOMEM;
1746
1747	ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - `1`];
1748
1749	root = btrfs_get_fs_root(fs_info: info, objectid: tree_id, check_ref: true);
1750	if (IS_ERR(ptr: root)) {
1751	ret = PTR_ERR(ptr: root);
1752	root = NULL;
1753	goto out;
1754	}
1755
1756	key.objectid = dirid;
1757	key.type = BTRFS_INODE_REF_KEY;
1758	key.offset = (u64)-`1`;
1759
1760	while (`1`) {
1761	ret = btrfs_search_backwards(root, key: &key, path);
1762	if (ret < `0`)
1763	goto out;
1764	else if (ret > `0`) {
1765	ret = -ENOENT;
1766	goto out;
1767	}
1768
1769	l = path->nodes[`0`];
1770	slot = path->slots[`0`];
1771
1772	iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
1773	len = btrfs_inode_ref_name_len(eb: l, s: iref);
1774	ptr -= len + `1`;
1775	total_len += len + `1`;
1776	if (ptr < name) {
1777	ret = -ENAMETOOLONG;
1778	goto out;
1779	}
1780
1781	*(ptr + len) = `'/'`;
1782	read_extent_buffer(eb: l, dst: ptr, start: (unsigned long)(iref + `1`), len);
1783
1784	if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1785	break;
1786
1787	btrfs_release_path(p: path);
1788	key.objectid = key.offset;
1789	key.offset = (u64)-`1`;
1790	dirid = key.objectid;
1791	}
1792	memmove(name, ptr, total_len);
1793	name[total_len] = `'\0'`;
1794	ret = `0`;
1795	out:
1796	btrfs_put_root(root);
1797	return ret;
1798	}
1799
1800	static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
1801	struct inode *inode,
1802	struct btrfs_ioctl_ino_lookup_user_args *args)
1803	{
1804	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
1805	u64 upper_limit = btrfs_ino(BTRFS_I(inode));
1806	u64 treeid = btrfs_root_id(BTRFS_I(inode)->root);
1807	u64 dirid = args->dirid;
1808	unsigned long item_off;
1809	unsigned long item_len;
1810	struct btrfs_inode_ref *iref;
1811	struct btrfs_root_ref *rref;
1812	struct btrfs_root *root = NULL;
1813	BTRFS_PATH_AUTO_FREE(path);
1814	struct btrfs_key key;
1815	struct extent_buffer *leaf;
1816	char *ptr;
1817	int slot;
1818	int len;
1819	int total_len = `0`;
1820	int ret;
1821
1822	path = btrfs_alloc_path();
1823	if (!path)
1824	return -ENOMEM;
1825
1826	/*
1827	* If the bottom subvolume does not exist directly under upper_limit,
1828	* construct the path in from the bottom up.
1829	*/
1830	if (dirid != upper_limit) {
1831	ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - `1`];
1832
1833	root = btrfs_get_fs_root(fs_info, objectid: treeid, check_ref: true);
1834	if (IS_ERR(ptr: root))
1835	return PTR_ERR(ptr: root);
1836
1837	key.objectid = dirid;
1838	key.type = BTRFS_INODE_REF_KEY;
1839	key.offset = (u64)-`1`;
1840	while (`1`) {
1841	struct btrfs_inode *temp_inode;
1842
1843	ret = btrfs_search_backwards(root, key: &key, path);
1844	if (ret < `0`)
1845	goto out_put;
1846	else if (ret > `0`) {
1847	ret = -ENOENT;
1848	goto out_put;
1849	}
1850
1851	leaf = path->nodes[`0`];
1852	slot = path->slots[`0`];
1853
1854	iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref);
1855	len = btrfs_inode_ref_name_len(eb: leaf, s: iref);
1856	ptr -= len + `1`;
1857	total_len += len + `1`;
1858	if (ptr < args->path) {
1859	ret = -ENAMETOOLONG;
1860	goto out_put;
1861	}
1862
1863	*(ptr + len) = `'/'`;
1864	read_extent_buffer(eb: leaf, dst: ptr,
1865	start: (unsigned long)(iref + `1`), len);
1866
1867	/*
1868	* We don't need the path anymore, so release it and
1869	* avoid deadlocks and lockdep warnings in case
1870	* btrfs_iget() needs to lookup the inode from its root
1871	* btree and lock the same leaf.
1872	*/
1873	btrfs_release_path(p: path);
1874	temp_inode = btrfs_iget(ino: key.offset, root);
1875	if (IS_ERR(ptr: temp_inode)) {
1876	ret = PTR_ERR(ptr: temp_inode);
1877	goto out_put;
1878	}
1879	/ Check the read+exec permission of this directory. /
1880	ret = inode_permission(idmap, &temp_inode->vfs_inode,
1881	MAY_READ \| MAY_EXEC);
1882	iput(&temp_inode->vfs_inode);
1883	if (ret)
1884	goto out_put;
1885
1886	if (key.offset == upper_limit)
1887	break;
1888	if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
1889	ret = -EACCES;
1890	goto out_put;
1891	}
1892
1893	key.objectid = key.offset;
1894	key.offset = (u64)-`1`;
1895	dirid = key.objectid;
1896	}
1897
1898	memmove(args->path, ptr, total_len);
1899	args->path[total_len] = `'\0'`;
1900	btrfs_put_root(root);
1901	root = NULL;
1902	btrfs_release_path(p: path);
1903	}
1904
1905	/ Get the bottom subvolume's name from ROOT_REF /
1906	key.objectid = treeid;
1907	key.type = BTRFS_ROOT_REF_KEY;
1908	key.offset = args->treeid;
1909	ret = btrfs_search_slot(NULL, root: fs_info->tree_root, key: &key, p: path, ins_len: `0`, cow: `0`);
1910	if (ret < `0`)
1911	return ret;
1912	else if (ret > `0`)
1913	return -ENOENT;
1914
1915	leaf = path->nodes[`0`];
1916	slot = path->slots[`0`];
1917	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
1918
1919	item_off = btrfs_item_ptr_offset(leaf, slot);
1920	item_len = btrfs_item_size(eb: leaf, slot);
1921	/ Check if dirid in ROOT_REF corresponds to passed dirid /
1922	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
1923	if (args->dirid != btrfs_root_ref_dirid(eb: leaf, s: rref))
1924	return -EINVAL;
1925
1926	/ Copy subvolume's name /
1927	item_off += sizeof(struct btrfs_root_ref);
1928	item_len -= sizeof(struct btrfs_root_ref);
1929	read_extent_buffer(eb: leaf, dst: args->name, start: item_off, len: item_len);
1930	args->name[item_len] = `0`;
1931
1932	out_put:
1933	btrfs_put_root(root);
1934
1935	return ret;
1936	}
1937
1938	static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root,
1939	void __user *argp)
1940	{
1941	struct btrfs_ioctl_ino_lookup_args *args;
1942	int ret = `0`;
1943
1944	args = memdup_user(argp, sizeof(*args));
1945	if (IS_ERR(ptr: args))
1946	return PTR_ERR(ptr: args);
1947
1948	/*
1949	* Unprivileged query to obtain the containing subvolume root id. The
1950	* path is reset so it's consistent with btrfs_search_path_in_tree.
1951	*/
1952	if (args->treeid == `0`)
1953	args->treeid = btrfs_root_id(root);
1954
1955	if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
1956	args->name[`0`] = `0`;
1957	goto out;
1958	}
1959
1960	if (!capable(CAP_SYS_ADMIN)) {
1961	ret = -EPERM;
1962	goto out;
1963	}
1964
1965	ret = btrfs_search_path_in_tree(info: root->fs_info,
1966	tree_id: args->treeid, dirid: args->objectid,
1967	name: args->name);
1968
1969	out:
1970	if (ret == `0` && copy_to_user(to: argp, from: args, n: sizeof(*args)))
1971	ret = -EFAULT;
1972
1973	kfree(objp: args);
1974	return ret;
1975	}
1976
1977	/*
1978	* Version of ino_lookup ioctl (unprivileged)
1979	*
1980	* The main differences from ino_lookup ioctl are:
1981	*
1982	* 1. Read + Exec permission will be checked using inode_permission() during
1983	* path construction. -EACCES will be returned in case of failure.
1984	* 2. Path construction will be stopped at the inode number which corresponds
1985	* to the fd with which this ioctl is called. If constructed path does not
1986	* exist under fd's inode, -EACCES will be returned.
1987	* 3. The name of bottom subvolume is also searched and filled.
1988	*/
1989	static int btrfs_ioctl_ino_lookup_user(struct file file, void* __user *argp)
1990	{
1991	struct btrfs_ioctl_ino_lookup_user_args *args;
1992	struct inode *inode;
1993	int ret;
1994
1995	args = memdup_user(argp, sizeof(*args));
1996	if (IS_ERR(ptr: args))
1997	return PTR_ERR(ptr: args);
1998
1999	inode = file_inode(f: file);
2000
2001	if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
2002	btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
2003	/*
2004	* The subvolume does not exist under fd with which this is
2005	* called
2006	*/
2007	kfree(objp: args);
2008	return -EACCES;
2009	}
2010
2011	ret = btrfs_search_path_in_tree_user(idmap: file_mnt_idmap(file), inode, args);
2012
2013	if (ret == `0` && copy_to_user(to: argp, from: args, n: sizeof(*args)))
2014	ret = -EFAULT;
2015
2016	kfree(objp: args);
2017	return ret;
2018	}
2019
2020	/ Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF /
2021	static int btrfs_ioctl_get_subvol_info(struct inode inode, void* __user *argp)
2022	{
2023	struct btrfs_ioctl_get_subvol_info_args *subvol_info;
2024	struct btrfs_fs_info *fs_info;
2025	struct btrfs_root *root;
2026	struct btrfs_path *path;
2027	struct btrfs_key key;
2028	struct btrfs_root_item *root_item;
2029	struct btrfs_root_ref *rref;
2030	struct extent_buffer *leaf;
2031	unsigned long item_off;
2032	unsigned long item_len;
2033	int slot;
2034	int ret = `0`;
2035
2036	path = btrfs_alloc_path();
2037	if (!path)
2038	return -ENOMEM;
2039
2040	subvol_info = kzalloc(sizeof(*subvol_info), GFP_KERNEL);
2041	if (!subvol_info) {
2042	btrfs_free_path(p: path);
2043	return -ENOMEM;
2044	}
2045
2046	fs_info = BTRFS_I(inode)->root->fs_info;
2047
2048	/ Get root_item of inode's subvolume /
2049	key.objectid = btrfs_root_id(BTRFS_I(inode)->root);
2050	root = btrfs_get_fs_root(fs_info, objectid: key.objectid, check_ref: true);
2051	if (IS_ERR(ptr: root)) {
2052	ret = PTR_ERR(ptr: root);
2053	goto out_free;
2054	}
2055	root_item = &root->root_item;
2056
2057	subvol_info->treeid = key.objectid;
2058
2059	subvol_info->generation = btrfs_root_generation(s: root_item);
2060	subvol_info->flags = btrfs_root_flags(s: root_item);
2061
2062	memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE);
2063	memcpy(subvol_info->parent_uuid, root_item->parent_uuid,
2064	BTRFS_UUID_SIZE);
2065	memcpy(subvol_info->received_uuid, root_item->received_uuid,
2066	BTRFS_UUID_SIZE);
2067
2068	subvol_info->ctransid = btrfs_root_ctransid(s: root_item);
2069	subvol_info->ctime.sec = btrfs_stack_timespec_sec(s: &root_item->ctime);
2070	subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(s: &root_item->ctime);
2071
2072	subvol_info->otransid = btrfs_root_otransid(s: root_item);
2073	subvol_info->otime.sec = btrfs_stack_timespec_sec(s: &root_item->otime);
2074	subvol_info->otime.nsec = btrfs_stack_timespec_nsec(s: &root_item->otime);
2075
2076	subvol_info->stransid = btrfs_root_stransid(s: root_item);
2077	subvol_info->stime.sec = btrfs_stack_timespec_sec(s: &root_item->stime);
2078	subvol_info->stime.nsec = btrfs_stack_timespec_nsec(s: &root_item->stime);
2079
2080	subvol_info->rtransid = btrfs_root_rtransid(s: root_item);
2081	subvol_info->rtime.sec = btrfs_stack_timespec_sec(s: &root_item->rtime);
2082	subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(s: &root_item->rtime);
2083
2084	if (key.objectid != BTRFS_FS_TREE_OBJECTID) {
2085	/ Search root tree for ROOT_BACKREF of this subvolume /
2086	key.type = BTRFS_ROOT_BACKREF_KEY;
2087	key.offset = `0`;
2088	ret = btrfs_search_slot(NULL, root: fs_info->tree_root, key: &key, p: path, ins_len: `0`, cow: `0`);
2089	if (ret < `0`) {
2090	goto out;
2091	} else if (path->slots[`0`] >=
2092	btrfs_header_nritems(eb: path->nodes[`0`])) {
2093	ret = btrfs_next_leaf(root: fs_info->tree_root, path);
2094	if (ret < `0`) {
2095	goto out;
2096	} else if (unlikely(ret > `0`)) {
2097	ret = -EUCLEAN;
2098	goto out;
2099	}
2100	}
2101
2102	leaf = path->nodes[`0`];
2103	slot = path->slots[`0`];
2104	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
2105	if (key.objectid == subvol_info->treeid &&
2106	key.type == BTRFS_ROOT_BACKREF_KEY) {
2107	subvol_info->parent_id = key.offset;
2108
2109	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
2110	subvol_info->dirid = btrfs_root_ref_dirid(eb: leaf, s: rref);
2111
2112	item_off = btrfs_item_ptr_offset(leaf, slot)
2113	+ sizeof(struct btrfs_root_ref);
2114	item_len = btrfs_item_size(eb: leaf, slot)
2115	- sizeof(struct btrfs_root_ref);
2116	read_extent_buffer(eb: leaf, dst: subvol_info->name,
2117	start: item_off, len: item_len);
2118	} else {
2119	ret = -ENOENT;
2120	goto out;
2121	}
2122	}
2123
2124	btrfs_free_path(p: path);
2125	path = NULL;
2126	if (copy_to_user(to: argp, from: subvol_info, n: sizeof(*subvol_info)))
2127	ret = -EFAULT;
2128
2129	out:
2130	btrfs_put_root(root);
2131	out_free:
2132	btrfs_free_path(p: path);
2133	kfree(objp: subvol_info);
2134	return ret;
2135	}
2136
2137	/*
2138	* Return ROOT_REF information of the subvolume containing this inode
2139	* except the subvolume name.
2140	*/
2141	static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
2142	void __user *argp)
2143	{
2144	struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
2145	struct btrfs_root_ref *rref;
2146	struct btrfs_path *path;
2147	struct btrfs_key key;
2148	struct extent_buffer *leaf;
2149	u64 objectid;
2150	int slot;
2151	int ret;
2152	u8 found;
2153
2154	path = btrfs_alloc_path();
2155	if (!path)
2156	return -ENOMEM;
2157
2158	rootrefs = memdup_user(argp, sizeof(*rootrefs));
2159	if (IS_ERR(ptr: rootrefs)) {
2160	btrfs_free_path(p: path);
2161	return PTR_ERR(ptr: rootrefs);
2162	}
2163
2164	objectid = btrfs_root_id(root);
2165	key.objectid = objectid;
2166	key.type = BTRFS_ROOT_REF_KEY;
2167	key.offset = rootrefs->min_treeid;
2168	found = `0`;
2169
2170	root = root->fs_info->tree_root;
2171	ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: `0`, cow: `0`);
2172	if (ret < `0`) {
2173	goto out;
2174	} else if (path->slots[`0`] >=
2175	btrfs_header_nritems(eb: path->nodes[`0`])) {
2176	ret = btrfs_next_leaf(root, path);
2177	if (ret < `0`) {
2178	goto out;
2179	} else if (unlikely(ret > `0`)) {
2180	ret = -EUCLEAN;
2181	goto out;
2182	}
2183	}
2184	while (`1`) {
2185	leaf = path->nodes[`0`];
2186	slot = path->slots[`0`];
2187
2188	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
2189	if (key.objectid != objectid \|\| key.type != BTRFS_ROOT_REF_KEY) {
2190	ret = `0`;
2191	goto out;
2192	}
2193
2194	if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) {
2195	ret = -EOVERFLOW;
2196	goto out;
2197	}
2198
2199	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
2200	rootrefs->rootref[found].treeid = key.offset;
2201	rootrefs->rootref[found].dirid =
2202	btrfs_root_ref_dirid(eb: leaf, s: rref);
2203	found++;
2204
2205	ret = btrfs_next_item(root, p: path);
2206	if (ret < `0`) {
2207	goto out;
2208	} else if (unlikely(ret > `0`)) {
2209	ret = -EUCLEAN;
2210	goto out;
2211	}
2212	}
2213
2214	out:
2215	btrfs_free_path(p: path);
2216
2217	if (!ret \|\| ret == -EOVERFLOW) {
2218	rootrefs->num_items = found;
2219	/ update min_treeid for next search /
2220	if (found)
2221	rootrefs->min_treeid =
2222	rootrefs->rootref[found - `1`].treeid + `1`;
2223	if (copy_to_user(to: argp, from: rootrefs, n: sizeof(*rootrefs)))
2224	ret = -EFAULT;
2225	}
2226
2227	kfree(objp: rootrefs);
2228
2229	return ret;
2230	}
2231
2232	static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2233	void __user *arg,
2234	bool destroy_v2)
2235	{
2236	struct dentry *parent = file->f_path.dentry;
2237	struct dentry *dentry;
2238	struct inode *dir = d_inode(dentry: parent);
2239	struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
2240	struct inode *inode;
2241	struct btrfs_root *root = BTRFS_I(dir)->root;
2242	struct btrfs_root *dest = NULL;
2243	struct btrfs_ioctl_vol_args *vol_args = NULL;
2244	struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
2245	struct mnt_idmap *idmap = file_mnt_idmap(file);
2246	char subvol_name, subvol_name_ptr = NULL;
2247	int ret = `0`;
2248	bool destroy_parent = false;
2249
2250	/ We don't support snapshots with extent tree v2 yet. /
2251	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
2252	btrfs_err(fs_info,
2253	"extent tree v2 doesn't support snapshot deletion yet");
2254	return -EOPNOTSUPP;
2255	}
2256
2257	if (destroy_v2) {
2258	vol_args2 = memdup_user(arg, sizeof(*vol_args2));
2259	if (IS_ERR(ptr: vol_args2))
2260	return PTR_ERR(ptr: vol_args2);
2261
2262	if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) {
2263	ret = -EOPNOTSUPP;
2264	goto out;
2265	}
2266
2267	/*
2268	* If SPEC_BY_ID is not set, we are looking for the subvolume by
2269	* name, same as v1 currently does.
2270	*/
2271	if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) {
2272	ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2);
2273	if (ret < `0`)
2274	goto out;
2275	subvol_name = vol_args2->name;
2276
2277	ret = mnt_want_write_file(file);
2278	if (ret)
2279	goto out;
2280	} else {
2281	struct inode *old_dir;
2282
2283	if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) {
2284	ret = -EINVAL;
2285	goto out;
2286	}
2287
2288	ret = mnt_want_write_file(file);
2289	if (ret)
2290	goto out;
2291
2292	dentry = btrfs_get_dentry(sb: fs_info->sb,
2293	BTRFS_FIRST_FREE_OBJECTID,
2294	root_objectid: vol_args2->subvolid, generation: `0`);
2295	if (IS_ERR(ptr: dentry)) {
2296	ret = PTR_ERR(ptr: dentry);
2297	goto out_drop_write;
2298	}
2299
2300	/*
2301	* Change the default parent since the subvolume being
2302	* deleted can be outside of the current mount point.
2303	*/
2304	parent = btrfs_get_parent(child: dentry);
2305
2306	/*
2307	* At this point dentry->d_name can point to '/' if the
2308	* subvolume we want to destroy is outsite of the
2309	* current mount point, so we need to release the
2310	* current dentry and execute the lookup to return a new
2311	* one with ->d_name pointing to the
2312	* <mount point>/subvol_name.
2313	*/
2314	dput(dentry);
2315	if (IS_ERR(ptr: parent)) {
2316	ret = PTR_ERR(ptr: parent);
2317	goto out_drop_write;
2318	}
2319	old_dir = dir;
2320	dir = d_inode(dentry: parent);
2321
2322	/*
2323	* If v2 was used with SPEC_BY_ID, a new parent was
2324	* allocated since the subvolume can be outside of the
2325	* current mount point. Later on we need to release this
2326	* new parent dentry.
2327	*/
2328	destroy_parent = true;
2329
2330	/*
2331	* On idmapped mounts, deletion via subvolid is
2332	* restricted to subvolumes that are immediate
2333	* ancestors of the inode referenced by the file
2334	* descriptor in the ioctl. Otherwise the idmapping
2335	* could potentially be abused to delete subvolumes
2336	* anywhere in the filesystem the user wouldn't be able
2337	* to delete without an idmapped mount.
2338	*/
2339	if (old_dir != dir && idmap != &nop_mnt_idmap) {
2340	ret = -EOPNOTSUPP;
2341	goto free_parent;
2342	}
2343
2344	subvol_name_ptr = btrfs_get_subvol_name_from_objectid(
2345	fs_info, subvol_objectid: vol_args2->subvolid);
2346	if (IS_ERR(ptr: subvol_name_ptr)) {
2347	ret = PTR_ERR(ptr: subvol_name_ptr);
2348	goto free_parent;
2349	}
2350	/ subvol_name_ptr is already nul terminated /
2351	subvol_name = (char *)kbasename(path: subvol_name_ptr);
2352	}
2353	} else {
2354	vol_args = memdup_user(arg, sizeof(*vol_args));
2355	if (IS_ERR(ptr: vol_args))
2356	return PTR_ERR(ptr: vol_args);
2357
2358	ret = btrfs_check_ioctl_vol_args_path(vol_args);
2359	if (ret < `0`)
2360	goto out;
2361
2362	subvol_name = vol_args->name;
2363
2364	ret = mnt_want_write_file(file);
2365	if (ret)
2366	goto out;
2367	}
2368
2369	if (strchr(subvol_name, `'/'`) \|\|
2370	strcmp(subvol_name, "..") == `0`) {
2371	ret = -EINVAL;
2372	goto free_subvol_name;
2373	}
2374
2375	if (!S_ISDIR(dir->i_mode)) {
2376	ret = -ENOTDIR;
2377	goto free_subvol_name;
2378	}
2379
2380	dentry = start_removing_killable(idmap, parent, name: &QSTR(subvol_name));
2381	if (IS_ERR(ptr: dentry)) {
2382	ret = PTR_ERR(ptr: dentry);
2383	goto out_end_removing;
2384	}
2385
2386	inode = d_inode(dentry);
2387	dest = BTRFS_I(inode)->root;
2388	if (!capable(CAP_SYS_ADMIN)) {
2389	/*
2390	* Regular user. Only allow this with a special mount
2391	* option, when the user has write+exec access to the
2392	* subvol root, and when rmdir(2) would have been
2393	* allowed.
2394	*
2395	* Note that this is _not_ check that the subvol is
2396	* empty or doesn't contain data that we wouldn't
2397	* otherwise be able to delete.
2398	*
2399	* Users who want to delete empty subvols should try
2400	* rmdir(2).
2401	*/
2402	ret = -EPERM;
2403	if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
2404	goto out_end_removing;
2405
2406	/*
2407	* Do not allow deletion if the parent dir is the same
2408	* as the dir to be deleted. That means the ioctl
2409	* must be called on the dentry referencing the root
2410	* of the subvol, not a random directory contained
2411	* within it.
2412	*/
2413	ret = -EINVAL;
2414	if (root == dest)
2415	goto out_end_removing;
2416
2417	ret = inode_permission(idmap, inode, MAY_WRITE \| MAY_EXEC);
2418	if (ret)
2419	goto out_end_removing;
2420	}
2421
2422	/ check if subvolume may be deleted by a user /
2423	ret = btrfs_may_delete(idmap, dir, victim: dentry, isdir: `1`);
2424	if (ret)
2425	goto out_end_removing;
2426
2427	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
2428	ret = -EINVAL;
2429	goto out_end_removing;
2430	}
2431
2432	btrfs_inode_lock(BTRFS_I(inode), ilock_flags: `0`);
2433	ret = btrfs_delete_subvolume(BTRFS_I(dir), dentry);
2434	btrfs_inode_unlock(BTRFS_I(inode), ilock_flags: `0`);
2435	if (!ret)
2436	d_delete_notify(dir, dentry);
2437
2438	out_end_removing:
2439	end_removing(child: dentry);
2440	free_subvol_name:
2441	kfree(objp: subvol_name_ptr);
2442	free_parent:
2443	if (destroy_parent)
2444	dput(parent);
2445	out_drop_write:
2446	mnt_drop_write_file(file);
2447	out:
2448	kfree(objp: vol_args2);
2449	kfree(objp: vol_args);
2450	return ret;
2451	}
2452
2453	static int btrfs_ioctl_defrag(struct file file, void* __user *argp)
2454	{
2455	struct inode *inode = file_inode(f: file);
2456	struct btrfs_root *root = BTRFS_I(inode)->root;
2457	struct btrfs_ioctl_defrag_range_args range = {`0`};
2458	int ret;
2459
2460	ret = mnt_want_write_file(file);
2461	if (ret)
2462	return ret;
2463
2464	if (btrfs_root_readonly(root)) {
2465	ret = -EROFS;
2466	goto out;
2467	}
2468
2469	switch (inode->i_mode & S_IFMT) {
2470	case S_IFDIR:
2471	if (!capable(CAP_SYS_ADMIN)) {
2472	ret = -EPERM;
2473	goto out;
2474	}
2475	ret = btrfs_defrag_root(root);
2476	break;
2477	case S_IFREG:
2478	/*
2479	* Note that this does not check the file descriptor for write
2480	* access. This prevents defragmenting executables that are
2481	* running and allows defrag on files open in read-only mode.
2482	*/
2483	if (!capable(CAP_SYS_ADMIN) &&
2484	inode_permission(&nop_mnt_idmap, inode, MAY_WRITE)) {
2485	ret = -EPERM;
2486	goto out;
2487	}
2488
2489	/*
2490	* Don't allow defrag on pre-content watched files, as it could
2491	* populate the page cache with 0's via readahead.
2492	*/
2493	if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
2494	ret = -EINVAL;
2495	goto out;
2496	}
2497
2498	if (argp) {
2499	if (copy_from_user(to: &range, from: argp, n: sizeof(range))) {
2500	ret = -EFAULT;
2501	goto out;
2502	}
2503	if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
2504	ret = -EOPNOTSUPP;
2505	goto out;
2506	}
2507	if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS) &&
2508	(range.flags & BTRFS_DEFRAG_RANGE_NOCOMPRESS)) {
2509	ret = -EINVAL;
2510	goto out;
2511	}
2512	/ Compression or no-compression require to start the IO. /
2513	if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS) \|\|
2514	(range.flags & BTRFS_DEFRAG_RANGE_NOCOMPRESS)) {
2515	range.flags \|= BTRFS_DEFRAG_RANGE_START_IO;
2516	range.extent_thresh = (u32)-`1`;
2517	}
2518	} else {
2519	/ the rest are all set to zero by kzalloc /
2520	range.len = (u64)-`1`;
2521	}
2522	ret = btrfs_defrag_file(BTRFS_I(file_inode(file)), ra: &file->f_ra,
2523	range: &range, BTRFS_OLDEST_GENERATION, max_to_defrag: `0`);
2524	if (ret > `0`)
2525	ret = `0`;
2526	break;
2527	default:
2528	ret = -EINVAL;
2529	}
2530	out:
2531	mnt_drop_write_file(file);
2532	return ret;
2533	}
2534
2535	static long btrfs_ioctl_add_dev(struct btrfs_fs_info fs_info, void* __user *arg)
2536	{
2537	struct btrfs_ioctl_vol_args *vol_args;
2538	bool restore_op = false;
2539	int ret;
2540
2541	if (!capable(CAP_SYS_ADMIN))
2542	return -EPERM;
2543
2544	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
2545	btrfs_err(fs_info, "device add not supported on extent tree v2 yet");
2546	return -EINVAL;
2547	}
2548
2549	if (fs_info->fs_devices->temp_fsid) {
2550	btrfs_err(fs_info,
2551	"device add not supported on cloned temp-fsid mount");
2552	return -EINVAL;
2553	}
2554
2555	if (!btrfs_exclop_start(fs_info, type: BTRFS_EXCLOP_DEV_ADD)) {
2556	if (!btrfs_exclop_start_try_lock(fs_info, type: BTRFS_EXCLOP_DEV_ADD))
2557	return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
2558
2559	/*
2560	* We can do the device add because we have a paused balanced,
2561	* change the exclusive op type and remember we should bring
2562	* back the paused balance
2563	*/
2564	fs_info->exclusive_operation = BTRFS_EXCLOP_DEV_ADD;
2565	btrfs_exclop_start_unlock(fs_info);
2566	restore_op = true;
2567	}
2568
2569	vol_args = memdup_user(arg, sizeof(*vol_args));
2570	if (IS_ERR(ptr: vol_args)) {
2571	ret = PTR_ERR(ptr: vol_args);
2572	goto out;
2573	}
2574
2575	ret = btrfs_check_ioctl_vol_args_path(vol_args);
2576	if (ret < `0`)
2577	goto out_free;
2578
2579	ret = btrfs_init_new_device(fs_info, path: vol_args->name);
2580
2581	if (!ret)
2582	btrfs_info(fs_info, "disk added %s", vol_args->name);
2583
2584	out_free:
2585	kfree(objp: vol_args);
2586	out:
2587	if (restore_op)
2588	btrfs_exclop_balance(fs_info, op: BTRFS_EXCLOP_BALANCE_PAUSED);
2589	else
2590	btrfs_exclop_finish(fs_info);
2591	return ret;
2592	}
2593
2594	static long btrfs_ioctl_rm_dev_v2(struct file file, void* __user *arg)
2595	{
2596	BTRFS_DEV_LOOKUP_ARGS(args);
2597	struct inode *inode = file_inode(f: file);
2598	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
2599	struct btrfs_ioctl_vol_args_v2 *vol_args;
2600	struct file *bdev_file = NULL;
2601	int ret;
2602	bool cancel = false;
2603
2604	if (!capable(CAP_SYS_ADMIN))
2605	return -EPERM;
2606
2607	vol_args = memdup_user(arg, sizeof(*vol_args));
2608	if (IS_ERR(ptr: vol_args))
2609	return PTR_ERR(ptr: vol_args);
2610
2611	if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) {
2612	ret = -EOPNOTSUPP;
2613	goto out;
2614	}
2615
2616	ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2: vol_args);
2617	if (ret < `0`)
2618	goto out;
2619
2620	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
2621	args.devid = vol_args->devid;
2622	} else if (!strcmp("cancel", vol_args->name)) {
2623	cancel = true;
2624	} else {
2625	ret = btrfs_get_dev_args_from_path(fs_info, args: &args, path: vol_args->name);
2626	if (ret)
2627	goto out;
2628	}
2629
2630	ret = mnt_want_write_file(file);
2631	if (ret)
2632	goto out;
2633
2634	ret = exclop_start_or_cancel_reloc(fs_info, type: BTRFS_EXCLOP_DEV_REMOVE,
2635	cancel);
2636	if (ret)
2637	goto err_drop;
2638
2639	/ Exclusive operation is now claimed /
2640	ret = btrfs_rm_device(fs_info, args: &args, bdev_file: &bdev_file);
2641
2642	btrfs_exclop_finish(fs_info);
2643
2644	if (!ret) {
2645	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
2646	btrfs_info(fs_info, "device deleted: id %llu",
2647	vol_args->devid);
2648	else
2649	btrfs_info(fs_info, "device deleted: %s",
2650	vol_args->name);
2651	}
2652	err_drop:
2653	mnt_drop_write_file(file);
2654	if (bdev_file)
2655	bdev_fput(bdev_file);
2656	out:
2657	btrfs_put_dev_args_from_path(args: &args);
2658	kfree(objp: vol_args);
2659	return ret;
2660	}
2661
2662	static long btrfs_ioctl_rm_dev(struct file file, void* __user *arg)
2663	{
2664	BTRFS_DEV_LOOKUP_ARGS(args);
2665	struct inode *inode = file_inode(f: file);
2666	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
2667	struct btrfs_ioctl_vol_args *vol_args;
2668	struct file *bdev_file = NULL;
2669	int ret;
2670	bool cancel = false;
2671
2672	if (!capable(CAP_SYS_ADMIN))
2673	return -EPERM;
2674
2675	vol_args = memdup_user(arg, sizeof(*vol_args));
2676	if (IS_ERR(ptr: vol_args))
2677	return PTR_ERR(ptr: vol_args);
2678
2679	ret = btrfs_check_ioctl_vol_args_path(vol_args);
2680	if (ret < `0`)
2681	goto out_free;
2682
2683	if (!strcmp("cancel", vol_args->name)) {
2684	cancel = true;
2685	} else {
2686	ret = btrfs_get_dev_args_from_path(fs_info, args: &args, path: vol_args->name);
2687	if (ret)
2688	goto out;
2689	}
2690
2691	ret = mnt_want_write_file(file);
2692	if (ret)
2693	goto out;
2694
2695	ret = exclop_start_or_cancel_reloc(fs_info, type: BTRFS_EXCLOP_DEV_REMOVE,
2696	cancel);
2697	if (ret == `0`) {
2698	ret = btrfs_rm_device(fs_info, args: &args, bdev_file: &bdev_file);
2699	if (!ret)
2700	btrfs_info(fs_info, "disk deleted %s", vol_args->name);
2701	btrfs_exclop_finish(fs_info);
2702	}
2703
2704	mnt_drop_write_file(file);
2705	if (bdev_file)
2706	bdev_fput(bdev_file);
2707	out:
2708	btrfs_put_dev_args_from_path(args: &args);
2709	out_free:
2710	kfree(objp: vol_args);
2711	return ret;
2712	}
2713
2714	static long btrfs_ioctl_fs_info(const struct btrfs_fs_info *fs_info,
2715	void __user *arg)
2716	{
2717	struct btrfs_ioctl_fs_info_args *fi_args;
2718	struct btrfs_device *device;
2719	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2720	u64 flags_in;
2721	int ret = `0`;
2722
2723	fi_args = memdup_user(arg, sizeof(*fi_args));
2724	if (IS_ERR(ptr: fi_args))
2725	return PTR_ERR(ptr: fi_args);
2726
2727	flags_in = fi_args->flags;
2728	memset(fi_args, `0`, sizeof(*fi_args));
2729
2730	rcu_read_lock();
2731	fi_args->num_devices = fs_devices->num_devices;
2732
2733	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
2734	if (device->devid > fi_args->max_id)
2735	fi_args->max_id = device->devid;
2736	}
2737	rcu_read_unlock();
2738
2739	memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid));
2740	fi_args->nodesize = fs_info->nodesize;
2741	fi_args->sectorsize = fs_info->sectorsize;
2742	fi_args->clone_alignment = fs_info->sectorsize;
2743
2744	if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) {
2745	fi_args->csum_type = btrfs_super_csum_type(s: fs_info->super_copy);
2746	fi_args->csum_size = btrfs_super_csum_size(s: fs_info->super_copy);
2747	fi_args->flags \|= BTRFS_FS_INFO_FLAG_CSUM_INFO;
2748	}
2749
2750	if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) {
2751	fi_args->generation = btrfs_get_fs_generation(fs_info);
2752	fi_args->flags \|= BTRFS_FS_INFO_FLAG_GENERATION;
2753	}
2754
2755	if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) {
2756	memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid,
2757	sizeof(fi_args->metadata_uuid));
2758	fi_args->flags \|= BTRFS_FS_INFO_FLAG_METADATA_UUID;
2759	}
2760
2761	if (copy_to_user(to: arg, from: fi_args, n: sizeof(*fi_args)))
2762	ret = -EFAULT;
2763
2764	kfree(objp: fi_args);
2765	return ret;
2766	}
2767
2768	static long btrfs_ioctl_dev_info(const struct btrfs_fs_info *fs_info,
2769	void __user *arg)
2770	{
2771	BTRFS_DEV_LOOKUP_ARGS(args);
2772	struct btrfs_ioctl_dev_info_args *di_args;
2773	struct btrfs_device *dev;
2774	int ret = `0`;
2775
2776	di_args = memdup_user(arg, sizeof(*di_args));
2777	if (IS_ERR(ptr: di_args))
2778	return PTR_ERR(ptr: di_args);
2779
2780	args.devid = di_args->devid;
2781	if (!btrfs_is_empty_uuid(uuid: di_args->uuid))
2782	args.uuid = di_args->uuid;
2783
2784	rcu_read_lock();
2785	dev = btrfs_find_device(fs_devices: fs_info->fs_devices, args: &args);
2786	if (!dev) {
2787	ret = -ENODEV;
2788	goto out;
2789	}
2790
2791	di_args->devid = dev->devid;
2792	di_args->bytes_used = btrfs_device_get_bytes_used(dev);
2793	di_args->total_bytes = btrfs_device_get_total_bytes(dev);
2794	memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2795	memcpy(di_args->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
2796	if (dev->name)
2797	strscpy(di_args->path, btrfs_dev_name(dev), sizeof(di_args->path));
2798	else
2799	di_args->path[`0`] = `'\0'`;
2800
2801	out:
2802	rcu_read_unlock();
2803	if (ret == `0` && copy_to_user(to: arg, from: di_args, n: sizeof(*di_args)))
2804	ret = -EFAULT;
2805
2806	kfree(objp: di_args);
2807	return ret;
2808	}
2809
2810	static long btrfs_ioctl_default_subvol(struct file file, void* __user *argp)
2811	{
2812	struct inode *inode = file_inode(f: file);
2813	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
2814	struct btrfs_root *root = BTRFS_I(inode)->root;
2815	struct btrfs_root *new_root;
2816	struct btrfs_dir_item *di;
2817	struct btrfs_trans_handle *trans;
2818	struct btrfs_path *path = NULL;
2819	struct btrfs_disk_key disk_key;
2820	struct fscrypt_str name = FSTR_INIT("default", `7`);
2821	u64 objectid = `0`;
2822	u64 dir_id;
2823	int ret;
2824
2825	if (!capable(CAP_SYS_ADMIN))
2826	return -EPERM;
2827
2828	ret = mnt_want_write_file(file);
2829	if (ret)
2830	return ret;
2831
2832	if (copy_from_user(to: &objectid, from: argp, n: sizeof(objectid))) {
2833	ret = -EFAULT;
2834	goto out;
2835	}
2836
2837	if (!objectid)
2838	objectid = BTRFS_FS_TREE_OBJECTID;
2839
2840	new_root = btrfs_get_fs_root(fs_info, objectid, check_ref: true);
2841	if (IS_ERR(ptr: new_root)) {
2842	ret = PTR_ERR(ptr: new_root);
2843	goto out;
2844	}
2845	if (!btrfs_is_fstree(rootid: btrfs_root_id(root: new_root))) {
2846	ret = -ENOENT;
2847	goto out_free;
2848	}
2849
2850	path = btrfs_alloc_path();
2851	if (!path) {
2852	ret = -ENOMEM;
2853	goto out_free;
2854	}
2855
2856	trans = btrfs_start_transaction(root, num_items: `1`);
2857	if (IS_ERR(ptr: trans)) {
2858	ret = PTR_ERR(ptr: trans);
2859	goto out_free;
2860	}
2861
2862	dir_id = btrfs_super_root_dir(s: fs_info->super_copy);
2863	di = btrfs_lookup_dir_item(trans, root: fs_info->tree_root, path,
2864	dir: dir_id, name: &name, mod: `1`);
2865	if (IS_ERR_OR_NULL(ptr: di)) {
2866	btrfs_release_path(p: path);
2867	btrfs_end_transaction(trans);
2868	btrfs_err(fs_info,
2869	"Umm, you don't have the default diritem, this isn't going to work");
2870	ret = -ENOENT;
2871	goto out_free;
2872	}
2873
2874	btrfs_cpu_key_to_disk(disk_key: &disk_key, cpu_key: &new_root->root_key);
2875	btrfs_set_dir_item_key(eb: path->nodes[`0`], item: di, key: &disk_key);
2876	btrfs_release_path(p: path);
2877
2878	btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL);
2879	btrfs_end_transaction(trans);
2880	out_free:
2881	btrfs_put_root(root: new_root);
2882	btrfs_free_path(p: path);
2883	out:
2884	mnt_drop_write_file(file);
2885	return ret;
2886	}
2887
2888	static void get_block_group_info(struct list_head *groups_list,
2889	struct btrfs_ioctl_space_info *space)
2890	{
2891	struct btrfs_block_group *block_group;
2892
2893	space->total_bytes = `0`;
2894	space->used_bytes = `0`;
2895	space->flags = `0`;
2896	list_for_each_entry(block_group, groups_list, list) {
2897	space->flags = block_group->flags;
2898	space->total_bytes += block_group->length;
2899	space->used_bytes += block_group->used;
2900	}
2901	}
2902
2903	static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
2904	void __user *arg)
2905	{
2906	struct btrfs_ioctl_space_args space_args = { `0` };
2907	struct btrfs_ioctl_space_info space;
2908	struct btrfs_ioctl_space_info *dest;
2909	struct btrfs_ioctl_space_info AUTO_KFREE(dest_orig);
2910	struct btrfs_ioctl_space_info __user *user_dest;
2911	struct btrfs_space_info *info;
2912	static const u64 types[] = {
2913	BTRFS_BLOCK_GROUP_DATA,
2914	BTRFS_BLOCK_GROUP_SYSTEM,
2915	BTRFS_BLOCK_GROUP_METADATA,
2916	BTRFS_BLOCK_GROUP_DATA \| BTRFS_BLOCK_GROUP_METADATA
2917	};
2918	int num_types = `4`;
2919	int alloc_size;
2920	int ret = `0`;
2921	u64 slot_count = `0`;
2922	int i, c;
2923
2924	if (copy_from_user(to: &space_args,
2925	from: (struct btrfs_ioctl_space_args __user *)arg,
2926	n: sizeof(space_args)))
2927	return -EFAULT;
2928
2929	for (i = `0`; i < num_types; i++) {
2930	struct btrfs_space_info *tmp;
2931
2932	info = NULL;
2933	list_for_each_entry(tmp, &fs_info->space_info, list) {
2934	if (tmp->flags == types[i]) {
2935	info = tmp;
2936	break;
2937	}
2938	}
2939
2940	if (!info)
2941	continue;
2942
2943	down_read(sem: &info->groups_sem);
2944	for (c = `0`; c < BTRFS_NR_RAID_TYPES; c++) {
2945	if (!list_empty(head: &info->block_groups[c]))
2946	slot_count++;
2947	}
2948	up_read(sem: &info->groups_sem);
2949	}
2950
2951	/*
2952	* Global block reserve, exported as a space_info
2953	*/
2954	slot_count++;
2955
2956	/ space_slots == 0 means they are asking for a count /
2957	if (space_args.space_slots == `0`) {
2958	space_args.total_spaces = slot_count;
2959	goto out;
2960	}
2961
2962	slot_count = min_t(u64, space_args.space_slots, slot_count);
2963
2964	alloc_size = sizeof(dest) slot_count;
2965
2966	/ we generally have at most 6 or so space infos, one for each raid*
2967	* level. So, a whole page should be more than enough for everyone
2968	*/
2969	if (alloc_size > PAGE_SIZE)
2970	return -ENOMEM;
2971
2972	space_args.total_spaces = `0`;
2973	dest = kmalloc(alloc_size, GFP_KERNEL);
2974	if (!dest)
2975	return -ENOMEM;
2976	dest_orig = dest;
2977
2978	/ now we have a buffer to copy into /
2979	for (i = `0`; i < num_types; i++) {
2980	struct btrfs_space_info *tmp;
2981
2982	if (!slot_count)
2983	break;
2984
2985	info = NULL;
2986	list_for_each_entry(tmp, &fs_info->space_info, list) {
2987	if (tmp->flags == types[i]) {
2988	info = tmp;
2989	break;
2990	}
2991	}
2992
2993	if (!info)
2994	continue;
2995	down_read(sem: &info->groups_sem);
2996	for (c = `0`; c < BTRFS_NR_RAID_TYPES; c++) {
2997	if (!list_empty(head: &info->block_groups[c])) {
2998	get_block_group_info(groups_list: &info->block_groups[c],
2999	space: &space);
3000	memcpy(dest, &space, sizeof(space));
3001	dest++;
3002	space_args.total_spaces++;
3003	slot_count--;
3004	}
3005	if (!slot_count)
3006	break;
3007	}
3008	up_read(sem: &info->groups_sem);
3009	}
3010
3011	/*
3012	* Add global block reserve
3013	*/
3014	if (slot_count) {
3015	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
3016
3017	spin_lock(lock: &block_rsv->lock);
3018	space.total_bytes = block_rsv->size;
3019	space.used_bytes = block_rsv->size - block_rsv->reserved;
3020	spin_unlock(lock: &block_rsv->lock);
3021	space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV;
3022	memcpy(dest, &space, sizeof(space));
3023	space_args.total_spaces++;
3024	}
3025
3026	user_dest = (struct btrfs_ioctl_space_info __user *)
3027	(arg + sizeof(struct btrfs_ioctl_space_args));
3028
3029	if (copy_to_user(to: user_dest, from: dest_orig, n: alloc_size))
3030	return -EFAULT;
3031
3032	out:
3033	if (ret == `0` && copy_to_user(to: arg, from: &space_args, n: sizeof(space_args)))
3034	ret = -EFAULT;
3035
3036	return ret;
3037	}
3038
3039	static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
3040	void __user *argp)
3041	{
3042	struct btrfs_trans_handle *trans;
3043	u64 transid;
3044
3045	/*
3046	* Start orphan cleanup here for the given root in case it hasn't been
3047	* started already by other means. Errors are handled in the other
3048	* functions during transaction commit.
3049	*/
3050	btrfs_orphan_cleanup(root);
3051
3052	trans = btrfs_attach_transaction_barrier(root);
3053	if (IS_ERR(ptr: trans)) {
3054	if (PTR_ERR(ptr: trans) != -ENOENT)
3055	return PTR_ERR(ptr: trans);
3056
3057	/ No running transaction, don't bother /
3058	transid = btrfs_get_last_trans_committed(fs_info: root->fs_info);
3059	goto out;
3060	}
3061	transid = trans->transid;
3062	btrfs_commit_transaction_async(trans);
3063	out:
3064	if (argp)
3065	if (copy_to_user(to: argp, from: &transid, n: sizeof(transid)))
3066	return -EFAULT;
3067	return `0`;
3068	}
3069
3070	static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
3071	void __user *argp)
3072	{
3073	/ By default wait for the current transaction. /
3074	u64 transid = `0`;
3075
3076	if (argp)
3077	if (copy_from_user(to: &transid, from: argp, n: sizeof(transid)))
3078	return -EFAULT;
3079
3080	return btrfs_wait_for_commit(fs_info, transid);
3081	}
3082
3083	static long btrfs_ioctl_scrub(struct file file, void* __user *arg)
3084	{
3085	struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file));
3086	struct btrfs_ioctl_scrub_args *sa;
3087	int ret;
3088
3089	if (!capable(CAP_SYS_ADMIN))
3090	return -EPERM;
3091
3092	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
3093	btrfs_err(fs_info, "scrub: extent tree v2 not yet supported");
3094	return -EINVAL;
3095	}
3096
3097	sa = memdup_user(arg, sizeof(*sa));
3098	if (IS_ERR(ptr: sa))
3099	return PTR_ERR(ptr: sa);
3100
3101	if (sa->flags & ~BTRFS_SCRUB_SUPPORTED_FLAGS) {
3102	ret = -EOPNOTSUPP;
3103	goto out;
3104	}
3105
3106	if (!(sa->flags & BTRFS_SCRUB_READONLY)) {
3107	ret = mnt_want_write_file(file);
3108	if (ret)
3109	goto out;
3110	}
3111
3112	ret = btrfs_scrub_dev(fs_info, devid: sa->devid, start: sa->start, end: sa->end,
3113	progress: &sa->progress, readonly: sa->flags & BTRFS_SCRUB_READONLY,
3114	is_dev_replace: `0`);
3115
3116	/*
3117	* Copy scrub args to user space even if btrfs_scrub_dev() returned an
3118	* error. This is important as it allows user space to know how much
3119	* progress scrub has done. For example, if scrub is canceled we get
3120	* -ECANCELED from btrfs_scrub_dev() and return that error back to user
3121	* space. Later user space can inspect the progress from the structure
3122	* btrfs_ioctl_scrub_args and resume scrub from where it left off
3123	* previously (btrfs-progs does this).
3124	* If we fail to copy the btrfs_ioctl_scrub_args structure to user space
3125	* then return -EFAULT to signal the structure was not copied or it may
3126	* be corrupt and unreliable due to a partial copy.
3127	*/
3128	if (copy_to_user(to: arg, from: sa, n: sizeof(*sa)))
3129	ret = -EFAULT;
3130
3131	if (!(sa->flags & BTRFS_SCRUB_READONLY))
3132	mnt_drop_write_file(file);
3133	out:
3134	kfree(objp: sa);
3135	return ret;
3136	}
3137
3138	static long btrfs_ioctl_scrub_cancel(struct btrfs_fs_info *fs_info)
3139	{
3140	if (!capable(CAP_SYS_ADMIN))
3141	return -EPERM;
3142
3143	return btrfs_scrub_cancel(info: fs_info);
3144	}
3145
3146	static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info,
3147	void __user *arg)
3148	{
3149	struct btrfs_ioctl_scrub_args *sa;
3150	int ret;
3151
3152	if (!capable(CAP_SYS_ADMIN))
3153	return -EPERM;
3154
3155	sa = memdup_user(arg, sizeof(*sa));
3156	if (IS_ERR(ptr: sa))
3157	return PTR_ERR(ptr: sa);
3158
3159	ret = btrfs_scrub_progress(fs_info, devid: sa->devid, progress: &sa->progress);
3160
3161	if (ret == `0` && copy_to_user(to: arg, from: sa, n: sizeof(*sa)))
3162	ret = -EFAULT;
3163
3164	kfree(objp: sa);
3165	return ret;
3166	}
3167
3168	static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info,
3169	void __user *arg)
3170	{
3171	struct btrfs_ioctl_get_dev_stats *sa;
3172	int ret;
3173
3174	sa = memdup_user(arg, sizeof(*sa));
3175	if (IS_ERR(ptr: sa))
3176	return PTR_ERR(ptr: sa);
3177
3178	if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) {
3179	kfree(objp: sa);
3180	return -EPERM;
3181	}
3182
3183	ret = btrfs_get_dev_stats(fs_info, stats: sa);
3184
3185	if (ret == `0` && copy_to_user(to: arg, from: sa, n: sizeof(*sa)))
3186	ret = -EFAULT;
3187
3188	kfree(objp: sa);
3189	return ret;
3190	}
3191
3192	static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
3193	void __user *arg)
3194	{
3195	struct btrfs_ioctl_dev_replace_args *p;
3196	int ret;
3197
3198	if (!capable(CAP_SYS_ADMIN))
3199	return -EPERM;
3200
3201	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
3202	btrfs_err(fs_info, "device replace not supported on extent tree v2 yet");
3203	return -EINVAL;
3204	}
3205
3206	p = memdup_user(arg, sizeof(*p));
3207	if (IS_ERR(ptr: p))
3208	return PTR_ERR(ptr: p);
3209
3210	switch (p->cmd) {
3211	case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
3212	if (sb_rdonly(sb: fs_info->sb)) {
3213	ret = -EROFS;
3214	goto out;
3215	}
3216	if (!btrfs_exclop_start(fs_info, type: BTRFS_EXCLOP_DEV_REPLACE)) {
3217	ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3218	} else {
3219	ret = btrfs_dev_replace_by_ioctl(fs_info, args: p);
3220	btrfs_exclop_finish(fs_info);
3221	}
3222	break;
3223	case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
3224	btrfs_dev_replace_status(fs_info, args: p);
3225	ret = `0`;
3226	break;
3227	case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL:
3228	p->result = btrfs_dev_replace_cancel(fs_info);
3229	ret = `0`;
3230	break;
3231	default:
3232	ret = -EINVAL;
3233	break;
3234	}
3235
3236	if ((ret == `0` \|\| ret == -ECANCELED) && copy_to_user(to: arg, from: p, n: sizeof(*p)))
3237	ret = -EFAULT;
3238	out:
3239	kfree(objp: p);
3240	return ret;
3241	}
3242
3243	static long btrfs_ioctl_ino_to_path(struct btrfs_root root, void* __user *arg)
3244	{
3245	int ret = `0`;
3246	int i;
3247	u64 rel_ptr;
3248	int size;
3249	struct btrfs_ioctl_ino_path_args *ipa = NULL;
3250	struct inode_fs_paths *ipath __free(inode_fs_paths) = NULL;
3251	struct btrfs_path *path;
3252
3253	if (!capable(CAP_DAC_READ_SEARCH))
3254	return -EPERM;
3255
3256	path = btrfs_alloc_path();
3257	if (!path) {
3258	ret = -ENOMEM;
3259	goto out;
3260	}
3261
3262	ipa = memdup_user(arg, sizeof(*ipa));
3263	if (IS_ERR(ptr: ipa)) {
3264	ret = PTR_ERR(ptr: ipa);
3265	ipa = NULL;
3266	goto out;
3267	}
3268
3269	size = min_t(u32, ipa->size, `4096`);
3270	ipath = init_ipath(total_bytes: size, fs_root: root, path);
3271	if (IS_ERR(ptr: ipath)) {
3272	ret = PTR_ERR(ptr: ipath);
3273	ipath = NULL;
3274	goto out;
3275	}
3276
3277	ret = paths_from_inode(inum: ipa->inum, ipath);
3278	if (ret < `0`)
3279	goto out;
3280
3281	for (i = `0`; i < ipath->fspath->elem_cnt; ++i) {
3282	rel_ptr = ipath->fspath->val[i] -
3283	(u64)(unsigned long)ipath->fspath->val;
3284	ipath->fspath->val[i] = rel_ptr;
3285	}
3286
3287	btrfs_free_path(p: path);
3288	path = NULL;
3289	ret = copy_to_user(to: (void __user )(unsigned* long)ipa->fspath,
3290	from: ipath->fspath, n: size);
3291	if (ret) {
3292	ret = -EFAULT;
3293	goto out;
3294	}
3295
3296	out:
3297	btrfs_free_path(p: path);
3298	kfree(objp: ipa);
3299
3300	return ret;
3301	}
3302
3303	static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
3304	void __user arg, int* version)
3305	{
3306	int ret = `0`;
3307	int size;
3308	struct btrfs_ioctl_logical_ino_args *loi;
3309	struct btrfs_data_container *inodes = NULL;
3310	bool ignore_offset;
3311
3312	if (!capable(CAP_SYS_ADMIN))
3313	return -EPERM;
3314
3315	loi = memdup_user(arg, sizeof(*loi));
3316	if (IS_ERR(ptr: loi))
3317	return PTR_ERR(ptr: loi);
3318
3319	if (version == `1`) {
3320	ignore_offset = false;
3321	size = min_t(u32, loi->size, SZ_64K);
3322	} else {
3323	/ All reserved bits must be 0 for now /
3324	if (memchr_inv(p: loi->reserved, c: `0`, size: sizeof(loi->reserved))) {
3325	ret = -EINVAL;
3326	goto out_loi;
3327	}
3328	/ Only accept flags we have defined so far /
3329	if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) {
3330	ret = -EINVAL;
3331	goto out_loi;
3332	}
3333	ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
3334	size = min_t(u32, loi->size, SZ_16M);
3335	}
3336
3337	inodes = init_data_container(total_bytes: size);
3338	if (IS_ERR(ptr: inodes)) {
3339	ret = PTR_ERR(ptr: inodes);
3340	goto out_loi;
3341	}
3342
3343	ret = iterate_inodes_from_logical(logical: loi->logical, fs_info, ctx: inodes, ignore_offset);
3344	if (ret == -EINVAL)
3345	ret = -ENOENT;
3346	if (ret < `0`)
3347	goto out;
3348
3349	ret = copy_to_user(to: (void __user )(unsigned* long)loi->inodes, from: inodes,
3350	n: size);
3351	if (ret)
3352	ret = -EFAULT;
3353
3354	out:
3355	kvfree(addr: inodes);
3356	out_loi:
3357	kfree(objp: loi);
3358
3359	return ret;
3360	}
3361
3362	void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
3363	struct btrfs_ioctl_balance_args *bargs)
3364	{
3365	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3366
3367	bargs->flags = bctl->flags;
3368
3369	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
3370	bargs->state \|= BTRFS_BALANCE_STATE_RUNNING;
3371	if (atomic_read(v: &fs_info->balance_pause_req))
3372	bargs->state \|= BTRFS_BALANCE_STATE_PAUSE_REQ;
3373	if (atomic_read(v: &fs_info->balance_cancel_req))
3374	bargs->state \|= BTRFS_BALANCE_STATE_CANCEL_REQ;
3375
3376	memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
3377	memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
3378	memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
3379
3380	spin_lock(lock: &fs_info->balance_lock);
3381	memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3382	spin_unlock(lock: &fs_info->balance_lock);
3383	}
3384
3385	/*
3386	* Try to acquire fs_info::balance_mutex as well as set BTRFS_EXLCOP_BALANCE as
3387	* required.
3388	*
3389	* @fs_info: the filesystem
3390	* @excl_acquired: ptr to boolean value which is set to false in case balance
3391	* is being resumed
3392	*
3393	* Return 0 on success in which case both fs_info::balance is acquired as well
3394	* as exclusive ops are blocked. In case of failure return an error code.
3395	*/
3396	static int btrfs_try_lock_balance(struct btrfs_fs_info fs_info, bool excl_acquired)
3397	{
3398	int ret;
3399
3400	/*
3401	* Exclusive operation is locked. Three possibilities:
3402	* (1) some other op is running
3403	* (2) balance is running
3404	* (3) balance is paused -- special case (think resume)
3405	*/
3406	while (`1`) {
3407	if (btrfs_exclop_start(fs_info, type: BTRFS_EXCLOP_BALANCE)) {
3408	*excl_acquired = true;
3409	mutex_lock(&fs_info->balance_mutex);
3410	return `0`;
3411	}
3412
3413	mutex_lock(&fs_info->balance_mutex);
3414	if (fs_info->balance_ctl) {
3415	/ This is either (2) or (3) /
3416	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
3417	/ This is (2) /
3418	ret = -EINPROGRESS;
3419	goto out_failure;
3420
3421	} else {
3422	mutex_unlock(lock: &fs_info->balance_mutex);
3423	/*
3424	* Lock released to allow other waiters to
3425	* continue, we'll reexamine the status again.
3426	*/
3427	mutex_lock(&fs_info->balance_mutex);
3428
3429	if (fs_info->balance_ctl &&
3430	!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
3431	/ This is (3) /
3432	*excl_acquired = false;
3433	return `0`;
3434	}
3435	}
3436	} else {
3437	/ This is (1) /
3438	ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3439	goto out_failure;
3440	}
3441
3442	mutex_unlock(lock: &fs_info->balance_mutex);
3443	}
3444
3445	out_failure:
3446	mutex_unlock(lock: &fs_info->balance_mutex);
3447	*excl_acquired = false;
3448	return ret;
3449	}
3450
3451	static long btrfs_ioctl_balance(struct file file, void* __user *arg)
3452	{
3453	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
3454	struct btrfs_fs_info *fs_info = root->fs_info;
3455	struct btrfs_ioctl_balance_args *bargs;
3456	struct btrfs_balance_control *bctl;
3457	bool need_unlock = true;
3458	int ret;
3459
3460	if (!capable(CAP_SYS_ADMIN))
3461	return -EPERM;
3462
3463	ret = mnt_want_write_file(file);
3464	if (ret)
3465	return ret;
3466
3467	bargs = memdup_user(arg, sizeof(*bargs));
3468	if (IS_ERR(ptr: bargs)) {
3469	ret = PTR_ERR(ptr: bargs);
3470	bargs = NULL;
3471	goto out;
3472	}
3473
3474	ret = btrfs_try_lock_balance(fs_info, excl_acquired: &need_unlock);
3475	if (ret)
3476	goto out;
3477
3478	lockdep_assert_held(&fs_info->balance_mutex);
3479
3480	if (bargs->flags & BTRFS_BALANCE_RESUME) {
3481	if (!fs_info->balance_ctl) {
3482	ret = -ENOTCONN;
3483	goto out_unlock;
3484	}
3485
3486	bctl = fs_info->balance_ctl;
3487	spin_lock(lock: &fs_info->balance_lock);
3488	bctl->flags \|= BTRFS_BALANCE_RESUME;
3489	spin_unlock(lock: &fs_info->balance_lock);
3490	btrfs_exclop_balance(fs_info, op: BTRFS_EXCLOP_BALANCE);
3491
3492	goto do_balance;
3493	}
3494
3495	if (bargs->flags & ~(BTRFS_BALANCE_ARGS_MASK \| BTRFS_BALANCE_TYPE_MASK)) {
3496	ret = -EINVAL;
3497	goto out_unlock;
3498	}
3499
3500	if (fs_info->balance_ctl) {
3501	ret = -EINPROGRESS;
3502	goto out_unlock;
3503	}
3504
3505	bctl = kzalloc(sizeof(*bctl), GFP_KERNEL);
3506	if (!bctl) {
3507	ret = -ENOMEM;
3508	goto out_unlock;
3509	}
3510
3511	memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
3512	memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
3513	memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
3514
3515	bctl->flags = bargs->flags;
3516	do_balance:
3517	/*
3518	* Ownership of bctl and exclusive operation goes to btrfs_balance.
3519	* bctl is freed in reset_balance_state, or, if restriper was paused
3520	* all the way until unmount, in free_fs_info. The flag should be
3521	* cleared after reset_balance_state.
3522	*/
3523	need_unlock = false;
3524
3525	ret = btrfs_balance(fs_info, bctl, bargs);
3526	bctl = NULL;
3527
3528	if (ret == `0` \|\| ret == -ECANCELED) {
3529	if (copy_to_user(to: arg, from: bargs, n: sizeof(*bargs)))
3530	ret = -EFAULT;
3531	}
3532
3533	kfree(objp: bctl);
3534	out_unlock:
3535	mutex_unlock(lock: &fs_info->balance_mutex);
3536	if (need_unlock)
3537	btrfs_exclop_finish(fs_info);
3538	out:
3539	mnt_drop_write_file(file);
3540	kfree(objp: bargs);
3541	return ret;
3542	}
3543
3544	static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info fs_info, int* cmd)
3545	{
3546	if (!capable(CAP_SYS_ADMIN))
3547	return -EPERM;
3548
3549	switch (cmd) {
3550	case BTRFS_BALANCE_CTL_PAUSE:
3551	return btrfs_pause_balance(fs_info);
3552	case BTRFS_BALANCE_CTL_CANCEL:
3553	return btrfs_cancel_balance(fs_info);
3554	}
3555
3556	return -EINVAL;
3557	}
3558
3559	static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
3560	void __user *arg)
3561	{
3562	struct btrfs_ioctl_balance_args AUTO_KFREE(bargs);
3563	int ret = `0`;
3564
3565	if (!capable(CAP_SYS_ADMIN))
3566	return -EPERM;
3567
3568	mutex_lock(&fs_info->balance_mutex);
3569	if (!fs_info->balance_ctl) {
3570	ret = -ENOTCONN;
3571	goto out;
3572	}
3573
3574	bargs = kzalloc(sizeof(*bargs), GFP_KERNEL);
3575	if (!bargs) {
3576	ret = -ENOMEM;
3577	goto out;
3578	}
3579
3580	btrfs_update_ioctl_balance_args(fs_info, bargs);
3581
3582	if (copy_to_user(to: arg, from: bargs, n: sizeof(*bargs)))
3583	ret = -EFAULT;
3584	out:
3585	mutex_unlock(lock: &fs_info->balance_mutex);
3586	return ret;
3587	}
3588
3589	static long btrfs_ioctl_quota_ctl(struct file file, void* __user *arg)
3590	{
3591	struct inode *inode = file_inode(f: file);
3592	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
3593	struct btrfs_ioctl_quota_ctl_args *sa;
3594	int ret;
3595
3596	if (!capable(CAP_SYS_ADMIN))
3597	return -EPERM;
3598
3599	ret = mnt_want_write_file(file);
3600	if (ret)
3601	return ret;
3602
3603	sa = memdup_user(arg, sizeof(*sa));
3604	if (IS_ERR(ptr: sa)) {
3605	ret = PTR_ERR(ptr: sa);
3606	goto drop_write;
3607	}
3608
3609	switch (sa->cmd) {
3610	case BTRFS_QUOTA_CTL_ENABLE:
3611	case BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA:
3612	down_write(sem: &fs_info->subvol_sem);
3613	ret = btrfs_quota_enable(fs_info, quota_ctl_args: sa);
3614	up_write(sem: &fs_info->subvol_sem);
3615	break;
3616	case BTRFS_QUOTA_CTL_DISABLE:
3617	/*
3618	* Lock the cleaner mutex to prevent races with concurrent
3619	* relocation, because relocation may be building backrefs for
3620	* blocks of the quota root while we are deleting the root. This
3621	* is like dropping fs roots of deleted snapshots/subvolumes, we
3622	* need the same protection.
3623	*
3624	* This also prevents races between concurrent tasks trying to
3625	* disable quotas, because we will unlock and relock
3626	* qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
3627	*
3628	* We take this here because we have the dependency of
3629	*
3630	* inode_lock -> subvol_sem
3631	*
3632	* because of rename. With relocation we can prealloc extents,
3633	* so that makes the dependency chain
3634	*
3635	* cleaner_mutex -> inode_lock -> subvol_sem
3636	*
3637	* so we must take the cleaner_mutex here before we take the
3638	* subvol_sem. The deadlock can't actually happen, but this
3639	* quiets lockdep.
3640	*/
3641	mutex_lock(&fs_info->cleaner_mutex);
3642	down_write(sem: &fs_info->subvol_sem);
3643	ret = btrfs_quota_disable(fs_info);
3644	up_write(sem: &fs_info->subvol_sem);
3645	mutex_unlock(lock: &fs_info->cleaner_mutex);
3646	break;
3647	default:
3648	ret = -EINVAL;
3649	break;
3650	}
3651
3652	kfree(objp: sa);
3653	drop_write:
3654	mnt_drop_write_file(file);
3655	return ret;
3656	}
3657
3658	static long btrfs_ioctl_qgroup_assign(struct file file, void* __user *arg)
3659	{
3660	struct inode *inode = file_inode(f: file);
3661	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
3662	struct btrfs_root *root = BTRFS_I(inode)->root;
3663	struct btrfs_ioctl_qgroup_assign_args *sa;
3664	struct btrfs_qgroup_list *prealloc = NULL;
3665	struct btrfs_trans_handle *trans;
3666	int ret;
3667	int err;
3668
3669	if (!capable(CAP_SYS_ADMIN))
3670	return -EPERM;
3671
3672	if (!btrfs_qgroup_enabled(fs_info))
3673	return -ENOTCONN;
3674
3675	ret = mnt_want_write_file(file);
3676	if (ret)
3677	return ret;
3678
3679	sa = memdup_user(arg, sizeof(*sa));
3680	if (IS_ERR(ptr: sa)) {
3681	ret = PTR_ERR(ptr: sa);
3682	goto drop_write;
3683	}
3684
3685	if (sa->assign) {
3686	prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
3687	if (!prealloc) {
3688	ret = -ENOMEM;
3689	goto out;
3690	}
3691	}
3692
3693	trans = btrfs_join_transaction(root);
3694	if (IS_ERR(ptr: trans)) {
3695	ret = PTR_ERR(ptr: trans);
3696	goto out;
3697	}
3698
3699	/*
3700	* Prealloc ownership is moved to the relation handler, there it's used
3701	* or freed on error.
3702	*/
3703	if (sa->assign) {
3704	ret = btrfs_add_qgroup_relation(trans, src: sa->src, dst: sa->dst, prealloc);
3705	prealloc = NULL;
3706	} else {
3707	ret = btrfs_del_qgroup_relation(trans, src: sa->src, dst: sa->dst);
3708	}
3709
3710	/ update qgroup status and info /
3711	mutex_lock(&fs_info->qgroup_ioctl_lock);
3712	err = btrfs_run_qgroups(trans);
3713	mutex_unlock(lock: &fs_info->qgroup_ioctl_lock);
3714	if (err < `0`)
3715	btrfs_warn(fs_info,
3716	"qgroup status update failed after %s relation, marked as inconsistent",
3717	sa->assign ? "adding" : "deleting");
3718	err = btrfs_end_transaction(trans);
3719	if (err && !ret)
3720	ret = err;
3721
3722	out:
3723	kfree(objp: prealloc);
3724	kfree(objp: sa);
3725	drop_write:
3726	mnt_drop_write_file(file);
3727	return ret;
3728	}
3729
3730	static long btrfs_ioctl_qgroup_create(struct file file, void* __user *arg)
3731	{
3732	struct inode *inode = file_inode(f: file);
3733	struct btrfs_root *root = BTRFS_I(inode)->root;
3734	struct btrfs_ioctl_qgroup_create_args *sa;
3735	struct btrfs_trans_handle *trans;
3736	int ret;
3737	int err;
3738
3739	if (!capable(CAP_SYS_ADMIN))
3740	return -EPERM;
3741
3742	if (!btrfs_qgroup_enabled(fs_info: root->fs_info))
3743	return -ENOTCONN;
3744
3745	ret = mnt_want_write_file(file);
3746	if (ret)
3747	return ret;
3748
3749	sa = memdup_user(arg, sizeof(*sa));
3750	if (IS_ERR(ptr: sa)) {
3751	ret = PTR_ERR(ptr: sa);
3752	goto drop_write;
3753	}
3754
3755	if (!sa->qgroupid) {
3756	ret = -EINVAL;
3757	goto out;
3758	}
3759
3760	if (sa->create && btrfs_is_fstree(rootid: sa->qgroupid)) {
3761	ret = -EINVAL;
3762	goto out;
3763	}
3764
3765	trans = btrfs_join_transaction(root);
3766	if (IS_ERR(ptr: trans)) {
3767	ret = PTR_ERR(ptr: trans);
3768	goto out;
3769	}
3770
3771	if (sa->create) {
3772	ret = btrfs_create_qgroup(trans, qgroupid: sa->qgroupid);
3773	} else {
3774	ret = btrfs_remove_qgroup(trans, qgroupid: sa->qgroupid);
3775	}
3776
3777	err = btrfs_end_transaction(trans);
3778	if (err && !ret)
3779	ret = err;
3780
3781	out:
3782	kfree(objp: sa);
3783	drop_write:
3784	mnt_drop_write_file(file);
3785	return ret;
3786	}
3787
3788	static long btrfs_ioctl_qgroup_limit(struct file file, void* __user *arg)
3789	{
3790	struct inode *inode = file_inode(f: file);
3791	struct btrfs_root *root = BTRFS_I(inode)->root;
3792	struct btrfs_ioctl_qgroup_limit_args *sa;
3793	struct btrfs_trans_handle *trans;
3794	int ret;
3795	int err;
3796	u64 qgroupid;
3797
3798	if (!capable(CAP_SYS_ADMIN))
3799	return -EPERM;
3800
3801	if (!btrfs_qgroup_enabled(fs_info: root->fs_info))
3802	return -ENOTCONN;
3803
3804	ret = mnt_want_write_file(file);
3805	if (ret)
3806	return ret;
3807
3808	sa = memdup_user(arg, sizeof(*sa));
3809	if (IS_ERR(ptr: sa)) {
3810	ret = PTR_ERR(ptr: sa);
3811	goto drop_write;
3812	}
3813
3814	trans = btrfs_join_transaction(root);
3815	if (IS_ERR(ptr: trans)) {
3816	ret = PTR_ERR(ptr: trans);
3817	goto out;
3818	}
3819
3820	qgroupid = sa->qgroupid;
3821	if (!qgroupid) {
3822	/ take the current subvol as qgroup /
3823	qgroupid = btrfs_root_id(root);
3824	}
3825
3826	ret = btrfs_limit_qgroup(trans, qgroupid, limit: &sa->lim);
3827
3828	err = btrfs_end_transaction(trans);
3829	if (err && !ret)
3830	ret = err;
3831
3832	out:
3833	kfree(objp: sa);
3834	drop_write:
3835	mnt_drop_write_file(file);
3836	return ret;
3837	}
3838
3839	static long btrfs_ioctl_quota_rescan(struct file file, void* __user *arg)
3840	{
3841	struct inode *inode = file_inode(f: file);
3842	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
3843	struct btrfs_ioctl_quota_rescan_args *qsa;
3844	int ret;
3845
3846	if (!capable(CAP_SYS_ADMIN))
3847	return -EPERM;
3848
3849	if (!btrfs_qgroup_enabled(fs_info))
3850	return -ENOTCONN;
3851
3852	ret = mnt_want_write_file(file);
3853	if (ret)
3854	return ret;
3855
3856	qsa = memdup_user(arg, sizeof(*qsa));
3857	if (IS_ERR(ptr: qsa)) {
3858	ret = PTR_ERR(ptr: qsa);
3859	goto drop_write;
3860	}
3861
3862	if (qsa->flags) {
3863	ret = -EINVAL;
3864	goto out;
3865	}
3866
3867	ret = btrfs_qgroup_rescan(fs_info);
3868
3869	out:
3870	kfree(objp: qsa);
3871	drop_write:
3872	mnt_drop_write_file(file);
3873	return ret;
3874	}
3875
3876	static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
3877	void __user *arg)
3878	{
3879	struct btrfs_ioctl_quota_rescan_args qsa = {`0`};
3880
3881	if (!capable(CAP_SYS_ADMIN))
3882	return -EPERM;
3883
3884	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
3885	qsa.flags = `1`;
3886	qsa.progress = fs_info->qgroup_rescan_progress.objectid;
3887	}
3888
3889	if (copy_to_user(to: arg, from: &qsa, n: sizeof(qsa)))
3890	return -EFAULT;
3891
3892	return `0`;
3893	}
3894
3895	static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info)
3896	{
3897	if (!capable(CAP_SYS_ADMIN))
3898	return -EPERM;
3899
3900	return btrfs_qgroup_wait_for_completion(fs_info, interruptible: true);
3901	}
3902
3903	static long _btrfs_ioctl_set_received_subvol(struct file *file,
3904	struct mnt_idmap *idmap,
3905	struct btrfs_ioctl_received_subvol_args *sa)
3906	{
3907	struct inode *inode = file_inode(f: file);
3908	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
3909	struct btrfs_root *root = BTRFS_I(inode)->root;
3910	struct btrfs_root_item *root_item = &root->root_item;
3911	struct btrfs_trans_handle *trans;
3912	struct timespec64 ct = current_time(inode);
3913	int ret = `0`;
3914	int received_uuid_changed;
3915
3916	if (!inode_owner_or_capable(idmap, inode))
3917	return -EPERM;
3918
3919	ret = mnt_want_write_file(file);
3920	if (ret < `0`)
3921	return ret;
3922
3923	down_write(sem: &fs_info->subvol_sem);
3924
3925	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
3926	ret = -EINVAL;
3927	goto out;
3928	}
3929
3930	if (btrfs_root_readonly(root)) {
3931	ret = -EROFS;
3932	goto out;
3933	}
3934
3935	/*
3936	* 1 - root item
3937	* 2 - uuid items (received uuid + subvol uuid)
3938	*/
3939	trans = btrfs_start_transaction(root, num_items: `3`);
3940	if (IS_ERR(ptr: trans)) {
3941	ret = PTR_ERR(ptr: trans);
3942	trans = NULL;
3943	goto out;
3944	}
3945
3946	sa->rtransid = trans->transid;
3947	sa->rtime.sec = ct.tv_sec;
3948	sa->rtime.nsec = ct.tv_nsec;
3949
3950	received_uuid_changed = memcmp(p: root_item->received_uuid, q: sa->uuid,
3951	BTRFS_UUID_SIZE);
3952	if (received_uuid_changed &&
3953	!btrfs_is_empty_uuid(uuid: root_item->received_uuid)) {
3954	ret = btrfs_uuid_tree_remove(trans, uuid: root_item->received_uuid,
3955	BTRFS_UUID_KEY_RECEIVED_SUBVOL,
3956	subid: btrfs_root_id(root));
3957	if (unlikely(ret && ret != -ENOENT)) {
3958	btrfs_abort_transaction(trans, ret);
3959	btrfs_end_transaction(trans);
3960	goto out;
3961	}
3962	}
3963	memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
3964	btrfs_set_root_stransid(s: root_item, val: sa->stransid);
3965	btrfs_set_root_rtransid(s: root_item, val: sa->rtransid);
3966	btrfs_set_stack_timespec_sec(s: &root_item->stime, val: sa->stime.sec);
3967	btrfs_set_stack_timespec_nsec(s: &root_item->stime, val: sa->stime.nsec);
3968	btrfs_set_stack_timespec_sec(s: &root_item->rtime, val: sa->rtime.sec);
3969	btrfs_set_stack_timespec_nsec(s: &root_item->rtime, val: sa->rtime.nsec);
3970
3971	ret = btrfs_update_root(trans, root: fs_info->tree_root,
3972	key: &root->root_key, item: &root->root_item);
3973	if (ret < `0`) {
3974	btrfs_end_transaction(trans);
3975	goto out;
3976	}
3977	if (received_uuid_changed && !btrfs_is_empty_uuid(uuid: sa->uuid)) {
3978	ret = btrfs_uuid_tree_add(trans, uuid: sa->uuid,
3979	BTRFS_UUID_KEY_RECEIVED_SUBVOL,
3980	subid: btrfs_root_id(root));
3981	if (unlikely(ret < `0` && ret != -EEXIST)) {
3982	btrfs_abort_transaction(trans, ret);
3983	btrfs_end_transaction(trans);
3984	goto out;
3985	}
3986	}
3987	ret = btrfs_commit_transaction(trans);
3988	out:
3989	up_write(sem: &fs_info->subvol_sem);
3990	mnt_drop_write_file(file);
3991	return ret;
3992	}
3993
3994	#ifdef CONFIG_64BIT
3995	static long btrfs_ioctl_set_received_subvol_32(struct file *file,
3996	void __user *arg)
3997	{
3998	struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL;
3999	struct btrfs_ioctl_received_subvol_args *args64 = NULL;
4000	int ret = `0`;
4001
4002	args32 = memdup_user(arg, sizeof(*args32));
4003	if (IS_ERR(ptr: args32))
4004	return PTR_ERR(ptr: args32);
4005
4006	args64 = kmalloc(sizeof(*args64), GFP_KERNEL);
4007	if (!args64) {
4008	ret = -ENOMEM;
4009	goto out;
4010	}
4011
4012	memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE);
4013	args64->stransid = args32->stransid;
4014	args64->rtransid = args32->rtransid;
4015	args64->stime.sec = args32->stime.sec;
4016	args64->stime.nsec = args32->stime.nsec;
4017	args64->rtime.sec = args32->rtime.sec;
4018	args64->rtime.nsec = args32->rtime.nsec;
4019	args64->flags = args32->flags;
4020
4021	ret = _btrfs_ioctl_set_received_subvol(file, idmap: file_mnt_idmap(file), sa: args64);
4022	if (ret)
4023	goto out;
4024
4025	memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE);
4026	args32->stransid = args64->stransid;
4027	args32->rtransid = args64->rtransid;
4028	args32->stime.sec = args64->stime.sec;
4029	args32->stime.nsec = args64->stime.nsec;
4030	args32->rtime.sec = args64->rtime.sec;
4031	args32->rtime.nsec = args64->rtime.nsec;
4032	args32->flags = args64->flags;
4033
4034	ret = copy_to_user(to: arg, from: args32, n: sizeof(*args32));
4035	if (ret)
4036	ret = -EFAULT;
4037
4038	out:
4039	kfree(objp: args32);
4040	kfree(objp: args64);
4041	return ret;
4042	}
4043	#endif
4044
4045	static long btrfs_ioctl_set_received_subvol(struct file *file,
4046	void __user *arg)
4047	{
4048	struct btrfs_ioctl_received_subvol_args *sa = NULL;
4049	int ret = `0`;
4050
4051	sa = memdup_user(arg, sizeof(*sa));
4052	if (IS_ERR(ptr: sa))
4053	return PTR_ERR(ptr: sa);
4054
4055	ret = _btrfs_ioctl_set_received_subvol(file, idmap: file_mnt_idmap(file), sa);
4056
4057	if (ret)
4058	goto out;
4059
4060	ret = copy_to_user(to: arg, from: sa, n: sizeof(*sa));
4061	if (ret)
4062	ret = -EFAULT;
4063
4064	out:
4065	kfree(objp: sa);
4066	return ret;
4067	}
4068
4069	static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info,
4070	void __user *arg)
4071	{
4072	size_t len;
4073	int ret;
4074	char label[BTRFS_LABEL_SIZE];
4075
4076	spin_lock(lock: &fs_info->super_lock);
4077	memcpy(label, fs_info->super_copy->label, BTRFS_LABEL_SIZE);
4078	spin_unlock(lock: &fs_info->super_lock);
4079
4080	len = strnlen(p: label, BTRFS_LABEL_SIZE);
4081
4082	if (len == BTRFS_LABEL_SIZE) {
4083	btrfs_warn(fs_info,
4084	"label is too long, return the first %zu bytes",
4085	--len);
4086	}
4087
4088	ret = copy_to_user(to: arg, from: label, n: len);
4089
4090	return ret ? -EFAULT : `0`;
4091	}
4092
4093	static int btrfs_ioctl_set_fslabel(struct file file, void* __user *arg)
4094	{
4095	struct inode *inode = file_inode(f: file);
4096	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
4097	struct btrfs_root *root = BTRFS_I(inode)->root;
4098	struct btrfs_super_block *super_block = fs_info->super_copy;
4099	struct btrfs_trans_handle *trans;
4100	char label[BTRFS_LABEL_SIZE];
4101	int ret;
4102
4103	if (!capable(CAP_SYS_ADMIN))
4104	return -EPERM;
4105
4106	if (copy_from_user(to: label, from: arg, n: sizeof(label)))
4107	return -EFAULT;
4108
4109	if (strnlen(p: label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
4110	btrfs_err(fs_info,
4111	"unable to set label with more than %d bytes",
4112	BTRFS_LABEL_SIZE - `1`);
4113	return -EINVAL;
4114	}
4115
4116	ret = mnt_want_write_file(file);
4117	if (ret)
4118	return ret;
4119
4120	trans = btrfs_start_transaction(root, num_items: `0`);
4121	if (IS_ERR(ptr: trans)) {
4122	ret = PTR_ERR(ptr: trans);
4123	goto out_unlock;
4124	}
4125
4126	spin_lock(lock: &fs_info->super_lock);
4127	strscpy(super_block->label, label);
4128	spin_unlock(lock: &fs_info->super_lock);
4129	ret = btrfs_commit_transaction(trans);
4130
4131	out_unlock:
4132	mnt_drop_write_file(file);
4133	return ret;
4134	}
4135
4136	#define INIT_FEATURE_FLAGS(suffix) \
4137	{ .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \
4138	.compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
4139	.incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
4140
4141	int btrfs_ioctl_get_supported_features(void __user *arg)
4142	{
4143	static const struct btrfs_ioctl_feature_flags features[`3`] = {
4144	INIT_FEATURE_FLAGS(SUPP),
4145	INIT_FEATURE_FLAGS(SAFE_SET),
4146	INIT_FEATURE_FLAGS(SAFE_CLEAR)
4147	};
4148
4149	if (copy_to_user(to: arg, from: &features, n: sizeof(features)))
4150	return -EFAULT;
4151
4152	return `0`;
4153	}
4154
4155	static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info,
4156	void __user *arg)
4157	{
4158	struct btrfs_super_block *super_block = fs_info->super_copy;
4159	struct btrfs_ioctl_feature_flags features;
4160
4161	features.compat_flags = btrfs_super_compat_flags(s: super_block);
4162	features.compat_ro_flags = btrfs_super_compat_ro_flags(s: super_block);
4163	features.incompat_flags = btrfs_super_incompat_flags(s: super_block);
4164
4165	if (copy_to_user(to: arg, from: &features, n: sizeof(features)))
4166	return -EFAULT;
4167
4168	return `0`;
4169	}
4170
4171	static int check_feature_bits(const struct btrfs_fs_info *fs_info,
4172	enum btrfs_feature_set set,
4173	u64 change_mask, u64 flags, u64 supported_flags,
4174	u64 safe_set, u64 safe_clear)
4175	{
4176	const char *type = btrfs_feature_set_name(set);
4177	const char AUTO_KFREE(names);
4178	u64 disallowed, unsupported;
4179	u64 set_mask = flags & change_mask;
4180	u64 clear_mask = ~flags & change_mask;
4181
4182	unsupported = set_mask & ~supported_flags;
4183	if (unsupported) {
4184	names = btrfs_printable_features(set, flags: unsupported);
4185	if (names)
4186	btrfs_warn(fs_info,
4187	"this kernel does not support the %s feature bit%s",
4188	names, strchr(names, `','`) ? "s" : "");
4189	else
4190	btrfs_warn(fs_info,
4191	"this kernel does not support %s bits 0x%llx",
4192	type, unsupported);
4193	return -EOPNOTSUPP;
4194	}
4195
4196	disallowed = set_mask & ~safe_set;
4197	if (disallowed) {
4198	names = btrfs_printable_features(set, flags: disallowed);
4199	if (names)
4200	btrfs_warn(fs_info,
4201	"can't set the %s feature bit%s while mounted",
4202	names, strchr(names, `','`) ? "s" : "");
4203	else
4204	btrfs_warn(fs_info,
4205	"can't set %s bits 0x%llx while mounted",
4206	type, disallowed);
4207	return -EPERM;
4208	}
4209
4210	disallowed = clear_mask & ~safe_clear;
4211	if (disallowed) {
4212	names = btrfs_printable_features(set, flags: disallowed);
4213	if (names)
4214	btrfs_warn(fs_info,
4215	"can't clear the %s feature bit%s while mounted",
4216	names, strchr(names, `','`) ? "s" : "");
4217	else
4218	btrfs_warn(fs_info,
4219	"can't clear %s bits 0x%llx while mounted",
4220	type, disallowed);
4221	return -EPERM;
4222	}
4223
4224	return `0`;
4225	}
4226
4227	#define check_feature(fs_info, change_mask, flags, mask_base) \
4228	check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \
4229	BTRFS_FEATURE_ ## mask_base ## _SUPP, \
4230	BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \
4231	BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)
4232
4233	static int btrfs_ioctl_set_features(struct file file, void* __user *arg)
4234	{
4235	struct inode *inode = file_inode(f: file);
4236	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
4237	struct btrfs_root *root = BTRFS_I(inode)->root;
4238	struct btrfs_super_block *super_block = fs_info->super_copy;
4239	struct btrfs_ioctl_feature_flags flags[`2`];
4240	struct btrfs_trans_handle *trans;
4241	u64 newflags;
4242	int ret;
4243
4244	if (!capable(CAP_SYS_ADMIN))
4245	return -EPERM;
4246
4247	if (copy_from_user(to: flags, from: arg, n: sizeof(flags)))
4248	return -EFAULT;
4249
4250	/ Nothing to do /
4251	if (!flags[`0`].compat_flags && !flags[`0`].compat_ro_flags &&
4252	!flags[`0`].incompat_flags)
4253	return `0`;
4254
4255	ret = check_feature(fs_info, flags[`0`].compat_flags,
4256	flags[`1`].compat_flags, COMPAT);
4257	if (ret)
4258	return ret;
4259
4260	ret = check_feature(fs_info, flags[`0`].compat_ro_flags,
4261	flags[`1`].compat_ro_flags, COMPAT_RO);
4262	if (ret)
4263	return ret;
4264
4265	ret = check_feature(fs_info, flags[`0`].incompat_flags,
4266	flags[`1`].incompat_flags, INCOMPAT);
4267	if (ret)
4268	return ret;
4269
4270	ret = mnt_want_write_file(file);
4271	if (ret)
4272	return ret;
4273
4274	trans = btrfs_start_transaction(root, num_items: `0`);
4275	if (IS_ERR(ptr: trans)) {
4276	ret = PTR_ERR(ptr: trans);
4277	goto out_drop_write;
4278	}
4279
4280	spin_lock(lock: &fs_info->super_lock);
4281	newflags = btrfs_super_compat_flags(s: super_block);
4282	newflags \|= flags[`0`].compat_flags & flags[`1`].compat_flags;
4283	newflags &= ~(flags[`0`].compat_flags & ~flags[`1`].compat_flags);
4284	btrfs_set_super_compat_flags(s: super_block, val: newflags);
4285
4286	newflags = btrfs_super_compat_ro_flags(s: super_block);
4287	newflags \|= flags[`0`].compat_ro_flags & flags[`1`].compat_ro_flags;
4288	newflags &= ~(flags[`0`].compat_ro_flags & ~flags[`1`].compat_ro_flags);
4289	btrfs_set_super_compat_ro_flags(s: super_block, val: newflags);
4290
4291	newflags = btrfs_super_incompat_flags(s: super_block);
4292	newflags \|= flags[`0`].incompat_flags & flags[`1`].incompat_flags;
4293	newflags &= ~(flags[`0`].incompat_flags & ~flags[`1`].incompat_flags);
4294	btrfs_set_super_incompat_flags(s: super_block, val: newflags);
4295	spin_unlock(lock: &fs_info->super_lock);
4296
4297	ret = btrfs_commit_transaction(trans);
4298	out_drop_write:
4299	mnt_drop_write_file(file);
4300
4301	return ret;
4302	}
4303
4304	static int _btrfs_ioctl_send(struct btrfs_root root, void* __user *argp, bool compat)
4305	{
4306	struct btrfs_ioctl_send_args *arg;
4307	int ret;
4308
4309	if (compat) {
4310	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4311	struct btrfs_ioctl_send_args_32 args32 = { `0` };
4312
4313	ret = copy_from_user(to: &args32, from: argp, n: sizeof(args32));
4314	if (ret)
4315	return -EFAULT;
4316	arg = kzalloc(sizeof(*arg), GFP_KERNEL);
4317	if (!arg)
4318	return -ENOMEM;
4319	arg->send_fd = args32.send_fd;
4320	arg->clone_sources_count = args32.clone_sources_count;
4321	arg->clone_sources = compat_ptr(uptr: args32.clone_sources);
4322	arg->parent_root = args32.parent_root;
4323	arg->flags = args32.flags;
4324	arg->version = args32.version;
4325	memcpy(arg->reserved, args32.reserved,
4326	sizeof(args32.reserved));
4327	#else
4328	return -ENOTTY;
4329	#endif
4330	} else {
4331	arg = memdup_user(argp, sizeof(*arg));
4332	if (IS_ERR(ptr: arg))
4333	return PTR_ERR(ptr: arg);
4334	}
4335	ret = btrfs_ioctl_send(send_root: root, arg);
4336	kfree(objp: arg);
4337	return ret;
4338	}
4339
4340	static int btrfs_ioctl_encoded_read(struct file file, void* __user *argp,
4341	bool compat)
4342	{
4343	struct btrfs_ioctl_encoded_io_args args = { `0` };
4344	size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
4345	flags);
4346	size_t copy_end;
4347	struct btrfs_inode *inode = BTRFS_I(file_inode(file));
4348	struct btrfs_fs_info *fs_info = inode->root->fs_info;
4349	struct extent_io_tree *io_tree = &inode->io_tree;
4350	struct iovec iovstack[UIO_FASTIOV];
4351	struct iovec *iov = iovstack;
4352	struct iov_iter iter;
4353	loff_t pos;
4354	struct kiocb kiocb;
4355	ssize_t ret;
4356	u64 disk_bytenr, disk_io_size;
4357	struct extent_state *cached_state = NULL;
4358
4359	if (!capable(CAP_SYS_ADMIN)) {
4360	ret = -EPERM;
4361	goto out_acct;
4362	}
4363
4364	if (compat) {
4365	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4366	struct btrfs_ioctl_encoded_io_args_32 args32;
4367
4368	copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
4369	flags);
4370	if (copy_from_user(to: &args32, from: argp, n: copy_end)) {
4371	ret = -EFAULT;
4372	goto out_acct;
4373	}
4374	args.iov = compat_ptr(uptr: args32.iov);
4375	args.iovcnt = args32.iovcnt;
4376	args.offset = args32.offset;
4377	args.flags = args32.flags;
4378	#else
4379	return -ENOTTY;
4380	#endif
4381	} else {
4382	copy_end = copy_end_kernel;
4383	if (copy_from_user(to: &args, from: argp, n: copy_end)) {
4384	ret = -EFAULT;
4385	goto out_acct;
4386	}
4387	}
4388	if (args.flags != `0`) {
4389	ret = -EINVAL;
4390	goto out_acct;
4391	}
4392
4393	ret = import_iovec(ITER_DEST, uvec: args.iov, nr_segs: args.iovcnt, ARRAY_SIZE(iovstack),
4394	iovp: &iov, i: &iter);
4395	if (ret < `0`)
4396	goto out_acct;
4397
4398	if (iov_iter_count(i: &iter) == `0`) {
4399	ret = `0`;
4400	goto out_iov;
4401	}
4402	pos = args.offset;
4403	ret = rw_verify_area(READ, file, &pos, args.len);
4404	if (ret < `0`)
4405	goto out_iov;
4406
4407	init_sync_kiocb(kiocb: &kiocb, filp: file);
4408	kiocb.ki_pos = pos;
4409
4410	ret = btrfs_encoded_read(iocb: &kiocb, iter: &iter, encoded: &args, cached_state: &cached_state,
4411	disk_bytenr: &disk_bytenr, disk_io_size: &disk_io_size);
4412
4413	if (ret == -EIOCBQUEUED) {
4414	bool unlocked = false;
4415	u64 start, lockend, count;
4416
4417	start = ALIGN_DOWN(kiocb.ki_pos, fs_info->sectorsize);
4418	lockend = start + BTRFS_MAX_UNCOMPRESSED - `1`;
4419
4420	if (args.compression)
4421	count = disk_io_size;
4422	else
4423	count = args.len;
4424
4425	ret = btrfs_encoded_read_regular(iocb: &kiocb, iter: &iter, start, lockend,
4426	cached_state: &cached_state, disk_bytenr,
4427	disk_io_size, count,
4428	compressed: args.compression, unlocked: &unlocked);
4429
4430	if (!unlocked) {
4431	btrfs_unlock_extent(tree: io_tree, start, end: lockend, cached: &cached_state);
4432	btrfs_inode_unlock(inode, ilock_flags: BTRFS_ILOCK_SHARED);
4433	}
4434	}
4435
4436	if (ret >= `0`) {
4437	fsnotify_access(file);
4438	if (copy_to_user(to: argp + copy_end,
4439	from: (char *)&args + copy_end_kernel,
4440	n: sizeof(args) - copy_end_kernel))
4441	ret = -EFAULT;
4442	}
4443
4444	out_iov:
4445	kfree(objp: iov);
4446	out_acct:
4447	if (ret > `0`)
4448	add_rchar(current, amt: ret);
4449	inc_syscr(current);
4450	return ret;
4451	}
4452
4453	static int btrfs_ioctl_encoded_write(struct file file, void* __user *argp, bool compat)
4454	{
4455	struct btrfs_ioctl_encoded_io_args args;
4456	struct iovec iovstack[UIO_FASTIOV];
4457	struct iovec *iov = iovstack;
4458	struct iov_iter iter;
4459	loff_t pos;
4460	struct kiocb kiocb;
4461	ssize_t ret;
4462
4463	if (!capable(CAP_SYS_ADMIN)) {
4464	ret = -EPERM;
4465	goto out_acct;
4466	}
4467
4468	if (!(file->f_mode & FMODE_WRITE)) {
4469	ret = -EBADF;
4470	goto out_acct;
4471	}
4472
4473	if (compat) {
4474	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4475	struct btrfs_ioctl_encoded_io_args_32 args32;
4476
4477	if (copy_from_user(to: &args32, from: argp, n: sizeof(args32))) {
4478	ret = -EFAULT;
4479	goto out_acct;
4480	}
4481	args.iov = compat_ptr(uptr: args32.iov);
4482	args.iovcnt = args32.iovcnt;
4483	args.offset = args32.offset;
4484	args.flags = args32.flags;
4485	args.len = args32.len;
4486	args.unencoded_len = args32.unencoded_len;
4487	args.unencoded_offset = args32.unencoded_offset;
4488	args.compression = args32.compression;
4489	args.encryption = args32.encryption;
4490	memcpy(args.reserved, args32.reserved, sizeof(args.reserved));
4491	#else
4492	return -ENOTTY;
4493	#endif
4494	} else {
4495	if (copy_from_user(to: &args, from: argp, n: sizeof(args))) {
4496	ret = -EFAULT;
4497	goto out_acct;
4498	}
4499	}
4500
4501	ret = -EINVAL;
4502	if (args.flags != `0`)
4503	goto out_acct;
4504	if (memchr_inv(p: args.reserved, c: `0`, size: sizeof(args.reserved)))
4505	goto out_acct;
4506	if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
4507	args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
4508	goto out_acct;
4509	if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES \|\|
4510	args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
4511	goto out_acct;
4512	if (args.unencoded_offset > args.unencoded_len)
4513	goto out_acct;
4514	if (args.len > args.unencoded_len - args.unencoded_offset)
4515	goto out_acct;
4516
4517	ret = import_iovec(ITER_SOURCE, uvec: args.iov, nr_segs: args.iovcnt, ARRAY_SIZE(iovstack),
4518	iovp: &iov, i: &iter);
4519	if (ret < `0`)
4520	goto out_acct;
4521
4522	if (iov_iter_count(i: &iter) == `0`) {
4523	ret = `0`;
4524	goto out_iov;
4525	}
4526	pos = args.offset;
4527	ret = rw_verify_area(WRITE, file, &pos, args.len);
4528	if (ret < `0`)
4529	goto out_iov;
4530
4531	init_sync_kiocb(kiocb: &kiocb, filp: file);
4532	ret = kiocb_set_rw_flags(ki: &kiocb, flags: `0`, WRITE);
4533	if (ret)
4534	goto out_iov;
4535	kiocb.ki_pos = pos;
4536
4537	file_start_write(file);
4538
4539	ret = btrfs_do_write_iter(iocb: &kiocb, from: &iter, encoded: &args);
4540	if (ret > `0`)
4541	fsnotify_modify(file);
4542
4543	file_end_write(file);
4544	out_iov:
4545	kfree(objp: iov);
4546	out_acct:
4547	if (ret > `0`)
4548	add_wchar(current, amt: ret);
4549	inc_syscw(current);
4550	return ret;
4551	}
4552
4553	struct btrfs_uring_encoded_data {
4554	struct btrfs_ioctl_encoded_io_args args;
4555	struct iovec iovstack[UIO_FASTIOV];
4556	struct iovec *iov;
4557	struct iov_iter iter;
4558	};
4559
4560	/*
4561	* Context that's attached to an encoded read io_uring command, in cmd->pdu. It
4562	* contains the fields in btrfs_uring_read_extent that are necessary to finish
4563	* off and cleanup the I/O in btrfs_uring_read_finished.
4564	*/
4565	struct btrfs_uring_priv {
4566	struct io_uring_cmd *cmd;
4567	struct page **pages;
4568	unsigned long nr_pages;
4569	struct kiocb iocb;
4570	struct iovec *iov;
4571	struct iov_iter iter;
4572	struct extent_state *cached_state;
4573	u64 count;
4574	u64 start;
4575	u64 lockend;
4576	int err;
4577	bool compressed;
4578	};
4579
4580	struct io_btrfs_cmd {
4581	struct btrfs_uring_encoded_data *data;
4582	struct btrfs_uring_priv *priv;
4583	};
4584
4585	static void btrfs_uring_read_finished(struct io_tw_req tw_req, io_tw_token_t tw)
4586	{
4587	struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
4588	struct io_btrfs_cmd bc = io_uring_cmd_to_pdu(cmd, struct* io_btrfs_cmd);
4589	struct btrfs_uring_priv *priv = bc->priv;
4590	struct btrfs_inode *inode = BTRFS_I(file_inode(priv->iocb.ki_filp));
4591	struct extent_io_tree *io_tree = &inode->io_tree;
4592	pgoff_t index;
4593	u64 cur;
4594	size_t page_offset;
4595	ssize_t ret;
4596
4597	/ The inode lock has already been acquired in btrfs_uring_read_extent. /
4598	btrfs_lockdep_inode_acquire(inode, i_rwsem);
4599
4600	if (priv->err) {
4601	ret = priv->err;
4602	goto out;
4603	}
4604
4605	if (priv->compressed) {
4606	index = `0`;
4607	page_offset = `0`;
4608	} else {
4609	index = (priv->iocb.ki_pos - priv->start) >> PAGE_SHIFT;
4610	page_offset = offset_in_page(priv->iocb.ki_pos - priv->start);
4611	}
4612	cur = `0`;
4613	while (cur < priv->count) {
4614	size_t bytes = min_t(size_t, priv->count - cur, PAGE_SIZE - page_offset);
4615
4616	if (copy_page_to_iter(page: priv->pages[index], offset: page_offset, bytes,
4617	i: &priv->iter) != bytes) {
4618	ret = -EFAULT;
4619	goto out;
4620	}
4621
4622	index++;
4623	cur += bytes;
4624	page_offset = `0`;
4625	}
4626	ret = priv->count;
4627
4628	out:
4629	btrfs_unlock_extent(tree: io_tree, start: priv->start, end: priv->lockend, cached: &priv->cached_state);
4630	btrfs_inode_unlock(inode, ilock_flags: BTRFS_ILOCK_SHARED);
4631
4632	io_uring_cmd_done(ioucmd: cmd, ret, IO_URING_CMD_TASK_WORK_ISSUE_FLAGS);
4633	add_rchar(current, amt: ret);
4634
4635	for (index = `0`; index < priv->nr_pages; index++)
4636	__free_page(priv->pages[index]);
4637
4638	kfree(objp: priv->pages);
4639	kfree(objp: priv->iov);
4640	kfree(objp: priv);
4641	kfree(objp: bc->data);
4642	}
4643
4644	void btrfs_uring_read_extent_endio(void ctx, int* err)
4645	{
4646	struct btrfs_uring_priv *priv = ctx;
4647	struct io_btrfs_cmd bc = io_uring_cmd_to_pdu(priv->cmd, struct* io_btrfs_cmd);
4648
4649	priv->err = err;
4650	bc->priv = priv;
4651
4652	io_uring_cmd_complete_in_task(ioucmd: priv->cmd, task_work_cb: btrfs_uring_read_finished);
4653	}
4654
4655	static int btrfs_uring_read_extent(struct kiocb iocb, struct* iov_iter *iter,
4656	u64 start, u64 lockend,
4657	struct extent_state *cached_state,
4658	u64 disk_bytenr, u64 disk_io_size,
4659	size_t count, bool compressed,
4660	struct iovec iov, struct* io_uring_cmd *cmd)
4661	{
4662	struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
4663	struct extent_io_tree *io_tree = &inode->io_tree;
4664	struct page **pages;
4665	struct btrfs_uring_priv *priv = NULL;
4666	unsigned long nr_pages;
4667	int ret;
4668
4669	nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
4670	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
4671	if (!pages)
4672	return -ENOMEM;
4673	ret = btrfs_alloc_page_array(nr_pages, page_array: pages, nofail: `0`);
4674	if (ret) {
4675	ret = -ENOMEM;
4676	goto out_fail;
4677	}
4678
4679	priv = kmalloc(sizeof(*priv), GFP_NOFS);
4680	if (!priv) {
4681	ret = -ENOMEM;
4682	goto out_fail;
4683	}
4684
4685	priv->iocb = *iocb;
4686	priv->iov = iov;
4687	priv->iter = *iter;
4688	priv->count = count;
4689	priv->cmd = cmd;
4690	priv->cached_state = cached_state;
4691	priv->compressed = compressed;
4692	priv->nr_pages = nr_pages;
4693	priv->pages = pages;
4694	priv->start = start;
4695	priv->lockend = lockend;
4696	priv->err = `0`;
4697
4698	ret = btrfs_encoded_read_regular_fill_pages(inode, disk_bytenr,
4699	disk_io_size, pages, uring_ctx: priv);
4700	if (ret && ret != -EIOCBQUEUED)
4701	goto out_fail;
4702
4703	/*
4704	* If we return -EIOCBQUEUED, we're deferring the cleanup to
4705	* btrfs_uring_read_finished(), which will handle unlocking the extent
4706	* and inode and freeing the allocations.
4707	*/
4708
4709	/*
4710	* We're returning to userspace with the inode lock held, and that's
4711	* okay - it'll get unlocked in a worker thread. Call
4712	* btrfs_lockdep_inode_release() to avoid confusing lockdep.
4713	*/
4714	btrfs_lockdep_inode_release(inode, i_rwsem);
4715
4716	return -EIOCBQUEUED;
4717
4718	out_fail:
4719	btrfs_unlock_extent(tree: io_tree, start, end: lockend, cached: &cached_state);
4720	btrfs_inode_unlock(inode, ilock_flags: BTRFS_ILOCK_SHARED);
4721	kfree(objp: priv);
4722	return ret;
4723	}
4724
4725	static int btrfs_uring_encoded_read(struct io_uring_cmd cmd, unsigned* int issue_flags)
4726	{
4727	struct file *file = cmd->file;
4728	struct btrfs_inode *inode = BTRFS_I(file->f_inode);
4729	struct extent_io_tree *io_tree = &inode->io_tree;
4730	struct btrfs_fs_info *fs_info = inode->root->fs_info;
4731	size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags);
4732	size_t copy_end;
4733	int ret;
4734	u64 disk_bytenr, disk_io_size;
4735	loff_t pos;
4736	struct kiocb kiocb;
4737	struct extent_state *cached_state = NULL;
4738	u64 start, lockend;
4739	void __user *sqe_addr;
4740	struct io_btrfs_cmd bc = io_uring_cmd_to_pdu(cmd, struct* io_btrfs_cmd);
4741	struct btrfs_uring_encoded_data *data = NULL;
4742
4743	if (cmd->flags & IORING_URING_CMD_REISSUE)
4744	data = bc->data;
4745
4746	if (!capable(CAP_SYS_ADMIN)) {
4747	ret = -EPERM;
4748	goto out_acct;
4749	}
4750	sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
4751
4752	if (issue_flags & IO_URING_F_COMPAT) {
4753	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4754	copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32, flags);
4755	#else
4756	ret = -ENOTTY;
4757	goto out_acct;
4758	#endif
4759	} else {
4760	copy_end = copy_end_kernel;
4761	}
4762
4763	if (!data) {
4764	data = kzalloc(sizeof(*data), GFP_NOFS);
4765	if (!data) {
4766	ret = -ENOMEM;
4767	goto out_acct;
4768	}
4769
4770	bc->data = data;
4771
4772	if (issue_flags & IO_URING_F_COMPAT) {
4773	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4774	struct btrfs_ioctl_encoded_io_args_32 args32;
4775
4776	if (copy_from_user(to: &args32, from: sqe_addr, n: copy_end)) {
4777	ret = -EFAULT;
4778	goto out_acct;
4779	}
4780
4781	data->args.iov = compat_ptr(uptr: args32.iov);
4782	data->args.iovcnt = args32.iovcnt;
4783	data->args.offset = args32.offset;
4784	data->args.flags = args32.flags;
4785	#endif
4786	} else {
4787	if (copy_from_user(to: &data->args, from: sqe_addr, n: copy_end)) {
4788	ret = -EFAULT;
4789	goto out_acct;
4790	}
4791	}
4792
4793	if (data->args.flags != `0`) {
4794	ret = -EINVAL;
4795	goto out_acct;
4796	}
4797
4798	data->iov = data->iovstack;
4799	ret = import_iovec(ITER_DEST, uvec: data->args.iov, nr_segs: data->args.iovcnt,
4800	ARRAY_SIZE(data->iovstack), iovp: &data->iov,
4801	i: &data->iter);
4802	if (ret < `0`)
4803	goto out_acct;
4804
4805	if (iov_iter_count(i: &data->iter) == `0`) {
4806	ret = `0`;
4807	goto out_free;
4808	}
4809	}
4810
4811	pos = data->args.offset;
4812	ret = rw_verify_area(READ, file, &pos, data->args.len);
4813	if (ret < `0`)
4814	goto out_free;
4815
4816	init_sync_kiocb(kiocb: &kiocb, filp: file);
4817	kiocb.ki_pos = pos;
4818
4819	if (issue_flags & IO_URING_F_NONBLOCK)
4820	kiocb.ki_flags \|= IOCB_NOWAIT;
4821
4822	start = ALIGN_DOWN(pos, fs_info->sectorsize);
4823	lockend = start + BTRFS_MAX_UNCOMPRESSED - `1`;
4824
4825	ret = btrfs_encoded_read(iocb: &kiocb, iter: &data->iter, encoded: &data->args, cached_state: &cached_state,
4826	disk_bytenr: &disk_bytenr, disk_io_size: &disk_io_size);
4827	if (ret == -EAGAIN)
4828	goto out_acct;
4829	if (ret < `0` && ret != -EIOCBQUEUED)
4830	goto out_free;
4831
4832	file_accessed(file);
4833
4834	if (copy_to_user(to: sqe_addr + copy_end,
4835	from: (const char *)&data->args + copy_end_kernel,
4836	n: sizeof(data->args) - copy_end_kernel)) {
4837	if (ret == -EIOCBQUEUED) {
4838	btrfs_unlock_extent(tree: io_tree, start, end: lockend, cached: &cached_state);
4839	btrfs_inode_unlock(inode, ilock_flags: BTRFS_ILOCK_SHARED);
4840	}
4841	ret = -EFAULT;
4842	goto out_free;
4843	}
4844
4845	if (ret == -EIOCBQUEUED) {
4846	u64 count = min_t(u64, iov_iter_count(&data->iter), disk_io_size);
4847
4848	/ Match ioctl by not returning past EOF if uncompressed. /
4849	if (!data->args.compression)
4850	count = min_t(u64, count, data->args.len);
4851
4852	ret = btrfs_uring_read_extent(iocb: &kiocb, iter: &data->iter, start, lockend,
4853	cached_state, disk_bytenr, disk_io_size,
4854	count, compressed: data->args.compression,
4855	iov: data->iov, cmd);
4856
4857	goto out_acct;
4858	}
4859
4860	out_free:
4861	kfree(objp: data->iov);
4862
4863	out_acct:
4864	if (ret > `0`)
4865	add_rchar(current, amt: ret);
4866	inc_syscr(current);
4867
4868	if (ret != -EIOCBQUEUED && ret != -EAGAIN)
4869	kfree(objp: data);
4870
4871	return ret;
4872	}
4873
4874	static int btrfs_uring_encoded_write(struct io_uring_cmd cmd, unsigned* int issue_flags)
4875	{
4876	struct file *file = cmd->file;
4877	loff_t pos;
4878	struct kiocb kiocb;
4879	ssize_t ret;
4880	void __user *sqe_addr;
4881	struct io_btrfs_cmd bc = io_uring_cmd_to_pdu(cmd, struct* io_btrfs_cmd);
4882	struct btrfs_uring_encoded_data *data = NULL;
4883
4884	if (cmd->flags & IORING_URING_CMD_REISSUE)
4885	data = bc->data;
4886
4887	if (!capable(CAP_SYS_ADMIN)) {
4888	ret = -EPERM;
4889	goto out_acct;
4890	}
4891	sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
4892
4893	if (!(file->f_mode & FMODE_WRITE)) {
4894	ret = -EBADF;
4895	goto out_acct;
4896	}
4897
4898	if (!data) {
4899	data = kzalloc(sizeof(*data), GFP_NOFS);
4900	if (!data) {
4901	ret = -ENOMEM;
4902	goto out_acct;
4903	}
4904
4905	bc->data = data;
4906
4907	if (issue_flags & IO_URING_F_COMPAT) {
4908	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4909	struct btrfs_ioctl_encoded_io_args_32 args32;
4910
4911	if (copy_from_user(to: &args32, from: sqe_addr, n: sizeof(args32))) {
4912	ret = -EFAULT;
4913	goto out_acct;
4914	}
4915	data->args.iov = compat_ptr(uptr: args32.iov);
4916	data->args.iovcnt = args32.iovcnt;
4917	data->args.offset = args32.offset;
4918	data->args.flags = args32.flags;
4919	data->args.len = args32.len;
4920	data->args.unencoded_len = args32.unencoded_len;
4921	data->args.unencoded_offset = args32.unencoded_offset;
4922	data->args.compression = args32.compression;
4923	data->args.encryption = args32.encryption;
4924	memcpy(data->args.reserved, args32.reserved,
4925	sizeof(data->args.reserved));
4926	#else
4927	ret = -ENOTTY;
4928	goto out_acct;
4929	#endif
4930	} else {
4931	if (copy_from_user(to: &data->args, from: sqe_addr, n: sizeof(data->args))) {
4932	ret = -EFAULT;
4933	goto out_acct;
4934	}
4935	}
4936
4937	ret = -EINVAL;
4938	if (data->args.flags != `0`)
4939	goto out_acct;
4940	if (memchr_inv(p: data->args.reserved, c: `0`, size: sizeof(data->args.reserved)))
4941	goto out_acct;
4942	if (data->args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
4943	data->args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
4944	goto out_acct;
4945	if (data->args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES \|\|
4946	data->args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
4947	goto out_acct;
4948	if (data->args.unencoded_offset > data->args.unencoded_len)
4949	goto out_acct;
4950	if (data->args.len > data->args.unencoded_len - data->args.unencoded_offset)
4951	goto out_acct;
4952
4953	data->iov = data->iovstack;
4954	ret = import_iovec(ITER_SOURCE, uvec: data->args.iov, nr_segs: data->args.iovcnt,
4955	ARRAY_SIZE(data->iovstack), iovp: &data->iov,
4956	i: &data->iter);
4957	if (ret < `0`)
4958	goto out_acct;
4959
4960	if (iov_iter_count(i: &data->iter) == `0`) {
4961	ret = `0`;
4962	goto out_iov;
4963	}
4964	}
4965
4966	if (issue_flags & IO_URING_F_NONBLOCK) {
4967	ret = -EAGAIN;
4968	goto out_acct;
4969	}
4970
4971	pos = data->args.offset;
4972	ret = rw_verify_area(WRITE, file, &pos, data->args.len);
4973	if (ret < `0`)
4974	goto out_iov;
4975
4976	init_sync_kiocb(kiocb: &kiocb, filp: file);
4977	ret = kiocb_set_rw_flags(ki: &kiocb, flags: `0`, WRITE);
4978	if (ret)
4979	goto out_iov;
4980	kiocb.ki_pos = pos;
4981
4982	file_start_write(file);
4983
4984	ret = btrfs_do_write_iter(iocb: &kiocb, from: &data->iter, encoded: &data->args);
4985	if (ret > `0`)
4986	fsnotify_modify(file);
4987
4988	file_end_write(file);
4989	out_iov:
4990	kfree(objp: data->iov);
4991	out_acct:
4992	if (ret > `0`)
4993	add_wchar(current, amt: ret);
4994	inc_syscw(current);
4995
4996	if (ret != -EAGAIN)
4997	kfree(objp: data);
4998	return ret;
4999	}
5000
5001	int btrfs_uring_cmd(struct io_uring_cmd cmd, unsigned* int issue_flags)
5002	{
5003	if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(cmd->file)))))
5004	return -EIO;
5005
5006	switch (cmd->cmd_op) {
5007	case BTRFS_IOC_ENCODED_READ:
5008	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
5009	case BTRFS_IOC_ENCODED_READ_32:
5010	#endif
5011	return btrfs_uring_encoded_read(cmd, issue_flags);
5012
5013	case BTRFS_IOC_ENCODED_WRITE:
5014	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
5015	case BTRFS_IOC_ENCODED_WRITE_32:
5016	#endif
5017	return btrfs_uring_encoded_write(cmd, issue_flags);
5018	}
5019
5020	return -EINVAL;
5021	}
5022
5023	static int btrfs_ioctl_subvol_sync(struct btrfs_fs_info fs_info, void* __user *argp)
5024	{
5025	struct btrfs_root *root;
5026	struct btrfs_ioctl_subvol_wait args = { `0` };
5027	signed long sched_ret;
5028	int refs;
5029	u64 root_flags;
5030	bool wait_for_deletion = false;
5031	bool found = false;
5032
5033	if (copy_from_user(to: &args, from: argp, n: sizeof(args)))
5034	return -EFAULT;
5035
5036	switch (args.mode) {
5037	case BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED:
5038	/*
5039	* Wait for the first one deleted that waits until all previous
5040	* are cleaned.
5041	*/
5042	spin_lock(lock: &fs_info->trans_lock);
5043	if (!list_empty(head: &fs_info->dead_roots)) {
5044	root = list_last_entry(&fs_info->dead_roots,
5045	struct btrfs_root, root_list);
5046	args.subvolid = btrfs_root_id(root);
5047	found = true;
5048	}
5049	spin_unlock(lock: &fs_info->trans_lock);
5050	if (!found)
5051	return -ENOENT;
5052
5053	fallthrough;
5054	case BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE:
5055	if ((`0` < args.subvolid && args.subvolid < BTRFS_FIRST_FREE_OBJECTID) \|\|
5056	BTRFS_LAST_FREE_OBJECTID < args.subvolid)
5057	return -EINVAL;
5058	break;
5059	case BTRFS_SUBVOL_SYNC_COUNT:
5060	spin_lock(lock: &fs_info->trans_lock);
5061	args.count = list_count_nodes(head: &fs_info->dead_roots);
5062	spin_unlock(lock: &fs_info->trans_lock);
5063	if (copy_to_user(to: argp, from: &args, n: sizeof(args)))
5064	return -EFAULT;
5065	return `0`;
5066	case BTRFS_SUBVOL_SYNC_PEEK_FIRST:
5067	spin_lock(lock: &fs_info->trans_lock);
5068	/ Last in the list was deleted first. /
5069	if (!list_empty(head: &fs_info->dead_roots)) {
5070	root = list_last_entry(&fs_info->dead_roots,
5071	struct btrfs_root, root_list);
5072	args.subvolid = btrfs_root_id(root);
5073	} else {
5074	args.subvolid = `0`;
5075	}
5076	spin_unlock(lock: &fs_info->trans_lock);
5077	if (copy_to_user(to: argp, from: &args, n: sizeof(args)))
5078	return -EFAULT;
5079	return `0`;
5080	case BTRFS_SUBVOL_SYNC_PEEK_LAST:
5081	spin_lock(lock: &fs_info->trans_lock);
5082	/ First in the list was deleted last. /
5083	if (!list_empty(head: &fs_info->dead_roots)) {
5084	root = list_first_entry(&fs_info->dead_roots,
5085	struct btrfs_root, root_list);
5086	args.subvolid = btrfs_root_id(root);
5087	} else {
5088	args.subvolid = `0`;
5089	}
5090	spin_unlock(lock: &fs_info->trans_lock);
5091	if (copy_to_user(to: argp, from: &args, n: sizeof(args)))
5092	return -EFAULT;
5093	return `0`;
5094	default:
5095	return -EINVAL;
5096	}
5097
5098	/ 32bit limitation: fs_roots_radix key is not wide enough. /
5099	if (sizeof(unsigned long) != sizeof(u64) && args.subvolid > U32_MAX)
5100	return -EOVERFLOW;
5101
5102	while (`1`) {
5103	/ Wait for the specific one. /
5104	if (down_read_interruptible(sem: &fs_info->subvol_sem) == -EINTR)
5105	return -EINTR;
5106	refs = -`1`;
5107	spin_lock(lock: &fs_info->fs_roots_radix_lock);
5108	root = radix_tree_lookup(&fs_info->fs_roots_radix,
5109	(unsigned long)args.subvolid);
5110	if (root) {
5111	spin_lock(lock: &root->root_item_lock);
5112	refs = btrfs_root_refs(s: &root->root_item);
5113	root_flags = btrfs_root_flags(s: &root->root_item);
5114	spin_unlock(lock: &root->root_item_lock);
5115	}
5116	spin_unlock(lock: &fs_info->fs_roots_radix_lock);
5117	up_read(sem: &fs_info->subvol_sem);
5118
5119	/ Subvolume does not exist. /
5120	if (!root)
5121	return -ENOENT;
5122
5123	/ Subvolume not deleted at all. /
5124	if (refs > `0`)
5125	return -EEXIST;
5126	/ We've waited and now the subvolume is gone. /
5127	if (wait_for_deletion && refs == -`1`) {
5128	/ Return the one we waited for as the last one. /
5129	if (copy_to_user(to: argp, from: &args, n: sizeof(args)))
5130	return -EFAULT;
5131	return `0`;
5132	}
5133
5134	/ Subvolume not found on the first try (deleted or never existed). /
5135	if (refs == -`1`)
5136	return -ENOENT;
5137
5138	wait_for_deletion = true;
5139	ASSERT(root_flags & BTRFS_ROOT_SUBVOL_DEAD);
5140	sched_ret = schedule_timeout_interruptible(HZ);
5141	/ Early wake up or error. /
5142	if (sched_ret != `0`)
5143	return -EINTR;
5144	}
5145
5146	return `0`;
5147	}
5148
5149	#ifdef CONFIG_BTRFS_EXPERIMENTAL
5150	static int btrfs_ioctl_shutdown(struct btrfs_fs_info fs_info, unsigned* long arg)
5151	{
5152	int ret = `0`;
5153	u32 flags;
5154
5155	if (!capable(CAP_SYS_ADMIN))
5156	return -EPERM;
5157
5158	if (get_user(flags, (u32 __user *)arg))
5159	return -EFAULT;
5160
5161	if (flags >= BTRFS_SHUTDOWN_FLAGS_LAST)
5162	return -EINVAL;
5163
5164	if (btrfs_is_shutdown(fs_info))
5165	return `0`;
5166
5167	switch (flags) {
5168	case BTRFS_SHUTDOWN_FLAGS_LOGFLUSH:
5169	case BTRFS_SHUTDOWN_FLAGS_DEFAULT:
5170	ret = freeze_super(super: fs_info->sb, who: FREEZE_HOLDER_KERNEL, NULL);
5171	if (ret)
5172	return ret;
5173	btrfs_force_shutdown(fs_info);
5174	ret = thaw_super(super: fs_info->sb, who: FREEZE_HOLDER_KERNEL, NULL);
5175	if (ret)
5176	return ret;
5177	break;
5178	case BTRFS_SHUTDOWN_FLAGS_NOLOGFLUSH:
5179	btrfs_force_shutdown(fs_info);
5180	break;
5181	}
5182	return ret;
5183	}
5184	#endif
5185
5186	long btrfs_ioctl(struct file file, unsigned* int
5187	cmd, unsigned long arg)
5188	{
5189	struct inode *inode = file_inode(f: file);
5190	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
5191	struct btrfs_root *root = BTRFS_I(inode)->root;
5192	void __user argp = (void* __user *)arg;
5193
5194	switch (cmd) {
5195	case FS_IOC_GETVERSION:
5196	return btrfs_ioctl_getversion(inode, arg: argp);
5197	case FS_IOC_GETFSLABEL:
5198	return btrfs_ioctl_get_fslabel(fs_info, arg: argp);
5199	case FS_IOC_SETFSLABEL:
5200	return btrfs_ioctl_set_fslabel(file, arg: argp);
5201	case FITRIM:
5202	return btrfs_ioctl_fitrim(fs_info, arg: argp);
5203	case BTRFS_IOC_SNAP_CREATE:
5204	return btrfs_ioctl_snap_create(file, arg: argp, subvol: false);
5205	case BTRFS_IOC_SNAP_CREATE_V2:
5206	return btrfs_ioctl_snap_create_v2(file, arg: argp, subvol: false);
5207	case BTRFS_IOC_SUBVOL_CREATE:
5208	return btrfs_ioctl_snap_create(file, arg: argp, subvol: true);
5209	case BTRFS_IOC_SUBVOL_CREATE_V2:
5210	return btrfs_ioctl_snap_create_v2(file, arg: argp, subvol: true);
5211	case BTRFS_IOC_SNAP_DESTROY:
5212	return btrfs_ioctl_snap_destroy(file, arg: argp, destroy_v2: false);
5213	case BTRFS_IOC_SNAP_DESTROY_V2:
5214	return btrfs_ioctl_snap_destroy(file, arg: argp, destroy_v2: true);
5215	case BTRFS_IOC_SUBVOL_GETFLAGS:
5216	return btrfs_ioctl_subvol_getflags(BTRFS_I(inode), arg: argp);
5217	case BTRFS_IOC_SUBVOL_SETFLAGS:
5218	return btrfs_ioctl_subvol_setflags(file, arg: argp);
5219	case BTRFS_IOC_DEFAULT_SUBVOL:
5220	return btrfs_ioctl_default_subvol(file, argp);
5221	case BTRFS_IOC_DEFRAG:
5222	return btrfs_ioctl_defrag(file, NULL);
5223	case BTRFS_IOC_DEFRAG_RANGE:
5224	return btrfs_ioctl_defrag(file, argp);
5225	case BTRFS_IOC_RESIZE:
5226	return btrfs_ioctl_resize(file, arg: argp);
5227	case BTRFS_IOC_ADD_DEV:
5228	return btrfs_ioctl_add_dev(fs_info, arg: argp);
5229	case BTRFS_IOC_RM_DEV:
5230	return btrfs_ioctl_rm_dev(file, arg: argp);
5231	case BTRFS_IOC_RM_DEV_V2:
5232	return btrfs_ioctl_rm_dev_v2(file, arg: argp);
5233	case BTRFS_IOC_FS_INFO:
5234	return btrfs_ioctl_fs_info(fs_info, arg: argp);
5235	case BTRFS_IOC_DEV_INFO:
5236	return btrfs_ioctl_dev_info(fs_info, arg: argp);
5237	case BTRFS_IOC_TREE_SEARCH:
5238	return btrfs_ioctl_tree_search(root, argp);
5239	case BTRFS_IOC_TREE_SEARCH_V2:
5240	return btrfs_ioctl_tree_search_v2(root, argp);
5241	case BTRFS_IOC_INO_LOOKUP:
5242	return btrfs_ioctl_ino_lookup(root, argp);
5243	case BTRFS_IOC_INO_PATHS:
5244	return btrfs_ioctl_ino_to_path(root, arg: argp);
5245	case BTRFS_IOC_LOGICAL_INO:
5246	return btrfs_ioctl_logical_to_ino(fs_info, arg: argp, version: `1`);
5247	case BTRFS_IOC_LOGICAL_INO_V2:
5248	return btrfs_ioctl_logical_to_ino(fs_info, arg: argp, version: `2`);
5249	case BTRFS_IOC_SPACE_INFO:
5250	return btrfs_ioctl_space_info(fs_info, arg: argp);
5251	case BTRFS_IOC_SYNC: {
5252	int ret;
5253
5254	ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, in_reclaim_context: false);
5255	if (ret)
5256	return ret;
5257	ret = btrfs_sync_fs(sb: inode->i_sb, wait: `1`);
5258	/*
5259	* There may be work for the cleaner kthread to do (subvolume
5260	* deletion, delayed iputs, defrag inodes, etc), so wake it up.
5261	*/
5262	wake_up_process(tsk: fs_info->cleaner_kthread);
5263	return ret;
5264	}
5265	case BTRFS_IOC_START_SYNC:
5266	return btrfs_ioctl_start_sync(root, argp);
5267	case BTRFS_IOC_WAIT_SYNC:
5268	return btrfs_ioctl_wait_sync(fs_info, argp);
5269	case BTRFS_IOC_SCRUB:
5270	return btrfs_ioctl_scrub(file, arg: argp);
5271	case BTRFS_IOC_SCRUB_CANCEL:
5272	return btrfs_ioctl_scrub_cancel(fs_info);
5273	case BTRFS_IOC_SCRUB_PROGRESS:
5274	return btrfs_ioctl_scrub_progress(fs_info, arg: argp);
5275	case BTRFS_IOC_BALANCE_V2:
5276	return btrfs_ioctl_balance(file, arg: argp);
5277	case BTRFS_IOC_BALANCE_CTL:
5278	return btrfs_ioctl_balance_ctl(fs_info, cmd: arg);
5279	case BTRFS_IOC_BALANCE_PROGRESS:
5280	return btrfs_ioctl_balance_progress(fs_info, arg: argp);
5281	case BTRFS_IOC_SET_RECEIVED_SUBVOL:
5282	return btrfs_ioctl_set_received_subvol(file, arg: argp);
5283	#ifdef CONFIG_64BIT
5284	case BTRFS_IOC_SET_RECEIVED_SUBVOL_32:
5285	return btrfs_ioctl_set_received_subvol_32(file, arg: argp);
5286	#endif
5287	case BTRFS_IOC_SEND:
5288	return _btrfs_ioctl_send(root, argp, compat: false);
5289	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
5290	case BTRFS_IOC_SEND_32:
5291	return _btrfs_ioctl_send(root, argp, compat: true);
5292	#endif
5293	case BTRFS_IOC_GET_DEV_STATS:
5294	return btrfs_ioctl_get_dev_stats(fs_info, arg: argp);
5295	case BTRFS_IOC_QUOTA_CTL:
5296	return btrfs_ioctl_quota_ctl(file, arg: argp);
5297	case BTRFS_IOC_QGROUP_ASSIGN:
5298	return btrfs_ioctl_qgroup_assign(file, arg: argp);
5299	case BTRFS_IOC_QGROUP_CREATE:
5300	return btrfs_ioctl_qgroup_create(file, arg: argp);
5301	case BTRFS_IOC_QGROUP_LIMIT:
5302	return btrfs_ioctl_qgroup_limit(file, arg: argp);
5303	case BTRFS_IOC_QUOTA_RESCAN:
5304	return btrfs_ioctl_quota_rescan(file, arg: argp);
5305	case BTRFS_IOC_QUOTA_RESCAN_STATUS:
5306	return btrfs_ioctl_quota_rescan_status(fs_info, arg: argp);
5307	case BTRFS_IOC_QUOTA_RESCAN_WAIT:
5308	return btrfs_ioctl_quota_rescan_wait(fs_info);
5309	case BTRFS_IOC_DEV_REPLACE:
5310	return btrfs_ioctl_dev_replace(fs_info, arg: argp);
5311	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
5312	return btrfs_ioctl_get_supported_features(arg: argp);
5313	case BTRFS_IOC_GET_FEATURES:
5314	return btrfs_ioctl_get_features(fs_info, arg: argp);
5315	case BTRFS_IOC_SET_FEATURES:
5316	return btrfs_ioctl_set_features(file, arg: argp);
5317	case BTRFS_IOC_GET_SUBVOL_INFO:
5318	return btrfs_ioctl_get_subvol_info(inode, argp);
5319	case BTRFS_IOC_GET_SUBVOL_ROOTREF:
5320	return btrfs_ioctl_get_subvol_rootref(root, argp);
5321	case BTRFS_IOC_INO_LOOKUP_USER:
5322	return btrfs_ioctl_ino_lookup_user(file, argp);
5323	case FS_IOC_ENABLE_VERITY:
5324	return fsverity_ioctl_enable(filp: file, arg: (const void __user *)argp);
5325	case FS_IOC_MEASURE_VERITY:
5326	return fsverity_ioctl_measure(filp: file, arg: argp);
5327	case FS_IOC_READ_VERITY_METADATA:
5328	return fsverity_ioctl_read_metadata(filp: file, uarg: argp);
5329	case BTRFS_IOC_ENCODED_READ:
5330	return btrfs_ioctl_encoded_read(file, argp, compat: false);
5331	case BTRFS_IOC_ENCODED_WRITE:
5332	return btrfs_ioctl_encoded_write(file, argp, compat: false);
5333	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
5334	case BTRFS_IOC_ENCODED_READ_32:
5335	return btrfs_ioctl_encoded_read(file, argp, compat: true);
5336	case BTRFS_IOC_ENCODED_WRITE_32:
5337	return btrfs_ioctl_encoded_write(file, argp, compat: true);
5338	#endif
5339	case BTRFS_IOC_SUBVOL_SYNC_WAIT:
5340	return btrfs_ioctl_subvol_sync(fs_info, argp);
5341	#ifdef CONFIG_BTRFS_EXPERIMENTAL
5342	case BTRFS_IOC_SHUTDOWN:
5343	return btrfs_ioctl_shutdown(fs_info, arg);
5344	#endif
5345	}
5346
5347	return -ENOTTY;
5348	}
5349
5350	#ifdef CONFIG_COMPAT
5351	long btrfs_compat_ioctl(struct file file, unsigned* int cmd, unsigned long arg)
5352	{
5353	/*
5354	* These all access 32-bit values anyway so no further
5355	* handling is necessary.
5356	*/
5357	switch (cmd) {
5358	case FS_IOC32_GETVERSION:
5359	cmd = FS_IOC_GETVERSION;
5360	break;
5361	}
5362
5363	return btrfs_ioctl(file, cmd, arg: (unsigned long) compat_ptr(uptr: arg));
5364	}
5365	#endif
5366

source code of linux/fs/btrfs/ioctl.c