ioctl.c source code [linux/fs/btrfs/ioctl.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (C) 2007 Oracle. All rights reserved.
4	*/
5
6	#include <linux/kernel.h>
7	#include <linux/bio.h>
8	#include <linux/file.h>
9	#include <linux/fs.h>
10	#include <linux/fsnotify.h>
11	#include <linux/pagemap.h>
12	#include <linux/highmem.h>
13	#include <linux/time.h>
14	#include <linux/string.h>
15	#include <linux/backing-dev.h>
16	#include <linux/mount.h>
17	#include <linux/namei.h>
18	#include <linux/writeback.h>
19	#include <linux/compat.h>
20	#include <linux/security.h>
21	#include <linux/xattr.h>
22	#include <linux/mm.h>
23	#include <linux/slab.h>
24	#include <linux/blkdev.h>
25	#include <linux/uuid.h>
26	#include <linux/btrfs.h>
27	#include <linux/uaccess.h>
28	#include <linux/iversion.h>
29	#include <linux/fileattr.h>
30	#include <linux/fsverity.h>
31	#include <linux/sched/xacct.h>
32	#include "ctree.h"
33	#include "disk-io.h"
34	#include "export.h"
35	#include "transaction.h"
36	#include "btrfs_inode.h"
37	#include "volumes.h"
38	#include "locking.h"
39	#include "backref.h"
40	#include "send.h"
41	#include "dev-replace.h"
42	#include "props.h"
43	#include "sysfs.h"
44	#include "qgroup.h"
45	#include "tree-log.h"
46	#include "compression.h"
47	#include "space-info.h"
48	#include "block-group.h"
49	#include "fs.h"
50	#include "accessors.h"
51	#include "extent-tree.h"
52	#include "root-tree.h"
53	#include "defrag.h"
54	#include "dir-item.h"
55	#include "uuid-tree.h"
56	#include "ioctl.h"
57	#include "file.h"
58	#include "scrub.h"
59	#include "super.h"
60
61	#ifdef CONFIG_64BIT
62	/ If we have a 32-bit userspace and 64-bit kernel, then the UAPI*
63	* structures are incorrect, as the timespec structure from userspace
64	* is 4 bytes too small. We define these alternatives here to teach
65	* the kernel about the 32-bit struct packing.
66	*/
67	struct btrfs_ioctl_timespec_32 {
68	__u64 sec;
69	__u32 nsec;
70	} __attribute__ ((__packed__));
71
72	struct btrfs_ioctl_received_subvol_args_32 {
73	char uuid[BTRFS_UUID_SIZE]; / in /
74	__u64 stransid; / in /
75	__u64 rtransid; / out /
76	struct btrfs_ioctl_timespec_32 stime; / in /
77	struct btrfs_ioctl_timespec_32 rtime; / out /
78	__u64 flags; / in /
79	__u64 reserved[`16`]; / in /
80	} __attribute__ ((__packed__));
81
82	#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \
83	struct btrfs_ioctl_received_subvol_args_32)
84	#endif
85
86	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
87	struct btrfs_ioctl_send_args_32 {
88	__s64 send_fd; / in /
89	__u64 clone_sources_count; / in /
90	compat_uptr_t clone_sources; / in /
91	__u64 parent_root; / in /
92	__u64 flags; / in /
93	__u32 version; / in /
94	__u8 reserved[`28`]; / in /
95	} __attribute__ ((__packed__));
96
97	#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
98	struct btrfs_ioctl_send_args_32)
99
100	struct btrfs_ioctl_encoded_io_args_32 {
101	compat_uptr_t iov;
102	compat_ulong_t iovcnt;
103	__s64 offset;
104	__u64 flags;
105	__u64 len;
106	__u64 unencoded_len;
107	__u64 unencoded_offset;
108	__u32 compression;
109	__u32 encryption;
110	__u8 reserved[`64`];
111	};
112
113	#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
114	struct btrfs_ioctl_encoded_io_args_32)
115	#define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \
116	struct btrfs_ioctl_encoded_io_args_32)
117	#endif
118
119	/ Mask out flags that are inappropriate for the given type of inode. /
120	static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
121	unsigned int flags)
122	{
123	if (S_ISDIR(inode->i_mode))
124	return flags;
125	else if (S_ISREG(inode->i_mode))
126	return flags & ~FS_DIRSYNC_FL;
127	else
128	return flags & (FS_NODUMP_FL \| FS_NOATIME_FL);
129	}
130
131	/*
132	* Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
133	* ioctl.
134	*/
135	static unsigned int btrfs_inode_flags_to_fsflags(struct btrfs_inode *binode)
136	{
137	unsigned int iflags = `0`;
138	u32 flags = binode->flags;
139	u32 ro_flags = binode->ro_flags;
140
141	if (flags & BTRFS_INODE_SYNC)
142	iflags \|= FS_SYNC_FL;
143	if (flags & BTRFS_INODE_IMMUTABLE)
144	iflags \|= FS_IMMUTABLE_FL;
145	if (flags & BTRFS_INODE_APPEND)
146	iflags \|= FS_APPEND_FL;
147	if (flags & BTRFS_INODE_NODUMP)
148	iflags \|= FS_NODUMP_FL;
149	if (flags & BTRFS_INODE_NOATIME)
150	iflags \|= FS_NOATIME_FL;
151	if (flags & BTRFS_INODE_DIRSYNC)
152	iflags \|= FS_DIRSYNC_FL;
153	if (flags & BTRFS_INODE_NODATACOW)
154	iflags \|= FS_NOCOW_FL;
155	if (ro_flags & BTRFS_INODE_RO_VERITY)
156	iflags \|= FS_VERITY_FL;
157
158	if (flags & BTRFS_INODE_NOCOMPRESS)
159	iflags \|= FS_NOCOMP_FL;
160	else if (flags & BTRFS_INODE_COMPRESS)
161	iflags \|= FS_COMPR_FL;
162
163	return iflags;
164	}
165
166	/*
167	* Update inode->i_flags based on the btrfs internal flags.
168	*/
169	void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
170	{
171	struct btrfs_inode *binode = BTRFS_I(inode);
172	unsigned int new_fl = `0`;
173
174	if (binode->flags & BTRFS_INODE_SYNC)
175	new_fl \|= S_SYNC;
176	if (binode->flags & BTRFS_INODE_IMMUTABLE)
177	new_fl \|= S_IMMUTABLE;
178	if (binode->flags & BTRFS_INODE_APPEND)
179	new_fl \|= S_APPEND;
180	if (binode->flags & BTRFS_INODE_NOATIME)
181	new_fl \|= S_NOATIME;
182	if (binode->flags & BTRFS_INODE_DIRSYNC)
183	new_fl \|= S_DIRSYNC;
184	if (binode->ro_flags & BTRFS_INODE_RO_VERITY)
185	new_fl \|= S_VERITY;
186
187	set_mask_bits(&inode->i_flags,
188	S_SYNC \| S_APPEND \| S_IMMUTABLE \| S_NOATIME \| S_DIRSYNC \|
189	S_VERITY, new_fl);
190	}
191
192	/*
193	* Check if @flags are a supported and valid set of FS_*_FL flags and that
194	* the old and new flags are not conflicting
195	*/
196	static int check_fsflags(unsigned int old_flags, unsigned int flags)
197	{
198	if (flags & ~(FS_IMMUTABLE_FL \| FS_APPEND_FL \| \
199	FS_NOATIME_FL \| FS_NODUMP_FL \| \
200	FS_SYNC_FL \| FS_DIRSYNC_FL \| \
201	FS_NOCOMP_FL \| FS_COMPR_FL \|
202	FS_NOCOW_FL))
203	return -EOPNOTSUPP;
204
205	/ COMPR and NOCOMP on new/old are valid /
206	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
207	return -EINVAL;
208
209	if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
210	return -EINVAL;
211
212	/ NOCOW and compression options are mutually exclusive /
213	if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL \| FS_NOCOMP_FL)))
214	return -EINVAL;
215	if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL \| FS_NOCOMP_FL)))
216	return -EINVAL;
217
218	return `0`;
219	}
220
221	static int check_fsflags_compatible(struct btrfs_fs_info *fs_info,
222	unsigned int flags)
223	{
224	if (btrfs_is_zoned(fs_info) && (flags & FS_NOCOW_FL))
225	return -EPERM;
226
227	return `0`;
228	}
229
230	int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args)
231	{
232	if (memchr(p: vol_args->name, c: `0`, size: sizeof(vol_args->name)) == NULL)
233	return -ENAMETOOLONG;
234	return `0`;
235	}
236
237	static int btrfs_check_ioctl_vol_args2_subvol_name(const struct btrfs_ioctl_vol_args_v2 *vol_args2)
238	{
239	if (memchr(p: vol_args2->name, c: `0`, size: sizeof(vol_args2->name)) == NULL)
240	return -ENAMETOOLONG;
241	return `0`;
242	}
243
244	/*
245	* Set flags/xflags from the internal inode flags. The remaining items of
246	* fsxattr are zeroed.
247	*/
248	int btrfs_fileattr_get(struct dentry dentry, struct* fileattr *fa)
249	{
250	struct btrfs_inode *binode = BTRFS_I(inode: d_inode(dentry));
251
252	fileattr_fill_flags(fa, flags: btrfs_inode_flags_to_fsflags(binode));
253	return `0`;
254	}
255
256	int btrfs_fileattr_set(struct mnt_idmap *idmap,
257	struct dentry dentry, struct* fileattr *fa)
258	{
259	struct inode *inode = d_inode(dentry);
260	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
261	struct btrfs_inode *binode = BTRFS_I(inode);
262	struct btrfs_root *root = binode->root;
263	struct btrfs_trans_handle *trans;
264	unsigned int fsflags, old_fsflags;
265	int ret;
266	const char *comp = NULL;
267	u32 binode_flags;
268
269	if (btrfs_root_readonly(root))
270	return -EROFS;
271
272	if (fileattr_has_fsx(fa))
273	return -EOPNOTSUPP;
274
275	fsflags = btrfs_mask_fsflags_for_type(inode, flags: fa->flags);
276	old_fsflags = btrfs_inode_flags_to_fsflags(binode);
277	ret = check_fsflags(old_flags: old_fsflags, flags: fsflags);
278	if (ret)
279	return ret;
280
281	ret = check_fsflags_compatible(fs_info, flags: fsflags);
282	if (ret)
283	return ret;
284
285	binode_flags = binode->flags;
286	if (fsflags & FS_SYNC_FL)
287	binode_flags \|= BTRFS_INODE_SYNC;
288	else
289	binode_flags &= ~BTRFS_INODE_SYNC;
290	if (fsflags & FS_IMMUTABLE_FL)
291	binode_flags \|= BTRFS_INODE_IMMUTABLE;
292	else
293	binode_flags &= ~BTRFS_INODE_IMMUTABLE;
294	if (fsflags & FS_APPEND_FL)
295	binode_flags \|= BTRFS_INODE_APPEND;
296	else
297	binode_flags &= ~BTRFS_INODE_APPEND;
298	if (fsflags & FS_NODUMP_FL)
299	binode_flags \|= BTRFS_INODE_NODUMP;
300	else
301	binode_flags &= ~BTRFS_INODE_NODUMP;
302	if (fsflags & FS_NOATIME_FL)
303	binode_flags \|= BTRFS_INODE_NOATIME;
304	else
305	binode_flags &= ~BTRFS_INODE_NOATIME;
306
307	/ If coming from FS_IOC_FSSETXATTR then skip unconverted flags /
308	if (!fa->flags_valid) {
309	/ 1 item for the inode /
310	trans = btrfs_start_transaction(root, num_items: `1`);
311	if (IS_ERR(ptr: trans))
312	return PTR_ERR(ptr: trans);
313	goto update_flags;
314	}
315
316	if (fsflags & FS_DIRSYNC_FL)
317	binode_flags \|= BTRFS_INODE_DIRSYNC;
318	else
319	binode_flags &= ~BTRFS_INODE_DIRSYNC;
320	if (fsflags & FS_NOCOW_FL) {
321	if (S_ISREG(inode->i_mode)) {
322	/*
323	* It's safe to turn csums off here, no extents exist.
324	* Otherwise we want the flag to reflect the real COW
325	* status of the file and will not set it.
326	*/
327	if (inode->i_size == `0`)
328	binode_flags \|= BTRFS_INODE_NODATACOW \|
329	BTRFS_INODE_NODATASUM;
330	} else {
331	binode_flags \|= BTRFS_INODE_NODATACOW;
332	}
333	} else {
334	/*
335	* Revert back under same assumptions as above
336	*/
337	if (S_ISREG(inode->i_mode)) {
338	if (inode->i_size == `0`)
339	binode_flags &= ~(BTRFS_INODE_NODATACOW \|
340	BTRFS_INODE_NODATASUM);
341	} else {
342	binode_flags &= ~BTRFS_INODE_NODATACOW;
343	}
344	}
345
346	/*
347	* The COMPRESS flag can only be changed by users, while the NOCOMPRESS
348	* flag may be changed automatically if compression code won't make
349	* things smaller.
350	*/
351	if (fsflags & FS_NOCOMP_FL) {
352	binode_flags &= ~BTRFS_INODE_COMPRESS;
353	binode_flags \|= BTRFS_INODE_NOCOMPRESS;
354	} else if (fsflags & FS_COMPR_FL) {
355
356	if (IS_SWAPFILE(inode))
357	return -ETXTBSY;
358
359	binode_flags \|= BTRFS_INODE_COMPRESS;
360	binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
361
362	comp = btrfs_compress_type2str(type: fs_info->compress_type);
363	if (!comp \|\| comp[`0`] == `0`)
364	comp = btrfs_compress_type2str(type: BTRFS_COMPRESS_ZLIB);
365	} else {
366	binode_flags &= ~(BTRFS_INODE_COMPRESS \| BTRFS_INODE_NOCOMPRESS);
367	}
368
369	/*
370	* 1 for inode item
371	* 2 for properties
372	*/
373	trans = btrfs_start_transaction(root, num_items: `3`);
374	if (IS_ERR(ptr: trans))
375	return PTR_ERR(ptr: trans);
376
377	if (comp) {
378	ret = btrfs_set_prop(trans, inode, name: "btrfs.compression", value: comp,
379	strlen(comp), flags: `0`);
380	if (ret) {
381	btrfs_abort_transaction(trans, ret);
382	goto out_end_trans;
383	}
384	} else {
385	ret = btrfs_set_prop(trans, inode, name: "btrfs.compression", NULL,
386	value_len: `0`, flags: `0`);
387	if (ret && ret != -ENODATA) {
388	btrfs_abort_transaction(trans, ret);
389	goto out_end_trans;
390	}
391	}
392
393	update_flags:
394	binode->flags = binode_flags;
395	btrfs_sync_inode_flags_to_i_flags(inode);
396	inode_inc_iversion(inode);
397	inode_set_ctime_current(inode);
398	ret = btrfs_update_inode(trans, inode: BTRFS_I(inode));
399
400	out_end_trans:
401	btrfs_end_transaction(trans);
402	return ret;
403	}
404
405	/*
406	* Start exclusive operation @type, return true on success
407	*/
408	bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
409	enum btrfs_exclusive_operation type)
410	{
411	bool ret = false;
412
413	spin_lock(lock: &fs_info->super_lock);
414	if (fs_info->exclusive_operation == BTRFS_EXCLOP_NONE) {
415	fs_info->exclusive_operation = type;
416	ret = true;
417	}
418	spin_unlock(lock: &fs_info->super_lock);
419
420	return ret;
421	}
422
423	/*
424	* Conditionally allow to enter the exclusive operation in case it's compatible
425	* with the running one. This must be paired with btrfs_exclop_start_unlock and
426	* btrfs_exclop_finish.
427	*
428	* Compatibility:
429	* - the same type is already running
430	* - when trying to add a device and balance has been paused
431	* - not BTRFS_EXCLOP_NONE - this is intentionally incompatible and the caller
432	* must check the condition first that would allow none -> @type
433	*/
434	bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
435	enum btrfs_exclusive_operation type)
436	{
437	spin_lock(lock: &fs_info->super_lock);
438	if (fs_info->exclusive_operation == type \|\|
439	(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED &&
440	type == BTRFS_EXCLOP_DEV_ADD))
441	return true;
442
443	spin_unlock(lock: &fs_info->super_lock);
444	return false;
445	}
446
447	void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info)
448	{
449	spin_unlock(lock: &fs_info->super_lock);
450	}
451
452	void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
453	{
454	spin_lock(lock: &fs_info->super_lock);
455	WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
456	spin_unlock(lock: &fs_info->super_lock);
457	sysfs_notify(kobj: &fs_info->fs_devices->fsid_kobj, NULL, attr: "exclusive_operation");
458	}
459
460	void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
461	enum btrfs_exclusive_operation op)
462	{
463	switch (op) {
464	case BTRFS_EXCLOP_BALANCE_PAUSED:
465	spin_lock(lock: &fs_info->super_lock);
466	ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE \|\|
467	fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD \|\|
468	fs_info->exclusive_operation == BTRFS_EXCLOP_NONE \|\|
469	fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
470	fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
471	spin_unlock(lock: &fs_info->super_lock);
472	break;
473	case BTRFS_EXCLOP_BALANCE:
474	spin_lock(lock: &fs_info->super_lock);
475	ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
476	fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
477	spin_unlock(lock: &fs_info->super_lock);
478	break;
479	default:
480	btrfs_warn(fs_info,
481	"invalid exclop balance operation %d requested", op);
482	}
483	}
484
485	static int btrfs_ioctl_getversion(struct inode inode, int* __user *arg)
486	{
487	return put_user(inode->i_generation, arg);
488	}
489
490	static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
491	void __user *arg)
492	{
493	struct btrfs_device *device;
494	struct fstrim_range range;
495	u64 minlen = ULLONG_MAX;
496	u64 num_devices = `0`;
497	int ret;
498
499	if (!capable(CAP_SYS_ADMIN))
500	return -EPERM;
501
502	/*
503	* btrfs_trim_block_group() depends on space cache, which is not
504	* available in zoned filesystem. So, disallow fitrim on a zoned
505	* filesystem for now.
506	*/
507	if (btrfs_is_zoned(fs_info))
508	return -EOPNOTSUPP;
509
510	/*
511	* If the fs is mounted with nologreplay, which requires it to be
512	* mounted in RO mode as well, we can not allow discard on free space
513	* inside block groups, because log trees refer to extents that are not
514	* pinned in a block group's free space cache (pinning the extents is
515	* precisely the first phase of replaying a log tree).
516	*/
517	if (btrfs_test_opt(fs_info, NOLOGREPLAY))
518	return -EROFS;
519
520	rcu_read_lock();
521	list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
522	dev_list) {
523	if (!device->bdev \|\| !bdev_max_discard_sectors(bdev: device->bdev))
524	continue;
525	num_devices++;
526	minlen = min_t(u64, bdev_discard_granularity(device->bdev),
527	minlen);
528	}
529	rcu_read_unlock();
530
531	if (!num_devices)
532	return -EOPNOTSUPP;
533	if (copy_from_user(to: &range, from: arg, n: sizeof(range)))
534	return -EFAULT;
535
536	/*
537	* NOTE: Don't truncate the range using super->total_bytes. Bytenr of
538	* block group is in the logical address space, which can be any
539	* sectorsize aligned bytenr in the range [0, U64_MAX].
540	*/
541	if (range.len < fs_info->sectorsize)
542	return -EINVAL;
543
544	range.minlen = max(range.minlen, minlen);
545	ret = btrfs_trim_fs(fs_info, range: &range);
546	if (ret < `0`)
547	return ret;
548
549	if (copy_to_user(to: arg, from: &range, n: sizeof(range)))
550	return -EFAULT;
551
552	return `0`;
553	}
554
555	int __pure btrfs_is_empty_uuid(u8 *uuid)
556	{
557	int i;
558
559	for (i = `0`; i < BTRFS_UUID_SIZE; i++) {
560	if (uuid[i])
561	return `0`;
562	}
563	return `1`;
564	}
565
566	/*
567	* Calculate the number of transaction items to reserve for creating a subvolume
568	* or snapshot, not including the inode, directory entries, or parent directory.
569	*/
570	static unsigned int create_subvol_num_items(struct btrfs_qgroup_inherit *inherit)
571	{
572	/*
573	* 1 to add root block
574	* 1 to add root item
575	* 1 to add root ref
576	* 1 to add root backref
577	* 1 to add UUID item
578	* 1 to add qgroup info
579	* 1 to add qgroup limit
580	*
581	* Ideally the last two would only be accounted if qgroups are enabled,
582	* but that can change between now and the time we would insert them.
583	*/
584	unsigned int num_items = `7`;
585
586	if (inherit) {
587	/ 2 to add qgroup relations for each inherited qgroup /
588	num_items += `2` * inherit->num_qgroups;
589	}
590	return num_items;
591	}
592
593	static noinline int create_subvol(struct mnt_idmap *idmap,
594	struct inode dir, struct* dentry *dentry,
595	struct btrfs_qgroup_inherit *inherit)
596	{
597	struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
598	struct btrfs_trans_handle *trans;
599	struct btrfs_key key;
600	struct btrfs_root_item *root_item;
601	struct btrfs_inode_item *inode_item;
602	struct extent_buffer *leaf;
603	struct btrfs_root *root = BTRFS_I(inode: dir)->root;
604	struct btrfs_root *new_root;
605	struct btrfs_block_rsv block_rsv;
606	struct timespec64 cur_time = current_time(inode: dir);
607	struct btrfs_new_inode_args new_inode_args = {
608	.dir = dir,
609	.dentry = dentry,
610	.subvol = true,
611	};
612	unsigned int trans_num_items;
613	int ret;
614	dev_t anon_dev;
615	u64 objectid;
616	u64 qgroup_reserved = `0`;
617
618	root_item = kzalloc(size: sizeof(*root_item), GFP_KERNEL);
619	if (!root_item)
620	return -ENOMEM;
621
622	ret = btrfs_get_free_objectid(root: fs_info->tree_root, objectid: &objectid);
623	if (ret)
624	goto out_root_item;
625
626	/*
627	* Don't create subvolume whose level is not zero. Or qgroup will be
628	* screwed up since it assumes subvolume qgroup's level to be 0.
629	*/
630	if (btrfs_qgroup_level(qgroupid: objectid)) {
631	ret = -ENOSPC;
632	goto out_root_item;
633	}
634
635	ret = get_anon_bdev(&anon_dev);
636	if (ret < `0`)
637	goto out_root_item;
638
639	new_inode_args.inode = btrfs_new_subvol_inode(idmap, dir);
640	if (!new_inode_args.inode) {
641	ret = -ENOMEM;
642	goto out_anon_dev;
643	}
644	ret = btrfs_new_inode_prepare(args: &new_inode_args, trans_num_items: &trans_num_items);
645	if (ret)
646	goto out_inode;
647	trans_num_items += create_subvol_num_items(inherit);
648
649	btrfs_init_block_rsv(rsv: &block_rsv, type: BTRFS_BLOCK_RSV_TEMP);
650	ret = btrfs_subvolume_reserve_metadata(root, rsv: &block_rsv,
651	nitems: trans_num_items, use_global_rsv: false);
652	if (ret)
653	goto out_new_inode_args;
654	qgroup_reserved = block_rsv.qgroup_rsv_reserved;
655
656	trans = btrfs_start_transaction(root, num_items: `0`);
657	if (IS_ERR(ptr: trans)) {
658	ret = PTR_ERR(ptr: trans);
659	goto out_release_rsv;
660	}
661	ret = btrfs_record_root_in_trans(trans, root: BTRFS_I(inode: dir)->root);
662	if (ret)
663	goto out;
664	btrfs_qgroup_convert_reserved_meta(root, num_bytes: qgroup_reserved);
665	qgroup_reserved = `0`;
666	trans->block_rsv = &block_rsv;
667	trans->bytes_reserved = block_rsv.size;
668	/ Tree log can't currently deal with an inode which is a new root. /
669	btrfs_set_log_full_commit(trans);
670
671	ret = btrfs_qgroup_inherit(trans, srcid: `0`, objectid, inode_rootid: root->root_key.objectid, inherit);
672	if (ret)
673	goto out;
674
675	leaf = btrfs_alloc_tree_block(trans, root, parent: `0`, root_objectid: objectid, NULL, level: `0`, hint: `0`, empty_size: `0`,
676	reloc_src_root: `0`, nest: BTRFS_NESTING_NORMAL);
677	if (IS_ERR(ptr: leaf)) {
678	ret = PTR_ERR(ptr: leaf);
679	goto out;
680	}
681
682	btrfs_mark_buffer_dirty(trans, buf: leaf);
683
684	inode_item = &root_item->inode;
685	btrfs_set_stack_inode_generation(s: inode_item, val: `1`);
686	btrfs_set_stack_inode_size(s: inode_item, val: `3`);
687	btrfs_set_stack_inode_nlink(s: inode_item, val: `1`);
688	btrfs_set_stack_inode_nbytes(s: inode_item,
689	val: fs_info->nodesize);
690	btrfs_set_stack_inode_mode(s: inode_item, S_IFDIR \| `0755`);
691
692	btrfs_set_root_flags(s: root_item, val: `0`);
693	btrfs_set_root_limit(s: root_item, val: `0`);
694	btrfs_set_stack_inode_flags(s: inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
695
696	btrfs_set_root_bytenr(s: root_item, val: leaf->start);
697	btrfs_set_root_generation(s: root_item, val: trans->transid);
698	btrfs_set_root_level(s: root_item, val: `0`);
699	btrfs_set_root_refs(s: root_item, val: `1`);
700	btrfs_set_root_used(s: root_item, val: leaf->len);
701	btrfs_set_root_last_snapshot(s: root_item, val: `0`);
702
703	btrfs_set_root_generation_v2(s: root_item,
704	val: btrfs_root_generation(s: root_item));
705	generate_random_guid(guid: root_item->uuid);
706	btrfs_set_stack_timespec_sec(s: &root_item->otime, val: cur_time.tv_sec);
707	btrfs_set_stack_timespec_nsec(s: &root_item->otime, val: cur_time.tv_nsec);
708	root_item->ctime = root_item->otime;
709	btrfs_set_root_ctransid(s: root_item, val: trans->transid);
710	btrfs_set_root_otransid(s: root_item, val: trans->transid);
711
712	btrfs_tree_unlock(eb: leaf);
713
714	btrfs_set_root_dirid(s: root_item, BTRFS_FIRST_FREE_OBJECTID);
715
716	key.objectid = objectid;
717	key.offset = `0`;
718	key.type = BTRFS_ROOT_ITEM_KEY;
719	ret = btrfs_insert_root(trans, root: fs_info->tree_root, key: &key,
720	item: root_item);
721	if (ret) {
722	/*
723	* Since we don't abort the transaction in this case, free the
724	* tree block so that we don't leak space and leave the
725	* filesystem in an inconsistent state (an extent item in the
726	* extent tree with a backreference for a root that does not
727	* exists).
728	*/
729	btrfs_tree_lock(eb: leaf);
730	btrfs_clear_buffer_dirty(trans, buf: leaf);
731	btrfs_tree_unlock(eb: leaf);
732	btrfs_free_tree_block(trans, root_id: objectid, buf: leaf, parent: `0`, last_ref: `1`);
733	free_extent_buffer(eb: leaf);
734	goto out;
735	}
736
737	free_extent_buffer(eb: leaf);
738	leaf = NULL;
739
740	new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev: &anon_dev);
741	if (IS_ERR(ptr: new_root)) {
742	ret = PTR_ERR(ptr: new_root);
743	btrfs_abort_transaction(trans, ret);
744	goto out;
745	}
746	/ anon_dev is owned by new_root now. /
747	anon_dev = `0`;
748	BTRFS_I(inode: new_inode_args.inode)->root = new_root;
749	/ ... and new_root is owned by new_inode_args.inode now. /
750
751	ret = btrfs_record_root_in_trans(trans, root: new_root);
752	if (ret) {
753	btrfs_abort_transaction(trans, ret);
754	goto out;
755	}
756
757	ret = btrfs_uuid_tree_add(trans, uuid: root_item->uuid,
758	BTRFS_UUID_KEY_SUBVOL, subid: objectid);
759	if (ret) {
760	btrfs_abort_transaction(trans, ret);
761	goto out;
762	}
763
764	ret = btrfs_create_new_inode(trans, args: &new_inode_args);
765	if (ret) {
766	btrfs_abort_transaction(trans, ret);
767	goto out;
768	}
769
770	d_instantiate_new(dentry, new_inode_args.inode);
771	new_inode_args.inode = NULL;
772
773	out:
774	trans->block_rsv = NULL;
775	trans->bytes_reserved = `0`;
776	btrfs_end_transaction(trans);
777	out_release_rsv:
778	btrfs_block_rsv_release(fs_info, block_rsv: &block_rsv, num_bytes: (u64)-`1`, NULL);
779	if (qgroup_reserved)
780	btrfs_qgroup_free_meta_prealloc(root, num_bytes: qgroup_reserved);
781	out_new_inode_args:
782	btrfs_new_inode_args_destroy(args: &new_inode_args);
783	out_inode:
784	iput(new_inode_args.inode);
785	out_anon_dev:
786	if (anon_dev)
787	free_anon_bdev(anon_dev);
788	out_root_item:
789	kfree(objp: root_item);
790	return ret;
791	}
792
793	static int create_snapshot(struct btrfs_root root, struct* inode *dir,
794	struct dentry *dentry, bool readonly,
795	struct btrfs_qgroup_inherit *inherit)
796	{
797	struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
798	struct inode *inode;
799	struct btrfs_pending_snapshot *pending_snapshot;
800	unsigned int trans_num_items;
801	struct btrfs_trans_handle *trans;
802	struct btrfs_block_rsv *block_rsv;
803	u64 qgroup_reserved = `0`;
804	int ret;
805
806	/ We do not support snapshotting right now. /
807	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
808	btrfs_warn(fs_info,
809	"extent tree v2 doesn't support snapshotting yet");
810	return -EOPNOTSUPP;
811	}
812
813	if (btrfs_root_refs(s: &root->root_item) == `0`)
814	return -ENOENT;
815
816	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
817	return -EINVAL;
818
819	if (atomic_read(v: &root->nr_swapfiles)) {
820	btrfs_warn(fs_info,
821	"cannot snapshot subvolume with active swapfile");
822	return -ETXTBSY;
823	}
824
825	pending_snapshot = kzalloc(size: sizeof(*pending_snapshot), GFP_KERNEL);
826	if (!pending_snapshot)
827	return -ENOMEM;
828
829	ret = get_anon_bdev(&pending_snapshot->anon_dev);
830	if (ret < `0`)
831	goto free_pending;
832	pending_snapshot->root_item = kzalloc(size: sizeof(struct btrfs_root_item),
833	GFP_KERNEL);
834	pending_snapshot->path = btrfs_alloc_path();
835	if (!pending_snapshot->root_item \|\| !pending_snapshot->path) {
836	ret = -ENOMEM;
837	goto free_pending;
838	}
839
840	block_rsv = &pending_snapshot->block_rsv;
841	btrfs_init_block_rsv(rsv: block_rsv, type: BTRFS_BLOCK_RSV_TEMP);
842	/*
843	* 1 to add dir item
844	* 1 to add dir index
845	* 1 to update parent inode item
846	*/
847	trans_num_items = create_subvol_num_items(inherit) + `3`;
848	ret = btrfs_subvolume_reserve_metadata(root: BTRFS_I(inode: dir)->root, rsv: block_rsv,
849	nitems: trans_num_items, use_global_rsv: false);
850	if (ret)
851	goto free_pending;
852	qgroup_reserved = block_rsv->qgroup_rsv_reserved;
853
854	pending_snapshot->dentry = dentry;
855	pending_snapshot->root = root;
856	pending_snapshot->readonly = readonly;
857	pending_snapshot->dir = dir;
858	pending_snapshot->inherit = inherit;
859
860	trans = btrfs_start_transaction(root, num_items: `0`);
861	if (IS_ERR(ptr: trans)) {
862	ret = PTR_ERR(ptr: trans);
863	goto fail;
864	}
865	ret = btrfs_record_root_in_trans(trans, root: BTRFS_I(inode: dir)->root);
866	if (ret) {
867	btrfs_end_transaction(trans);
868	goto fail;
869	}
870	btrfs_qgroup_convert_reserved_meta(root, num_bytes: qgroup_reserved);
871	qgroup_reserved = `0`;
872
873	trans->pending_snapshot = pending_snapshot;
874
875	ret = btrfs_commit_transaction(trans);
876	if (ret)
877	goto fail;
878
879	ret = pending_snapshot->error;
880	if (ret)
881	goto fail;
882
883	ret = btrfs_orphan_cleanup(root: pending_snapshot->snap);
884	if (ret)
885	goto fail;
886
887	inode = btrfs_lookup_dentry(dir: d_inode(dentry: dentry->d_parent), dentry);
888	if (IS_ERR(ptr: inode)) {
889	ret = PTR_ERR(ptr: inode);
890	goto fail;
891	}
892
893	d_instantiate(dentry, inode);
894	ret = `0`;
895	pending_snapshot->anon_dev = `0`;
896	fail:
897	/ Prevent double freeing of anon_dev /
898	if (ret && pending_snapshot->snap)
899	pending_snapshot->snap->anon_dev = `0`;
900	btrfs_put_root(root: pending_snapshot->snap);
901	btrfs_block_rsv_release(fs_info, block_rsv, num_bytes: (u64)-`1`, NULL);
902	if (qgroup_reserved)
903	btrfs_qgroup_free_meta_prealloc(root, num_bytes: qgroup_reserved);
904	free_pending:
905	if (pending_snapshot->anon_dev)
906	free_anon_bdev(pending_snapshot->anon_dev);
907	kfree(objp: pending_snapshot->root_item);
908	btrfs_free_path(p: pending_snapshot->path);
909	kfree(objp: pending_snapshot);
910
911	return ret;
912	}
913
914	/ copy of may_delete in fs/namei.c()*
915	* Check whether we can remove a link victim from directory dir, check
916	* whether the type of victim is right.
917	* 1. We can't do it if dir is read-only (done in permission())
918	* 2. We should have write and exec permissions on dir
919	* 3. We can't remove anything from append-only dir
920	* 4. We can't do anything with immutable dir (done in permission())
921	* 5. If the sticky bit on dir is set we should either
922	* a. be owner of dir, or
923	* b. be owner of victim, or
924	* c. have CAP_FOWNER capability
925	* 6. If the victim is append-only or immutable we can't do anything with
926	* links pointing to it.
927	* 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
928	* 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
929	* 9. We can't remove a root or mountpoint.
930	* 10. We don't allow removal of NFS sillyrenamed files; it's handled by
931	* nfs_async_unlink().
932	*/
933
934	static int btrfs_may_delete(struct mnt_idmap *idmap,
935	struct inode dir, struct* dentry victim, int* isdir)
936	{
937	int error;
938
939	if (d_really_is_negative(dentry: victim))
940	return -ENOENT;
941
942	/ The @victim is not inside @dir. /
943	if (d_inode(dentry: victim->d_parent) != dir)
944	return -EINVAL;
945	audit_inode_child(parent: dir, dentry: victim, AUDIT_TYPE_CHILD_DELETE);
946
947	error = inode_permission(idmap, dir, MAY_WRITE \| MAY_EXEC);
948	if (error)
949	return error;
950	if (IS_APPEND(dir))
951	return -EPERM;
952	if (check_sticky(idmap, dir, inode: d_inode(dentry: victim)) \|\|
953	IS_APPEND(d_inode(victim)) \|\| IS_IMMUTABLE(d_inode(victim)) \|\|
954	IS_SWAPFILE(d_inode(victim)))
955	return -EPERM;
956	if (isdir) {
957	if (!d_is_dir(dentry: victim))
958	return -ENOTDIR;
959	if (IS_ROOT(victim))
960	return -EBUSY;
961	} else if (d_is_dir(dentry: victim))
962	return -EISDIR;
963	if (IS_DEADDIR(dir))
964	return -ENOENT;
965	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
966	return -EBUSY;
967	return `0`;
968	}
969
970	/ copy of may_create in fs/namei.c() /
971	static inline int btrfs_may_create(struct mnt_idmap *idmap,
972	struct inode dir, struct* dentry *child)
973	{
974	if (d_really_is_positive(dentry: child))
975	return -EEXIST;
976	if (IS_DEADDIR(dir))
977	return -ENOENT;
978	if (!fsuidgid_has_mapping(sb: dir->i_sb, idmap))
979	return -EOVERFLOW;
980	return inode_permission(idmap, dir, MAY_WRITE \| MAY_EXEC);
981	}
982
983	/*
984	* Create a new subvolume below @parent. This is largely modeled after
985	* sys_mkdirat and vfs_mkdir, but we only do a single component lookup
986	* inside this filesystem so it's quite a bit simpler.
987	*/
988	static noinline int btrfs_mksubvol(const struct path *parent,
989	struct mnt_idmap *idmap,
990	const char name, int* namelen,
991	struct btrfs_root *snap_src,
992	bool readonly,
993	struct btrfs_qgroup_inherit *inherit)
994	{
995	struct inode *dir = d_inode(dentry: parent->dentry);
996	struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
997	struct dentry *dentry;
998	struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen);
999	int error;
1000
1001	error = down_write_killable_nested(sem: &dir->i_rwsem, subclass: I_MUTEX_PARENT);
1002	if (error == -EINTR)
1003	return error;
1004
1005	dentry = lookup_one(idmap, name, parent->dentry, namelen);
1006	error = PTR_ERR(ptr: dentry);
1007	if (IS_ERR(ptr: dentry))
1008	goto out_unlock;
1009
1010	error = btrfs_may_create(idmap, dir, child: dentry);
1011	if (error)
1012	goto out_dput;
1013
1014	/*
1015	* even if this name doesn't exist, we may get hash collisions.
1016	* check for them now when we can safely fail
1017	*/
1018	error = btrfs_check_dir_item_collision(root: BTRFS_I(inode: dir)->root,
1019	dir: dir->i_ino, name: &name_str);
1020	if (error)
1021	goto out_dput;
1022
1023	down_read(sem: &fs_info->subvol_sem);
1024
1025	if (btrfs_root_refs(s: &BTRFS_I(inode: dir)->root->root_item) == `0`)
1026	goto out_up_read;
1027
1028	if (snap_src)
1029	error = create_snapshot(root: snap_src, dir, dentry, readonly, inherit);
1030	else
1031	error = create_subvol(idmap, dir, dentry, inherit);
1032
1033	if (!error)
1034	fsnotify_mkdir(dir, dentry);
1035	out_up_read:
1036	up_read(sem: &fs_info->subvol_sem);
1037	out_dput:
1038	dput(dentry);
1039	out_unlock:
1040	btrfs_inode_unlock(inode: BTRFS_I(inode: dir), ilock_flags: `0`);
1041	return error;
1042	}
1043
1044	static noinline int btrfs_mksnapshot(const struct path *parent,
1045	struct mnt_idmap *idmap,
1046	const char name, int* namelen,
1047	struct btrfs_root *root,
1048	bool readonly,
1049	struct btrfs_qgroup_inherit *inherit)
1050	{
1051	int ret;
1052	bool snapshot_force_cow = false;
1053
1054	/*
1055	* Force new buffered writes to reserve space even when NOCOW is
1056	* possible. This is to avoid later writeback (running dealloc) to
1057	* fallback to COW mode and unexpectedly fail with ENOSPC.
1058	*/
1059	btrfs_drew_read_lock(lock: &root->snapshot_lock);
1060
1061	ret = btrfs_start_delalloc_snapshot(root, in_reclaim_context: false);
1062	if (ret)
1063	goto out;
1064
1065	/*
1066	* All previous writes have started writeback in NOCOW mode, so now
1067	* we force future writes to fallback to COW mode during snapshot
1068	* creation.
1069	*/
1070	atomic_inc(v: &root->snapshot_force_cow);
1071	snapshot_force_cow = true;
1072
1073	btrfs_wait_ordered_extents(root, U64_MAX, range_start: `0`, range_len: (u64)-`1`);
1074
1075	ret = btrfs_mksubvol(parent, idmap, name, namelen,
1076	snap_src: root, readonly, inherit);
1077	out:
1078	if (snapshot_force_cow)
1079	atomic_dec(v: &root->snapshot_force_cow);
1080	btrfs_drew_read_unlock(lock: &root->snapshot_lock);
1081	return ret;
1082	}
1083
1084	/*
1085	* Try to start exclusive operation @type or cancel it if it's running.
1086	*
1087	* Return:
1088	* 0 - normal mode, newly claimed op started
1089	* >0 - normal mode, something else is running,
1090	* return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS to user space
1091	* ECANCELED - cancel mode, successful cancel
1092	* ENOTCONN - cancel mode, operation not running anymore
1093	*/
1094	static int exclop_start_or_cancel_reloc(struct btrfs_fs_info *fs_info,
1095	enum btrfs_exclusive_operation type, bool cancel)
1096	{
1097	if (!cancel) {
1098	/ Start normal op /
1099	if (!btrfs_exclop_start(fs_info, type))
1100	return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
1101	/ Exclusive operation is now claimed /
1102	return `0`;
1103	}
1104
1105	/ Cancel running op /
1106	if (btrfs_exclop_start_try_lock(fs_info, type)) {
1107	/*
1108	* This blocks any exclop finish from setting it to NONE, so we
1109	* request cancellation. Either it runs and we will wait for it,
1110	* or it has finished and no waiting will happen.
1111	*/
1112	atomic_inc(v: &fs_info->reloc_cancel_req);
1113	btrfs_exclop_start_unlock(fs_info);
1114
1115	if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
1116	wait_on_bit(word: &fs_info->flags, bit: BTRFS_FS_RELOC_RUNNING,
1117	TASK_INTERRUPTIBLE);
1118
1119	return -ECANCELED;
1120	}
1121
1122	/ Something else is running or none /
1123	return -ENOTCONN;
1124	}
1125
1126	static noinline int btrfs_ioctl_resize(struct file *file,
1127	void __user *arg)
1128	{
1129	BTRFS_DEV_LOOKUP_ARGS(args);
1130	struct inode *inode = file_inode(f: file);
1131	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
1132	u64 new_size;
1133	u64 old_size;
1134	u64 devid = `1`;
1135	struct btrfs_root *root = BTRFS_I(inode)->root;
1136	struct btrfs_ioctl_vol_args *vol_args;
1137	struct btrfs_trans_handle *trans;
1138	struct btrfs_device *device = NULL;
1139	char *sizestr;
1140	char *retptr;
1141	char *devstr = NULL;
1142	int ret = `0`;
1143	int mod = `0`;
1144	bool cancel;
1145
1146	if (!capable(CAP_SYS_ADMIN))
1147	return -EPERM;
1148
1149	ret = mnt_want_write_file(file);
1150	if (ret)
1151	return ret;
1152
1153	/*
1154	* Read the arguments before checking exclusivity to be able to
1155	* distinguish regular resize and cancel
1156	*/
1157	vol_args = memdup_user(arg, sizeof(*vol_args));
1158	if (IS_ERR(ptr: vol_args)) {
1159	ret = PTR_ERR(ptr: vol_args);
1160	goto out_drop;
1161	}
1162	ret = btrfs_check_ioctl_vol_args_path(vol_args);
1163	if (ret < `0`)
1164	goto out_free;
1165
1166	sizestr = vol_args->name;
1167	cancel = (strcmp("cancel", sizestr) == `0`);
1168	ret = exclop_start_or_cancel_reloc(fs_info, type: BTRFS_EXCLOP_RESIZE, cancel);
1169	if (ret)
1170	goto out_free;
1171	/ Exclusive operation is now claimed /
1172
1173	devstr = strchr(sizestr, `':'`);
1174	if (devstr) {
1175	sizestr = devstr + `1`;
1176	*devstr = `'\0'`;
1177	devstr = vol_args->name;
1178	ret = kstrtoull(s: devstr, base: `10`, res: &devid);
1179	if (ret)
1180	goto out_finish;
1181	if (!devid) {
1182	ret = -EINVAL;
1183	goto out_finish;
1184	}
1185	btrfs_info(fs_info, "resizing devid %llu", devid);
1186	}
1187
1188	args.devid = devid;
1189	device = btrfs_find_device(fs_devices: fs_info->fs_devices, args: &args);
1190	if (!device) {
1191	btrfs_info(fs_info, "resizer unable to find device %llu",
1192	devid);
1193	ret = -ENODEV;
1194	goto out_finish;
1195	}
1196
1197	if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1198	btrfs_info(fs_info,
1199	"resizer unable to apply on readonly device %llu",
1200	devid);
1201	ret = -EPERM;
1202	goto out_finish;
1203	}
1204
1205	if (!strcmp(sizestr, "max"))
1206	new_size = bdev_nr_bytes(bdev: device->bdev);
1207	else {
1208	if (sizestr[`0`] == `'-'`) {
1209	mod = -`1`;
1210	sizestr++;
1211	} else if (sizestr[`0`] == `'+'`) {
1212	mod = `1`;
1213	sizestr++;
1214	}
1215	new_size = memparse(ptr: sizestr, retptr: &retptr);
1216	if (*retptr != `'\0'` \|\| new_size == `0`) {
1217	ret = -EINVAL;
1218	goto out_finish;
1219	}
1220	}
1221
1222	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1223	ret = -EPERM;
1224	goto out_finish;
1225	}
1226
1227	old_size = btrfs_device_get_total_bytes(dev: device);
1228
1229	if (mod < `0`) {
1230	if (new_size > old_size) {
1231	ret = -EINVAL;
1232	goto out_finish;
1233	}
1234	new_size = old_size - new_size;
1235	} else if (mod > `0`) {
1236	if (new_size > ULLONG_MAX - old_size) {
1237	ret = -ERANGE;
1238	goto out_finish;
1239	}
1240	new_size = old_size + new_size;
1241	}
1242
1243	if (new_size < SZ_256M) {
1244	ret = -EINVAL;
1245	goto out_finish;
1246	}
1247	if (new_size > bdev_nr_bytes(bdev: device->bdev)) {
1248	ret = -EFBIG;
1249	goto out_finish;
1250	}
1251
1252	new_size = round_down(new_size, fs_info->sectorsize);
1253
1254	if (new_size > old_size) {
1255	trans = btrfs_start_transaction(root, num_items: `0`);
1256	if (IS_ERR(ptr: trans)) {
1257	ret = PTR_ERR(ptr: trans);
1258	goto out_finish;
1259	}
1260	ret = btrfs_grow_device(trans, device, new_size);
1261	btrfs_commit_transaction(trans);
1262	} else if (new_size < old_size) {
1263	ret = btrfs_shrink_device(device, new_size);
1264	} / equal, nothing need to do /
1265
1266	if (ret == `0` && new_size != old_size)
1267	btrfs_info_in_rcu(fs_info,
1268	"resize device %s (devid %llu) from %llu to %llu",
1269	btrfs_dev_name(device), device->devid,
1270	old_size, new_size);
1271	out_finish:
1272	btrfs_exclop_finish(fs_info);
1273	out_free:
1274	kfree(objp: vol_args);
1275	out_drop:
1276	mnt_drop_write_file(file);
1277	return ret;
1278	}
1279
1280	static noinline int __btrfs_ioctl_snap_create(struct file *file,
1281	struct mnt_idmap *idmap,
1282	const char name, unsigned* long fd, int subvol,
1283	bool readonly,
1284	struct btrfs_qgroup_inherit *inherit)
1285	{
1286	int namelen;
1287	int ret = `0`;
1288
1289	if (!S_ISDIR(file_inode(file)->i_mode))
1290	return -ENOTDIR;
1291
1292	ret = mnt_want_write_file(file);
1293	if (ret)
1294	goto out;
1295
1296	namelen = strlen(name);
1297	if (strchr(name, `'/'`)) {
1298	ret = -EINVAL;
1299	goto out_drop_write;
1300	}
1301
1302	if (name[`0`] == `'.'` &&
1303	(namelen == `1` \|\| (name[`1`] == `'.'` && namelen == `2`))) {
1304	ret = -EEXIST;
1305	goto out_drop_write;
1306	}
1307
1308	if (subvol) {
1309	ret = btrfs_mksubvol(parent: &file->f_path, idmap, name,
1310	namelen, NULL, readonly, inherit);
1311	} else {
1312	struct fd src = fdget(fd);
1313	struct inode *src_inode;
1314	if (!src.file) {
1315	ret = -EINVAL;
1316	goto out_drop_write;
1317	}
1318
1319	src_inode = file_inode(f: src.file);
1320	if (src_inode->i_sb != file_inode(f: file)->i_sb) {
1321	btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
1322	"Snapshot src from another FS");
1323	ret = -EXDEV;
1324	} else if (!inode_owner_or_capable(idmap, inode: src_inode)) {
1325	/*
1326	* Subvolume creation is not restricted, but snapshots
1327	* are limited to own subvolumes only
1328	*/
1329	ret = -EPERM;
1330	} else if (btrfs_ino(inode: BTRFS_I(inode: src_inode)) != BTRFS_FIRST_FREE_OBJECTID) {
1331	/*
1332	* Snapshots must be made with the src_inode referring
1333	* to the subvolume inode, otherwise the permission
1334	* checking above is useless because we may have
1335	* permission on a lower directory but not the subvol
1336	* itself.
1337	*/
1338	ret = -EINVAL;
1339	} else {
1340	ret = btrfs_mksnapshot(parent: &file->f_path, idmap,
1341	name, namelen,
1342	root: BTRFS_I(inode: src_inode)->root,
1343	readonly, inherit);
1344	}
1345	fdput(fd: src);
1346	}
1347	out_drop_write:
1348	mnt_drop_write_file(file);
1349	out:
1350	return ret;
1351	}
1352
1353	static noinline int btrfs_ioctl_snap_create(struct file *file,
1354	void __user arg, int* subvol)
1355	{
1356	struct btrfs_ioctl_vol_args *vol_args;
1357	int ret;
1358
1359	if (!S_ISDIR(file_inode(file)->i_mode))
1360	return -ENOTDIR;
1361
1362	vol_args = memdup_user(arg, sizeof(*vol_args));
1363	if (IS_ERR(ptr: vol_args))
1364	return PTR_ERR(ptr: vol_args);
1365	ret = btrfs_check_ioctl_vol_args_path(vol_args);
1366	if (ret < `0`)
1367	goto out;
1368
1369	ret = __btrfs_ioctl_snap_create(file, idmap: file_mnt_idmap(file),
1370	name: vol_args->name, fd: vol_args->fd, subvol,
1371	readonly: false, NULL);
1372
1373	out:
1374	kfree(objp: vol_args);
1375	return ret;
1376	}
1377
1378	static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1379	void __user arg, int* subvol)
1380	{
1381	struct btrfs_ioctl_vol_args_v2 *vol_args;
1382	int ret;
1383	bool readonly = false;
1384	struct btrfs_qgroup_inherit *inherit = NULL;
1385
1386	if (!S_ISDIR(file_inode(file)->i_mode))
1387	return -ENOTDIR;
1388
1389	vol_args = memdup_user(arg, sizeof(*vol_args));
1390	if (IS_ERR(ptr: vol_args))
1391	return PTR_ERR(ptr: vol_args);
1392	ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2: vol_args);
1393	if (ret < `0`)
1394	goto free_args;
1395
1396	if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) {
1397	ret = -EOPNOTSUPP;
1398	goto free_args;
1399	}
1400
1401	if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
1402	readonly = true;
1403	if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
1404	struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file));
1405
1406	if (vol_args->size < sizeof(*inherit) \|\|
1407	vol_args->size > PAGE_SIZE) {
1408	ret = -EINVAL;
1409	goto free_args;
1410	}
1411	inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
1412	if (IS_ERR(ptr: inherit)) {
1413	ret = PTR_ERR(ptr: inherit);
1414	goto free_args;
1415	}
1416
1417	ret = btrfs_qgroup_check_inherit(fs_info, inherit, size: vol_args->size);
1418	if (ret < `0`)
1419	goto free_inherit;
1420	}
1421
1422	ret = __btrfs_ioctl_snap_create(file, idmap: file_mnt_idmap(file),
1423	name: vol_args->name, fd: vol_args->fd, subvol,
1424	readonly, inherit);
1425	if (ret)
1426	goto free_inherit;
1427	free_inherit:
1428	kfree(objp: inherit);
1429	free_args:
1430	kfree(objp: vol_args);
1431	return ret;
1432	}
1433
1434	static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
1435	void __user *arg)
1436	{
1437	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
1438	struct btrfs_root *root = BTRFS_I(inode)->root;
1439	int ret = `0`;
1440	u64 flags = `0`;
1441
1442	if (btrfs_ino(inode: BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID)
1443	return -EINVAL;
1444
1445	down_read(sem: &fs_info->subvol_sem);
1446	if (btrfs_root_readonly(root))
1447	flags \|= BTRFS_SUBVOL_RDONLY;
1448	up_read(sem: &fs_info->subvol_sem);
1449
1450	if (copy_to_user(to: arg, from: &flags, n: sizeof(flags)))
1451	ret = -EFAULT;
1452
1453	return ret;
1454	}
1455
1456	static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1457	void __user *arg)
1458	{
1459	struct inode *inode = file_inode(f: file);
1460	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
1461	struct btrfs_root *root = BTRFS_I(inode)->root;
1462	struct btrfs_trans_handle *trans;
1463	u64 root_flags;
1464	u64 flags;
1465	int ret = `0`;
1466
1467	if (!inode_owner_or_capable(idmap: file_mnt_idmap(file), inode))
1468	return -EPERM;
1469
1470	ret = mnt_want_write_file(file);
1471	if (ret)
1472	goto out;
1473
1474	if (btrfs_ino(inode: BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
1475	ret = -EINVAL;
1476	goto out_drop_write;
1477	}
1478
1479	if (copy_from_user(to: &flags, from: arg, n: sizeof(flags))) {
1480	ret = -EFAULT;
1481	goto out_drop_write;
1482	}
1483
1484	if (flags & ~BTRFS_SUBVOL_RDONLY) {
1485	ret = -EOPNOTSUPP;
1486	goto out_drop_write;
1487	}
1488
1489	down_write(sem: &fs_info->subvol_sem);
1490
1491	/ nothing to do /
1492	if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
1493	goto out_drop_sem;
1494
1495	root_flags = btrfs_root_flags(s: &root->root_item);
1496	if (flags & BTRFS_SUBVOL_RDONLY) {
1497	btrfs_set_root_flags(s: &root->root_item,
1498	val: root_flags \| BTRFS_ROOT_SUBVOL_RDONLY);
1499	} else {
1500	/*
1501	* Block RO -> RW transition if this subvolume is involved in
1502	* send
1503	*/
1504	spin_lock(lock: &root->root_item_lock);
1505	if (root->send_in_progress == `0`) {
1506	btrfs_set_root_flags(s: &root->root_item,
1507	val: root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
1508	spin_unlock(lock: &root->root_item_lock);
1509	} else {
1510	spin_unlock(lock: &root->root_item_lock);
1511	btrfs_warn(fs_info,
1512	"Attempt to set subvolume %llu read-write during send",
1513	root->root_key.objectid);
1514	ret = -EPERM;
1515	goto out_drop_sem;
1516	}
1517	}
1518
1519	trans = btrfs_start_transaction(root, num_items: `1`);
1520	if (IS_ERR(ptr: trans)) {
1521	ret = PTR_ERR(ptr: trans);
1522	goto out_reset;
1523	}
1524
1525	ret = btrfs_update_root(trans, root: fs_info->tree_root,
1526	key: &root->root_key, item: &root->root_item);
1527	if (ret < `0`) {
1528	btrfs_end_transaction(trans);
1529	goto out_reset;
1530	}
1531
1532	ret = btrfs_commit_transaction(trans);
1533
1534	out_reset:
1535	if (ret)
1536	btrfs_set_root_flags(s: &root->root_item, val: root_flags);
1537	out_drop_sem:
1538	up_write(sem: &fs_info->subvol_sem);
1539	out_drop_write:
1540	mnt_drop_write_file(file);
1541	out:
1542	return ret;
1543	}
1544
1545	static noinline int key_in_sk(struct btrfs_key *key,
1546	struct btrfs_ioctl_search_key *sk)
1547	{
1548	struct btrfs_key test;
1549	int ret;
1550
1551	test.objectid = sk->min_objectid;
1552	test.type = sk->min_type;
1553	test.offset = sk->min_offset;
1554
1555	ret = btrfs_comp_cpu_keys(k1: key, k2: &test);
1556	if (ret < `0`)
1557	return `0`;
1558
1559	test.objectid = sk->max_objectid;
1560	test.type = sk->max_type;
1561	test.offset = sk->max_offset;
1562
1563	ret = btrfs_comp_cpu_keys(k1: key, k2: &test);
1564	if (ret > `0`)
1565	return `0`;
1566	return `1`;
1567	}
1568
1569	static noinline int copy_to_sk(struct btrfs_path *path,
1570	struct btrfs_key *key,
1571	struct btrfs_ioctl_search_key *sk,
1572	u64 *buf_size,
1573	char __user *ubuf,
1574	unsigned long *sk_offset,
1575	int *num_found)
1576	{
1577	u64 found_transid;
1578	struct extent_buffer *leaf;
1579	struct btrfs_ioctl_search_header sh;
1580	struct btrfs_key test;
1581	unsigned long item_off;
1582	unsigned long item_len;
1583	int nritems;
1584	int i;
1585	int slot;
1586	int ret = `0`;
1587
1588	leaf = path->nodes[`0`];
1589	slot = path->slots[`0`];
1590	nritems = btrfs_header_nritems(eb: leaf);
1591
1592	if (btrfs_header_generation(eb: leaf) > sk->max_transid) {
1593	i = nritems;
1594	goto advance_key;
1595	}
1596	found_transid = btrfs_header_generation(eb: leaf);
1597
1598	for (i = slot; i < nritems; i++) {
1599	item_off = btrfs_item_ptr_offset(leaf, i);
1600	item_len = btrfs_item_size(eb: leaf, slot: i);
1601
1602	btrfs_item_key_to_cpu(eb: leaf, cpu_key: key, nr: i);
1603	if (!key_in_sk(key, sk))
1604	continue;
1605
1606	if (sizeof(sh) + item_len > *buf_size) {
1607	if (*num_found) {
1608	ret = `1`;
1609	goto out;
1610	}
1611
1612	/*
1613	* return one empty item back for v1, which does not
1614	* handle -EOVERFLOW
1615	*/
1616
1617	buf_size = sizeof*(sh) + item_len;
1618	item_len = `0`;
1619	ret = -EOVERFLOW;
1620	}
1621
1622	if (sizeof(sh) + item_len + sk_offset > buf_size) {
1623	ret = `1`;
1624	goto out;
1625	}
1626
1627	sh.objectid = key->objectid;
1628	sh.offset = key->offset;
1629	sh.type = key->type;
1630	sh.len = item_len;
1631	sh.transid = found_transid;
1632
1633	/*
1634	* Copy search result header. If we fault then loop again so we
1635	* can fault in the pages and -EFAULT there if there's a
1636	* problem. Otherwise we'll fault and then copy the buffer in
1637	* properly this next time through
1638	*/
1639	if (copy_to_user_nofault(dst: ubuf + sk_offset, src: &sh, size: sizeof*(sh))) {
1640	ret = `0`;
1641	goto out;
1642	}
1643
1644	sk_offset += sizeof*(sh);
1645
1646	if (item_len) {
1647	char __user up = ubuf + sk_offset;
1648	/*
1649	* Copy the item, same behavior as above, but reset the
1650	* * sk_offset so we copy the full thing again.
1651	*/
1652	if (read_extent_buffer_to_user_nofault(eb: leaf, dst: up,
1653	start: item_off, len: item_len)) {
1654	ret = `0`;
1655	sk_offset -= sizeof*(sh);
1656	goto out;
1657	}
1658
1659	*sk_offset += item_len;
1660	}
1661	(*num_found)++;
1662
1663	if (ret) / -EOVERFLOW from above /
1664	goto out;
1665
1666	if (*num_found >= sk->nr_items) {
1667	ret = `1`;
1668	goto out;
1669	}
1670	}
1671	advance_key:
1672	ret = `0`;
1673	test.objectid = sk->max_objectid;
1674	test.type = sk->max_type;
1675	test.offset = sk->max_offset;
1676	if (btrfs_comp_cpu_keys(k1: key, k2: &test) >= `0`)
1677	ret = `1`;
1678	else if (key->offset < (u64)-`1`)
1679	key->offset++;
1680	else if (key->type < (u8)-`1`) {
1681	key->offset = `0`;
1682	key->type++;
1683	} else if (key->objectid < (u64)-`1`) {
1684	key->offset = `0`;
1685	key->type = `0`;
1686	key->objectid++;
1687	} else
1688	ret = `1`;
1689	out:
1690	/*
1691	* 0: all items from this leaf copied, continue with next
1692	* 1: * more items can be copied, but unused buffer is too small
1693	* * all items were found
1694	* Either way, it will stops the loop which iterates to the next
1695	* leaf
1696	* -EOVERFLOW: item was to large for buffer
1697	* -EFAULT: could not copy extent buffer back to userspace
1698	*/
1699	return ret;
1700	}
1701
1702	static noinline int search_ioctl(struct inode *inode,
1703	struct btrfs_ioctl_search_key *sk,
1704	u64 *buf_size,
1705	char __user *ubuf)
1706	{
1707	struct btrfs_fs_info *info = inode_to_fs_info(inode);
1708	struct btrfs_root *root;
1709	struct btrfs_key key;
1710	struct btrfs_path *path;
1711	int ret;
1712	int num_found = `0`;
1713	unsigned long sk_offset = `0`;
1714
1715	if (buf_size < sizeof(struct* btrfs_ioctl_search_header)) {
1716	buf_size = sizeof(struct* btrfs_ioctl_search_header);
1717	return -EOVERFLOW;
1718	}
1719
1720	path = btrfs_alloc_path();
1721	if (!path)
1722	return -ENOMEM;
1723
1724	if (sk->tree_id == `0`) {
1725	/ search the root of the inode that was passed /
1726	root = btrfs_grab_root(root: BTRFS_I(inode)->root);
1727	} else {
1728	root = btrfs_get_fs_root(fs_info: info, objectid: sk->tree_id, check_ref: true);
1729	if (IS_ERR(ptr: root)) {
1730	btrfs_free_path(p: path);
1731	return PTR_ERR(ptr: root);
1732	}
1733	}
1734
1735	key.objectid = sk->min_objectid;
1736	key.type = sk->min_type;
1737	key.offset = sk->min_offset;
1738
1739	while (`1`) {
1740	ret = -EFAULT;
1741	/*
1742	* Ensure that the whole user buffer is faulted in at sub-page
1743	* granularity, otherwise the loop may live-lock.
1744	*/
1745	if (fault_in_subpage_writeable(uaddr: ubuf + sk_offset,
1746	size: *buf_size - sk_offset))
1747	break;
1748
1749	ret = btrfs_search_forward(root, min_key: &key, path, min_trans: sk->min_transid);
1750	if (ret != `0`) {
1751	if (ret > `0`)
1752	ret = `0`;
1753	goto err;
1754	}
1755	ret = copy_to_sk(path, key: &key, sk, buf_size, ubuf,
1756	sk_offset: &sk_offset, num_found: &num_found);
1757	btrfs_release_path(p: path);
1758	if (ret)
1759	break;
1760
1761	}
1762	if (ret > `0`)
1763	ret = `0`;
1764	err:
1765	sk->nr_items = num_found;
1766	btrfs_put_root(root);
1767	btrfs_free_path(p: path);
1768	return ret;
1769	}
1770
1771	static noinline int btrfs_ioctl_tree_search(struct inode *inode,
1772	void __user *argp)
1773	{
1774	struct btrfs_ioctl_search_args __user *uargs = argp;
1775	struct btrfs_ioctl_search_key sk;
1776	int ret;
1777	u64 buf_size;
1778
1779	if (!capable(CAP_SYS_ADMIN))
1780	return -EPERM;
1781
1782	if (copy_from_user(to: &sk, from: &uargs->key, n: sizeof(sk)))
1783	return -EFAULT;
1784
1785	buf_size = sizeof(uargs->buf);
1786
1787	ret = search_ioctl(inode, sk: &sk, buf_size: &buf_size, ubuf: uargs->buf);
1788
1789	/*
1790	* In the origin implementation an overflow is handled by returning a
1791	* search header with a len of zero, so reset ret.
1792	*/
1793	if (ret == -EOVERFLOW)
1794	ret = `0`;
1795
1796	if (ret == `0` && copy_to_user(to: &uargs->key, from: &sk, n: sizeof(sk)))
1797	ret = -EFAULT;
1798	return ret;
1799	}
1800
1801	static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
1802	void __user *argp)
1803	{
1804	struct btrfs_ioctl_search_args_v2 __user *uarg = argp;
1805	struct btrfs_ioctl_search_args_v2 args;
1806	int ret;
1807	u64 buf_size;
1808	const u64 buf_limit = SZ_16M;
1809
1810	if (!capable(CAP_SYS_ADMIN))
1811	return -EPERM;
1812
1813	/ copy search header and buffer size /
1814	if (copy_from_user(to: &args, from: uarg, n: sizeof(args)))
1815	return -EFAULT;
1816
1817	buf_size = args.buf_size;
1818
1819	/ limit result size to 16MB /
1820	if (buf_size > buf_limit)
1821	buf_size = buf_limit;
1822
1823	ret = search_ioctl(inode, sk: &args.key, buf_size: &buf_size,
1824	ubuf: (char __user *)(&uarg->buf[`0`]));
1825	if (ret == `0` && copy_to_user(to: &uarg->key, from: &args.key, n: sizeof(args.key)))
1826	ret = -EFAULT;
1827	else if (ret == -EOVERFLOW &&
1828	copy_to_user(to: &uarg->buf_size, from: &buf_size, n: sizeof(buf_size)))
1829	ret = -EFAULT;
1830
1831	return ret;
1832	}
1833
1834	/*
1835	* Search INODE_REFs to identify path name of 'dirid' directory
1836	* in a 'tree_id' tree. and sets path name to 'name'.
1837	*/
1838	static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1839	u64 tree_id, u64 dirid, char *name)
1840	{
1841	struct btrfs_root *root;
1842	struct btrfs_key key;
1843	char *ptr;
1844	int ret = -`1`;
1845	int slot;
1846	int len;
1847	int total_len = `0`;
1848	struct btrfs_inode_ref *iref;
1849	struct extent_buffer *l;
1850	struct btrfs_path *path;
1851
1852	if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
1853	name[`0`]=`'\0'`;
1854	return `0`;
1855	}
1856
1857	path = btrfs_alloc_path();
1858	if (!path)
1859	return -ENOMEM;
1860
1861	ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - `1`];
1862
1863	root = btrfs_get_fs_root(fs_info: info, objectid: tree_id, check_ref: true);
1864	if (IS_ERR(ptr: root)) {
1865	ret = PTR_ERR(ptr: root);
1866	root = NULL;
1867	goto out;
1868	}
1869
1870	key.objectid = dirid;
1871	key.type = BTRFS_INODE_REF_KEY;
1872	key.offset = (u64)-`1`;
1873
1874	while (`1`) {
1875	ret = btrfs_search_backwards(root, key: &key, path);
1876	if (ret < `0`)
1877	goto out;
1878	else if (ret > `0`) {
1879	ret = -ENOENT;
1880	goto out;
1881	}
1882
1883	l = path->nodes[`0`];
1884	slot = path->slots[`0`];
1885
1886	iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
1887	len = btrfs_inode_ref_name_len(eb: l, s: iref);
1888	ptr -= len + `1`;
1889	total_len += len + `1`;
1890	if (ptr < name) {
1891	ret = -ENAMETOOLONG;
1892	goto out;
1893	}
1894
1895	*(ptr + len) = `'/'`;
1896	read_extent_buffer(eb: l, dst: ptr, start: (unsigned long)(iref + `1`), len);
1897
1898	if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1899	break;
1900
1901	btrfs_release_path(p: path);
1902	key.objectid = key.offset;
1903	key.offset = (u64)-`1`;
1904	dirid = key.objectid;
1905	}
1906	memmove(name, ptr, total_len);
1907	name[total_len] = `'\0'`;
1908	ret = `0`;
1909	out:
1910	btrfs_put_root(root);
1911	btrfs_free_path(p: path);
1912	return ret;
1913	}
1914
1915	static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
1916	struct inode *inode,
1917	struct btrfs_ioctl_ino_lookup_user_args *args)
1918	{
1919	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
1920	struct super_block *sb = inode->i_sb;
1921	struct btrfs_key upper_limit = BTRFS_I(inode)->location;
1922	u64 treeid = BTRFS_I(inode)->root->root_key.objectid;
1923	u64 dirid = args->dirid;
1924	unsigned long item_off;
1925	unsigned long item_len;
1926	struct btrfs_inode_ref *iref;
1927	struct btrfs_root_ref *rref;
1928	struct btrfs_root *root = NULL;
1929	struct btrfs_path *path;
1930	struct btrfs_key key, key2;
1931	struct extent_buffer *leaf;
1932	struct inode *temp_inode;
1933	char *ptr;
1934	int slot;
1935	int len;
1936	int total_len = `0`;
1937	int ret;
1938
1939	path = btrfs_alloc_path();
1940	if (!path)
1941	return -ENOMEM;
1942
1943	/*
1944	* If the bottom subvolume does not exist directly under upper_limit,
1945	* construct the path in from the bottom up.
1946	*/
1947	if (dirid != upper_limit.objectid) {
1948	ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - `1`];
1949
1950	root = btrfs_get_fs_root(fs_info, objectid: treeid, check_ref: true);
1951	if (IS_ERR(ptr: root)) {
1952	ret = PTR_ERR(ptr: root);
1953	goto out;
1954	}
1955
1956	key.objectid = dirid;
1957	key.type = BTRFS_INODE_REF_KEY;
1958	key.offset = (u64)-`1`;
1959	while (`1`) {
1960	ret = btrfs_search_backwards(root, key: &key, path);
1961	if (ret < `0`)
1962	goto out_put;
1963	else if (ret > `0`) {
1964	ret = -ENOENT;
1965	goto out_put;
1966	}
1967
1968	leaf = path->nodes[`0`];
1969	slot = path->slots[`0`];
1970
1971	iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref);
1972	len = btrfs_inode_ref_name_len(eb: leaf, s: iref);
1973	ptr -= len + `1`;
1974	total_len += len + `1`;
1975	if (ptr < args->path) {
1976	ret = -ENAMETOOLONG;
1977	goto out_put;
1978	}
1979
1980	*(ptr + len) = `'/'`;
1981	read_extent_buffer(eb: leaf, dst: ptr,
1982	start: (unsigned long)(iref + `1`), len);
1983
1984	/ Check the read+exec permission of this directory /
1985	ret = btrfs_previous_item(root, path, min_objectid: dirid,
1986	BTRFS_INODE_ITEM_KEY);
1987	if (ret < `0`) {
1988	goto out_put;
1989	} else if (ret > `0`) {
1990	ret = -ENOENT;
1991	goto out_put;
1992	}
1993
1994	leaf = path->nodes[`0`];
1995	slot = path->slots[`0`];
1996	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key2, nr: slot);
1997	if (key2.objectid != dirid) {
1998	ret = -ENOENT;
1999	goto out_put;
2000	}
2001
2002	/*
2003	* We don't need the path anymore, so release it and
2004	* avoid deadlocks and lockdep warnings in case
2005	* btrfs_iget() needs to lookup the inode from its root
2006	* btree and lock the same leaf.
2007	*/
2008	btrfs_release_path(p: path);
2009	temp_inode = btrfs_iget(s: sb, ino: key2.objectid, root);
2010	if (IS_ERR(ptr: temp_inode)) {
2011	ret = PTR_ERR(ptr: temp_inode);
2012	goto out_put;
2013	}
2014	ret = inode_permission(idmap, temp_inode,
2015	MAY_READ \| MAY_EXEC);
2016	iput(temp_inode);
2017	if (ret) {
2018	ret = -EACCES;
2019	goto out_put;
2020	}
2021
2022	if (key.offset == upper_limit.objectid)
2023	break;
2024	if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
2025	ret = -EACCES;
2026	goto out_put;
2027	}
2028
2029	key.objectid = key.offset;
2030	key.offset = (u64)-`1`;
2031	dirid = key.objectid;
2032	}
2033
2034	memmove(args->path, ptr, total_len);
2035	args->path[total_len] = `'\0'`;
2036	btrfs_put_root(root);
2037	root = NULL;
2038	btrfs_release_path(p: path);
2039	}
2040
2041	/ Get the bottom subvolume's name from ROOT_REF /
2042	key.objectid = treeid;
2043	key.type = BTRFS_ROOT_REF_KEY;
2044	key.offset = args->treeid;
2045	ret = btrfs_search_slot(NULL, root: fs_info->tree_root, key: &key, p: path, ins_len: `0`, cow: `0`);
2046	if (ret < `0`) {
2047	goto out;
2048	} else if (ret > `0`) {
2049	ret = -ENOENT;
2050	goto out;
2051	}
2052
2053	leaf = path->nodes[`0`];
2054	slot = path->slots[`0`];
2055	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
2056
2057	item_off = btrfs_item_ptr_offset(leaf, slot);
2058	item_len = btrfs_item_size(eb: leaf, slot);
2059	/ Check if dirid in ROOT_REF corresponds to passed dirid /
2060	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
2061	if (args->dirid != btrfs_root_ref_dirid(eb: leaf, s: rref)) {
2062	ret = -EINVAL;
2063	goto out;
2064	}
2065
2066	/ Copy subvolume's name /
2067	item_off += sizeof(struct btrfs_root_ref);
2068	item_len -= sizeof(struct btrfs_root_ref);
2069	read_extent_buffer(eb: leaf, dst: args->name, start: item_off, len: item_len);
2070	args->name[item_len] = `0`;
2071
2072	out_put:
2073	btrfs_put_root(root);
2074	out:
2075	btrfs_free_path(p: path);
2076	return ret;
2077	}
2078
2079	static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root,
2080	void __user *argp)
2081	{
2082	struct btrfs_ioctl_ino_lookup_args *args;
2083	int ret = `0`;
2084
2085	args = memdup_user(argp, sizeof(*args));
2086	if (IS_ERR(ptr: args))
2087	return PTR_ERR(ptr: args);
2088
2089	/*
2090	* Unprivileged query to obtain the containing subvolume root id. The
2091	* path is reset so it's consistent with btrfs_search_path_in_tree.
2092	*/
2093	if (args->treeid == `0`)
2094	args->treeid = root->root_key.objectid;
2095
2096	if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
2097	args->name[`0`] = `0`;
2098	goto out;
2099	}
2100
2101	if (!capable(CAP_SYS_ADMIN)) {
2102	ret = -EPERM;
2103	goto out;
2104	}
2105
2106	ret = btrfs_search_path_in_tree(info: root->fs_info,
2107	tree_id: args->treeid, dirid: args->objectid,
2108	name: args->name);
2109
2110	out:
2111	if (ret == `0` && copy_to_user(to: argp, from: args, n: sizeof(*args)))
2112	ret = -EFAULT;
2113
2114	kfree(objp: args);
2115	return ret;
2116	}
2117
2118	/*
2119	* Version of ino_lookup ioctl (unprivileged)
2120	*
2121	* The main differences from ino_lookup ioctl are:
2122	*
2123	* 1. Read + Exec permission will be checked using inode_permission() during
2124	* path construction. -EACCES will be returned in case of failure.
2125	* 2. Path construction will be stopped at the inode number which corresponds
2126	* to the fd with which this ioctl is called. If constructed path does not
2127	* exist under fd's inode, -EACCES will be returned.
2128	* 3. The name of bottom subvolume is also searched and filled.
2129	*/
2130	static int btrfs_ioctl_ino_lookup_user(struct file file, void* __user *argp)
2131	{
2132	struct btrfs_ioctl_ino_lookup_user_args *args;
2133	struct inode *inode;
2134	int ret;
2135
2136	args = memdup_user(argp, sizeof(*args));
2137	if (IS_ERR(ptr: args))
2138	return PTR_ERR(ptr: args);
2139
2140	inode = file_inode(f: file);
2141
2142	if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
2143	BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) {
2144	/*
2145	* The subvolume does not exist under fd with which this is
2146	* called
2147	*/
2148	kfree(objp: args);
2149	return -EACCES;
2150	}
2151
2152	ret = btrfs_search_path_in_tree_user(idmap: file_mnt_idmap(file), inode, args);
2153
2154	if (ret == `0` && copy_to_user(to: argp, from: args, n: sizeof(*args)))
2155	ret = -EFAULT;
2156
2157	kfree(objp: args);
2158	return ret;
2159	}
2160
2161	/ Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF /
2162	static int btrfs_ioctl_get_subvol_info(struct inode inode, void* __user *argp)
2163	{
2164	struct btrfs_ioctl_get_subvol_info_args *subvol_info;
2165	struct btrfs_fs_info *fs_info;
2166	struct btrfs_root *root;
2167	struct btrfs_path *path;
2168	struct btrfs_key key;
2169	struct btrfs_root_item *root_item;
2170	struct btrfs_root_ref *rref;
2171	struct extent_buffer *leaf;
2172	unsigned long item_off;
2173	unsigned long item_len;
2174	int slot;
2175	int ret = `0`;
2176
2177	path = btrfs_alloc_path();
2178	if (!path)
2179	return -ENOMEM;
2180
2181	subvol_info = kzalloc(size: sizeof(*subvol_info), GFP_KERNEL);
2182	if (!subvol_info) {
2183	btrfs_free_path(p: path);
2184	return -ENOMEM;
2185	}
2186
2187	fs_info = BTRFS_I(inode)->root->fs_info;
2188
2189	/ Get root_item of inode's subvolume /
2190	key.objectid = BTRFS_I(inode)->root->root_key.objectid;
2191	root = btrfs_get_fs_root(fs_info, objectid: key.objectid, check_ref: true);
2192	if (IS_ERR(ptr: root)) {
2193	ret = PTR_ERR(ptr: root);
2194	goto out_free;
2195	}
2196	root_item = &root->root_item;
2197
2198	subvol_info->treeid = key.objectid;
2199
2200	subvol_info->generation = btrfs_root_generation(s: root_item);
2201	subvol_info->flags = btrfs_root_flags(s: root_item);
2202
2203	memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE);
2204	memcpy(subvol_info->parent_uuid, root_item->parent_uuid,
2205	BTRFS_UUID_SIZE);
2206	memcpy(subvol_info->received_uuid, root_item->received_uuid,
2207	BTRFS_UUID_SIZE);
2208
2209	subvol_info->ctransid = btrfs_root_ctransid(s: root_item);
2210	subvol_info->ctime.sec = btrfs_stack_timespec_sec(s: &root_item->ctime);
2211	subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(s: &root_item->ctime);
2212
2213	subvol_info->otransid = btrfs_root_otransid(s: root_item);
2214	subvol_info->otime.sec = btrfs_stack_timespec_sec(s: &root_item->otime);
2215	subvol_info->otime.nsec = btrfs_stack_timespec_nsec(s: &root_item->otime);
2216
2217	subvol_info->stransid = btrfs_root_stransid(s: root_item);
2218	subvol_info->stime.sec = btrfs_stack_timespec_sec(s: &root_item->stime);
2219	subvol_info->stime.nsec = btrfs_stack_timespec_nsec(s: &root_item->stime);
2220
2221	subvol_info->rtransid = btrfs_root_rtransid(s: root_item);
2222	subvol_info->rtime.sec = btrfs_stack_timespec_sec(s: &root_item->rtime);
2223	subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(s: &root_item->rtime);
2224
2225	if (key.objectid != BTRFS_FS_TREE_OBJECTID) {
2226	/ Search root tree for ROOT_BACKREF of this subvolume /
2227	key.type = BTRFS_ROOT_BACKREF_KEY;
2228	key.offset = `0`;
2229	ret = btrfs_search_slot(NULL, root: fs_info->tree_root, key: &key, p: path, ins_len: `0`, cow: `0`);
2230	if (ret < `0`) {
2231	goto out;
2232	} else if (path->slots[`0`] >=
2233	btrfs_header_nritems(eb: path->nodes[`0`])) {
2234	ret = btrfs_next_leaf(root: fs_info->tree_root, path);
2235	if (ret < `0`) {
2236	goto out;
2237	} else if (ret > `0`) {
2238	ret = -EUCLEAN;
2239	goto out;
2240	}
2241	}
2242
2243	leaf = path->nodes[`0`];
2244	slot = path->slots[`0`];
2245	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
2246	if (key.objectid == subvol_info->treeid &&
2247	key.type == BTRFS_ROOT_BACKREF_KEY) {
2248	subvol_info->parent_id = key.offset;
2249
2250	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
2251	subvol_info->dirid = btrfs_root_ref_dirid(eb: leaf, s: rref);
2252
2253	item_off = btrfs_item_ptr_offset(leaf, slot)
2254	+ sizeof(struct btrfs_root_ref);
2255	item_len = btrfs_item_size(eb: leaf, slot)
2256	- sizeof(struct btrfs_root_ref);
2257	read_extent_buffer(eb: leaf, dst: subvol_info->name,
2258	start: item_off, len: item_len);
2259	} else {
2260	ret = -ENOENT;
2261	goto out;
2262	}
2263	}
2264
2265	btrfs_free_path(p: path);
2266	path = NULL;
2267	if (copy_to_user(to: argp, from: subvol_info, n: sizeof(*subvol_info)))
2268	ret = -EFAULT;
2269
2270	out:
2271	btrfs_put_root(root);
2272	out_free:
2273	btrfs_free_path(p: path);
2274	kfree(objp: subvol_info);
2275	return ret;
2276	}
2277
2278	/*
2279	* Return ROOT_REF information of the subvolume containing this inode
2280	* except the subvolume name.
2281	*/
2282	static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
2283	void __user *argp)
2284	{
2285	struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
2286	struct btrfs_root_ref *rref;
2287	struct btrfs_path *path;
2288	struct btrfs_key key;
2289	struct extent_buffer *leaf;
2290	u64 objectid;
2291	int slot;
2292	int ret;
2293	u8 found;
2294
2295	path = btrfs_alloc_path();
2296	if (!path)
2297	return -ENOMEM;
2298
2299	rootrefs = memdup_user(argp, sizeof(*rootrefs));
2300	if (IS_ERR(ptr: rootrefs)) {
2301	btrfs_free_path(p: path);
2302	return PTR_ERR(ptr: rootrefs);
2303	}
2304
2305	objectid = root->root_key.objectid;
2306	key.objectid = objectid;
2307	key.type = BTRFS_ROOT_REF_KEY;
2308	key.offset = rootrefs->min_treeid;
2309	found = `0`;
2310
2311	root = root->fs_info->tree_root;
2312	ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: `0`, cow: `0`);
2313	if (ret < `0`) {
2314	goto out;
2315	} else if (path->slots[`0`] >=
2316	btrfs_header_nritems(eb: path->nodes[`0`])) {
2317	ret = btrfs_next_leaf(root, path);
2318	if (ret < `0`) {
2319	goto out;
2320	} else if (ret > `0`) {
2321	ret = -EUCLEAN;
2322	goto out;
2323	}
2324	}
2325	while (`1`) {
2326	leaf = path->nodes[`0`];
2327	slot = path->slots[`0`];
2328
2329	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: slot);
2330	if (key.objectid != objectid \|\| key.type != BTRFS_ROOT_REF_KEY) {
2331	ret = `0`;
2332	goto out;
2333	}
2334
2335	if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) {
2336	ret = -EOVERFLOW;
2337	goto out;
2338	}
2339
2340	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
2341	rootrefs->rootref[found].treeid = key.offset;
2342	rootrefs->rootref[found].dirid =
2343	btrfs_root_ref_dirid(eb: leaf, s: rref);
2344	found++;
2345
2346	ret = btrfs_next_item(root, p: path);
2347	if (ret < `0`) {
2348	goto out;
2349	} else if (ret > `0`) {
2350	ret = -EUCLEAN;
2351	goto out;
2352	}
2353	}
2354
2355	out:
2356	btrfs_free_path(p: path);
2357
2358	if (!ret \|\| ret == -EOVERFLOW) {
2359	rootrefs->num_items = found;
2360	/ update min_treeid for next search /
2361	if (found)
2362	rootrefs->min_treeid =
2363	rootrefs->rootref[found - `1`].treeid + `1`;
2364	if (copy_to_user(to: argp, from: rootrefs, n: sizeof(*rootrefs)))
2365	ret = -EFAULT;
2366	}
2367
2368	kfree(objp: rootrefs);
2369
2370	return ret;
2371	}
2372
2373	static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2374	void __user *arg,
2375	bool destroy_v2)
2376	{
2377	struct dentry *parent = file->f_path.dentry;
2378	struct dentry *dentry;
2379	struct inode *dir = d_inode(dentry: parent);
2380	struct btrfs_fs_info *fs_info = inode_to_fs_info(dir);
2381	struct inode *inode;
2382	struct btrfs_root *root = BTRFS_I(inode: dir)->root;
2383	struct btrfs_root *dest = NULL;
2384	struct btrfs_ioctl_vol_args *vol_args = NULL;
2385	struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
2386	struct mnt_idmap *idmap = file_mnt_idmap(file);
2387	char subvol_name, subvol_name_ptr = NULL;
2388	int subvol_namelen;
2389	int err = `0`;
2390	bool destroy_parent = false;
2391
2392	/ We don't support snapshots with extent tree v2 yet. /
2393	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
2394	btrfs_err(fs_info,
2395	"extent tree v2 doesn't support snapshot deletion yet");
2396	return -EOPNOTSUPP;
2397	}
2398
2399	if (destroy_v2) {
2400	vol_args2 = memdup_user(arg, sizeof(*vol_args2));
2401	if (IS_ERR(ptr: vol_args2))
2402	return PTR_ERR(ptr: vol_args2);
2403
2404	if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) {
2405	err = -EOPNOTSUPP;
2406	goto out;
2407	}
2408
2409	/*
2410	* If SPEC_BY_ID is not set, we are looking for the subvolume by
2411	* name, same as v1 currently does.
2412	*/
2413	if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) {
2414	err = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2);
2415	if (err < `0`)
2416	goto out;
2417	subvol_name = vol_args2->name;
2418
2419	err = mnt_want_write_file(file);
2420	if (err)
2421	goto out;
2422	} else {
2423	struct inode *old_dir;
2424
2425	if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) {
2426	err = -EINVAL;
2427	goto out;
2428	}
2429
2430	err = mnt_want_write_file(file);
2431	if (err)
2432	goto out;
2433
2434	dentry = btrfs_get_dentry(sb: fs_info->sb,
2435	BTRFS_FIRST_FREE_OBJECTID,
2436	root_objectid: vol_args2->subvolid, generation: `0`);
2437	if (IS_ERR(ptr: dentry)) {
2438	err = PTR_ERR(ptr: dentry);
2439	goto out_drop_write;
2440	}
2441
2442	/*
2443	* Change the default parent since the subvolume being
2444	* deleted can be outside of the current mount point.
2445	*/
2446	parent = btrfs_get_parent(child: dentry);
2447
2448	/*
2449	* At this point dentry->d_name can point to '/' if the
2450	* subvolume we want to destroy is outsite of the
2451	* current mount point, so we need to release the
2452	* current dentry and execute the lookup to return a new
2453	* one with ->d_name pointing to the
2454	* <mount point>/subvol_name.
2455	*/
2456	dput(dentry);
2457	if (IS_ERR(ptr: parent)) {
2458	err = PTR_ERR(ptr: parent);
2459	goto out_drop_write;
2460	}
2461	old_dir = dir;
2462	dir = d_inode(dentry: parent);
2463
2464	/*
2465	* If v2 was used with SPEC_BY_ID, a new parent was
2466	* allocated since the subvolume can be outside of the
2467	* current mount point. Later on we need to release this
2468	* new parent dentry.
2469	*/
2470	destroy_parent = true;
2471
2472	/*
2473	* On idmapped mounts, deletion via subvolid is
2474	* restricted to subvolumes that are immediate
2475	* ancestors of the inode referenced by the file
2476	* descriptor in the ioctl. Otherwise the idmapping
2477	* could potentially be abused to delete subvolumes
2478	* anywhere in the filesystem the user wouldn't be able
2479	* to delete without an idmapped mount.
2480	*/
2481	if (old_dir != dir && idmap != &nop_mnt_idmap) {
2482	err = -EOPNOTSUPP;
2483	goto free_parent;
2484	}
2485
2486	subvol_name_ptr = btrfs_get_subvol_name_from_objectid(
2487	fs_info, subvol_objectid: vol_args2->subvolid);
2488	if (IS_ERR(ptr: subvol_name_ptr)) {
2489	err = PTR_ERR(ptr: subvol_name_ptr);
2490	goto free_parent;
2491	}
2492	/ subvol_name_ptr is already nul terminated /
2493	subvol_name = (char *)kbasename(path: subvol_name_ptr);
2494	}
2495	} else {
2496	vol_args = memdup_user(arg, sizeof(*vol_args));
2497	if (IS_ERR(ptr: vol_args))
2498	return PTR_ERR(ptr: vol_args);
2499
2500	err = btrfs_check_ioctl_vol_args_path(vol_args);
2501	if (err < `0`)
2502	goto out;
2503
2504	subvol_name = vol_args->name;
2505
2506	err = mnt_want_write_file(file);
2507	if (err)
2508	goto out;
2509	}
2510
2511	subvol_namelen = strlen(subvol_name);
2512
2513	if (strchr(subvol_name, `'/'`) \|\|
2514	strncmp(subvol_name, "..", subvol_namelen) == `0`) {
2515	err = -EINVAL;
2516	goto free_subvol_name;
2517	}
2518
2519	if (!S_ISDIR(dir->i_mode)) {
2520	err = -ENOTDIR;
2521	goto free_subvol_name;
2522	}
2523
2524	err = down_write_killable_nested(sem: &dir->i_rwsem, subclass: I_MUTEX_PARENT);
2525	if (err == -EINTR)
2526	goto free_subvol_name;
2527	dentry = lookup_one(idmap, subvol_name, parent, subvol_namelen);
2528	if (IS_ERR(ptr: dentry)) {
2529	err = PTR_ERR(ptr: dentry);
2530	goto out_unlock_dir;
2531	}
2532
2533	if (d_really_is_negative(dentry)) {
2534	err = -ENOENT;
2535	goto out_dput;
2536	}
2537
2538	inode = d_inode(dentry);
2539	dest = BTRFS_I(inode)->root;
2540	if (!capable(CAP_SYS_ADMIN)) {
2541	/*
2542	* Regular user. Only allow this with a special mount
2543	* option, when the user has write+exec access to the
2544	* subvol root, and when rmdir(2) would have been
2545	* allowed.
2546	*
2547	* Note that this is _not_ check that the subvol is
2548	* empty or doesn't contain data that we wouldn't
2549	* otherwise be able to delete.
2550	*
2551	* Users who want to delete empty subvols should try
2552	* rmdir(2).
2553	*/
2554	err = -EPERM;
2555	if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
2556	goto out_dput;
2557
2558	/*
2559	* Do not allow deletion if the parent dir is the same
2560	* as the dir to be deleted. That means the ioctl
2561	* must be called on the dentry referencing the root
2562	* of the subvol, not a random directory contained
2563	* within it.
2564	*/
2565	err = -EINVAL;
2566	if (root == dest)
2567	goto out_dput;
2568
2569	err = inode_permission(idmap, inode, MAY_WRITE \| MAY_EXEC);
2570	if (err)
2571	goto out_dput;
2572	}
2573
2574	/ check if subvolume may be deleted by a user /
2575	err = btrfs_may_delete(idmap, dir, victim: dentry, isdir: `1`);
2576	if (err)
2577	goto out_dput;
2578
2579	if (btrfs_ino(inode: BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
2580	err = -EINVAL;
2581	goto out_dput;
2582	}
2583
2584	btrfs_inode_lock(inode: BTRFS_I(inode), ilock_flags: `0`);
2585	err = btrfs_delete_subvolume(dir: BTRFS_I(inode: dir), dentry);
2586	btrfs_inode_unlock(inode: BTRFS_I(inode), ilock_flags: `0`);
2587	if (!err)
2588	d_delete_notify(dir, dentry);
2589
2590	out_dput:
2591	dput(dentry);
2592	out_unlock_dir:
2593	btrfs_inode_unlock(inode: BTRFS_I(inode: dir), ilock_flags: `0`);
2594	free_subvol_name:
2595	kfree(objp: subvol_name_ptr);
2596	free_parent:
2597	if (destroy_parent)
2598	dput(parent);
2599	out_drop_write:
2600	mnt_drop_write_file(file);
2601	out:
2602	kfree(objp: vol_args2);
2603	kfree(objp: vol_args);
2604	return err;
2605	}
2606
2607	static int btrfs_ioctl_defrag(struct file file, void* __user *argp)
2608	{
2609	struct inode *inode = file_inode(f: file);
2610	struct btrfs_root *root = BTRFS_I(inode)->root;
2611	struct btrfs_ioctl_defrag_range_args range = {`0`};
2612	int ret;
2613
2614	ret = mnt_want_write_file(file);
2615	if (ret)
2616	return ret;
2617
2618	if (btrfs_root_readonly(root)) {
2619	ret = -EROFS;
2620	goto out;
2621	}
2622
2623	switch (inode->i_mode & S_IFMT) {
2624	case S_IFDIR:
2625	if (!capable(CAP_SYS_ADMIN)) {
2626	ret = -EPERM;
2627	goto out;
2628	}
2629	ret = btrfs_defrag_root(root);
2630	break;
2631	case S_IFREG:
2632	/*
2633	* Note that this does not check the file descriptor for write
2634	* access. This prevents defragmenting executables that are
2635	* running and allows defrag on files open in read-only mode.
2636	*/
2637	if (!capable(CAP_SYS_ADMIN) &&
2638	inode_permission(&nop_mnt_idmap, inode, MAY_WRITE)) {
2639	ret = -EPERM;
2640	goto out;
2641	}
2642
2643	if (argp) {
2644	if (copy_from_user(to: &range, from: argp, n: sizeof(range))) {
2645	ret = -EFAULT;
2646	goto out;
2647	}
2648	if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
2649	ret = -EOPNOTSUPP;
2650	goto out;
2651	}
2652	/ compression requires us to start the IO /
2653	if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
2654	range.flags \|= BTRFS_DEFRAG_RANGE_START_IO;
2655	range.extent_thresh = (u32)-`1`;
2656	}
2657	} else {
2658	/ the rest are all set to zero by kzalloc /
2659	range.len = (u64)-`1`;
2660	}
2661	ret = btrfs_defrag_file(inode: file_inode(f: file), ra: &file->f_ra,
2662	range: &range, BTRFS_OLDEST_GENERATION, max_to_defrag: `0`);
2663	if (ret > `0`)
2664	ret = `0`;
2665	break;
2666	default:
2667	ret = -EINVAL;
2668	}
2669	out:
2670	mnt_drop_write_file(file);
2671	return ret;
2672	}
2673
2674	static long btrfs_ioctl_add_dev(struct btrfs_fs_info fs_info, void* __user *arg)
2675	{
2676	struct btrfs_ioctl_vol_args *vol_args;
2677	bool restore_op = false;
2678	int ret;
2679
2680	if (!capable(CAP_SYS_ADMIN))
2681	return -EPERM;
2682
2683	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
2684	btrfs_err(fs_info, "device add not supported on extent tree v2 yet");
2685	return -EINVAL;
2686	}
2687
2688	if (fs_info->fs_devices->temp_fsid) {
2689	btrfs_err(fs_info,
2690	"device add not supported on cloned temp-fsid mount");
2691	return -EINVAL;
2692	}
2693
2694	if (!btrfs_exclop_start(fs_info, type: BTRFS_EXCLOP_DEV_ADD)) {
2695	if (!btrfs_exclop_start_try_lock(fs_info, type: BTRFS_EXCLOP_DEV_ADD))
2696	return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
2697
2698	/*
2699	* We can do the device add because we have a paused balanced,
2700	* change the exclusive op type and remember we should bring
2701	* back the paused balance
2702	*/
2703	fs_info->exclusive_operation = BTRFS_EXCLOP_DEV_ADD;
2704	btrfs_exclop_start_unlock(fs_info);
2705	restore_op = true;
2706	}
2707
2708	vol_args = memdup_user(arg, sizeof(*vol_args));
2709	if (IS_ERR(ptr: vol_args)) {
2710	ret = PTR_ERR(ptr: vol_args);
2711	goto out;
2712	}
2713
2714	ret = btrfs_check_ioctl_vol_args_path(vol_args);
2715	if (ret < `0`)
2716	goto out_free;
2717
2718	ret = btrfs_init_new_device(fs_info, path: vol_args->name);
2719
2720	if (!ret)
2721	btrfs_info(fs_info, "disk added %s", vol_args->name);
2722
2723	out_free:
2724	kfree(objp: vol_args);
2725	out:
2726	if (restore_op)
2727	btrfs_exclop_balance(fs_info, op: BTRFS_EXCLOP_BALANCE_PAUSED);
2728	else
2729	btrfs_exclop_finish(fs_info);
2730	return ret;
2731	}
2732
2733	static long btrfs_ioctl_rm_dev_v2(struct file file, void* __user *arg)
2734	{
2735	BTRFS_DEV_LOOKUP_ARGS(args);
2736	struct inode *inode = file_inode(f: file);
2737	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
2738	struct btrfs_ioctl_vol_args_v2 *vol_args;
2739	struct file *bdev_file = NULL;
2740	int ret;
2741	bool cancel = false;
2742
2743	if (!capable(CAP_SYS_ADMIN))
2744	return -EPERM;
2745
2746	vol_args = memdup_user(arg, sizeof(*vol_args));
2747	if (IS_ERR(ptr: vol_args))
2748	return PTR_ERR(ptr: vol_args);
2749
2750	if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) {
2751	ret = -EOPNOTSUPP;
2752	goto out;
2753	}
2754
2755	ret = btrfs_check_ioctl_vol_args2_subvol_name(vol_args2: vol_args);
2756	if (ret < `0`)
2757	goto out;
2758
2759	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
2760	args.devid = vol_args->devid;
2761	} else if (!strcmp("cancel", vol_args->name)) {
2762	cancel = true;
2763	} else {
2764	ret = btrfs_get_dev_args_from_path(fs_info, args: &args, path: vol_args->name);
2765	if (ret)
2766	goto out;
2767	}
2768
2769	ret = mnt_want_write_file(file);
2770	if (ret)
2771	goto out;
2772
2773	ret = exclop_start_or_cancel_reloc(fs_info, type: BTRFS_EXCLOP_DEV_REMOVE,
2774	cancel);
2775	if (ret)
2776	goto err_drop;
2777
2778	/ Exclusive operation is now claimed /
2779	ret = btrfs_rm_device(fs_info, args: &args, bdev_file: &bdev_file);
2780
2781	btrfs_exclop_finish(fs_info);
2782
2783	if (!ret) {
2784	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
2785	btrfs_info(fs_info, "device deleted: id %llu",
2786	vol_args->devid);
2787	else
2788	btrfs_info(fs_info, "device deleted: %s",
2789	vol_args->name);
2790	}
2791	err_drop:
2792	mnt_drop_write_file(file);
2793	if (bdev_file)
2794	fput(bdev_file);
2795	out:
2796	btrfs_put_dev_args_from_path(args: &args);
2797	kfree(objp: vol_args);
2798	return ret;
2799	}
2800
2801	static long btrfs_ioctl_rm_dev(struct file file, void* __user *arg)
2802	{
2803	BTRFS_DEV_LOOKUP_ARGS(args);
2804	struct inode *inode = file_inode(f: file);
2805	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
2806	struct btrfs_ioctl_vol_args *vol_args;
2807	struct file *bdev_file = NULL;
2808	int ret;
2809	bool cancel = false;
2810
2811	if (!capable(CAP_SYS_ADMIN))
2812	return -EPERM;
2813
2814	vol_args = memdup_user(arg, sizeof(*vol_args));
2815	if (IS_ERR(ptr: vol_args))
2816	return PTR_ERR(ptr: vol_args);
2817
2818	ret = btrfs_check_ioctl_vol_args_path(vol_args);
2819	if (ret < `0`)
2820	goto out_free;
2821
2822	if (!strcmp("cancel", vol_args->name)) {
2823	cancel = true;
2824	} else {
2825	ret = btrfs_get_dev_args_from_path(fs_info, args: &args, path: vol_args->name);
2826	if (ret)
2827	goto out;
2828	}
2829
2830	ret = mnt_want_write_file(file);
2831	if (ret)
2832	goto out;
2833
2834	ret = exclop_start_or_cancel_reloc(fs_info, type: BTRFS_EXCLOP_DEV_REMOVE,
2835	cancel);
2836	if (ret == `0`) {
2837	ret = btrfs_rm_device(fs_info, args: &args, bdev_file: &bdev_file);
2838	if (!ret)
2839	btrfs_info(fs_info, "disk deleted %s", vol_args->name);
2840	btrfs_exclop_finish(fs_info);
2841	}
2842
2843	mnt_drop_write_file(file);
2844	if (bdev_file)
2845	fput(bdev_file);
2846	out:
2847	btrfs_put_dev_args_from_path(args: &args);
2848	out_free:
2849	kfree(objp: vol_args);
2850	return ret;
2851	}
2852
2853	static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
2854	void __user *arg)
2855	{
2856	struct btrfs_ioctl_fs_info_args *fi_args;
2857	struct btrfs_device *device;
2858	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2859	u64 flags_in;
2860	int ret = `0`;
2861
2862	fi_args = memdup_user(arg, sizeof(*fi_args));
2863	if (IS_ERR(ptr: fi_args))
2864	return PTR_ERR(ptr: fi_args);
2865
2866	flags_in = fi_args->flags;
2867	memset(fi_args, `0`, sizeof(*fi_args));
2868
2869	rcu_read_lock();
2870	fi_args->num_devices = fs_devices->num_devices;
2871
2872	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
2873	if (device->devid > fi_args->max_id)
2874	fi_args->max_id = device->devid;
2875	}
2876	rcu_read_unlock();
2877
2878	memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid));
2879	fi_args->nodesize = fs_info->nodesize;
2880	fi_args->sectorsize = fs_info->sectorsize;
2881	fi_args->clone_alignment = fs_info->sectorsize;
2882
2883	if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) {
2884	fi_args->csum_type = btrfs_super_csum_type(s: fs_info->super_copy);
2885	fi_args->csum_size = btrfs_super_csum_size(s: fs_info->super_copy);
2886	fi_args->flags \|= BTRFS_FS_INFO_FLAG_CSUM_INFO;
2887	}
2888
2889	if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) {
2890	fi_args->generation = btrfs_get_fs_generation(fs_info);
2891	fi_args->flags \|= BTRFS_FS_INFO_FLAG_GENERATION;
2892	}
2893
2894	if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) {
2895	memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid,
2896	sizeof(fi_args->metadata_uuid));
2897	fi_args->flags \|= BTRFS_FS_INFO_FLAG_METADATA_UUID;
2898	}
2899
2900	if (copy_to_user(to: arg, from: fi_args, n: sizeof(*fi_args)))
2901	ret = -EFAULT;
2902
2903	kfree(objp: fi_args);
2904	return ret;
2905	}
2906
2907	static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
2908	void __user *arg)
2909	{
2910	BTRFS_DEV_LOOKUP_ARGS(args);
2911	struct btrfs_ioctl_dev_info_args *di_args;
2912	struct btrfs_device *dev;
2913	int ret = `0`;
2914
2915	di_args = memdup_user(arg, sizeof(*di_args));
2916	if (IS_ERR(ptr: di_args))
2917	return PTR_ERR(ptr: di_args);
2918
2919	args.devid = di_args->devid;
2920	if (!btrfs_is_empty_uuid(uuid: di_args->uuid))
2921	args.uuid = di_args->uuid;
2922
2923	rcu_read_lock();
2924	dev = btrfs_find_device(fs_devices: fs_info->fs_devices, args: &args);
2925	if (!dev) {
2926	ret = -ENODEV;
2927	goto out;
2928	}
2929
2930	di_args->devid = dev->devid;
2931	di_args->bytes_used = btrfs_device_get_bytes_used(dev);
2932	di_args->total_bytes = btrfs_device_get_total_bytes(dev);
2933	memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2934	memcpy(di_args->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
2935	if (dev->name)
2936	strscpy(di_args->path, btrfs_dev_name(dev), sizeof(di_args->path));
2937	else
2938	di_args->path[`0`] = `'\0'`;
2939
2940	out:
2941	rcu_read_unlock();
2942	if (ret == `0` && copy_to_user(to: arg, from: di_args, n: sizeof(*di_args)))
2943	ret = -EFAULT;
2944
2945	kfree(objp: di_args);
2946	return ret;
2947	}
2948
2949	static long btrfs_ioctl_default_subvol(struct file file, void* __user *argp)
2950	{
2951	struct inode *inode = file_inode(f: file);
2952	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
2953	struct btrfs_root *root = BTRFS_I(inode)->root;
2954	struct btrfs_root *new_root;
2955	struct btrfs_dir_item *di;
2956	struct btrfs_trans_handle *trans;
2957	struct btrfs_path *path = NULL;
2958	struct btrfs_disk_key disk_key;
2959	struct fscrypt_str name = FSTR_INIT("default", `7`);
2960	u64 objectid = `0`;
2961	u64 dir_id;
2962	int ret;
2963
2964	if (!capable(CAP_SYS_ADMIN))
2965	return -EPERM;
2966
2967	ret = mnt_want_write_file(file);
2968	if (ret)
2969	return ret;
2970
2971	if (copy_from_user(to: &objectid, from: argp, n: sizeof(objectid))) {
2972	ret = -EFAULT;
2973	goto out;
2974	}
2975
2976	if (!objectid)
2977	objectid = BTRFS_FS_TREE_OBJECTID;
2978
2979	new_root = btrfs_get_fs_root(fs_info, objectid, check_ref: true);
2980	if (IS_ERR(ptr: new_root)) {
2981	ret = PTR_ERR(ptr: new_root);
2982	goto out;
2983	}
2984	if (!is_fstree(rootid: new_root->root_key.objectid)) {
2985	ret = -ENOENT;
2986	goto out_free;
2987	}
2988
2989	path = btrfs_alloc_path();
2990	if (!path) {
2991	ret = -ENOMEM;
2992	goto out_free;
2993	}
2994
2995	trans = btrfs_start_transaction(root, num_items: `1`);
2996	if (IS_ERR(ptr: trans)) {
2997	ret = PTR_ERR(ptr: trans);
2998	goto out_free;
2999	}
3000
3001	dir_id = btrfs_super_root_dir(s: fs_info->super_copy);
3002	di = btrfs_lookup_dir_item(trans, root: fs_info->tree_root, path,
3003	dir: dir_id, name: &name, mod: `1`);
3004	if (IS_ERR_OR_NULL(ptr: di)) {
3005	btrfs_release_path(p: path);
3006	btrfs_end_transaction(trans);
3007	btrfs_err(fs_info,
3008	"Umm, you don't have the default diritem, this isn't going to work");
3009	ret = -ENOENT;
3010	goto out_free;
3011	}
3012
3013	btrfs_cpu_key_to_disk(disk_key: &disk_key, cpu_key: &new_root->root_key);
3014	btrfs_set_dir_item_key(eb: path->nodes[`0`], item: di, key: &disk_key);
3015	btrfs_mark_buffer_dirty(trans, buf: path->nodes[`0`]);
3016	btrfs_release_path(p: path);
3017
3018	btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL);
3019	btrfs_end_transaction(trans);
3020	out_free:
3021	btrfs_put_root(root: new_root);
3022	btrfs_free_path(p: path);
3023	out:
3024	mnt_drop_write_file(file);
3025	return ret;
3026	}
3027
3028	static void get_block_group_info(struct list_head *groups_list,
3029	struct btrfs_ioctl_space_info *space)
3030	{
3031	struct btrfs_block_group *block_group;
3032
3033	space->total_bytes = `0`;
3034	space->used_bytes = `0`;
3035	space->flags = `0`;
3036	list_for_each_entry(block_group, groups_list, list) {
3037	space->flags = block_group->flags;
3038	space->total_bytes += block_group->length;
3039	space->used_bytes += block_group->used;
3040	}
3041	}
3042
3043	static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
3044	void __user *arg)
3045	{
3046	struct btrfs_ioctl_space_args space_args = { `0` };
3047	struct btrfs_ioctl_space_info space;
3048	struct btrfs_ioctl_space_info *dest;
3049	struct btrfs_ioctl_space_info *dest_orig;
3050	struct btrfs_ioctl_space_info __user *user_dest;
3051	struct btrfs_space_info *info;
3052	static const u64 types[] = {
3053	BTRFS_BLOCK_GROUP_DATA,
3054	BTRFS_BLOCK_GROUP_SYSTEM,
3055	BTRFS_BLOCK_GROUP_METADATA,
3056	BTRFS_BLOCK_GROUP_DATA \| BTRFS_BLOCK_GROUP_METADATA
3057	};
3058	int num_types = `4`;
3059	int alloc_size;
3060	int ret = `0`;
3061	u64 slot_count = `0`;
3062	int i, c;
3063
3064	if (copy_from_user(to: &space_args,
3065	from: (struct btrfs_ioctl_space_args __user *)arg,
3066	n: sizeof(space_args)))
3067	return -EFAULT;
3068
3069	for (i = `0`; i < num_types; i++) {
3070	struct btrfs_space_info *tmp;
3071
3072	info = NULL;
3073	list_for_each_entry(tmp, &fs_info->space_info, list) {
3074	if (tmp->flags == types[i]) {
3075	info = tmp;
3076	break;
3077	}
3078	}
3079
3080	if (!info)
3081	continue;
3082
3083	down_read(sem: &info->groups_sem);
3084	for (c = `0`; c < BTRFS_NR_RAID_TYPES; c++) {
3085	if (!list_empty(head: &info->block_groups[c]))
3086	slot_count++;
3087	}
3088	up_read(sem: &info->groups_sem);
3089	}
3090
3091	/*
3092	* Global block reserve, exported as a space_info
3093	*/
3094	slot_count++;
3095
3096	/ space_slots == 0 means they are asking for a count /
3097	if (space_args.space_slots == `0`) {
3098	space_args.total_spaces = slot_count;
3099	goto out;
3100	}
3101
3102	slot_count = min_t(u64, space_args.space_slots, slot_count);
3103
3104	alloc_size = sizeof(dest) slot_count;
3105
3106	/ we generally have at most 6 or so space infos, one for each raid*
3107	* level. So, a whole page should be more than enough for everyone
3108	*/
3109	if (alloc_size > PAGE_SIZE)
3110	return -ENOMEM;
3111
3112	space_args.total_spaces = `0`;
3113	dest = kmalloc(size: alloc_size, GFP_KERNEL);
3114	if (!dest)
3115	return -ENOMEM;
3116	dest_orig = dest;
3117
3118	/ now we have a buffer to copy into /
3119	for (i = `0`; i < num_types; i++) {
3120	struct btrfs_space_info *tmp;
3121
3122	if (!slot_count)
3123	break;
3124
3125	info = NULL;
3126	list_for_each_entry(tmp, &fs_info->space_info, list) {
3127	if (tmp->flags == types[i]) {
3128	info = tmp;
3129	break;
3130	}
3131	}
3132
3133	if (!info)
3134	continue;
3135	down_read(sem: &info->groups_sem);
3136	for (c = `0`; c < BTRFS_NR_RAID_TYPES; c++) {
3137	if (!list_empty(head: &info->block_groups[c])) {
3138	get_block_group_info(groups_list: &info->block_groups[c],
3139	space: &space);
3140	memcpy(dest, &space, sizeof(space));
3141	dest++;
3142	space_args.total_spaces++;
3143	slot_count--;
3144	}
3145	if (!slot_count)
3146	break;
3147	}
3148	up_read(sem: &info->groups_sem);
3149	}
3150
3151	/*
3152	* Add global block reserve
3153	*/
3154	if (slot_count) {
3155	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
3156
3157	spin_lock(lock: &block_rsv->lock);
3158	space.total_bytes = block_rsv->size;
3159	space.used_bytes = block_rsv->size - block_rsv->reserved;
3160	spin_unlock(lock: &block_rsv->lock);
3161	space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV;
3162	memcpy(dest, &space, sizeof(space));
3163	space_args.total_spaces++;
3164	}
3165
3166	user_dest = (struct btrfs_ioctl_space_info __user *)
3167	(arg + sizeof(struct btrfs_ioctl_space_args));
3168
3169	if (copy_to_user(to: user_dest, from: dest_orig, n: alloc_size))
3170	ret = -EFAULT;
3171
3172	kfree(objp: dest_orig);
3173	out:
3174	if (ret == `0` && copy_to_user(to: arg, from: &space_args, n: sizeof(space_args)))
3175	ret = -EFAULT;
3176
3177	return ret;
3178	}
3179
3180	static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
3181	void __user *argp)
3182	{
3183	struct btrfs_trans_handle *trans;
3184	u64 transid;
3185
3186	/*
3187	* Start orphan cleanup here for the given root in case it hasn't been
3188	* started already by other means. Errors are handled in the other
3189	* functions during transaction commit.
3190	*/
3191	btrfs_orphan_cleanup(root);
3192
3193	trans = btrfs_attach_transaction_barrier(root);
3194	if (IS_ERR(ptr: trans)) {
3195	if (PTR_ERR(ptr: trans) != -ENOENT)
3196	return PTR_ERR(ptr: trans);
3197
3198	/ No running transaction, don't bother /
3199	transid = btrfs_get_last_trans_committed(fs_info: root->fs_info);
3200	goto out;
3201	}
3202	transid = trans->transid;
3203	btrfs_commit_transaction_async(trans);
3204	out:
3205	if (argp)
3206	if (copy_to_user(to: argp, from: &transid, n: sizeof(transid)))
3207	return -EFAULT;
3208	return `0`;
3209	}
3210
3211	static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
3212	void __user *argp)
3213	{
3214	/ By default wait for the current transaction. /
3215	u64 transid = `0`;
3216
3217	if (argp)
3218	if (copy_from_user(to: &transid, from: argp, n: sizeof(transid)))
3219	return -EFAULT;
3220
3221	return btrfs_wait_for_commit(fs_info, transid);
3222	}
3223
3224	static long btrfs_ioctl_scrub(struct file file, void* __user *arg)
3225	{
3226	struct btrfs_fs_info *fs_info = inode_to_fs_info(file_inode(file));
3227	struct btrfs_ioctl_scrub_args *sa;
3228	int ret;
3229
3230	if (!capable(CAP_SYS_ADMIN))
3231	return -EPERM;
3232
3233	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
3234	btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
3235	return -EINVAL;
3236	}
3237
3238	sa = memdup_user(arg, sizeof(*sa));
3239	if (IS_ERR(ptr: sa))
3240	return PTR_ERR(ptr: sa);
3241
3242	if (sa->flags & ~BTRFS_SCRUB_SUPPORTED_FLAGS) {
3243	ret = -EOPNOTSUPP;
3244	goto out;
3245	}
3246
3247	if (!(sa->flags & BTRFS_SCRUB_READONLY)) {
3248	ret = mnt_want_write_file(file);
3249	if (ret)
3250	goto out;
3251	}
3252
3253	ret = btrfs_scrub_dev(fs_info, devid: sa->devid, start: sa->start, end: sa->end,
3254	progress: &sa->progress, readonly: sa->flags & BTRFS_SCRUB_READONLY,
3255	is_dev_replace: `0`);
3256
3257	/*
3258	* Copy scrub args to user space even if btrfs_scrub_dev() returned an
3259	* error. This is important as it allows user space to know how much
3260	* progress scrub has done. For example, if scrub is canceled we get
3261	* -ECANCELED from btrfs_scrub_dev() and return that error back to user
3262	* space. Later user space can inspect the progress from the structure
3263	* btrfs_ioctl_scrub_args and resume scrub from where it left off
3264	* previously (btrfs-progs does this).
3265	* If we fail to copy the btrfs_ioctl_scrub_args structure to user space
3266	* then return -EFAULT to signal the structure was not copied or it may
3267	* be corrupt and unreliable due to a partial copy.
3268	*/
3269	if (copy_to_user(to: arg, from: sa, n: sizeof(*sa)))
3270	ret = -EFAULT;
3271
3272	if (!(sa->flags & BTRFS_SCRUB_READONLY))
3273	mnt_drop_write_file(file);
3274	out:
3275	kfree(objp: sa);
3276	return ret;
3277	}
3278
3279	static long btrfs_ioctl_scrub_cancel(struct btrfs_fs_info *fs_info)
3280	{
3281	if (!capable(CAP_SYS_ADMIN))
3282	return -EPERM;
3283
3284	return btrfs_scrub_cancel(info: fs_info);
3285	}
3286
3287	static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info,
3288	void __user *arg)
3289	{
3290	struct btrfs_ioctl_scrub_args *sa;
3291	int ret;
3292
3293	if (!capable(CAP_SYS_ADMIN))
3294	return -EPERM;
3295
3296	sa = memdup_user(arg, sizeof(*sa));
3297	if (IS_ERR(ptr: sa))
3298	return PTR_ERR(ptr: sa);
3299
3300	ret = btrfs_scrub_progress(fs_info, devid: sa->devid, progress: &sa->progress);
3301
3302	if (ret == `0` && copy_to_user(to: arg, from: sa, n: sizeof(*sa)))
3303	ret = -EFAULT;
3304
3305	kfree(objp: sa);
3306	return ret;
3307	}
3308
3309	static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info,
3310	void __user *arg)
3311	{
3312	struct btrfs_ioctl_get_dev_stats *sa;
3313	int ret;
3314
3315	sa = memdup_user(arg, sizeof(*sa));
3316	if (IS_ERR(ptr: sa))
3317	return PTR_ERR(ptr: sa);
3318
3319	if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) {
3320	kfree(objp: sa);
3321	return -EPERM;
3322	}
3323
3324	ret = btrfs_get_dev_stats(fs_info, stats: sa);
3325
3326	if (ret == `0` && copy_to_user(to: arg, from: sa, n: sizeof(*sa)))
3327	ret = -EFAULT;
3328
3329	kfree(objp: sa);
3330	return ret;
3331	}
3332
3333	static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
3334	void __user *arg)
3335	{
3336	struct btrfs_ioctl_dev_replace_args *p;
3337	int ret;
3338
3339	if (!capable(CAP_SYS_ADMIN))
3340	return -EPERM;
3341
3342	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
3343	btrfs_err(fs_info, "device replace not supported on extent tree v2 yet");
3344	return -EINVAL;
3345	}
3346
3347	p = memdup_user(arg, sizeof(*p));
3348	if (IS_ERR(ptr: p))
3349	return PTR_ERR(ptr: p);
3350
3351	switch (p->cmd) {
3352	case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
3353	if (sb_rdonly(sb: fs_info->sb)) {
3354	ret = -EROFS;
3355	goto out;
3356	}
3357	if (!btrfs_exclop_start(fs_info, type: BTRFS_EXCLOP_DEV_REPLACE)) {
3358	ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3359	} else {
3360	ret = btrfs_dev_replace_by_ioctl(fs_info, args: p);
3361	btrfs_exclop_finish(fs_info);
3362	}
3363	break;
3364	case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
3365	btrfs_dev_replace_status(fs_info, args: p);
3366	ret = `0`;
3367	break;
3368	case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL:
3369	p->result = btrfs_dev_replace_cancel(fs_info);
3370	ret = `0`;
3371	break;
3372	default:
3373	ret = -EINVAL;
3374	break;
3375	}
3376
3377	if ((ret == `0` \|\| ret == -ECANCELED) && copy_to_user(to: arg, from: p, n: sizeof(*p)))
3378	ret = -EFAULT;
3379	out:
3380	kfree(objp: p);
3381	return ret;
3382	}
3383
3384	static long btrfs_ioctl_ino_to_path(struct btrfs_root root, void* __user *arg)
3385	{
3386	int ret = `0`;
3387	int i;
3388	u64 rel_ptr;
3389	int size;
3390	struct btrfs_ioctl_ino_path_args *ipa = NULL;
3391	struct inode_fs_paths *ipath = NULL;
3392	struct btrfs_path *path;
3393
3394	if (!capable(CAP_DAC_READ_SEARCH))
3395	return -EPERM;
3396
3397	path = btrfs_alloc_path();
3398	if (!path) {
3399	ret = -ENOMEM;
3400	goto out;
3401	}
3402
3403	ipa = memdup_user(arg, sizeof(*ipa));
3404	if (IS_ERR(ptr: ipa)) {
3405	ret = PTR_ERR(ptr: ipa);
3406	ipa = NULL;
3407	goto out;
3408	}
3409
3410	size = min_t(u32, ipa->size, `4096`);
3411	ipath = init_ipath(total_bytes: size, fs_root: root, path);
3412	if (IS_ERR(ptr: ipath)) {
3413	ret = PTR_ERR(ptr: ipath);
3414	ipath = NULL;
3415	goto out;
3416	}
3417
3418	ret = paths_from_inode(inum: ipa->inum, ipath);
3419	if (ret < `0`)
3420	goto out;
3421
3422	for (i = `0`; i < ipath->fspath->elem_cnt; ++i) {
3423	rel_ptr = ipath->fspath->val[i] -
3424	(u64)(unsigned long)ipath->fspath->val;
3425	ipath->fspath->val[i] = rel_ptr;
3426	}
3427
3428	btrfs_free_path(p: path);
3429	path = NULL;
3430	ret = copy_to_user(to: (void __user )(unsigned* long)ipa->fspath,
3431	from: ipath->fspath, n: size);
3432	if (ret) {
3433	ret = -EFAULT;
3434	goto out;
3435	}
3436
3437	out:
3438	btrfs_free_path(p: path);
3439	free_ipath(ipath);
3440	kfree(objp: ipa);
3441
3442	return ret;
3443	}
3444
3445	static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
3446	void __user arg, int* version)
3447	{
3448	int ret = `0`;
3449	int size;
3450	struct btrfs_ioctl_logical_ino_args *loi;
3451	struct btrfs_data_container *inodes = NULL;
3452	struct btrfs_path *path = NULL;
3453	bool ignore_offset;
3454
3455	if (!capable(CAP_SYS_ADMIN))
3456	return -EPERM;
3457
3458	loi = memdup_user(arg, sizeof(*loi));
3459	if (IS_ERR(ptr: loi))
3460	return PTR_ERR(ptr: loi);
3461
3462	if (version == `1`) {
3463	ignore_offset = false;
3464	size = min_t(u32, loi->size, SZ_64K);
3465	} else {
3466	/ All reserved bits must be 0 for now /
3467	if (memchr_inv(p: loi->reserved, c: `0`, size: sizeof(loi->reserved))) {
3468	ret = -EINVAL;
3469	goto out_loi;
3470	}
3471	/ Only accept flags we have defined so far /
3472	if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) {
3473	ret = -EINVAL;
3474	goto out_loi;
3475	}
3476	ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
3477	size = min_t(u32, loi->size, SZ_16M);
3478	}
3479
3480	inodes = init_data_container(total_bytes: size);
3481	if (IS_ERR(ptr: inodes)) {
3482	ret = PTR_ERR(ptr: inodes);
3483	goto out_loi;
3484	}
3485
3486	path = btrfs_alloc_path();
3487	if (!path) {
3488	ret = -ENOMEM;
3489	goto out;
3490	}
3491	ret = iterate_inodes_from_logical(logical: loi->logical, fs_info, path,
3492	ctx: inodes, ignore_offset);
3493	btrfs_free_path(p: path);
3494	if (ret == -EINVAL)
3495	ret = -ENOENT;
3496	if (ret < `0`)
3497	goto out;
3498
3499	ret = copy_to_user(to: (void __user )(unsigned* long)loi->inodes, from: inodes,
3500	n: size);
3501	if (ret)
3502	ret = -EFAULT;
3503
3504	out:
3505	kvfree(addr: inodes);
3506	out_loi:
3507	kfree(objp: loi);
3508
3509	return ret;
3510	}
3511
3512	void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
3513	struct btrfs_ioctl_balance_args *bargs)
3514	{
3515	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3516
3517	bargs->flags = bctl->flags;
3518
3519	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
3520	bargs->state \|= BTRFS_BALANCE_STATE_RUNNING;
3521	if (atomic_read(v: &fs_info->balance_pause_req))
3522	bargs->state \|= BTRFS_BALANCE_STATE_PAUSE_REQ;
3523	if (atomic_read(v: &fs_info->balance_cancel_req))
3524	bargs->state \|= BTRFS_BALANCE_STATE_CANCEL_REQ;
3525
3526	memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
3527	memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
3528	memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
3529
3530	spin_lock(lock: &fs_info->balance_lock);
3531	memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3532	spin_unlock(lock: &fs_info->balance_lock);
3533	}
3534
3535	/*
3536	* Try to acquire fs_info::balance_mutex as well as set BTRFS_EXLCOP_BALANCE as
3537	* required.
3538	*
3539	* @fs_info: the filesystem
3540	* @excl_acquired: ptr to boolean value which is set to false in case balance
3541	* is being resumed
3542	*
3543	* Return 0 on success in which case both fs_info::balance is acquired as well
3544	* as exclusive ops are blocked. In case of failure return an error code.
3545	*/
3546	static int btrfs_try_lock_balance(struct btrfs_fs_info fs_info, bool excl_acquired)
3547	{
3548	int ret;
3549
3550	/*
3551	* Exclusive operation is locked. Three possibilities:
3552	* (1) some other op is running
3553	* (2) balance is running
3554	* (3) balance is paused -- special case (think resume)
3555	*/
3556	while (`1`) {
3557	if (btrfs_exclop_start(fs_info, type: BTRFS_EXCLOP_BALANCE)) {
3558	*excl_acquired = true;
3559	mutex_lock(&fs_info->balance_mutex);
3560	return `0`;
3561	}
3562
3563	mutex_lock(&fs_info->balance_mutex);
3564	if (fs_info->balance_ctl) {
3565	/ This is either (2) or (3) /
3566	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
3567	/ This is (2) /
3568	ret = -EINPROGRESS;
3569	goto out_failure;
3570
3571	} else {
3572	mutex_unlock(lock: &fs_info->balance_mutex);
3573	/*
3574	* Lock released to allow other waiters to
3575	* continue, we'll reexamine the status again.
3576	*/
3577	mutex_lock(&fs_info->balance_mutex);
3578
3579	if (fs_info->balance_ctl &&
3580	!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
3581	/ This is (3) /
3582	*excl_acquired = false;
3583	return `0`;
3584	}
3585	}
3586	} else {
3587	/ This is (1) /
3588	ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3589	goto out_failure;
3590	}
3591
3592	mutex_unlock(lock: &fs_info->balance_mutex);
3593	}
3594
3595	out_failure:
3596	mutex_unlock(lock: &fs_info->balance_mutex);
3597	*excl_acquired = false;
3598	return ret;
3599	}
3600
3601	static long btrfs_ioctl_balance(struct file file, void* __user *arg)
3602	{
3603	struct btrfs_root *root = BTRFS_I(inode: file_inode(f: file))->root;
3604	struct btrfs_fs_info *fs_info = root->fs_info;
3605	struct btrfs_ioctl_balance_args *bargs;
3606	struct btrfs_balance_control *bctl;
3607	bool need_unlock = true;
3608	int ret;
3609
3610	if (!capable(CAP_SYS_ADMIN))
3611	return -EPERM;
3612
3613	ret = mnt_want_write_file(file);
3614	if (ret)
3615	return ret;
3616
3617	bargs = memdup_user(arg, sizeof(*bargs));
3618	if (IS_ERR(ptr: bargs)) {
3619	ret = PTR_ERR(ptr: bargs);
3620	bargs = NULL;
3621	goto out;
3622	}
3623
3624	ret = btrfs_try_lock_balance(fs_info, excl_acquired: &need_unlock);
3625	if (ret)
3626	goto out;
3627
3628	lockdep_assert_held(&fs_info->balance_mutex);
3629
3630	if (bargs->flags & BTRFS_BALANCE_RESUME) {
3631	if (!fs_info->balance_ctl) {
3632	ret = -ENOTCONN;
3633	goto out_unlock;
3634	}
3635
3636	bctl = fs_info->balance_ctl;
3637	spin_lock(lock: &fs_info->balance_lock);
3638	bctl->flags \|= BTRFS_BALANCE_RESUME;
3639	spin_unlock(lock: &fs_info->balance_lock);
3640	btrfs_exclop_balance(fs_info, op: BTRFS_EXCLOP_BALANCE);
3641
3642	goto do_balance;
3643	}
3644
3645	if (bargs->flags & ~(BTRFS_BALANCE_ARGS_MASK \| BTRFS_BALANCE_TYPE_MASK)) {
3646	ret = -EINVAL;
3647	goto out_unlock;
3648	}
3649
3650	if (fs_info->balance_ctl) {
3651	ret = -EINPROGRESS;
3652	goto out_unlock;
3653	}
3654
3655	bctl = kzalloc(size: sizeof(*bctl), GFP_KERNEL);
3656	if (!bctl) {
3657	ret = -ENOMEM;
3658	goto out_unlock;
3659	}
3660
3661	memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
3662	memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
3663	memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
3664
3665	bctl->flags = bargs->flags;
3666	do_balance:
3667	/*
3668	* Ownership of bctl and exclusive operation goes to btrfs_balance.
3669	* bctl is freed in reset_balance_state, or, if restriper was paused
3670	* all the way until unmount, in free_fs_info. The flag should be
3671	* cleared after reset_balance_state.
3672	*/
3673	need_unlock = false;
3674
3675	ret = btrfs_balance(fs_info, bctl, bargs);
3676	bctl = NULL;
3677
3678	if (ret == `0` \|\| ret == -ECANCELED) {
3679	if (copy_to_user(to: arg, from: bargs, n: sizeof(*bargs)))
3680	ret = -EFAULT;
3681	}
3682
3683	kfree(objp: bctl);
3684	out_unlock:
3685	mutex_unlock(lock: &fs_info->balance_mutex);
3686	if (need_unlock)
3687	btrfs_exclop_finish(fs_info);
3688	out:
3689	mnt_drop_write_file(file);
3690	kfree(objp: bargs);
3691	return ret;
3692	}
3693
3694	static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info fs_info, int* cmd)
3695	{
3696	if (!capable(CAP_SYS_ADMIN))
3697	return -EPERM;
3698
3699	switch (cmd) {
3700	case BTRFS_BALANCE_CTL_PAUSE:
3701	return btrfs_pause_balance(fs_info);
3702	case BTRFS_BALANCE_CTL_CANCEL:
3703	return btrfs_cancel_balance(fs_info);
3704	}
3705
3706	return -EINVAL;
3707	}
3708
3709	static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
3710	void __user *arg)
3711	{
3712	struct btrfs_ioctl_balance_args *bargs;
3713	int ret = `0`;
3714
3715	if (!capable(CAP_SYS_ADMIN))
3716	return -EPERM;
3717
3718	mutex_lock(&fs_info->balance_mutex);
3719	if (!fs_info->balance_ctl) {
3720	ret = -ENOTCONN;
3721	goto out;
3722	}
3723
3724	bargs = kzalloc(size: sizeof(*bargs), GFP_KERNEL);
3725	if (!bargs) {
3726	ret = -ENOMEM;
3727	goto out;
3728	}
3729
3730	btrfs_update_ioctl_balance_args(fs_info, bargs);
3731
3732	if (copy_to_user(to: arg, from: bargs, n: sizeof(*bargs)))
3733	ret = -EFAULT;
3734
3735	kfree(objp: bargs);
3736	out:
3737	mutex_unlock(lock: &fs_info->balance_mutex);
3738	return ret;
3739	}
3740
3741	static long btrfs_ioctl_quota_ctl(struct file file, void* __user *arg)
3742	{
3743	struct inode *inode = file_inode(f: file);
3744	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
3745	struct btrfs_ioctl_quota_ctl_args *sa;
3746	int ret;
3747
3748	if (!capable(CAP_SYS_ADMIN))
3749	return -EPERM;
3750
3751	ret = mnt_want_write_file(file);
3752	if (ret)
3753	return ret;
3754
3755	sa = memdup_user(arg, sizeof(*sa));
3756	if (IS_ERR(ptr: sa)) {
3757	ret = PTR_ERR(ptr: sa);
3758	goto drop_write;
3759	}
3760
3761	down_write(sem: &fs_info->subvol_sem);
3762
3763	switch (sa->cmd) {
3764	case BTRFS_QUOTA_CTL_ENABLE:
3765	case BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA:
3766	ret = btrfs_quota_enable(fs_info, quota_ctl_args: sa);
3767	break;
3768	case BTRFS_QUOTA_CTL_DISABLE:
3769	ret = btrfs_quota_disable(fs_info);
3770	break;
3771	default:
3772	ret = -EINVAL;
3773	break;
3774	}
3775
3776	kfree(objp: sa);
3777	up_write(sem: &fs_info->subvol_sem);
3778	drop_write:
3779	mnt_drop_write_file(file);
3780	return ret;
3781	}
3782
3783	static long btrfs_ioctl_qgroup_assign(struct file file, void* __user *arg)
3784	{
3785	struct inode *inode = file_inode(f: file);
3786	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
3787	struct btrfs_root *root = BTRFS_I(inode)->root;
3788	struct btrfs_ioctl_qgroup_assign_args *sa;
3789	struct btrfs_trans_handle *trans;
3790	int ret;
3791	int err;
3792
3793	if (!capable(CAP_SYS_ADMIN))
3794	return -EPERM;
3795
3796	ret = mnt_want_write_file(file);
3797	if (ret)
3798	return ret;
3799
3800	sa = memdup_user(arg, sizeof(*sa));
3801	if (IS_ERR(ptr: sa)) {
3802	ret = PTR_ERR(ptr: sa);
3803	goto drop_write;
3804	}
3805
3806	trans = btrfs_join_transaction(root);
3807	if (IS_ERR(ptr: trans)) {
3808	ret = PTR_ERR(ptr: trans);
3809	goto out;
3810	}
3811
3812	if (sa->assign) {
3813	ret = btrfs_add_qgroup_relation(trans, src: sa->src, dst: sa->dst);
3814	} else {
3815	ret = btrfs_del_qgroup_relation(trans, src: sa->src, dst: sa->dst);
3816	}
3817
3818	/ update qgroup status and info /
3819	mutex_lock(&fs_info->qgroup_ioctl_lock);
3820	err = btrfs_run_qgroups(trans);
3821	mutex_unlock(lock: &fs_info->qgroup_ioctl_lock);
3822	if (err < `0`)
3823	btrfs_handle_fs_error(fs_info, err,
3824	"failed to update qgroup status and info");
3825	err = btrfs_end_transaction(trans);
3826	if (err && !ret)
3827	ret = err;
3828
3829	out:
3830	kfree(objp: sa);
3831	drop_write:
3832	mnt_drop_write_file(file);
3833	return ret;
3834	}
3835
3836	static long btrfs_ioctl_qgroup_create(struct file file, void* __user *arg)
3837	{
3838	struct inode *inode = file_inode(f: file);
3839	struct btrfs_root *root = BTRFS_I(inode)->root;
3840	struct btrfs_ioctl_qgroup_create_args *sa;
3841	struct btrfs_trans_handle *trans;
3842	int ret;
3843	int err;
3844
3845	if (!capable(CAP_SYS_ADMIN))
3846	return -EPERM;
3847
3848	ret = mnt_want_write_file(file);
3849	if (ret)
3850	return ret;
3851
3852	sa = memdup_user(arg, sizeof(*sa));
3853	if (IS_ERR(ptr: sa)) {
3854	ret = PTR_ERR(ptr: sa);
3855	goto drop_write;
3856	}
3857
3858	if (!sa->qgroupid) {
3859	ret = -EINVAL;
3860	goto out;
3861	}
3862
3863	if (sa->create && is_fstree(rootid: sa->qgroupid)) {
3864	ret = -EINVAL;
3865	goto out;
3866	}
3867
3868	trans = btrfs_join_transaction(root);
3869	if (IS_ERR(ptr: trans)) {
3870	ret = PTR_ERR(ptr: trans);
3871	goto out;
3872	}
3873
3874	if (sa->create) {
3875	ret = btrfs_create_qgroup(trans, qgroupid: sa->qgroupid);
3876	} else {
3877	ret = btrfs_remove_qgroup(trans, qgroupid: sa->qgroupid);
3878	}
3879
3880	err = btrfs_end_transaction(trans);
3881	if (err && !ret)
3882	ret = err;
3883
3884	out:
3885	kfree(objp: sa);
3886	drop_write:
3887	mnt_drop_write_file(file);
3888	return ret;
3889	}
3890
3891	static long btrfs_ioctl_qgroup_limit(struct file file, void* __user *arg)
3892	{
3893	struct inode *inode = file_inode(f: file);
3894	struct btrfs_root *root = BTRFS_I(inode)->root;
3895	struct btrfs_ioctl_qgroup_limit_args *sa;
3896	struct btrfs_trans_handle *trans;
3897	int ret;
3898	int err;
3899	u64 qgroupid;
3900
3901	if (!capable(CAP_SYS_ADMIN))
3902	return -EPERM;
3903
3904	ret = mnt_want_write_file(file);
3905	if (ret)
3906	return ret;
3907
3908	sa = memdup_user(arg, sizeof(*sa));
3909	if (IS_ERR(ptr: sa)) {
3910	ret = PTR_ERR(ptr: sa);
3911	goto drop_write;
3912	}
3913
3914	trans = btrfs_join_transaction(root);
3915	if (IS_ERR(ptr: trans)) {
3916	ret = PTR_ERR(ptr: trans);
3917	goto out;
3918	}
3919
3920	qgroupid = sa->qgroupid;
3921	if (!qgroupid) {
3922	/ take the current subvol as qgroup /
3923	qgroupid = root->root_key.objectid;
3924	}
3925
3926	ret = btrfs_limit_qgroup(trans, qgroupid, limit: &sa->lim);
3927
3928	err = btrfs_end_transaction(trans);
3929	if (err && !ret)
3930	ret = err;
3931
3932	out:
3933	kfree(objp: sa);
3934	drop_write:
3935	mnt_drop_write_file(file);
3936	return ret;
3937	}
3938
3939	static long btrfs_ioctl_quota_rescan(struct file file, void* __user *arg)
3940	{
3941	struct inode *inode = file_inode(f: file);
3942	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
3943	struct btrfs_ioctl_quota_rescan_args *qsa;
3944	int ret;
3945
3946	if (!capable(CAP_SYS_ADMIN))
3947	return -EPERM;
3948
3949	ret = mnt_want_write_file(file);
3950	if (ret)
3951	return ret;
3952
3953	qsa = memdup_user(arg, sizeof(*qsa));
3954	if (IS_ERR(ptr: qsa)) {
3955	ret = PTR_ERR(ptr: qsa);
3956	goto drop_write;
3957	}
3958
3959	if (qsa->flags) {
3960	ret = -EINVAL;
3961	goto out;
3962	}
3963
3964	ret = btrfs_qgroup_rescan(fs_info);
3965
3966	out:
3967	kfree(objp: qsa);
3968	drop_write:
3969	mnt_drop_write_file(file);
3970	return ret;
3971	}
3972
3973	static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
3974	void __user *arg)
3975	{
3976	struct btrfs_ioctl_quota_rescan_args qsa = {`0`};
3977
3978	if (!capable(CAP_SYS_ADMIN))
3979	return -EPERM;
3980
3981	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
3982	qsa.flags = `1`;
3983	qsa.progress = fs_info->qgroup_rescan_progress.objectid;
3984	}
3985
3986	if (copy_to_user(to: arg, from: &qsa, n: sizeof(qsa)))
3987	return -EFAULT;
3988
3989	return `0`;
3990	}
3991
3992	static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
3993	void __user *arg)
3994	{
3995	if (!capable(CAP_SYS_ADMIN))
3996	return -EPERM;
3997
3998	return btrfs_qgroup_wait_for_completion(fs_info, interruptible: true);
3999	}
4000
4001	static long _btrfs_ioctl_set_received_subvol(struct file *file,
4002	struct mnt_idmap *idmap,
4003	struct btrfs_ioctl_received_subvol_args *sa)
4004	{
4005	struct inode *inode = file_inode(f: file);
4006	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
4007	struct btrfs_root *root = BTRFS_I(inode)->root;
4008	struct btrfs_root_item *root_item = &root->root_item;
4009	struct btrfs_trans_handle *trans;
4010	struct timespec64 ct = current_time(inode);
4011	int ret = `0`;
4012	int received_uuid_changed;
4013
4014	if (!inode_owner_or_capable(idmap, inode))
4015	return -EPERM;
4016
4017	ret = mnt_want_write_file(file);
4018	if (ret < `0`)
4019	return ret;
4020
4021	down_write(sem: &fs_info->subvol_sem);
4022
4023	if (btrfs_ino(inode: BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
4024	ret = -EINVAL;
4025	goto out;
4026	}
4027
4028	if (btrfs_root_readonly(root)) {
4029	ret = -EROFS;
4030	goto out;
4031	}
4032
4033	/*
4034	* 1 - root item
4035	* 2 - uuid items (received uuid + subvol uuid)
4036	*/
4037	trans = btrfs_start_transaction(root, num_items: `3`);
4038	if (IS_ERR(ptr: trans)) {
4039	ret = PTR_ERR(ptr: trans);
4040	trans = NULL;
4041	goto out;
4042	}
4043
4044	sa->rtransid = trans->transid;
4045	sa->rtime.sec = ct.tv_sec;
4046	sa->rtime.nsec = ct.tv_nsec;
4047
4048	received_uuid_changed = memcmp(p: root_item->received_uuid, q: sa->uuid,
4049	BTRFS_UUID_SIZE);
4050	if (received_uuid_changed &&
4051	!btrfs_is_empty_uuid(uuid: root_item->received_uuid)) {
4052	ret = btrfs_uuid_tree_remove(trans, uuid: root_item->received_uuid,
4053	BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4054	subid: root->root_key.objectid);
4055	if (ret && ret != -ENOENT) {
4056	btrfs_abort_transaction(trans, ret);
4057	btrfs_end_transaction(trans);
4058	goto out;
4059	}
4060	}
4061	memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
4062	btrfs_set_root_stransid(s: root_item, val: sa->stransid);
4063	btrfs_set_root_rtransid(s: root_item, val: sa->rtransid);
4064	btrfs_set_stack_timespec_sec(s: &root_item->stime, val: sa->stime.sec);
4065	btrfs_set_stack_timespec_nsec(s: &root_item->stime, val: sa->stime.nsec);
4066	btrfs_set_stack_timespec_sec(s: &root_item->rtime, val: sa->rtime.sec);
4067	btrfs_set_stack_timespec_nsec(s: &root_item->rtime, val: sa->rtime.nsec);
4068
4069	ret = btrfs_update_root(trans, root: fs_info->tree_root,
4070	key: &root->root_key, item: &root->root_item);
4071	if (ret < `0`) {
4072	btrfs_end_transaction(trans);
4073	goto out;
4074	}
4075	if (received_uuid_changed && !btrfs_is_empty_uuid(uuid: sa->uuid)) {
4076	ret = btrfs_uuid_tree_add(trans, uuid: sa->uuid,
4077	BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4078	subid: root->root_key.objectid);
4079	if (ret < `0` && ret != -EEXIST) {
4080	btrfs_abort_transaction(trans, ret);
4081	btrfs_end_transaction(trans);
4082	goto out;
4083	}
4084	}
4085	ret = btrfs_commit_transaction(trans);
4086	out:
4087	up_write(sem: &fs_info->subvol_sem);
4088	mnt_drop_write_file(file);
4089	return ret;
4090	}
4091
4092	#ifdef CONFIG_64BIT
4093	static long btrfs_ioctl_set_received_subvol_32(struct file *file,
4094	void __user *arg)
4095	{
4096	struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL;
4097	struct btrfs_ioctl_received_subvol_args *args64 = NULL;
4098	int ret = `0`;
4099
4100	args32 = memdup_user(arg, sizeof(*args32));
4101	if (IS_ERR(ptr: args32))
4102	return PTR_ERR(ptr: args32);
4103
4104	args64 = kmalloc(size: sizeof(*args64), GFP_KERNEL);
4105	if (!args64) {
4106	ret = -ENOMEM;
4107	goto out;
4108	}
4109
4110	memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE);
4111	args64->stransid = args32->stransid;
4112	args64->rtransid = args32->rtransid;
4113	args64->stime.sec = args32->stime.sec;
4114	args64->stime.nsec = args32->stime.nsec;
4115	args64->rtime.sec = args32->rtime.sec;
4116	args64->rtime.nsec = args32->rtime.nsec;
4117	args64->flags = args32->flags;
4118
4119	ret = _btrfs_ioctl_set_received_subvol(file, idmap: file_mnt_idmap(file), sa: args64);
4120	if (ret)
4121	goto out;
4122
4123	memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE);
4124	args32->stransid = args64->stransid;
4125	args32->rtransid = args64->rtransid;
4126	args32->stime.sec = args64->stime.sec;
4127	args32->stime.nsec = args64->stime.nsec;
4128	args32->rtime.sec = args64->rtime.sec;
4129	args32->rtime.nsec = args64->rtime.nsec;
4130	args32->flags = args64->flags;
4131
4132	ret = copy_to_user(to: arg, from: args32, n: sizeof(*args32));
4133	if (ret)
4134	ret = -EFAULT;
4135
4136	out:
4137	kfree(objp: args32);
4138	kfree(objp: args64);
4139	return ret;
4140	}
4141	#endif
4142
4143	static long btrfs_ioctl_set_received_subvol(struct file *file,
4144	void __user *arg)
4145	{
4146	struct btrfs_ioctl_received_subvol_args *sa = NULL;
4147	int ret = `0`;
4148
4149	sa = memdup_user(arg, sizeof(*sa));
4150	if (IS_ERR(ptr: sa))
4151	return PTR_ERR(ptr: sa);
4152
4153	ret = _btrfs_ioctl_set_received_subvol(file, idmap: file_mnt_idmap(file), sa);
4154
4155	if (ret)
4156	goto out;
4157
4158	ret = copy_to_user(to: arg, from: sa, n: sizeof(*sa));
4159	if (ret)
4160	ret = -EFAULT;
4161
4162	out:
4163	kfree(objp: sa);
4164	return ret;
4165	}
4166
4167	static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info,
4168	void __user *arg)
4169	{
4170	size_t len;
4171	int ret;
4172	char label[BTRFS_LABEL_SIZE];
4173
4174	spin_lock(lock: &fs_info->super_lock);
4175	memcpy(label, fs_info->super_copy->label, BTRFS_LABEL_SIZE);
4176	spin_unlock(lock: &fs_info->super_lock);
4177
4178	len = strnlen(p: label, BTRFS_LABEL_SIZE);
4179
4180	if (len == BTRFS_LABEL_SIZE) {
4181	btrfs_warn(fs_info,
4182	"label is too long, return the first %zu bytes",
4183	--len);
4184	}
4185
4186	ret = copy_to_user(to: arg, from: label, n: len);
4187
4188	return ret ? -EFAULT : `0`;
4189	}
4190
4191	static int btrfs_ioctl_set_fslabel(struct file file, void* __user *arg)
4192	{
4193	struct inode *inode = file_inode(f: file);
4194	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
4195	struct btrfs_root *root = BTRFS_I(inode)->root;
4196	struct btrfs_super_block *super_block = fs_info->super_copy;
4197	struct btrfs_trans_handle *trans;
4198	char label[BTRFS_LABEL_SIZE];
4199	int ret;
4200
4201	if (!capable(CAP_SYS_ADMIN))
4202	return -EPERM;
4203
4204	if (copy_from_user(to: label, from: arg, n: sizeof(label)))
4205	return -EFAULT;
4206
4207	if (strnlen(p: label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
4208	btrfs_err(fs_info,
4209	"unable to set label with more than %d bytes",
4210	BTRFS_LABEL_SIZE - `1`);
4211	return -EINVAL;
4212	}
4213
4214	ret = mnt_want_write_file(file);
4215	if (ret)
4216	return ret;
4217
4218	trans = btrfs_start_transaction(root, num_items: `0`);
4219	if (IS_ERR(ptr: trans)) {
4220	ret = PTR_ERR(ptr: trans);
4221	goto out_unlock;
4222	}
4223
4224	spin_lock(lock: &fs_info->super_lock);
4225	strcpy(p: super_block->label, q: label);
4226	spin_unlock(lock: &fs_info->super_lock);
4227	ret = btrfs_commit_transaction(trans);
4228
4229	out_unlock:
4230	mnt_drop_write_file(file);
4231	return ret;
4232	}
4233
4234	#define INIT_FEATURE_FLAGS(suffix) \
4235	{ .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \
4236	.compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
4237	.incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
4238
4239	int btrfs_ioctl_get_supported_features(void __user *arg)
4240	{
4241	static const struct btrfs_ioctl_feature_flags features[`3`] = {
4242	INIT_FEATURE_FLAGS(SUPP),
4243	INIT_FEATURE_FLAGS(SAFE_SET),
4244	INIT_FEATURE_FLAGS(SAFE_CLEAR)
4245	};
4246
4247	if (copy_to_user(to: arg, from: &features, n: sizeof(features)))
4248	return -EFAULT;
4249
4250	return `0`;
4251	}
4252
4253	static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info,
4254	void __user *arg)
4255	{
4256	struct btrfs_super_block *super_block = fs_info->super_copy;
4257	struct btrfs_ioctl_feature_flags features;
4258
4259	features.compat_flags = btrfs_super_compat_flags(s: super_block);
4260	features.compat_ro_flags = btrfs_super_compat_ro_flags(s: super_block);
4261	features.incompat_flags = btrfs_super_incompat_flags(s: super_block);
4262
4263	if (copy_to_user(to: arg, from: &features, n: sizeof(features)))
4264	return -EFAULT;
4265
4266	return `0`;
4267	}
4268
4269	static int check_feature_bits(struct btrfs_fs_info *fs_info,
4270	enum btrfs_feature_set set,
4271	u64 change_mask, u64 flags, u64 supported_flags,
4272	u64 safe_set, u64 safe_clear)
4273	{
4274	const char *type = btrfs_feature_set_name(set);
4275	char *names;
4276	u64 disallowed, unsupported;
4277	u64 set_mask = flags & change_mask;
4278	u64 clear_mask = ~flags & change_mask;
4279
4280	unsupported = set_mask & ~supported_flags;
4281	if (unsupported) {
4282	names = btrfs_printable_features(set, flags: unsupported);
4283	if (names) {
4284	btrfs_warn(fs_info,
4285	"this kernel does not support the %s feature bit%s",
4286	names, strchr(names, `','`) ? "s" : "");
4287	kfree(objp: names);
4288	} else
4289	btrfs_warn(fs_info,
4290	"this kernel does not support %s bits 0x%llx",
4291	type, unsupported);
4292	return -EOPNOTSUPP;
4293	}
4294
4295	disallowed = set_mask & ~safe_set;
4296	if (disallowed) {
4297	names = btrfs_printable_features(set, flags: disallowed);
4298	if (names) {
4299	btrfs_warn(fs_info,
4300	"can't set the %s feature bit%s while mounted",
4301	names, strchr(names, `','`) ? "s" : "");
4302	kfree(objp: names);
4303	} else
4304	btrfs_warn(fs_info,
4305	"can't set %s bits 0x%llx while mounted",
4306	type, disallowed);
4307	return -EPERM;
4308	}
4309
4310	disallowed = clear_mask & ~safe_clear;
4311	if (disallowed) {
4312	names = btrfs_printable_features(set, flags: disallowed);
4313	if (names) {
4314	btrfs_warn(fs_info,
4315	"can't clear the %s feature bit%s while mounted",
4316	names, strchr(names, `','`) ? "s" : "");
4317	kfree(objp: names);
4318	} else
4319	btrfs_warn(fs_info,
4320	"can't clear %s bits 0x%llx while mounted",
4321	type, disallowed);
4322	return -EPERM;
4323	}
4324
4325	return `0`;
4326	}
4327
4328	#define check_feature(fs_info, change_mask, flags, mask_base) \
4329	check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \
4330	BTRFS_FEATURE_ ## mask_base ## _SUPP, \
4331	BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \
4332	BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)
4333
4334	static int btrfs_ioctl_set_features(struct file file, void* __user *arg)
4335	{
4336	struct inode *inode = file_inode(f: file);
4337	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
4338	struct btrfs_root *root = BTRFS_I(inode)->root;
4339	struct btrfs_super_block *super_block = fs_info->super_copy;
4340	struct btrfs_ioctl_feature_flags flags[`2`];
4341	struct btrfs_trans_handle *trans;
4342	u64 newflags;
4343	int ret;
4344
4345	if (!capable(CAP_SYS_ADMIN))
4346	return -EPERM;
4347
4348	if (copy_from_user(to: flags, from: arg, n: sizeof(flags)))
4349	return -EFAULT;
4350
4351	/ Nothing to do /
4352	if (!flags[`0`].compat_flags && !flags[`0`].compat_ro_flags &&
4353	!flags[`0`].incompat_flags)
4354	return `0`;
4355
4356	ret = check_feature(fs_info, flags[`0`].compat_flags,
4357	flags[`1`].compat_flags, COMPAT);
4358	if (ret)
4359	return ret;
4360
4361	ret = check_feature(fs_info, flags[`0`].compat_ro_flags,
4362	flags[`1`].compat_ro_flags, COMPAT_RO);
4363	if (ret)
4364	return ret;
4365
4366	ret = check_feature(fs_info, flags[`0`].incompat_flags,
4367	flags[`1`].incompat_flags, INCOMPAT);
4368	if (ret)
4369	return ret;
4370
4371	ret = mnt_want_write_file(file);
4372	if (ret)
4373	return ret;
4374
4375	trans = btrfs_start_transaction(root, num_items: `0`);
4376	if (IS_ERR(ptr: trans)) {
4377	ret = PTR_ERR(ptr: trans);
4378	goto out_drop_write;
4379	}
4380
4381	spin_lock(lock: &fs_info->super_lock);
4382	newflags = btrfs_super_compat_flags(s: super_block);
4383	newflags \|= flags[`0`].compat_flags & flags[`1`].compat_flags;
4384	newflags &= ~(flags[`0`].compat_flags & ~flags[`1`].compat_flags);
4385	btrfs_set_super_compat_flags(s: super_block, val: newflags);
4386
4387	newflags = btrfs_super_compat_ro_flags(s: super_block);
4388	newflags \|= flags[`0`].compat_ro_flags & flags[`1`].compat_ro_flags;
4389	newflags &= ~(flags[`0`].compat_ro_flags & ~flags[`1`].compat_ro_flags);
4390	btrfs_set_super_compat_ro_flags(s: super_block, val: newflags);
4391
4392	newflags = btrfs_super_incompat_flags(s: super_block);
4393	newflags \|= flags[`0`].incompat_flags & flags[`1`].incompat_flags;
4394	newflags &= ~(flags[`0`].incompat_flags & ~flags[`1`].incompat_flags);
4395	btrfs_set_super_incompat_flags(s: super_block, val: newflags);
4396	spin_unlock(lock: &fs_info->super_lock);
4397
4398	ret = btrfs_commit_transaction(trans);
4399	out_drop_write:
4400	mnt_drop_write_file(file);
4401
4402	return ret;
4403	}
4404
4405	static int _btrfs_ioctl_send(struct inode inode, void* __user *argp, bool compat)
4406	{
4407	struct btrfs_ioctl_send_args *arg;
4408	int ret;
4409
4410	if (compat) {
4411	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4412	struct btrfs_ioctl_send_args_32 args32 = { `0` };
4413
4414	ret = copy_from_user(to: &args32, from: argp, n: sizeof(args32));
4415	if (ret)
4416	return -EFAULT;
4417	arg = kzalloc(size: sizeof(*arg), GFP_KERNEL);
4418	if (!arg)
4419	return -ENOMEM;
4420	arg->send_fd = args32.send_fd;
4421	arg->clone_sources_count = args32.clone_sources_count;
4422	arg->clone_sources = compat_ptr(uptr: args32.clone_sources);
4423	arg->parent_root = args32.parent_root;
4424	arg->flags = args32.flags;
4425	arg->version = args32.version;
4426	memcpy(arg->reserved, args32.reserved,
4427	sizeof(args32.reserved));
4428	#else
4429	return -ENOTTY;
4430	#endif
4431	} else {
4432	arg = memdup_user(argp, sizeof(*arg));
4433	if (IS_ERR(ptr: arg))
4434	return PTR_ERR(ptr: arg);
4435	}
4436	ret = btrfs_ioctl_send(inode, arg);
4437	kfree(objp: arg);
4438	return ret;
4439	}
4440
4441	static int btrfs_ioctl_encoded_read(struct file file, void* __user *argp,
4442	bool compat)
4443	{
4444	struct btrfs_ioctl_encoded_io_args args = { `0` };
4445	size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
4446	flags);
4447	size_t copy_end;
4448	struct iovec iovstack[UIO_FASTIOV];
4449	struct iovec *iov = iovstack;
4450	struct iov_iter iter;
4451	loff_t pos;
4452	struct kiocb kiocb;
4453	ssize_t ret;
4454
4455	if (!capable(CAP_SYS_ADMIN)) {
4456	ret = -EPERM;
4457	goto out_acct;
4458	}
4459
4460	if (compat) {
4461	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4462	struct btrfs_ioctl_encoded_io_args_32 args32;
4463
4464	copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
4465	flags);
4466	if (copy_from_user(to: &args32, from: argp, n: copy_end)) {
4467	ret = -EFAULT;
4468	goto out_acct;
4469	}
4470	args.iov = compat_ptr(uptr: args32.iov);
4471	args.iovcnt = args32.iovcnt;
4472	args.offset = args32.offset;
4473	args.flags = args32.flags;
4474	#else
4475	return -ENOTTY;
4476	#endif
4477	} else {
4478	copy_end = copy_end_kernel;
4479	if (copy_from_user(to: &args, from: argp, n: copy_end)) {
4480	ret = -EFAULT;
4481	goto out_acct;
4482	}
4483	}
4484	if (args.flags != `0`) {
4485	ret = -EINVAL;
4486	goto out_acct;
4487	}
4488
4489	ret = import_iovec(ITER_DEST, uvec: args.iov, nr_segs: args.iovcnt, ARRAY_SIZE(iovstack),
4490	iovp: &iov, i: &iter);
4491	if (ret < `0`)
4492	goto out_acct;
4493
4494	if (iov_iter_count(i: &iter) == `0`) {
4495	ret = `0`;
4496	goto out_iov;
4497	}
4498	pos = args.offset;
4499	ret = rw_verify_area(READ, file, &pos, args.len);
4500	if (ret < `0`)
4501	goto out_iov;
4502
4503	init_sync_kiocb(kiocb: &kiocb, filp: file);
4504	kiocb.ki_pos = pos;
4505
4506	ret = btrfs_encoded_read(iocb: &kiocb, iter: &iter, encoded: &args);
4507	if (ret >= `0`) {
4508	fsnotify_access(file);
4509	if (copy_to_user(to: argp + copy_end,
4510	from: (char *)&args + copy_end_kernel,
4511	n: sizeof(args) - copy_end_kernel))
4512	ret = -EFAULT;
4513	}
4514
4515	out_iov:
4516	kfree(objp: iov);
4517	out_acct:
4518	if (ret > `0`)
4519	add_rchar(current, amt: ret);
4520	inc_syscr(current);
4521	return ret;
4522	}
4523
4524	static int btrfs_ioctl_encoded_write(struct file file, void* __user *argp, bool compat)
4525	{
4526	struct btrfs_ioctl_encoded_io_args args;
4527	struct iovec iovstack[UIO_FASTIOV];
4528	struct iovec *iov = iovstack;
4529	struct iov_iter iter;
4530	loff_t pos;
4531	struct kiocb kiocb;
4532	ssize_t ret;
4533
4534	if (!capable(CAP_SYS_ADMIN)) {
4535	ret = -EPERM;
4536	goto out_acct;
4537	}
4538
4539	if (!(file->f_mode & FMODE_WRITE)) {
4540	ret = -EBADF;
4541	goto out_acct;
4542	}
4543
4544	if (compat) {
4545	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4546	struct btrfs_ioctl_encoded_io_args_32 args32;
4547
4548	if (copy_from_user(to: &args32, from: argp, n: sizeof(args32))) {
4549	ret = -EFAULT;
4550	goto out_acct;
4551	}
4552	args.iov = compat_ptr(uptr: args32.iov);
4553	args.iovcnt = args32.iovcnt;
4554	args.offset = args32.offset;
4555	args.flags = args32.flags;
4556	args.len = args32.len;
4557	args.unencoded_len = args32.unencoded_len;
4558	args.unencoded_offset = args32.unencoded_offset;
4559	args.compression = args32.compression;
4560	args.encryption = args32.encryption;
4561	memcpy(args.reserved, args32.reserved, sizeof(args.reserved));
4562	#else
4563	return -ENOTTY;
4564	#endif
4565	} else {
4566	if (copy_from_user(to: &args, from: argp, n: sizeof(args))) {
4567	ret = -EFAULT;
4568	goto out_acct;
4569	}
4570	}
4571
4572	ret = -EINVAL;
4573	if (args.flags != `0`)
4574	goto out_acct;
4575	if (memchr_inv(p: args.reserved, c: `0`, size: sizeof(args.reserved)))
4576	goto out_acct;
4577	if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
4578	args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
4579	goto out_acct;
4580	if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES \|\|
4581	args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
4582	goto out_acct;
4583	if (args.unencoded_offset > args.unencoded_len)
4584	goto out_acct;
4585	if (args.len > args.unencoded_len - args.unencoded_offset)
4586	goto out_acct;
4587
4588	ret = import_iovec(ITER_SOURCE, uvec: args.iov, nr_segs: args.iovcnt, ARRAY_SIZE(iovstack),
4589	iovp: &iov, i: &iter);
4590	if (ret < `0`)
4591	goto out_acct;
4592
4593	if (iov_iter_count(i: &iter) == `0`) {
4594	ret = `0`;
4595	goto out_iov;
4596	}
4597	pos = args.offset;
4598	ret = rw_verify_area(WRITE, file, &pos, args.len);
4599	if (ret < `0`)
4600	goto out_iov;
4601
4602	init_sync_kiocb(kiocb: &kiocb, filp: file);
4603	ret = kiocb_set_rw_flags(ki: &kiocb, flags: `0`);
4604	if (ret)
4605	goto out_iov;
4606	kiocb.ki_pos = pos;
4607
4608	file_start_write(file);
4609
4610	ret = btrfs_do_write_iter(iocb: &kiocb, from: &iter, encoded: &args);
4611	if (ret > `0`)
4612	fsnotify_modify(file);
4613
4614	file_end_write(file);
4615	out_iov:
4616	kfree(objp: iov);
4617	out_acct:
4618	if (ret > `0`)
4619	add_wchar(current, amt: ret);
4620	inc_syscw(current);
4621	return ret;
4622	}
4623
4624	long btrfs_ioctl(struct file file, unsigned* int
4625	cmd, unsigned long arg)
4626	{
4627	struct inode *inode = file_inode(f: file);
4628	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
4629	struct btrfs_root *root = BTRFS_I(inode)->root;
4630	void __user argp = (void* __user *)arg;
4631
4632	switch (cmd) {
4633	case FS_IOC_GETVERSION:
4634	return btrfs_ioctl_getversion(inode, arg: argp);
4635	case FS_IOC_GETFSLABEL:
4636	return btrfs_ioctl_get_fslabel(fs_info, arg: argp);
4637	case FS_IOC_SETFSLABEL:
4638	return btrfs_ioctl_set_fslabel(file, arg: argp);
4639	case FITRIM:
4640	return btrfs_ioctl_fitrim(fs_info, arg: argp);
4641	case BTRFS_IOC_SNAP_CREATE:
4642	return btrfs_ioctl_snap_create(file, arg: argp, subvol: `0`);
4643	case BTRFS_IOC_SNAP_CREATE_V2:
4644	return btrfs_ioctl_snap_create_v2(file, arg: argp, subvol: `0`);
4645	case BTRFS_IOC_SUBVOL_CREATE:
4646	return btrfs_ioctl_snap_create(file, arg: argp, subvol: `1`);
4647	case BTRFS_IOC_SUBVOL_CREATE_V2:
4648	return btrfs_ioctl_snap_create_v2(file, arg: argp, subvol: `1`);
4649	case BTRFS_IOC_SNAP_DESTROY:
4650	return btrfs_ioctl_snap_destroy(file, arg: argp, destroy_v2: false);
4651	case BTRFS_IOC_SNAP_DESTROY_V2:
4652	return btrfs_ioctl_snap_destroy(file, arg: argp, destroy_v2: true);
4653	case BTRFS_IOC_SUBVOL_GETFLAGS:
4654	return btrfs_ioctl_subvol_getflags(inode, arg: argp);
4655	case BTRFS_IOC_SUBVOL_SETFLAGS:
4656	return btrfs_ioctl_subvol_setflags(file, arg: argp);
4657	case BTRFS_IOC_DEFAULT_SUBVOL:
4658	return btrfs_ioctl_default_subvol(file, argp);
4659	case BTRFS_IOC_DEFRAG:
4660	return btrfs_ioctl_defrag(file, NULL);
4661	case BTRFS_IOC_DEFRAG_RANGE:
4662	return btrfs_ioctl_defrag(file, argp);
4663	case BTRFS_IOC_RESIZE:
4664	return btrfs_ioctl_resize(file, arg: argp);
4665	case BTRFS_IOC_ADD_DEV:
4666	return btrfs_ioctl_add_dev(fs_info, arg: argp);
4667	case BTRFS_IOC_RM_DEV:
4668	return btrfs_ioctl_rm_dev(file, arg: argp);
4669	case BTRFS_IOC_RM_DEV_V2:
4670	return btrfs_ioctl_rm_dev_v2(file, arg: argp);
4671	case BTRFS_IOC_FS_INFO:
4672	return btrfs_ioctl_fs_info(fs_info, arg: argp);
4673	case BTRFS_IOC_DEV_INFO:
4674	return btrfs_ioctl_dev_info(fs_info, arg: argp);
4675	case BTRFS_IOC_TREE_SEARCH:
4676	return btrfs_ioctl_tree_search(inode, argp);
4677	case BTRFS_IOC_TREE_SEARCH_V2:
4678	return btrfs_ioctl_tree_search_v2(inode, argp);
4679	case BTRFS_IOC_INO_LOOKUP:
4680	return btrfs_ioctl_ino_lookup(root, argp);
4681	case BTRFS_IOC_INO_PATHS:
4682	return btrfs_ioctl_ino_to_path(root, arg: argp);
4683	case BTRFS_IOC_LOGICAL_INO:
4684	return btrfs_ioctl_logical_to_ino(fs_info, arg: argp, version: `1`);
4685	case BTRFS_IOC_LOGICAL_INO_V2:
4686	return btrfs_ioctl_logical_to_ino(fs_info, arg: argp, version: `2`);
4687	case BTRFS_IOC_SPACE_INFO:
4688	return btrfs_ioctl_space_info(fs_info, arg: argp);
4689	case BTRFS_IOC_SYNC: {
4690	int ret;
4691
4692	ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, in_reclaim_context: false);
4693	if (ret)
4694	return ret;
4695	ret = btrfs_sync_fs(sb: inode->i_sb, wait: `1`);
4696	/*
4697	* The transaction thread may want to do more work,
4698	* namely it pokes the cleaner kthread that will start
4699	* processing uncleaned subvols.
4700	*/
4701	wake_up_process(tsk: fs_info->transaction_kthread);
4702	return ret;
4703	}
4704	case BTRFS_IOC_START_SYNC:
4705	return btrfs_ioctl_start_sync(root, argp);
4706	case BTRFS_IOC_WAIT_SYNC:
4707	return btrfs_ioctl_wait_sync(fs_info, argp);
4708	case BTRFS_IOC_SCRUB:
4709	return btrfs_ioctl_scrub(file, arg: argp);
4710	case BTRFS_IOC_SCRUB_CANCEL:
4711	return btrfs_ioctl_scrub_cancel(fs_info);
4712	case BTRFS_IOC_SCRUB_PROGRESS:
4713	return btrfs_ioctl_scrub_progress(fs_info, arg: argp);
4714	case BTRFS_IOC_BALANCE_V2:
4715	return btrfs_ioctl_balance(file, arg: argp);
4716	case BTRFS_IOC_BALANCE_CTL:
4717	return btrfs_ioctl_balance_ctl(fs_info, cmd: arg);
4718	case BTRFS_IOC_BALANCE_PROGRESS:
4719	return btrfs_ioctl_balance_progress(fs_info, arg: argp);
4720	case BTRFS_IOC_SET_RECEIVED_SUBVOL:
4721	return btrfs_ioctl_set_received_subvol(file, arg: argp);
4722	#ifdef CONFIG_64BIT
4723	case BTRFS_IOC_SET_RECEIVED_SUBVOL_32:
4724	return btrfs_ioctl_set_received_subvol_32(file, arg: argp);
4725	#endif
4726	case BTRFS_IOC_SEND:
4727	return _btrfs_ioctl_send(inode, argp, compat: false);
4728	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4729	case BTRFS_IOC_SEND_32:
4730	return _btrfs_ioctl_send(inode, argp, compat: true);
4731	#endif
4732	case BTRFS_IOC_GET_DEV_STATS:
4733	return btrfs_ioctl_get_dev_stats(fs_info, arg: argp);
4734	case BTRFS_IOC_QUOTA_CTL:
4735	return btrfs_ioctl_quota_ctl(file, arg: argp);
4736	case BTRFS_IOC_QGROUP_ASSIGN:
4737	return btrfs_ioctl_qgroup_assign(file, arg: argp);
4738	case BTRFS_IOC_QGROUP_CREATE:
4739	return btrfs_ioctl_qgroup_create(file, arg: argp);
4740	case BTRFS_IOC_QGROUP_LIMIT:
4741	return btrfs_ioctl_qgroup_limit(file, arg: argp);
4742	case BTRFS_IOC_QUOTA_RESCAN:
4743	return btrfs_ioctl_quota_rescan(file, arg: argp);
4744	case BTRFS_IOC_QUOTA_RESCAN_STATUS:
4745	return btrfs_ioctl_quota_rescan_status(fs_info, arg: argp);
4746	case BTRFS_IOC_QUOTA_RESCAN_WAIT:
4747	return btrfs_ioctl_quota_rescan_wait(fs_info, arg: argp);
4748	case BTRFS_IOC_DEV_REPLACE:
4749	return btrfs_ioctl_dev_replace(fs_info, arg: argp);
4750	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
4751	return btrfs_ioctl_get_supported_features(arg: argp);
4752	case BTRFS_IOC_GET_FEATURES:
4753	return btrfs_ioctl_get_features(fs_info, arg: argp);
4754	case BTRFS_IOC_SET_FEATURES:
4755	return btrfs_ioctl_set_features(file, arg: argp);
4756	case BTRFS_IOC_GET_SUBVOL_INFO:
4757	return btrfs_ioctl_get_subvol_info(inode, argp);
4758	case BTRFS_IOC_GET_SUBVOL_ROOTREF:
4759	return btrfs_ioctl_get_subvol_rootref(root, argp);
4760	case BTRFS_IOC_INO_LOOKUP_USER:
4761	return btrfs_ioctl_ino_lookup_user(file, argp);
4762	case FS_IOC_ENABLE_VERITY:
4763	return fsverity_ioctl_enable(filp: file, arg: (const void __user *)argp);
4764	case FS_IOC_MEASURE_VERITY:
4765	return fsverity_ioctl_measure(filp: file, arg: argp);
4766	case BTRFS_IOC_ENCODED_READ:
4767	return btrfs_ioctl_encoded_read(file, argp, compat: false);
4768	case BTRFS_IOC_ENCODED_WRITE:
4769	return btrfs_ioctl_encoded_write(file, argp, compat: false);
4770	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
4771	case BTRFS_IOC_ENCODED_READ_32:
4772	return btrfs_ioctl_encoded_read(file, argp, compat: true);
4773	case BTRFS_IOC_ENCODED_WRITE_32:
4774	return btrfs_ioctl_encoded_write(file, argp, compat: true);
4775	#endif
4776	}
4777
4778	return -ENOTTY;
4779	}
4780
4781	#ifdef CONFIG_COMPAT
4782	long btrfs_compat_ioctl(struct file file, unsigned* int cmd, unsigned long arg)
4783	{
4784	/*
4785	* These all access 32-bit values anyway so no further
4786	* handling is necessary.
4787	*/
4788	switch (cmd) {
4789	case FS_IOC32_GETVERSION:
4790	cmd = FS_IOC_GETVERSION;
4791	break;
4792	}
4793
4794	return btrfs_ioctl(file, cmd, arg: (unsigned long) compat_ptr(uptr: arg));
4795	}
4796	#endif
4797

source code of linux/fs/btrfs/ioctl.c