diff mbox series

[PATCHv2] btrfs: Introduce new BTRFS_IOC_SNAP_DESTROY_V2 ioctl

Message ID 20200127024817.15587-1-marcos.souza.org@gmail.com (mailing list archive)
State New, archived
Headers show
Series [PATCHv2] btrfs: Introduce new BTRFS_IOC_SNAP_DESTROY_V2 ioctl | expand

Commit Message

Marcos Paulo de Souza Jan. 27, 2020, 2:48 a.m. UTC
From: Marcos Paulo de Souza <mpdesouza@suse.com>

This ioctl will be responsible for deleting a subvolume using it's id.
This can be used when a system has a file system mounted from a
subvolume, rather than the root file system, like below:

/
|- @subvol1
|- @subvol2
\- @subvol_default
If only @subvol_default is mounted, we have no path to reach
@subvol1 and @subvol2, thus no way to delete them.
This patch introduces a new flag to allow BTRFS_IOC_SNAP_DESTORY_V2
to delete subvolume using subvolid.

Also in this patch, export some functions, add BTRFS_SUBVOL_BY_ID flag
and add subvolid as a union member of name in struct btrfs_ioctl_vol_args_v2.

Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
---
 Changes from v1:
 * make btrfs_ioctl_snap_destroy handle both SNAP_DESTROY and SNAP_DESTROY_V2
   (suggested by Josef)
 * Change BTRFS_SUBVOL_DELETE_BY_ID to BTRFS_SUBVOL_BY_ID (David)
 * Send patches for btrfs-progs and xfstests along this change

 fs/btrfs/ctree.h           |  2 +
 fs/btrfs/export.c          |  4 +-
 fs/btrfs/export.h          |  5 ++
 fs/btrfs/ioctl.c           | 97 +++++++++++++++++++++++++++++++-------
 fs/btrfs/super.c           |  4 +-
 include/uapi/linux/btrfs.h | 12 ++++-
 6 files changed, 101 insertions(+), 23 deletions(-)

Comments

David Sterba Jan. 28, 2020, 5:02 p.m. UTC | #1
On Sun, Jan 26, 2020 at 11:48:17PM -0300, Marcos Paulo de Souza wrote:
> From: Marcos Paulo de Souza <mpdesouza@suse.com>
> 
> This ioctl will be responsible for deleting a subvolume using it's id.
> This can be used when a system has a file system mounted from a
> subvolume, rather than the root file system, like below:
> 
> /
> |- @subvol1
> |- @subvol2
> \- @subvol_default
> If only @subvol_default is mounted, we have no path to reach
> @subvol1 and @subvol2, thus no way to delete them.

Here I'd expect a brief overview how is the "we have no path to reach"
problem solved. This is the "invisible" part,

> This patch introduces a new flag to allow BTRFS_IOC_SNAP_DESTORY_V2
> to delete subvolume using subvolid.

and this is the visible interface part, so some details are lacking.

> Also in this patch, export some functions, add BTRFS_SUBVOL_BY_ID flag
> and add subvolid as a union member of name in struct btrfs_ioctl_vol_args_v2.
> 
> Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
> ---
>  Changes from v1:
>  * make btrfs_ioctl_snap_destroy handle both SNAP_DESTROY and SNAP_DESTROY_V2
>    (suggested by Josef)
>  * Change BTRFS_SUBVOL_DELETE_BY_ID to BTRFS_SUBVOL_BY_ID (David)
>  * Send patches for btrfs-progs and xfstests along this change
> 
>  fs/btrfs/ctree.h           |  2 +
>  fs/btrfs/export.c          |  4 +-
>  fs/btrfs/export.h          |  5 ++
>  fs/btrfs/ioctl.c           | 97 +++++++++++++++++++++++++++++++-------
>  fs/btrfs/super.c           |  4 +-
>  include/uapi/linux/btrfs.h | 12 ++++-
>  6 files changed, 101 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index f90b82050d2d..5847a34b5146 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3010,6 +3010,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
>  int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
>  			unsigned long new_flags);
>  int btrfs_sync_fs(struct super_block *sb, int wait);
> +char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
> +					   u64 subvol_objectid);
>  
>  static inline __printf(2, 3) __cold
>  void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
> diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
> index 72e312cae69d..027411cdbae7 100644
> --- a/fs/btrfs/export.c
> +++ b/fs/btrfs/export.c
> @@ -57,7 +57,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
>  	return type;
>  }
>  
> -static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
> +struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
>  				       u64 root_objectid, u32 generation,
>  				       int check_generation)
>  {
> @@ -152,7 +152,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
>  	return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
>  }
>  
> -static struct dentry *btrfs_get_parent(struct dentry *child)
> +struct dentry *btrfs_get_parent(struct dentry *child)
>  {
>  	struct inode *dir = d_inode(child);
>  	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
> diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
> index 57488ecd7d4e..f981e8103d64 100644
> --- a/fs/btrfs/export.h
> +++ b/fs/btrfs/export.h
> @@ -18,4 +18,9 @@ struct btrfs_fid {
>  	u64 parent_root_objectid;
>  } __attribute__ ((packed));
>  
> +struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
> +				       u64 root_objectid, u32 generation,
> +				       int check_generation);
> +struct dentry *btrfs_get_parent(struct dentry *child);
> +
>  #endif
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 1b1b6ff855aa..889cb43149f9 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -28,6 +28,7 @@
>  #include <linux/iversion.h>
>  #include "ctree.h"
>  #include "disk-io.h"
> +#include "export.h"
>  #include "transaction.h"
>  #include "btrfs_inode.h"
>  #include "print-tree.h"
> @@ -2836,7 +2837,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
>  }
>  
>  static noinline int btrfs_ioctl_snap_destroy(struct file *file,
> -					     void __user *arg)
> +					     void __user *arg,
> +					     bool destroy_v2)
>  {
>  	struct dentry *parent = file->f_path.dentry;
>  	struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
> @@ -2845,34 +2847,87 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>  	struct inode *inode;
>  	struct btrfs_root *root = BTRFS_I(dir)->root;
>  	struct btrfs_root *dest = NULL;
> -	struct btrfs_ioctl_vol_args *vol_args;
> +	struct btrfs_ioctl_vol_args *vol_args = NULL;
> +	struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
> +	char *name, *name_ptr = NULL;
>  	int namelen;
>  	int err = 0;
>  
> -	if (!S_ISDIR(dir->i_mode))
> -		return -ENOTDIR;
> +	if (destroy_v2) {
> +		vol_args2 = memdup_user(arg, sizeof(*vol_args2));
> +		if (IS_ERR(vol_args2))
> +			return PTR_ERR(vol_args2);
>  
> -	vol_args = memdup_user(arg, sizeof(*vol_args));
> -	if (IS_ERR(vol_args))
> -		return PTR_ERR(vol_args);
> +		if (vol_args2->subvolid == 0) {
> +			err = -EINVAL;
> +			goto out;
> +		}
>  
> -	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
> -	namelen = strlen(vol_args->name);
> -	if (strchr(vol_args->name, '/') ||
> -	    strncmp(vol_args->name, "..", namelen) == 0) {
> -		err = -EINVAL;
> -		goto out;
> +		if (!(vol_args2->flags & BTRFS_SUBVOL_BY_ID)) {
> +			err = -EINVAL;
> +			goto out;
> +		}
> +
> +		dentry = btrfs_get_dentry(fs_info->sb, BTRFS_FIRST_FREE_OBJECTID,
> +					vol_args2->subvolid, 0, 0);
> +		if (IS_ERR(dentry)) {
> +			err = PTR_ERR(dentry);
> +			goto out;
> +		}
> +
> +		/* 
> +		 * change the default parent since the subvolume being deleted
> +		 * can be outside of the current mount point
> +		 */
> +		parent = btrfs_get_parent(dentry);
> +
> +		/* 
> +		 * the only use of dentry was to get the parent, so we can
> +		 * release it now. Later on the dentry will be queried again to
> +		 * make sure the dentry will reside in the dentry cache
> +		 */
> +		dput(dentry);
> +		if (IS_ERR(parent)) {
> +			err = PTR_ERR(parent);
> +			goto out;
> +		}
> +		dir = d_inode(parent);
> +
> +		name_ptr = btrfs_get_subvol_name_from_objectid(fs_info, vol_args2->subvolid);
> +		if (IS_ERR(name_ptr)) {
> +			err = PTR_ERR(name_ptr);
> +			goto free_parent;
> +		}
> +		name = (char *)kbasename(name_ptr);
> +		namelen = strlen(name);
> +	} else {
> +		vol_args = memdup_user(arg, sizeof(*vol_args));
> +		if (IS_ERR(vol_args))
> +			return PTR_ERR(vol_args);
> +
> +		vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
> +		namelen = strlen(vol_args->name);
> +		if (strchr(vol_args->name, '/') ||
> +		    strncmp(vol_args->name, "..", namelen) == 0) {
> +			err = -EINVAL;
> +			goto out;
> +		}
> +		name = vol_args->name;
> +	}
> +
> +	if (!S_ISDIR(dir->i_mode)) {
> +		err = -ENOTDIR;
> +		goto free_subvol_name;
>  	}
>  
>  	err = mnt_want_write_file(file);
>  	if (err)
> -		goto out;
> -
> +		goto free_subvol_name;
>  
>  	err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
>  	if (err == -EINTR)
>  		goto out_drop_write;
> -	dentry = lookup_one_len(vol_args->name, parent, namelen);
> +	dentry = lookup_one_len(name, parent, namelen);
>  	if (IS_ERR(dentry)) {
>  		err = PTR_ERR(dentry);
>  		goto out_unlock_dir;
> @@ -2943,7 +2998,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>  	inode_unlock(dir);
>  out_drop_write:
>  	mnt_drop_write_file(file);
> +free_subvol_name:
> +	kfree(name_ptr);
> +free_parent:
> +	if (destroy_v2)
> +		dput(parent);
>  out:
> +	kfree(vol_args2);
>  	kfree(vol_args);
>  	return err;
>  }
> @@ -5464,7 +5525,9 @@ long btrfs_ioctl(struct file *file, unsigned int
>  	case BTRFS_IOC_SUBVOL_CREATE_V2:
>  		return btrfs_ioctl_snap_create_v2(file, argp, 1);
>  	case BTRFS_IOC_SNAP_DESTROY:
> -		return btrfs_ioctl_snap_destroy(file, argp);
> +		return btrfs_ioctl_snap_destroy(file, argp, false);
> +	case BTRFS_IOC_SNAP_DESTROY_V2:
> +		return btrfs_ioctl_snap_destroy(file, argp, true);
>  	case BTRFS_IOC_SUBVOL_GETFLAGS:
>  		return btrfs_ioctl_subvol_getflags(file, argp);
>  	case BTRFS_IOC_SUBVOL_SETFLAGS:
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index a906315efd19..4a8ce475d906 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -1024,7 +1024,7 @@ static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
>  	return error;
>  }
>  
> -static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
> +char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
>  					   u64 subvol_objectid)
>  {
>  	struct btrfs_root *root = fs_info->tree_root;
> @@ -1438,7 +1438,7 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
>  				goto out;
>  			}
>  		}
> -		subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
> +		subvol_name = btrfs_get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
>  							    subvol_objectid);
>  		if (IS_ERR(subvol_name)) {
>  			root = ERR_CAST(subvol_name);
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index 7a8bc8b920f5..d8619245f0dc 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -42,11 +42,14 @@ struct btrfs_ioctl_vol_args {
>  
>  #define BTRFS_DEVICE_SPEC_BY_ID		(1ULL << 3)
>  
> +#define BTRFS_SUBVOL_BY_ID	(1ULL << 4)
> +
>  #define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED		\
>  			(BTRFS_SUBVOL_CREATE_ASYNC |	\
>  			BTRFS_SUBVOL_RDONLY |		\
>  			BTRFS_SUBVOL_QGROUP_INHERIT |	\
> -			BTRFS_DEVICE_SPEC_BY_ID)
> +			BTRFS_DEVICE_SPEC_BY_ID |	\
> +			BTRFS_SUBVOL_BY_ID)

			BTRFS_SUBVOL_SPEC_BY_ID

> @@ -119,7 +122,10 @@ struct btrfs_ioctl_vol_args_v2 {
>  		__u64 unused[4];
>  	};
>  	union {
> -		char name[BTRFS_SUBVOL_NAME_MAX + 1];
> +		union {
> +			char name[BTRFS_SUBVOL_NAME_MAX + 1];
> +			__u64 subvolid;
> +		};

The subvolid can be added t othe first union just fine and duplicating
the name does not make sense to me.

  	union {
		char name[BTRFS_SUBVOL_NAME_MAX + 1];
		__u64 subvolid;
  		__u64 devid;
	};

The argument structure is common for device and subvolume ioctls but I'm
not aware of any context were we would need both at the same time.
Which makes the SPEC_BY_ID bits mutually exclusive.


>  		__u64 devid;
>  	};
David Sterba Jan. 28, 2020, 5:26 p.m. UTC | #2
On Sun, Jan 26, 2020 at 11:48:17PM -0300, Marcos Paulo de Souza wrote:
> From: Marcos Paulo de Souza <mpdesouza@suse.com>
> 
> This ioctl will be responsible for deleting a subvolume using it's id.
> This can be used when a system has a file system mounted from a
> subvolume, rather than the root file system, like below:
> 
> /
> |- @subvol1
> |- @subvol2
> \- @subvol_default
> If only @subvol_default is mounted, we have no path to reach
> @subvol1 and @subvol2, thus no way to delete them.
> This patch introduces a new flag to allow BTRFS_IOC_SNAP_DESTORY_V2
> to delete subvolume using subvolid.
> 
> Also in this patch, export some functions, add BTRFS_SUBVOL_BY_ID flag
> and add subvolid as a union member of name in struct btrfs_ioctl_vol_args_v2.
> 
> Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
> ---
>  Changes from v1:
>  * make btrfs_ioctl_snap_destroy handle both SNAP_DESTROY and SNAP_DESTROY_V2
>    (suggested by Josef)
>  * Change BTRFS_SUBVOL_DELETE_BY_ID to BTRFS_SUBVOL_BY_ID (David)
>  * Send patches for btrfs-progs and xfstests along this change
> 
>  fs/btrfs/ctree.h           |  2 +
>  fs/btrfs/export.c          |  4 +-
>  fs/btrfs/export.h          |  5 ++
>  fs/btrfs/ioctl.c           | 97 +++++++++++++++++++++++++++++++-------
>  fs/btrfs/super.c           |  4 +-
>  include/uapi/linux/btrfs.h | 12 ++++-
>  6 files changed, 101 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index f90b82050d2d..5847a34b5146 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3010,6 +3010,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
>  int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
>  			unsigned long new_flags);
>  int btrfs_sync_fs(struct super_block *sb, int wait);
> +char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
> +					   u64 subvol_objectid);
>  
>  static inline __printf(2, 3) __cold
>  void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
> diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
> index 72e312cae69d..027411cdbae7 100644
> --- a/fs/btrfs/export.c
> +++ b/fs/btrfs/export.c
> @@ -57,7 +57,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
>  	return type;
>  }
>  
> -static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
> +struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
>  				       u64 root_objectid, u32 generation,
>  				       int check_generation)
>  {
> @@ -152,7 +152,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
>  	return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
>  }
>  
> -static struct dentry *btrfs_get_parent(struct dentry *child)
> +struct dentry *btrfs_get_parent(struct dentry *child)
>  {
>  	struct inode *dir = d_inode(child);
>  	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
> diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
> index 57488ecd7d4e..f981e8103d64 100644
> --- a/fs/btrfs/export.h
> +++ b/fs/btrfs/export.h
> @@ -18,4 +18,9 @@ struct btrfs_fid {
>  	u64 parent_root_objectid;
>  } __attribute__ ((packed));
>  
> +struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
> +				       u64 root_objectid, u32 generation,
> +				       int check_generation);
> +struct dentry *btrfs_get_parent(struct dentry *child);
> +
>  #endif
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 1b1b6ff855aa..889cb43149f9 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -28,6 +28,7 @@
>  #include <linux/iversion.h>
>  #include "ctree.h"
>  #include "disk-io.h"
> +#include "export.h"
>  #include "transaction.h"
>  #include "btrfs_inode.h"
>  #include "print-tree.h"
> @@ -2836,7 +2837,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
>  }
>  
>  static noinline int btrfs_ioctl_snap_destroy(struct file *file,
> -					     void __user *arg)
> +					     void __user *arg,
> +					     bool destroy_v2)
>  {
>  	struct dentry *parent = file->f_path.dentry;
>  	struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
> @@ -2845,34 +2847,87 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>  	struct inode *inode;
>  	struct btrfs_root *root = BTRFS_I(dir)->root;
>  	struct btrfs_root *dest = NULL;
> -	struct btrfs_ioctl_vol_args *vol_args;
> +	struct btrfs_ioctl_vol_args *vol_args = NULL;
> +	struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
> +	char *name, *name_ptr = NULL;

The naming is confusing, name_ptr refers to the resolved subvolume name,
so I suggest to rename it to subvol_name.

>  	int namelen;
>  	int err = 0;
>  
> -	if (!S_ISDIR(dir->i_mode))
> -		return -ENOTDIR;
> +	if (destroy_v2) {
> +		vol_args2 = memdup_user(arg, sizeof(*vol_args2));
> +		if (IS_ERR(vol_args2))
> +			return PTR_ERR(vol_args2);
>  
> -	vol_args = memdup_user(arg, sizeof(*vol_args));
> -	if (IS_ERR(vol_args))
> -		return PTR_ERR(vol_args);
> +		if (vol_args2->subvolid == 0) {

This should be compared >= BTRFS_FIRST_FREE_OBJECTID
as there are no valid subvolumes with lower id. The exception is the
toplevel subvolume with id 5 that must not be deletable.

> +			err = -EINVAL;
> +			goto out;
> +		}
>  
> -	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
> -	namelen = strlen(vol_args->name);
> -	if (strchr(vol_args->name, '/') ||
> -	    strncmp(vol_args->name, "..", namelen) == 0) {
> -		err = -EINVAL;
> -		goto out;
> +		if (!(vol_args2->flags & BTRFS_SUBVOL_BY_ID)) {
> +			err = -EINVAL;
> +			goto out;

The flag validation needs to be factored out of the if. First validate,
then do the rest. For backward compatibility, the v1 ioctl must take no
flags, so if theres BTRFS_SUBVOL_BY_ID for v1, it needs to fail. For v2
the flag is optional.

> +		}
> +
> +		dentry = btrfs_get_dentry(fs_info->sb, BTRFS_FIRST_FREE_OBJECTID,
> +					vol_args2->subvolid, 0, 0);
> +		if (IS_ERR(dentry)) {
> +			err = PTR_ERR(dentry);
> +			goto out;
> +		}
> +
> +		/* 

There's a trailing space on the line, 'git am' does not allow me to
apply the patch without removing it manually. Same for the comment
below.

> +		 * change the default parent since the subvolume being deleted

Also please uppercase first letter in comments unless it's an
identifier. I fix such things but for patches that are going to have
another iteration it's better to point it out.

> +		 * can be outside of the current mount point
> +		 */
> +		parent = btrfs_get_parent(dentry);
> +
> +		/* 
> +		 * the only use of dentry was to get the parent, so we can
> +		 * release it now. Later on the dentry will be queried again to
> +		 * make sure the dentry will reside in the dentry cache

Can you please rephrase that? I'm not sure I understand.

> +		 */
> +		dput(dentry);
> +		if (IS_ERR(parent)) {
> +			err = PTR_ERR(parent);
> +			goto out;
> +		}
> +		dir = d_inode(parent);
> +
> +		name_ptr = btrfs_get_subvol_name_from_objectid(fs_info, vol_args2->subvolid);
> +		if (IS_ERR(name_ptr)) {
> +			err = PTR_ERR(name_ptr);
> +			goto free_parent;
> +		}
> +		name = (char *)kbasename(name_ptr);
> +		namelen = strlen(name);
> +	} else {
> +		vol_args = memdup_user(arg, sizeof(*vol_args));
> +		if (IS_ERR(vol_args))
> +			return PTR_ERR(vol_args);
> +
> +		vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
> +		namelen = strlen(vol_args->name);

> +		if (strchr(vol_args->name, '/') ||
> +		    strncmp(vol_args->name, "..", namelen) == 0) {
> +			err = -EINVAL;
> +			goto out;
> +		}

This sanity check can be unconditional, ie. also done for the v2 even in
the spec-by-id case.

> +		name = vol_args->name;
> +	}
> +
> +	if (!S_ISDIR(dir->i_mode)) {
> +		err = -ENOTDIR;
> +		goto free_subvol_name;
>  	}
>  
>  	err = mnt_want_write_file(file);

So this is related to separating the validation. Calling
mnt_want_write_file must be between flag validation and using the
dentries and resolving path etc.

The initial part is ordered like: argument checks, subsystem checks, the
implementation.

>  	if (err)
> -		goto out;
> -
> +		goto free_subvol_name;
>  
>  	err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
>  	if (err == -EINTR)
>  		goto out_drop_write;
> -	dentry = lookup_one_len(vol_args->name, parent, namelen);
> +	dentry = lookup_one_len(name, parent, namelen);
>  	if (IS_ERR(dentry)) {
>  		err = PTR_ERR(dentry);
>  		goto out_unlock_dir;
> @@ -2943,7 +2998,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
>  	inode_unlock(dir);
>  out_drop_write:
>  	mnt_drop_write_file(file);
> +free_subvol_name:
> +	kfree(name_ptr);
> +free_parent:
> +	if (destroy_v2)
> +		dput(parent);
>  out:
> +	kfree(vol_args2);
>  	kfree(vol_args);
>  	return err;
>  }
Christoph Hellwig Jan. 28, 2020, 5:31 p.m. UTC | #3
On Sun, Jan 26, 2020 at 11:48:17PM -0300, Marcos Paulo de Souza wrote:
> From: Marcos Paulo de Souza <mpdesouza@suse.com>
> 
> This ioctl will be responsible for deleting a subvolume using it's id.
> This can be used when a system has a file system mounted from a
> subvolume, rather than the root file system, like below:

Isn't BTRFS_IOC_SNAP_DESTROY_BY_ID a better name?
David Sterba Jan. 28, 2020, 5:35 p.m. UTC | #4
On Tue, Jan 28, 2020 at 09:31:00AM -0800, Christoph Hellwig wrote:
> On Sun, Jan 26, 2020 at 11:48:17PM -0300, Marcos Paulo de Souza wrote:
> > From: Marcos Paulo de Souza <mpdesouza@suse.com>
> > 
> > This ioctl will be responsible for deleting a subvolume using it's id.
> > This can be used when a system has a file system mounted from a
> > subvolume, rather than the root file system, like below:
> 
> Isn't BTRFS_IOC_SNAP_DESTROY_BY_ID a better name?

I'd prefer to keep it V2 as a more flexible ioctl than just to delete
by id. There are other _V2 ioctls that use matching
btrfs_ioctl_vol_args_v2 structure. In case we want to implement
recursive subvolume deletion we'd have to introduce yet another ioctl
number compared to just a new flag and support both by-name and by-id.
Sounds like a lot of duplication.
Marcos Paulo de Souza Jan. 29, 2020, 3:07 p.m. UTC | #5
On Tue, 2020-01-28 at 18:26 +0100, David Sterba wrote:
> wrote:
> > From: Marcos Paulo de Souza <mpdesouza@suse.com>

...

> >  	struct dentry *parent = file->f_path.dentry;
> >  	struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
> > @@ -2845,34 +2847,87 @@ static noinline int
> btrfs_ioctl_snap_destroy(struct file *file,
> >  	struct inode *inode;
> >  	struct btrfs_root *root = BTRFS_I(dir)->root;
> >  	struct btrfs_root *dest = NULL;
> > -	struct btrfs_ioctl_vol_args *vol_args;
> > +	struct btrfs_ioctl_vol_args *vol_args = NULL;
> > +	struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
> > +	char *name, *name_ptr = NULL;
> 
> The naming is confusing, name_ptr refers to the resolved subvolume
> name,
> so I suggest to rename it to subvol_name.

Sure, much better.

> >  	int namelen;
> >  	int err = 0;
> >  
> > -	if (!S_ISDIR(dir->i_mode))
> > -		return -ENOTDIR;
> > +	if (destroy_v2) {
> > +		vol_args2 = memdup_user(arg, sizeof(*vol_args2));
> > +		if (IS_ERR(vol_args2))
> > +			return PTR_ERR(vol_args2);
> >  
> > -	vol_args = memdup_user(arg, sizeof(*vol_args));
> > -	if (IS_ERR(vol_args))
> > -		return PTR_ERR(vol_args);
> > +		if (vol_args2->subvolid == 0) {
> 
> This should be compared >= BTRFS_FIRST_FREE_OBJECTID
> as there are no valid subvolumes with lower id. The exception is the
> toplevel subvolume with id 5 that must not be deletable.

Agreed.

> 
> > +			err = -EINVAL;
> > +			goto out;
> > +		}
> >  
> > -	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
> > -	namelen = strlen(vol_args->name);
> > -	if (strchr(vol_args->name, '/') ||
> > -	    strncmp(vol_args->name, "..", namelen) == 0) {
> > -		err = -EINVAL;
> > -		goto out;
> > +		if (!(vol_args2->flags & BTRFS_SUBVOL_BY_ID)) {
> > +			err = -EINVAL;
> > +			goto out;
> 
> The flag validation needs to be factored out of the if. First
> validate,
> then do the rest. For backward compatibility, the v1 ioctl must take
> no
> flags, so if theres BTRFS_SUBVOL_BY_ID for v1, it needs to fail. For
> v2
> the flag is optional.

Only vol_args_v2 has the flags field, so for current
BTRFS_IOC_SNAP_DESTORY there won't be any flags. If we drop the check
for BTRFS_SUBVOL_BY_ID in BTRFS_IOC_SNAP_DESTORY_V2, so won't check for
this flag at all, making it meaningless.

What do you think? Should we drop this flag at all and just rely in the
ioctl number + subvolid being informed?

> 
> > +		}
> > +
> > +		dentry = btrfs_get_dentry(fs_info->sb,
> BTRFS_FIRST_FREE_OBJECTID,
> > +					vol_args2->subvolid, 0, 0);
> > +		if (IS_ERR(dentry)) {
> > +			err = PTR_ERR(dentry);
> > +			goto out;
> > +		}
> > +
> > +		/* 
> 
> There's a trailing space on the line, 'git am' does not allow me to
> apply the patch without removing it manually. Same for the comment
> below.

Done.

> 
> > +		 * change the default parent since the subvolume being
> deleted
> 
> Also please uppercase first letter in comments unless it's an
> identifier. I fix such things but for patches that are going to have
> another iteration it's better to point it out.

Sure.

> 
> > +		 * can be outside of the current mount point
> > +		 */
> > +		parent = btrfs_get_parent(dentry);
> > +
> > +		/* 
> > +		 * the only use of dentry was to get the parent, so we
> can
> > +		 * release it now. Later on the dentry will be queried
> again to
> > +		 * make sure the dentry will reside in the dentry cache
> 
> Can you please rephrase that? I'm not sure I understand.

What do you think about:
      /*
       * At this point dentry->d_name can point to '/' if the
       * subvolume we want to destroy is outsite of the current mount
       * point, so we need to released the current dentry and execute
       * the lookup to return a new one with ->d_name pointing to the
       * <mount point>/subvol_name.
       */


> > +		if (strchr(vol_args->name, '/') ||
>  +		    strncmp(vol_args->name, "..", namelen) == 0) {
> > +			err = -EINVAL;
> > +			goto out;
> > +		}
> 
> This sanity check can be unconditional, ie. also done for the v2 even
> in
> the spec-by-id case.

Makes sense.

> 
> > +		name = vol_args->name;
> > +	}
> > +
> > +	if (!S_ISDIR(dir->i_mode)) {
> > +		err = -ENOTDIR;
> > +		goto free_subvol_name;
> >  	}
> >  
> >  	err = mnt_want_write_file(file);
> 
> So this is related to separating the validation. Calling
> mnt_want_write_file must be between flag validation and using the
> dentries and resolving path etc.
> 
> The initial part is ordered like: argument checks, subsystem checks,
> the
> implementation.

Done.

Thanks for your review David. Once I have the flag question clarified I
will sent the v3.


Thanks,
  Marcos
David Sterba Jan. 29, 2020, 4:12 p.m. UTC | #6
On Wed, Jan 29, 2020 at 12:07:40PM -0300, Marcos Paulo de Souza wrote:
> > > -	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
> > > -	namelen = strlen(vol_args->name);
> > > -	if (strchr(vol_args->name, '/') ||
> > > -	    strncmp(vol_args->name, "..", namelen) == 0) {
> > > -		err = -EINVAL;
> > > -		goto out;
> > > +		if (!(vol_args2->flags & BTRFS_SUBVOL_BY_ID)) {
> > > +			err = -EINVAL;
> > > +			goto out;
> > 
> > The flag validation needs to be factored out of the if. First
> > validate,
> > then do the rest. For backward compatibility, the v1 ioctl must take
> > no
> > flags, so if theres BTRFS_SUBVOL_BY_ID for v1, it needs to fail. For
> > v2
> > the flag is optional.
> 
> Only vol_args_v2 has the flags field, so for current
> BTRFS_IOC_SNAP_DESTORY there won't be any flags. If we drop the check
> for BTRFS_SUBVOL_BY_ID in BTRFS_IOC_SNAP_DESTORY_V2, so won't check for
> this flag at all, making it meaningless.

Oh right, so the validation applies only to v2 and in that case it's
fine to keep it in the place you have it.

> What do you think? Should we drop this flag at all and just rely in the
> ioctl number + subvolid being informed?

No, v2 should work for both deletion by name and by id. It's going to
supersede v1 that has to stay for backward compatibility, but must
provide complete functionality of v1 to keep the usability sane.
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f90b82050d2d..5847a34b5146 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3010,6 +3010,8 @@  int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			unsigned long new_flags);
 int btrfs_sync_fs(struct super_block *sb, int wait);
+char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+					   u64 subvol_objectid);
 
 static inline __printf(2, 3) __cold
 void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 72e312cae69d..027411cdbae7 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -57,7 +57,7 @@  static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
 	return type;
 }
 
-static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
 				       u64 root_objectid, u32 generation,
 				       int check_generation)
 {
@@ -152,7 +152,7 @@  static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
 	return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
 }
 
-static struct dentry *btrfs_get_parent(struct dentry *child)
+struct dentry *btrfs_get_parent(struct dentry *child)
 {
 	struct inode *dir = d_inode(child);
 	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
index 57488ecd7d4e..f981e8103d64 100644
--- a/fs/btrfs/export.h
+++ b/fs/btrfs/export.h
@@ -18,4 +18,9 @@  struct btrfs_fid {
 	u64 parent_root_objectid;
 } __attribute__ ((packed));
 
+struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+				       u64 root_objectid, u32 generation,
+				       int check_generation);
+struct dentry *btrfs_get_parent(struct dentry *child);
+
 #endif
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1b1b6ff855aa..889cb43149f9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -28,6 +28,7 @@ 
 #include <linux/iversion.h>
 #include "ctree.h"
 #include "disk-io.h"
+#include "export.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
 #include "print-tree.h"
@@ -2836,7 +2837,8 @@  static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
 }
 
 static noinline int btrfs_ioctl_snap_destroy(struct file *file,
-					     void __user *arg)
+					     void __user *arg,
+					     bool destroy_v2)
 {
 	struct dentry *parent = file->f_path.dentry;
 	struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
@@ -2845,34 +2847,87 @@  static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 	struct inode *inode;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct btrfs_root *dest = NULL;
-	struct btrfs_ioctl_vol_args *vol_args;
+	struct btrfs_ioctl_vol_args *vol_args = NULL;
+	struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
+	char *name, *name_ptr = NULL;
 	int namelen;
 	int err = 0;
 
-	if (!S_ISDIR(dir->i_mode))
-		return -ENOTDIR;
+	if (destroy_v2) {
+		vol_args2 = memdup_user(arg, sizeof(*vol_args2));
+		if (IS_ERR(vol_args2))
+			return PTR_ERR(vol_args2);
 
-	vol_args = memdup_user(arg, sizeof(*vol_args));
-	if (IS_ERR(vol_args))
-		return PTR_ERR(vol_args);
+		if (vol_args2->subvolid == 0) {
+			err = -EINVAL;
+			goto out;
+		}
 
-	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-	namelen = strlen(vol_args->name);
-	if (strchr(vol_args->name, '/') ||
-	    strncmp(vol_args->name, "..", namelen) == 0) {
-		err = -EINVAL;
-		goto out;
+		if (!(vol_args2->flags & BTRFS_SUBVOL_BY_ID)) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		dentry = btrfs_get_dentry(fs_info->sb, BTRFS_FIRST_FREE_OBJECTID,
+					vol_args2->subvolid, 0, 0);
+		if (IS_ERR(dentry)) {
+			err = PTR_ERR(dentry);
+			goto out;
+		}
+
+		/* 
+		 * change the default parent since the subvolume being deleted
+		 * can be outside of the current mount point
+		 */
+		parent = btrfs_get_parent(dentry);
+
+		/* 
+		 * the only use of dentry was to get the parent, so we can
+		 * release it now. Later on the dentry will be queried again to
+		 * make sure the dentry will reside in the dentry cache
+		 */
+		dput(dentry);
+		if (IS_ERR(parent)) {
+			err = PTR_ERR(parent);
+			goto out;
+		}
+		dir = d_inode(parent);
+
+		name_ptr = btrfs_get_subvol_name_from_objectid(fs_info, vol_args2->subvolid);
+		if (IS_ERR(name_ptr)) {
+			err = PTR_ERR(name_ptr);
+			goto free_parent;
+		}
+		name = (char *)kbasename(name_ptr);
+		namelen = strlen(name);
+	} else {
+		vol_args = memdup_user(arg, sizeof(*vol_args));
+		if (IS_ERR(vol_args))
+			return PTR_ERR(vol_args);
+
+		vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+		namelen = strlen(vol_args->name);
+		if (strchr(vol_args->name, '/') ||
+		    strncmp(vol_args->name, "..", namelen) == 0) {
+			err = -EINVAL;
+			goto out;
+		}
+		name = vol_args->name;
+	}
+
+	if (!S_ISDIR(dir->i_mode)) {
+		err = -ENOTDIR;
+		goto free_subvol_name;
 	}
 
 	err = mnt_want_write_file(file);
 	if (err)
-		goto out;
-
+		goto free_subvol_name;
 
 	err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
 	if (err == -EINTR)
 		goto out_drop_write;
-	dentry = lookup_one_len(vol_args->name, parent, namelen);
+	dentry = lookup_one_len(name, parent, namelen);
 	if (IS_ERR(dentry)) {
 		err = PTR_ERR(dentry);
 		goto out_unlock_dir;
@@ -2943,7 +2998,13 @@  static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 	inode_unlock(dir);
 out_drop_write:
 	mnt_drop_write_file(file);
+free_subvol_name:
+	kfree(name_ptr);
+free_parent:
+	if (destroy_v2)
+		dput(parent);
 out:
+	kfree(vol_args2);
 	kfree(vol_args);
 	return err;
 }
@@ -5464,7 +5525,9 @@  long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_SUBVOL_CREATE_V2:
 		return btrfs_ioctl_snap_create_v2(file, argp, 1);
 	case BTRFS_IOC_SNAP_DESTROY:
-		return btrfs_ioctl_snap_destroy(file, argp);
+		return btrfs_ioctl_snap_destroy(file, argp, false);
+	case BTRFS_IOC_SNAP_DESTROY_V2:
+		return btrfs_ioctl_snap_destroy(file, argp, true);
 	case BTRFS_IOC_SUBVOL_GETFLAGS:
 		return btrfs_ioctl_subvol_getflags(file, argp);
 	case BTRFS_IOC_SUBVOL_SETFLAGS:
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a906315efd19..4a8ce475d906 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1024,7 +1024,7 @@  static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
 	return error;
 }
 
-static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
 					   u64 subvol_objectid)
 {
 	struct btrfs_root *root = fs_info->tree_root;
@@ -1438,7 +1438,7 @@  static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
 				goto out;
 			}
 		}
-		subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
+		subvol_name = btrfs_get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
 							    subvol_objectid);
 		if (IS_ERR(subvol_name)) {
 			root = ERR_CAST(subvol_name);
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 7a8bc8b920f5..d8619245f0dc 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -42,11 +42,14 @@  struct btrfs_ioctl_vol_args {
 
 #define BTRFS_DEVICE_SPEC_BY_ID		(1ULL << 3)
 
+#define BTRFS_SUBVOL_BY_ID	(1ULL << 4)
+
 #define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED		\
 			(BTRFS_SUBVOL_CREATE_ASYNC |	\
 			BTRFS_SUBVOL_RDONLY |		\
 			BTRFS_SUBVOL_QGROUP_INHERIT |	\
-			BTRFS_DEVICE_SPEC_BY_ID)
+			BTRFS_DEVICE_SPEC_BY_ID |	\
+			BTRFS_SUBVOL_BY_ID)
 
 #define BTRFS_FSID_SIZE 16
 #define BTRFS_UUID_SIZE 16
@@ -119,7 +122,10 @@  struct btrfs_ioctl_vol_args_v2 {
 		__u64 unused[4];
 	};
 	union {
-		char name[BTRFS_SUBVOL_NAME_MAX + 1];
+		union {
+			char name[BTRFS_SUBVOL_NAME_MAX + 1];
+			__u64 subvolid;
+		};
 		__u64 devid;
 	};
 };
@@ -949,5 +955,7 @@  enum btrfs_err_code {
 				struct btrfs_ioctl_get_subvol_rootref_args)
 #define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
 				struct btrfs_ioctl_ino_lookup_user_args)
+#define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
+				struct btrfs_ioctl_vol_args_v2)
 
 #endif /* _UAPI_LINUX_BTRFS_H */