diff mbox series

Btrfs: reduce lock contention when create snapshot

Message ID 20200514091918.30294-1-robbieko@synology.com (mailing list archive)
State New, archived
Headers show
Series Btrfs: reduce lock contention when create snapshot | expand

Commit Message

robbieko May 14, 2020, 9:19 a.m. UTC
From: Robbie Ko <robbieko@synology.com>

When creating a snapshot, it takes a long time because
flush dirty data is required.

But we have taken two resources as shown below:
1. Destination directory inode lock
2. Global subvol semaphore

This will cause subvol destroy/create/setflag blocked,
until the snapshot is created.

We fix by flush dirty data first to reduce the time of
the critical section, and then lock the relevant resources.

Signed-off-by: Robbie Ko <robbieko@synology.com>
---
 fs/btrfs/ioctl.c | 70 ++++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 29 deletions(-)

Comments

Filipe Manana May 14, 2020, 1:16 p.m. UTC | #1
On Thu, May 14, 2020 at 10:20 AM robbieko <robbieko@synology.com> wrote:
>
> From: Robbie Ko <robbieko@synology.com>
>
> When creating a snapshot, it takes a long time because
> flush dirty data is required.
>
> But we have taken two resources as shown below:
> 1. Destination directory inode lock
> 2. Global subvol semaphore
>
> This will cause subvol destroy/create/setflag blocked,
> until the snapshot is created.
>
> We fix by flush dirty data first to reduce the time of
> the critical section, and then lock the relevant resources.
>
> Signed-off-by: Robbie Ko <robbieko@synology.com>

Reviewed-by: Filipe Manana <fdmanana@suse.com>

Looks good, thanks.

> ---
>  fs/btrfs/ioctl.c | 70 ++++++++++++++++++++++++++++--------------------
>  1 file changed, 41 insertions(+), 29 deletions(-)
>
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 40b729dce91c..d0c1598dc51e 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -748,7 +748,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
>         struct btrfs_pending_snapshot *pending_snapshot;
>         struct btrfs_trans_handle *trans;
>         int ret;
> -       bool snapshot_force_cow = false;
>
>         if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
>                 return -EINVAL;
> @@ -771,27 +770,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
>                 goto free_pending;
>         }
>
> -       /*
> -        * Force new buffered writes to reserve space even when NOCOW is
> -        * possible. This is to avoid later writeback (running dealloc) to
> -        * fallback to COW mode and unexpectedly fail with ENOSPC.
> -        */
> -       btrfs_drew_read_lock(&root->snapshot_lock);
> -
> -       ret = btrfs_start_delalloc_snapshot(root);
> -       if (ret)
> -               goto dec_and_free;
> -
> -       /*
> -        * All previous writes have started writeback in NOCOW mode, so now
> -        * we force future writes to fallback to COW mode during snapshot
> -        * creation.
> -        */
> -       atomic_inc(&root->snapshot_force_cow);
> -       snapshot_force_cow = true;
> -
> -       btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
> -
>         btrfs_init_block_rsv(&pending_snapshot->block_rsv,
>                              BTRFS_BLOCK_RSV_TEMP);
>         /*
> @@ -806,7 +784,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
>                                         &pending_snapshot->block_rsv, 8,
>                                         false);
>         if (ret)
> -               goto dec_and_free;
> +               goto free_pending;
>
>         pending_snapshot->dentry = dentry;
>         pending_snapshot->root = root;
> @@ -848,11 +826,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
>  fail:
>         btrfs_put_root(pending_snapshot->snap);
>         btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
> -dec_and_free:
> -       if (snapshot_force_cow)
> -               atomic_dec(&root->snapshot_force_cow);
> -       btrfs_drew_read_unlock(&root->snapshot_lock);
> -
>  free_pending:
>         kfree(pending_snapshot->root_item);
>         btrfs_free_path(pending_snapshot->path);
> @@ -983,6 +956,45 @@ static noinline int btrfs_mksubvol(const struct path *parent,
>         return error;
>  }
>
> +static noinline int btrfs_mksnapshot(const struct path *parent,
> +                                  const char *name, int namelen,
> +                                  struct btrfs_root *root,
> +                                  bool readonly,
> +                                  struct btrfs_qgroup_inherit *inherit)
> +{
> +       int ret;
> +       bool snapshot_force_cow = false;
> +
> +       /*
> +        * Force new buffered writes to reserve space even when NOCOW is
> +        * possible. This is to avoid later writeback (running dealloc) to
> +        * fallback to COW mode and unexpectedly fail with ENOSPC.
> +        */
> +       btrfs_drew_read_lock(&root->snapshot_lock);
> +
> +       ret = btrfs_start_delalloc_snapshot(root);
> +       if (ret)
> +               goto out;
> +
> +       /*
> +        * All previous writes have started writeback in NOCOW mode, so now
> +        * we force future writes to fallback to COW mode during snapshot
> +        * creation.
> +        */
> +       atomic_inc(&root->snapshot_force_cow);
> +       snapshot_force_cow = true;
> +
> +       btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
> +
> +       ret = btrfs_mksubvol(parent, name, namelen,
> +                            root, readonly, inherit);
> +out:
> +       if (snapshot_force_cow)
> +               atomic_dec(&root->snapshot_force_cow);
> +       btrfs_drew_read_unlock(&root->snapshot_lock);
> +       return ret;
> +}
> +
>  /*
>   * When we're defragging a range, we don't want to kick it off again
>   * if it is really just waiting for delalloc to send it down.
> @@ -1762,7 +1774,7 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
>                          */
>                         ret = -EPERM;
>                 } else {
> -                       ret = btrfs_mksubvol(&file->f_path, name, namelen,
> +                       ret = btrfs_mksnapshot(&file->f_path, name, namelen,
>                                              BTRFS_I(src_inode)->root,
>                                              readonly, inherit);
>                 }
> --
> 2.17.1
>
David Sterba May 18, 2020, 5:01 p.m. UTC | #2
On Thu, May 14, 2020 at 05:19:18PM +0800, robbieko wrote:
> From: Robbie Ko <robbieko@synology.com>
> 
> When creating a snapshot, it takes a long time because
> flush dirty data is required.
> 
> But we have taken two resources as shown below:
> 1. Destination directory inode lock
> 2. Global subvol semaphore
> 
> This will cause subvol destroy/create/setflag blocked,
> until the snapshot is created.
> 
> We fix by flush dirty data first to reduce the time of
> the critical section, and then lock the relevant resources.

A bit hard to follow what gets moved where but I got it in the end.
Flushing data before snapshotting does not depend on the directory lock
nor on the subvol semaphore so moving it out is fine.

This should also speed up creating new subvolumes as they don't need any
pre-flushing at all. Added to misc-next, thanks.
David Sterba May 18, 2020, 6:12 p.m. UTC | #3
On Mon, May 18, 2020 at 07:01:03PM +0200, David Sterba wrote:
> This should also speed up creating new subvolumes as they don't need any
> pre-flushing at all. Added to misc-next, thanks.

Never mind, this is not true, plain subvolumes take a different call path.
diff mbox series

Patch

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 40b729dce91c..d0c1598dc51e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -748,7 +748,6 @@  static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 	struct btrfs_pending_snapshot *pending_snapshot;
 	struct btrfs_trans_handle *trans;
 	int ret;
-	bool snapshot_force_cow = false;
 
 	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 		return -EINVAL;
@@ -771,27 +770,6 @@  static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 		goto free_pending;
 	}
 
-	/*
-	 * Force new buffered writes to reserve space even when NOCOW is
-	 * possible. This is to avoid later writeback (running dealloc) to
-	 * fallback to COW mode and unexpectedly fail with ENOSPC.
-	 */
-	btrfs_drew_read_lock(&root->snapshot_lock);
-
-	ret = btrfs_start_delalloc_snapshot(root);
-	if (ret)
-		goto dec_and_free;
-
-	/*
-	 * All previous writes have started writeback in NOCOW mode, so now
-	 * we force future writes to fallback to COW mode during snapshot
-	 * creation.
-	 */
-	atomic_inc(&root->snapshot_force_cow);
-	snapshot_force_cow = true;
-
-	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
-
 	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
 			     BTRFS_BLOCK_RSV_TEMP);
 	/*
@@ -806,7 +784,7 @@  static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 					&pending_snapshot->block_rsv, 8,
 					false);
 	if (ret)
-		goto dec_and_free;
+		goto free_pending;
 
 	pending_snapshot->dentry = dentry;
 	pending_snapshot->root = root;
@@ -848,11 +826,6 @@  static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 fail:
 	btrfs_put_root(pending_snapshot->snap);
 	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
-dec_and_free:
-	if (snapshot_force_cow)
-		atomic_dec(&root->snapshot_force_cow);
-	btrfs_drew_read_unlock(&root->snapshot_lock);
-
 free_pending:
 	kfree(pending_snapshot->root_item);
 	btrfs_free_path(pending_snapshot->path);
@@ -983,6 +956,45 @@  static noinline int btrfs_mksubvol(const struct path *parent,
 	return error;
 }
 
+static noinline int btrfs_mksnapshot(const struct path *parent,
+				   const char *name, int namelen,
+				   struct btrfs_root *root,
+				   bool readonly,
+				   struct btrfs_qgroup_inherit *inherit)
+{
+	int ret;
+	bool snapshot_force_cow = false;
+
+	/*
+	 * Force new buffered writes to reserve space even when NOCOW is
+	 * possible. This is to avoid later writeback (running dealloc) to
+	 * fallback to COW mode and unexpectedly fail with ENOSPC.
+	 */
+	btrfs_drew_read_lock(&root->snapshot_lock);
+
+	ret = btrfs_start_delalloc_snapshot(root);
+	if (ret)
+		goto out;
+
+	/*
+	 * All previous writes have started writeback in NOCOW mode, so now
+	 * we force future writes to fallback to COW mode during snapshot
+	 * creation.
+	 */
+	atomic_inc(&root->snapshot_force_cow);
+	snapshot_force_cow = true;
+
+	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
+
+	ret = btrfs_mksubvol(parent, name, namelen,
+			     root, readonly, inherit);
+out:
+	if (snapshot_force_cow)
+		atomic_dec(&root->snapshot_force_cow);
+	btrfs_drew_read_unlock(&root->snapshot_lock);
+	return ret;
+}
+
 /*
  * When we're defragging a range, we don't want to kick it off again
  * if it is really just waiting for delalloc to send it down.
@@ -1762,7 +1774,7 @@  static noinline int __btrfs_ioctl_snap_create(struct file *file,
 			 */
 			ret = -EPERM;
 		} else {
-			ret = btrfs_mksubvol(&file->f_path, name, namelen,
+			ret = btrfs_mksnapshot(&file->f_path, name, namelen,
 					     BTRFS_I(src_inode)->root,
 					     readonly, inherit);
 		}