diff mbox

[2/3] Btrfs: don't wait for unrelated IO to finish before relocation

Message ID 1461546072-11154-3-git-send-email-fdmanana@kernel.org (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Filipe Manana April 25, 2016, 1:01 a.m. UTC
From: Filipe Manana <fdmanana@suse.com>

Before the relocation process of a block group starts, it sets the block
group to readonly mode, then flushes all delalloc writes and then finally
it waits for all ordered extents to complete. This last step includes
waiting for ordered extents destinated at extents allocated in other block
groups, making us waste unecessary time.

So fix this by waiting only for ordered extents that fall into the block
group's range.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/dev-replace.c  |  4 ++--
 fs/btrfs/extent-tree.c  | 11 +++++++----
 fs/btrfs/ioctl.c        |  2 +-
 fs/btrfs/ordered-data.c | 26 +++++++++++++++++++-------
 fs/btrfs/ordered-data.h |  6 ++++--
 fs/btrfs/relocation.c   |  4 +++-
 fs/btrfs/super.c        |  2 +-
 fs/btrfs/transaction.c  |  2 +-
 8 files changed, 38 insertions(+), 19 deletions(-)

Comments

Liu Bo May 9, 2016, 10:59 p.m. UTC | #1
On Mon, Apr 25, 2016 at 02:01:11AM +0100, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> Before the relocation process of a block group starts, it sets the block
> group to readonly mode, then flushes all delalloc writes and then finally
> it waits for all ordered extents to complete. This last step includes
> waiting for ordered extents destinated at extents allocated in other block
> groups, making us waste unecessary time.
> 
> So fix this by waiting only for ordered extents that fall into the block
> group's range.
> 
> Signed-off-by: Filipe Manana <fdmanana@suse.com>
> ---
>  fs/btrfs/dev-replace.c  |  4 ++--
>  fs/btrfs/extent-tree.c  | 11 +++++++----
>  fs/btrfs/ioctl.c        |  2 +-
>  fs/btrfs/ordered-data.c | 26 +++++++++++++++++++-------
>  fs/btrfs/ordered-data.h |  6 ++++--
>  fs/btrfs/relocation.c   |  4 +++-
>  fs/btrfs/super.c        |  2 +-
>  fs/btrfs/transaction.c  |  2 +-
>  8 files changed, 38 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
> index a1d6652..a0c1016 100644
> --- a/fs/btrfs/dev-replace.c
> +++ b/fs/btrfs/dev-replace.c
> @@ -401,7 +401,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
>  	if (ret)
>  		btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
>  
> -	btrfs_wait_ordered_roots(root->fs_info, -1);
> +	btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
>  
>  	/* force writing the updated state information to disk */
>  	trans = btrfs_start_transaction(root, 0);
> @@ -493,7 +493,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
>  		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
>  		return ret;
>  	}
> -	btrfs_wait_ordered_roots(root->fs_info, -1);
> +	btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
>  
>  	trans = btrfs_start_transaction(root, 0);
>  	if (IS_ERR(trans)) {
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 53e1297..0544f70 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -4141,7 +4141,7 @@ commit_trans:
>  
>  			if (need_commit > 0) {
>  				btrfs_start_delalloc_roots(fs_info, 0, -1);
> -				btrfs_wait_ordered_roots(fs_info, -1);
> +				btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
>  			}
>  
>  			trans = btrfs_join_transaction(root);
> @@ -4583,7 +4583,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
>  		 */
>  		btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
>  		if (!current->journal_info)
> -			btrfs_wait_ordered_roots(root->fs_info, nr_items);
> +			btrfs_wait_ordered_roots(root->fs_info, nr_items,
> +						 0, (u64)-1);
>  	}
>  }
>  
> @@ -4632,7 +4633,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
>  		if (trans)
>  			return;
>  		if (wait_ordered)
> -			btrfs_wait_ordered_roots(root->fs_info, items);
> +			btrfs_wait_ordered_roots(root->fs_info, items,
> +						 0, (u64)-1);
>  		return;
>  	}
>  
> @@ -4671,7 +4673,8 @@ skip_async:
>  
>  		loops++;
>  		if (wait_ordered && !trans) {
> -			btrfs_wait_ordered_roots(root->fs_info, items);
> +			btrfs_wait_ordered_roots(root->fs_info, items,
> +						 0, (u64)-1);
>  		} else {
>  			time_left = schedule_timeout_killable(1);
>  			if (time_left)
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 053e677..4d71273 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -681,7 +681,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
>  	if (ret)
>  		goto dec_and_free;
>  
> -	btrfs_wait_ordered_extents(root, -1);
> +	btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
>  
>  	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
>  			     BTRFS_BLOCK_RSV_TEMP);
> diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
> index 41101b2..86c1cbb 100644
> --- a/fs/btrfs/ordered-data.c
> +++ b/fs/btrfs/ordered-data.c
> @@ -661,14 +661,15 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
>   * wait for all the ordered extents in a root.  This is done when balancing
>   * space between drives.
>   */
> -int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
> +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
> +			       const u64 range_start, const u64 range_len)
>  {
> -	struct list_head splice, works;
> +	LIST_HEAD(splice);
> +	LIST_HEAD(skipped);
> +	LIST_HEAD(works);
>  	struct btrfs_ordered_extent *ordered, *next;
>  	int count = 0;
> -
> -	INIT_LIST_HEAD(&splice);
> -	INIT_LIST_HEAD(&works);
> +	const u64 range_end = range_start + range_len;
>  
>  	mutex_lock(&root->ordered_extent_mutex);
>  	spin_lock(&root->ordered_extent_lock);
> @@ -676,6 +677,14 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
>  	while (!list_empty(&splice) && nr) {
>  		ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
>  					   root_extent_list);
> +
> +		if (range_end <= ordered->start ||
> +		    ordered->start + ordered->disk_len <= range_start) {
> +			list_move_tail(&ordered->root_extent_list, &skipped);
> +			cond_resched_lock(&root->ordered_extent_lock);
> +			continue;
> +		}
> +
>  		list_move_tail(&ordered->root_extent_list,
>  			       &root->ordered_extents);
>  		atomic_inc(&ordered->refs);
> @@ -694,6 +703,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
>  			nr--;
>  		count++;
>  	}
> +	list_splice_tail(&skipped, &root->ordered_extents);

It seems that 'skipped' is not necessary, we don't have to keep the
order or something, however it's not a big deal.

Others look sane to me.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>

Thanks,

-liubo

>  	list_splice_tail(&splice, &root->ordered_extents);
>  	spin_unlock(&root->ordered_extent_lock);
>  
> @@ -708,7 +718,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
>  	return count;
>  }
>  
> -int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
> +int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
> +			     const u64 range_start, const u64 range_len)
>  {
>  	struct btrfs_root *root;
>  	struct list_head splice;
> @@ -729,7 +740,8 @@ int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
>  			       &fs_info->ordered_roots);
>  		spin_unlock(&fs_info->ordered_root_lock);
>  
> -		done = btrfs_wait_ordered_extents(root, nr);
> +		done = btrfs_wait_ordered_extents(root, nr,
> +						  range_start, range_len);
>  		btrfs_put_fs_root(root);
>  		total_done += done;
>  
> diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
> index f29e08e..82c011b 100644
> --- a/fs/btrfs/ordered-data.h
> +++ b/fs/btrfs/ordered-data.h
> @@ -197,8 +197,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
>  				struct btrfs_ordered_extent *ordered);
>  int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
>  			   u32 *sum, int len);
> -int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
> -int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
> +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
> +			       const u64 range_start, const u64 range_len);
> +int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
> +			     const u64 range_start, const u64 range_len);
>  void btrfs_get_logged_extents(struct inode *inode,
>  			      struct list_head *logged_list,
>  			      const loff_t start,
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index f689be2..d768364 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -4258,7 +4258,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
>  		err = ret;
>  		goto out;
>  	}
> -	ret = btrfs_wait_ordered_roots(fs_info, -1);
> +	ret = btrfs_wait_ordered_roots(fs_info, -1,
> +				       rc->block_group->key.objectid,
> +				       rc->block_group->key.offset);
>  	/*
>  	 * We might have waited for an ordered extent to complete which was
>  	 * destined at an extent allocated from our block group. The extent item
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 00b8f37..89d1347 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -1160,7 +1160,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
>  		return 0;
>  	}
>  
> -	btrfs_wait_ordered_roots(fs_info, -1);
> +	btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
>  
>  	trans = btrfs_attach_transaction_barrier(root);
>  	if (IS_ERR(trans)) {
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index 43885e5..f0bb54a 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -1821,7 +1821,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
>  static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
>  {
>  	if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
> -		btrfs_wait_ordered_roots(fs_info, -1);
> +		btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
>  }
>  
>  static inline void
> -- 
> 2.7.0.rc3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index a1d6652..a0c1016 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -401,7 +401,7 @@  int btrfs_dev_replace_start(struct btrfs_root *root,
 	if (ret)
 		btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
 
-	btrfs_wait_ordered_roots(root->fs_info, -1);
+	btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
 
 	/* force writing the updated state information to disk */
 	trans = btrfs_start_transaction(root, 0);
@@ -493,7 +493,7 @@  static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 		return ret;
 	}
-	btrfs_wait_ordered_roots(root->fs_info, -1);
+	btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
 
 	trans = btrfs_start_transaction(root, 0);
 	if (IS_ERR(trans)) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 53e1297..0544f70 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4141,7 +4141,7 @@  commit_trans:
 
 			if (need_commit > 0) {
 				btrfs_start_delalloc_roots(fs_info, 0, -1);
-				btrfs_wait_ordered_roots(fs_info, -1);
+				btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
 			}
 
 			trans = btrfs_join_transaction(root);
@@ -4583,7 +4583,8 @@  static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
 		 */
 		btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
 		if (!current->journal_info)
-			btrfs_wait_ordered_roots(root->fs_info, nr_items);
+			btrfs_wait_ordered_roots(root->fs_info, nr_items,
+						 0, (u64)-1);
 	}
 }
 
@@ -4632,7 +4633,8 @@  static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 		if (trans)
 			return;
 		if (wait_ordered)
-			btrfs_wait_ordered_roots(root->fs_info, items);
+			btrfs_wait_ordered_roots(root->fs_info, items,
+						 0, (u64)-1);
 		return;
 	}
 
@@ -4671,7 +4673,8 @@  skip_async:
 
 		loops++;
 		if (wait_ordered && !trans) {
-			btrfs_wait_ordered_roots(root->fs_info, items);
+			btrfs_wait_ordered_roots(root->fs_info, items,
+						 0, (u64)-1);
 		} else {
 			time_left = schedule_timeout_killable(1);
 			if (time_left)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 053e677..4d71273 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -681,7 +681,7 @@  static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 	if (ret)
 		goto dec_and_free;
 
-	btrfs_wait_ordered_extents(root, -1);
+	btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
 
 	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
 			     BTRFS_BLOCK_RSV_TEMP);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 41101b2..86c1cbb 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -661,14 +661,15 @@  static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
  * wait for all the ordered extents in a root.  This is done when balancing
  * space between drives.
  */
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
+			       const u64 range_start, const u64 range_len)
 {
-	struct list_head splice, works;
+	LIST_HEAD(splice);
+	LIST_HEAD(skipped);
+	LIST_HEAD(works);
 	struct btrfs_ordered_extent *ordered, *next;
 	int count = 0;
-
-	INIT_LIST_HEAD(&splice);
-	INIT_LIST_HEAD(&works);
+	const u64 range_end = range_start + range_len;
 
 	mutex_lock(&root->ordered_extent_mutex);
 	spin_lock(&root->ordered_extent_lock);
@@ -676,6 +677,14 @@  int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
 	while (!list_empty(&splice) && nr) {
 		ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
 					   root_extent_list);
+
+		if (range_end <= ordered->start ||
+		    ordered->start + ordered->disk_len <= range_start) {
+			list_move_tail(&ordered->root_extent_list, &skipped);
+			cond_resched_lock(&root->ordered_extent_lock);
+			continue;
+		}
+
 		list_move_tail(&ordered->root_extent_list,
 			       &root->ordered_extents);
 		atomic_inc(&ordered->refs);
@@ -694,6 +703,7 @@  int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
 			nr--;
 		count++;
 	}
+	list_splice_tail(&skipped, &root->ordered_extents);
 	list_splice_tail(&splice, &root->ordered_extents);
 	spin_unlock(&root->ordered_extent_lock);
 
@@ -708,7 +718,8 @@  int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
 	return count;
 }
 
-int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
+int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+			     const u64 range_start, const u64 range_len)
 {
 	struct btrfs_root *root;
 	struct list_head splice;
@@ -729,7 +740,8 @@  int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
 			       &fs_info->ordered_roots);
 		spin_unlock(&fs_info->ordered_root_lock);
 
-		done = btrfs_wait_ordered_extents(root, nr);
+		done = btrfs_wait_ordered_extents(root, nr,
+						  range_start, range_len);
 		btrfs_put_fs_root(root);
 		total_done += done;
 
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f29e08e..82c011b 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -197,8 +197,10 @@  int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 				struct btrfs_ordered_extent *ordered);
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
 			   u32 *sum, int len);
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
-int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
+			       const u64 range_start, const u64 range_len);
+int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+			     const u64 range_start, const u64 range_len);
 void btrfs_get_logged_extents(struct inode *inode,
 			      struct list_head *logged_list,
 			      const loff_t start,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f689be2..d768364 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4258,7 +4258,9 @@  int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 		err = ret;
 		goto out;
 	}
-	ret = btrfs_wait_ordered_roots(fs_info, -1);
+	ret = btrfs_wait_ordered_roots(fs_info, -1,
+				       rc->block_group->key.objectid,
+				       rc->block_group->key.offset);
 	/*
 	 * We might have waited for an ordered extent to complete which was
 	 * destined at an extent allocated from our block group. The extent item
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 00b8f37..89d1347 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1160,7 +1160,7 @@  int btrfs_sync_fs(struct super_block *sb, int wait)
 		return 0;
 	}
 
-	btrfs_wait_ordered_roots(fs_info, -1);
+	btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
 
 	trans = btrfs_attach_transaction_barrier(root);
 	if (IS_ERR(trans)) {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 43885e5..f0bb54a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1821,7 +1821,7 @@  static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
 static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
 {
 	if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
-		btrfs_wait_ordered_roots(fs_info, -1);
+		btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
 }
 
 static inline void