diff mbox series

[v3,06/10] btrfs: defrag: introduce a helper to defrag a range

Message ID 20210608025927.119169-7-wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: defrag: rework to support sector perfect defrag | expand

Commit Message

Qu Wenruo June 8, 2021, 2:59 a.m. UTC
A new helper, defrag_one_range(), is introduced to defrag one range.

This function will mostly prepare the needed pages and extent status for
defrag_one_locked_target().

As we can only have a consistent view of extent map with page and
extent bits locked, we need to re-check the range passed in to get a
real target list for defrag_one_locked_target().

Since defrag_collect_targets() will call defrag_lookup_extent() and lock
extent range, we also need to teach those two functions to skip extent
lock.
Thus new parameter, @locked, is introruced to skip extent lock if the
caller has already locked the range.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/ioctl.c | 94 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 87 insertions(+), 7 deletions(-)

Comments

Qu Wenruo June 9, 2021, 5:38 a.m. UTC | #1
On 2021/6/8 上午10:59, Qu Wenruo wrote:
> A new helper, defrag_one_range(), is introduced to defrag one range.
> 
> This function will mostly prepare the needed pages and extent status for
> defrag_one_locked_target().
> 
> As we can only have a consistent view of extent map with page and
> extent bits locked, we need to re-check the range passed in to get a
> real target list for defrag_one_locked_target().
> 
> Since defrag_collect_targets() will call defrag_lookup_extent() and lock
> extent range, we also need to teach those two functions to skip extent
> lock.
> Thus new parameter, @locked, is introruced to skip extent lock if the
> caller has already locked the range.
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>   fs/btrfs/ioctl.c | 94 ++++++++++++++++++++++++++++++++++++++++++++----
>   1 file changed, 87 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 42e757dfdd7b..8259ad102469 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -1033,7 +1033,8 @@ static int find_new_extents(struct btrfs_root *root,
>   	return -ENOENT;
>   }
>   
> -static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
> +static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
> +					       bool locked)
>   {
>   	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
>   	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> @@ -1053,10 +1054,12 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
>   		u64 end = start + sectorsize - 1;
>   
>   		/* get the big lock and read metadata off disk */
> -		lock_extent_bits(io_tree, start, end, &cached);
> +		if (!locked)
> +			lock_extent_bits(io_tree, start, end, &cached);
>   		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start,
>   				      sectorsize);
> -		unlock_extent_cached(io_tree, start, end, &cached);
> +		if (!locked)
> +			unlock_extent_cached(io_tree, start, end, &cached);
>   
>   		if (IS_ERR(em))
>   			return NULL;
> @@ -1074,7 +1077,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
>   	if (em->start + em->len >= i_size_read(inode))
>   		return false;
>   
> -	next = defrag_lookup_extent(inode, em->start + em->len);
> +	next = defrag_lookup_extent(inode, em->start + em->len, false);

Here the fixed false parameter is causing another hang.

As defrag_check_next_extent() can be called inside 
defrag_collect_targets(), which can be called with extent locked.

Will fix it in next update.

Thanks,
Qu

>   	if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
>   		ret = false;
>   	else if ((em->block_start + em->block_len == next->block_start) &&
> @@ -1103,7 +1106,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
>   
>   	*skip = 0;
>   
> -	em = defrag_lookup_extent(inode, start);
> +	em = defrag_lookup_extent(inode, start, false);
>   	if (!em)
>   		return 0;
>   
> @@ -1390,12 +1393,13 @@ struct defrag_target_range {
>    * @do_compress:   Whether the defrag is doing compression
>    * 		   If true, @extent_thresh will be ignored and all regular
>    * 		   file extents meeting @newer_than will be targets.
> + * @locked:	   If the range has already hold extent lock
>    * @target_list:   The list of targets file extents
>    */
>   static int defrag_collect_targets(struct btrfs_inode *inode,
>   				  u64 start, u64 len, u32 extent_thresh,
>   				  u64 newer_than, bool do_compress,
> -				  struct list_head *target_list)
> +				  bool locked, struct list_head *target_list)
>   {
>   	u64 cur = start;
>   	int ret = 0;
> @@ -1406,7 +1410,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
>   		bool next_mergeable = true;
>   		u64 range_len;
>   
> -		em = defrag_lookup_extent(&inode->vfs_inode, cur);
> +		em = defrag_lookup_extent(&inode->vfs_inode, cur, locked);
>   		if (!em)
>   			break;
>   
> @@ -1548,6 +1552,82 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
>   	return ret;
>   }
>   
> +static int defrag_one_range(struct btrfs_inode *inode,
> +			    u64 start, u32 len,
> +			    u32 extent_thresh, u64 newer_than,
> +			    bool do_compress)
> +{
> +	struct extent_state *cached_state = NULL;
> +	struct defrag_target_range *entry;
> +	struct defrag_target_range *tmp;
> +	LIST_HEAD(target_list);
> +	struct page **pages;
> +	const u32 sectorsize = inode->root->fs_info->sectorsize;
> +	unsigned long last_index = (start + len - 1) >> PAGE_SHIFT;
> +	unsigned long start_index = start >> PAGE_SHIFT;
> +	unsigned int nr_pages = last_index - start_index + 1;
> +	int ret = 0;
> +	int i;
> +
> +	ASSERT(nr_pages <= CLUSTER_SIZE / PAGE_SIZE);
> +	ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(len, sectorsize));
> +
> +	pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
> +	if (!pages)
> +		return -ENOMEM;
> +
> +	/* Prepare all pages */
> +	for (i = 0; i < nr_pages; i++) {
> +		pages[i] = defrag_prepare_one_page(inode, start_index + i);
> +		if (IS_ERR(pages[i])) {
> +			ret = PTR_ERR(pages[i]);
> +			pages[i] = NULL;
> +			goto free_pages;
> +		}
> +	}
> +	/* Also lock the pages range */
> +	lock_extent_bits(&inode->io_tree, start_index << PAGE_SHIFT,
> +			 (last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
> +			 &cached_state);
> +	/*
> +	 * Now we have a consistent view about the extent map, re-check
> +	 * which range really need to be defraged.
> +	 *
> +	 * And this time we have extent locked already, pass @locked = true
> +	 * so that we won't re-lock the extent range and cause deadlock.
> +	 */
> +	ret = defrag_collect_targets(inode, start, len, extent_thresh,
> +				     newer_than, do_compress, true,
> +				     &target_list);
> +	if (ret < 0)
> +		goto unlock_extent;
> +
> +	list_for_each_entry(entry, &target_list, list) {
> +		ret = defrag_one_locked_target(inode, entry, pages, nr_pages,
> +					       &cached_state);
> +		if (ret < 0)
> +			break;
> +	}
> +
> +	list_for_each_entry_safe(entry, tmp, &target_list, list) {
> +		list_del_init(&entry->list);
> +		kfree(entry);
> +	}
> +unlock_extent:
> +	unlock_extent_cached(&inode->io_tree, start_index << PAGE_SHIFT,
> +			     (last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
> +			     &cached_state);
> +free_pages:
> +	for (i = 0; i < nr_pages; i++) {
> +		if (pages[i]) {
> +			unlock_page(pages[i]);
> +			put_page(pages[i]);
> +		}
> +	}
> +	kfree(pages);
> +	return ret;
> +}
> +
>   /*
>    * Btrfs entrace for defrag.
>    *
>
diff mbox series

Patch

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 42e757dfdd7b..8259ad102469 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1033,7 +1033,8 @@  static int find_new_extents(struct btrfs_root *root,
 	return -ENOENT;
 }
 
-static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
+static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
+					       bool locked)
 {
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -1053,10 +1054,12 @@  static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
 		u64 end = start + sectorsize - 1;
 
 		/* get the big lock and read metadata off disk */
-		lock_extent_bits(io_tree, start, end, &cached);
+		if (!locked)
+			lock_extent_bits(io_tree, start, end, &cached);
 		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start,
 				      sectorsize);
-		unlock_extent_cached(io_tree, start, end, &cached);
+		if (!locked)
+			unlock_extent_cached(io_tree, start, end, &cached);
 
 		if (IS_ERR(em))
 			return NULL;
@@ -1074,7 +1077,7 @@  static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
 	if (em->start + em->len >= i_size_read(inode))
 		return false;
 
-	next = defrag_lookup_extent(inode, em->start + em->len);
+	next = defrag_lookup_extent(inode, em->start + em->len, false);
 	if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
 		ret = false;
 	else if ((em->block_start + em->block_len == next->block_start) &&
@@ -1103,7 +1106,7 @@  static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
 
 	*skip = 0;
 
-	em = defrag_lookup_extent(inode, start);
+	em = defrag_lookup_extent(inode, start, false);
 	if (!em)
 		return 0;
 
@@ -1390,12 +1393,13 @@  struct defrag_target_range {
  * @do_compress:   Whether the defrag is doing compression
  * 		   If true, @extent_thresh will be ignored and all regular
  * 		   file extents meeting @newer_than will be targets.
+ * @locked:	   If the range has already hold extent lock
  * @target_list:   The list of targets file extents
  */
 static int defrag_collect_targets(struct btrfs_inode *inode,
 				  u64 start, u64 len, u32 extent_thresh,
 				  u64 newer_than, bool do_compress,
-				  struct list_head *target_list)
+				  bool locked, struct list_head *target_list)
 {
 	u64 cur = start;
 	int ret = 0;
@@ -1406,7 +1410,7 @@  static int defrag_collect_targets(struct btrfs_inode *inode,
 		bool next_mergeable = true;
 		u64 range_len;
 
-		em = defrag_lookup_extent(&inode->vfs_inode, cur);
+		em = defrag_lookup_extent(&inode->vfs_inode, cur, locked);
 		if (!em)
 			break;
 
@@ -1548,6 +1552,82 @@  static int defrag_one_locked_target(struct btrfs_inode *inode,
 	return ret;
 }
 
+static int defrag_one_range(struct btrfs_inode *inode,
+			    u64 start, u32 len,
+			    u32 extent_thresh, u64 newer_than,
+			    bool do_compress)
+{
+	struct extent_state *cached_state = NULL;
+	struct defrag_target_range *entry;
+	struct defrag_target_range *tmp;
+	LIST_HEAD(target_list);
+	struct page **pages;
+	const u32 sectorsize = inode->root->fs_info->sectorsize;
+	unsigned long last_index = (start + len - 1) >> PAGE_SHIFT;
+	unsigned long start_index = start >> PAGE_SHIFT;
+	unsigned int nr_pages = last_index - start_index + 1;
+	int ret = 0;
+	int i;
+
+	ASSERT(nr_pages <= CLUSTER_SIZE / PAGE_SIZE);
+	ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(len, sectorsize));
+
+	pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+	if (!pages)
+		return -ENOMEM;
+
+	/* Prepare all pages */
+	for (i = 0; i < nr_pages; i++) {
+		pages[i] = defrag_prepare_one_page(inode, start_index + i);
+		if (IS_ERR(pages[i])) {
+			ret = PTR_ERR(pages[i]);
+			pages[i] = NULL;
+			goto free_pages;
+		}
+	}
+	/* Also lock the pages range */
+	lock_extent_bits(&inode->io_tree, start_index << PAGE_SHIFT,
+			 (last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
+			 &cached_state);
+	/*
+	 * Now we have a consistent view about the extent map, re-check
+	 * which range really need to be defraged.
+	 *
+	 * And this time we have extent locked already, pass @locked = true
+	 * so that we won't re-lock the extent range and cause deadlock.
+	 */
+	ret = defrag_collect_targets(inode, start, len, extent_thresh,
+				     newer_than, do_compress, true,
+				     &target_list);
+	if (ret < 0)
+		goto unlock_extent;
+
+	list_for_each_entry(entry, &target_list, list) {
+		ret = defrag_one_locked_target(inode, entry, pages, nr_pages,
+					       &cached_state);
+		if (ret < 0)
+			break;
+	}
+
+	list_for_each_entry_safe(entry, tmp, &target_list, list) {
+		list_del_init(&entry->list);
+		kfree(entry);
+	}
+unlock_extent:
+	unlock_extent_cached(&inode->io_tree, start_index << PAGE_SHIFT,
+			     (last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
+			     &cached_state);
+free_pages:
+	for (i = 0; i < nr_pages; i++) {
+		if (pages[i]) {
+			unlock_page(pages[i]);
+			put_page(pages[i]);
+		}
+	}
+	kfree(pages);
+	return ret;
+}
+
 /*
  * Btrfs entrace for defrag.
  *