diff mbox series

[1/4] btrfs: Refactor cow_file_range_async

Message ID 20190213155529.31500-2-nborisov@suse.com (mailing list archive)
State New, archived
Headers show
Series Cleanups around compress path | expand

Commit Message

Nikolay Borisov Feb. 13, 2019, 3:55 p.m. UTC
This commit changes the implementation of cow_file_range_async in order
to get rid of the BUG_ON in the middle of the loop. Additionally it
reworks the inner loop in the hopes of making it more understandable.

Main change is that the number of chunks required to handle the given
range is calculated before going into the loop and the logic of the loop
just iterates the chunk count. Furthermore, the way memory is allocated
is reworked and now the code does a single kmalloc with enough space to
handle all chunks. Depending on whether compression is enabled or not
chunks are either 1 (in non-compress case) or the range divided by 512k.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/inode.c | 75 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 49 insertions(+), 26 deletions(-)

Comments

Nikolay Borisov Feb. 14, 2019, 2:30 p.m. UTC | #1
On 13.02.19 г. 17:55 ч., Nikolay Borisov wrote:
> This commit changes the implementation of cow_file_range_async in order
> to get rid of the BUG_ON in the middle of the loop. Additionally it
> reworks the inner loop in the hopes of making it more understandable.
> 
> Main change is that the number of chunks required to handle the given
> range is calculated before going into the loop and the logic of the loop
> just iterates the chunk count. Furthermore, the way memory is allocated
> is reworked and now the code does a single kmalloc with enough space to
> handle all chunks. Depending on whether compression is enabled or not
> chunks are either 1 (in non-compress case) or the range divided by 512k.
> 
> Signed-off-by: Nikolay Borisov <nborisov@suse.com>
> ---
>  fs/btrfs/inode.c | 75 +++++++++++++++++++++++++++++++-----------------
>  1 file changed, 49 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 05bbfd02ea49..c2180f0a5f42 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -375,6 +375,7 @@ struct async_cow {
>  	unsigned int write_flags;
>  	struct list_head extents;
>  	struct btrfs_work work;
> +	atomic_t *pending;
>  };
>  
>  static noinline int add_async_extent(struct async_cow *cow,
> @@ -1181,7 +1182,12 @@ static noinline void async_cow_free(struct btrfs_work *work)
>  	async_cow = container_of(work, struct async_cow, work);
>  	if (async_cow->inode)
>  		btrfs_add_delayed_iput(async_cow->inode);
> -	kfree(async_cow);
> +	/*
> +	 * Since the pointer to 'pending' is at the beginning of the array of
> +	 * async_cow's, freeing it ensures the whole array has been freed.
> +	 */
> +	if (atomic_dec_and_test(async_cow->pending))
> +		kfree(async_cow->pending);
>  }
>  
>  static int cow_file_range_async(struct inode *inode, struct page *locked_page,
> @@ -1193,42 +1199,59 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
>  	struct async_cow *async_cow;
>  	unsigned long nr_pages;
>  	u64 cur_end;
> +	u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
> +	int i;
> +	bool should_compress;
> +	atomic_t *p;
> +
>  
>  	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
>  			 1, 0, NULL);

In case of error I wonder shouldn't the cleared range really be within
page_Start-page_end. Because subsequent pages which are being written
back could follow this whole region?

> -	while (start < end) {
> -		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
> -		BUG_ON(!async_cow); /* -ENOMEM */
> -		/*
> -		 * igrab is called higher up in the call chain, take only the
> -		 * lightweight reference for the callback lifetime
> -		 */
> -		ihold(inode);
> -		async_cow->inode = inode;
> -		async_cow->fs_info = fs_info;
> -		async_cow->locked_page = locked_page;
> -		async_cow->start = start;
> -		async_cow->write_flags = write_flags;
> -
> -		if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
> -		    !btrfs_test_opt(fs_info, FORCE_COMPRESS))
> -			cur_end = end;
> -		else
> -			cur_end = min(end, start + SZ_512K - 1);
>  
> -		async_cow->end = cur_end;
> -		INIT_LIST_HEAD(&async_cow->extents);
> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
> +	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
> +		num_chunks = 1;
> +		should_compress = false;
> +	} else {
> +		should_compress = true;
> +	}
> +
> +	/* Layout is [atomic_t][async_cow1][async_cowN].... */
> +	async_cow = kmalloc(sizeof(atomic_t) + num_chunks*sizeof(*async_cow),
> +			    GFP_NOFS);
> +	if (!async_cow)
> +		return -ENOMEM;
> +
> +	p = (atomic_t *)async_cow;
> +	async_cow = (struct async_cow *)((char *)async_cow + sizeof(atomic_t));
> +	atomic_set(p, num_chunks);
> +
> +	for (i = 0; i < num_chunks; i++) {
>  
> -		btrfs_init_work(&async_cow->work,
> +		if (should_compress)
> +			cur_end = min(end, start + SZ_512K - 1);
> +		else
> +			cur_end = end;
> +
> +		ihold(inode);
> +		async_cow[i].pending= p;
> +		async_cow[i].inode = inode;
> +		async_cow[i].start = start;
> +		async_cow[i].end = cur_end;
> +		async_cow[i].fs_info = fs_info;
> +		async_cow[i].locked_page = locked_page;
> +		async_cow[i].write_flags = write_flags;
> +		INIT_LIST_HEAD(&async_cow[i].extents);
> +
> +		btrfs_init_work(&async_cow[i].work,
>  				btrfs_delalloc_helper,
>  				async_cow_start, async_cow_submit,
>  				async_cow_free);
>  
> -		nr_pages = (cur_end - start + PAGE_SIZE) >>
> -			PAGE_SHIFT;
> +		nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
>  		atomic_add(nr_pages, &fs_info->async_delalloc_pages);
>  
> -		btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
> +		btrfs_queue_work(fs_info->delalloc_workers, &async_cow[i].work);
>  
>  		*nr_written += nr_pages;
>  		start = cur_end + 1;
>
Nikolay Borisov Feb. 15, 2019, 10 a.m. UTC | #2
On 13.02.19 г. 17:55 ч., Nikolay Borisov wrote:
> This commit changes the implementation of cow_file_range_async in order
> to get rid of the BUG_ON in the middle of the loop. Additionally it
> reworks the inner loop in the hopes of making it more understandable.
> 
> Main change is that the number of chunks required to handle the given
> range is calculated before going into the loop and the logic of the loop
> just iterates the chunk count. Furthermore, the way memory is allocated
> is reworked and now the code does a single kmalloc with enough space to
> handle all chunks. Depending on whether compression is enabled or not
> chunks are either 1 (in non-compress case) or the range divided by 512k.
> 
> Signed-off-by: Nikolay Borisov <nborisov@suse.com>

Scratch that patch since it could lead to deadlocks. It's not enough to
just return an error and hop higher levels will unlock the pages and
whatnot. Other callers (cow_file_range/run_delalloc_nocow) use
extent_clear_unlock_delalloc.
> ---
>  fs/btrfs/inode.c | 75 +++++++++++++++++++++++++++++++-----------------
>  1 file changed, 49 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 05bbfd02ea49..c2180f0a5f42 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -375,6 +375,7 @@ struct async_cow {
>  	unsigned int write_flags;
>  	struct list_head extents;
>  	struct btrfs_work work;
> +	atomic_t *pending;
>  };
>  
>  static noinline int add_async_extent(struct async_cow *cow,
> @@ -1181,7 +1182,12 @@ static noinline void async_cow_free(struct btrfs_work *work)
>  	async_cow = container_of(work, struct async_cow, work);
>  	if (async_cow->inode)
>  		btrfs_add_delayed_iput(async_cow->inode);
> -	kfree(async_cow);
> +	/*
> +	 * Since the pointer to 'pending' is at the beginning of the array of
> +	 * async_cow's, freeing it ensures the whole array has been freed.
> +	 */
> +	if (atomic_dec_and_test(async_cow->pending))
> +		kfree(async_cow->pending);
>  }
>  
>  static int cow_file_range_async(struct inode *inode, struct page *locked_page,
> @@ -1193,42 +1199,59 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
>  	struct async_cow *async_cow;
>  	unsigned long nr_pages;
>  	u64 cur_end;
> +	u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
> +	int i;
> +	bool should_compress;
> +	atomic_t *p;
> +
>  
>  	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
>  			 1, 0, NULL);
> -	while (start < end) {
> -		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
> -		BUG_ON(!async_cow); /* -ENOMEM */
> -		/*
> -		 * igrab is called higher up in the call chain, take only the
> -		 * lightweight reference for the callback lifetime
> -		 */
> -		ihold(inode);
> -		async_cow->inode = inode;
> -		async_cow->fs_info = fs_info;
> -		async_cow->locked_page = locked_page;
> -		async_cow->start = start;
> -		async_cow->write_flags = write_flags;
> -
> -		if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
> -		    !btrfs_test_opt(fs_info, FORCE_COMPRESS))
> -			cur_end = end;
> -		else
> -			cur_end = min(end, start + SZ_512K - 1);
>  
> -		async_cow->end = cur_end;
> -		INIT_LIST_HEAD(&async_cow->extents);
> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
> +	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
> +		num_chunks = 1;
> +		should_compress = false;
> +	} else {
> +		should_compress = true;
> +	}
> +
> +	/* Layout is [atomic_t][async_cow1][async_cowN].... */
> +	async_cow = kmalloc(sizeof(atomic_t) + num_chunks*sizeof(*async_cow),
> +			    GFP_NOFS);
> +	if (!async_cow)
> +		return -ENOMEM;
> +
> +	p = (atomic_t *)async_cow;
> +	async_cow = (struct async_cow *)((char *)async_cow + sizeof(atomic_t));
> +	atomic_set(p, num_chunks);
> +
> +	for (i = 0; i < num_chunks; i++) {
>  
> -		btrfs_init_work(&async_cow->work,
> +		if (should_compress)
> +			cur_end = min(end, start + SZ_512K - 1);
> +		else
> +			cur_end = end;
> +
> +		ihold(inode);
> +		async_cow[i].pending= p;
> +		async_cow[i].inode = inode;
> +		async_cow[i].start = start;
> +		async_cow[i].end = cur_end;
> +		async_cow[i].fs_info = fs_info;
> +		async_cow[i].locked_page = locked_page;
> +		async_cow[i].write_flags = write_flags;
> +		INIT_LIST_HEAD(&async_cow[i].extents);
> +
> +		btrfs_init_work(&async_cow[i].work,
>  				btrfs_delalloc_helper,
>  				async_cow_start, async_cow_submit,
>  				async_cow_free);
>  
> -		nr_pages = (cur_end - start + PAGE_SIZE) >>
> -			PAGE_SHIFT;
> +		nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
>  		atomic_add(nr_pages, &fs_info->async_delalloc_pages);
>  
> -		btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
> +		btrfs_queue_work(fs_info->delalloc_workers, &async_cow[i].work);
>  
>  		*nr_written += nr_pages;
>  		start = cur_end + 1;
>
diff mbox series

Patch

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 05bbfd02ea49..c2180f0a5f42 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -375,6 +375,7 @@  struct async_cow {
 	unsigned int write_flags;
 	struct list_head extents;
 	struct btrfs_work work;
+	atomic_t *pending;
 };
 
 static noinline int add_async_extent(struct async_cow *cow,
@@ -1181,7 +1182,12 @@  static noinline void async_cow_free(struct btrfs_work *work)
 	async_cow = container_of(work, struct async_cow, work);
 	if (async_cow->inode)
 		btrfs_add_delayed_iput(async_cow->inode);
-	kfree(async_cow);
+	/*
+	 * Since the pointer to 'pending' is at the beginning of the array of
+	 * async_cow's, freeing it ensures the whole array has been freed.
+	 */
+	if (atomic_dec_and_test(async_cow->pending))
+		kfree(async_cow->pending);
 }
 
 static int cow_file_range_async(struct inode *inode, struct page *locked_page,
@@ -1193,42 +1199,59 @@  static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 	struct async_cow *async_cow;
 	unsigned long nr_pages;
 	u64 cur_end;
+	u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
+	int i;
+	bool should_compress;
+	atomic_t *p;
+
 
 	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
 			 1, 0, NULL);
-	while (start < end) {
-		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
-		BUG_ON(!async_cow); /* -ENOMEM */
-		/*
-		 * igrab is called higher up in the call chain, take only the
-		 * lightweight reference for the callback lifetime
-		 */
-		ihold(inode);
-		async_cow->inode = inode;
-		async_cow->fs_info = fs_info;
-		async_cow->locked_page = locked_page;
-		async_cow->start = start;
-		async_cow->write_flags = write_flags;
-
-		if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
-		    !btrfs_test_opt(fs_info, FORCE_COMPRESS))
-			cur_end = end;
-		else
-			cur_end = min(end, start + SZ_512K - 1);
 
-		async_cow->end = cur_end;
-		INIT_LIST_HEAD(&async_cow->extents);
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
+	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
+		num_chunks = 1;
+		should_compress = false;
+	} else {
+		should_compress = true;
+	}
+
+	/* Layout is [atomic_t][async_cow1][async_cowN].... */
+	async_cow = kmalloc(sizeof(atomic_t) + num_chunks*sizeof(*async_cow),
+			    GFP_NOFS);
+	if (!async_cow)
+		return -ENOMEM;
+
+	p = (atomic_t *)async_cow;
+	async_cow = (struct async_cow *)((char *)async_cow + sizeof(atomic_t));
+	atomic_set(p, num_chunks);
+
+	for (i = 0; i < num_chunks; i++) {
 
-		btrfs_init_work(&async_cow->work,
+		if (should_compress)
+			cur_end = min(end, start + SZ_512K - 1);
+		else
+			cur_end = end;
+
+		ihold(inode);
+		async_cow[i].pending= p;
+		async_cow[i].inode = inode;
+		async_cow[i].start = start;
+		async_cow[i].end = cur_end;
+		async_cow[i].fs_info = fs_info;
+		async_cow[i].locked_page = locked_page;
+		async_cow[i].write_flags = write_flags;
+		INIT_LIST_HEAD(&async_cow[i].extents);
+
+		btrfs_init_work(&async_cow[i].work,
 				btrfs_delalloc_helper,
 				async_cow_start, async_cow_submit,
 				async_cow_free);
 
-		nr_pages = (cur_end - start + PAGE_SIZE) >>
-			PAGE_SHIFT;
+		nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
 		atomic_add(nr_pages, &fs_info->async_delalloc_pages);
 
-		btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
+		btrfs_queue_work(fs_info->delalloc_workers, &async_cow[i].work);
 
 		*nr_written += nr_pages;
 		start = cur_end + 1;