diff mbox series

[8/9] btrfs: make btrfs_submit_compressed_read() to determine stripe boundary at bio allocation time

Message ID 20210611013114.57264-9-wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: compression: refactor and enhancement preparing for subpage compression support | expand

Commit Message

Qu Wenruo June 11, 2021, 1:31 a.m. UTC
Currently btrfs_submit_compressed_write() will check
btrfs_bio_fits_in_stripe() each time a new page is going to be added.

Even compressed extent is small, we don't really need to do that for
every page.

This patch will align the behavior to extent_io.c, by determining the
stripe boundary when allocating a bio.

Unlike extent_io.c, in compressed.c we don't need to bother things like
different bio flags, thus no need to re-use bio_ctrl.

Here we just manually introduce new local variable, next_stripe_start,
and use that value returned from alloc_compressed_bio() to calculate
the stripe boundary.

Then each time we add some page range into the bio, we check if we
reached the boundary.
And if reached, submit it.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/compression.c | 122 ++++++++++++++++++++---------------------
 1 file changed, 59 insertions(+), 63 deletions(-)

Comments

Johannes Thumshirn June 11, 2021, 7:49 a.m. UTC | #1
On 11/06/2021 03:32, Qu Wenruo wrote:
> +		/* Allocate new bio if not allocated or already submitted */
> +		if (!bio) {
> +			bio = alloc_compressed_bio(cb, cur_disk_bytenr,
> +				bio_op | write_flags,
> +				end_compressed_bio_write,
> +				&next_stripe_start);
> +			if (IS_ERR(bio)) {
> +				ret = errno_to_blk_status(PTR_ERR(bio));
> +				bio = NULL;
> +				goto finish_cb;
> +			}
> +		}
> +		/*
> +		 * We should never reach next_stripe_start, as if we reach the
> +		 * boundary we will submit the bio immediately.
> +		 */
> +		ASSERT(cur_disk_bytenr != next_stripe_start);
> +
> +		/*
> +		 * We have various limit on the real read size:
> +		 * - stripe boundary
> +		 * - page boundary
> +		 * - compressed length boundary
> +		 */
> +		real_size = min_t(u64, U32_MAX,
> +				  next_stripe_start - cur_disk_bytenr);
> +		real_size = min_t(u64, real_size,
> +				  PAGE_SIZE - offset_in_page(offset));
> +		real_size = min_t(u64, real_size,
> +				  compressed_len - offset);
> +		ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
>  
> -	/* create and submit bios for the compressed pages */
> -	bytes_left = compressed_len;
> -	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
> -		int submit = 0;
> -		int len;
> +		added = bio_add_page(bio, page, real_size,
> +				     offset_in_page(offset));
> +		/*
> +		 * Maximum compressed extent size is 128K, we should never
> +		 * reach bio size limit.
> +		 */
> +		ASSERT(added == real_size);
>  
> -		page = compressed_pages[pg_index];
> -		page->mapping = inode->vfs_inode.i_mapping;
> -		if (bio->bi_iter.bi_size)
> -			submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
> -							  0);
> +		cur_disk_bytenr += added;
>  
> -		if (pg_index == 0 && use_append)
> -			len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
> -		else
> -			len = bio_add_page(bio, page, PAGE_SIZE, 0);

I think you still need to distinguish between normal write and zone append here,
as you adding pages to an already created bio. Adding one page to an empty bio
will always succeed but when adding more than one page to a zone append bio, you
have to take the device's maximum zone append limit into account, as zone append
bios can't be split. This is also the reason why we do the device 
lookup/bio_set_dev() for the zone append bios, so bio_add_zone_append_page() can
look at the device's limitations when adding the pages.
Qu Wenruo June 11, 2021, 8:16 a.m. UTC | #2
On 2021/6/11 下午3:49, Johannes Thumshirn wrote:
> On 11/06/2021 03:32, Qu Wenruo wrote:
>> +		/* Allocate new bio if not allocated or already submitted */
>> +		if (!bio) {
>> +			bio = alloc_compressed_bio(cb, cur_disk_bytenr,
>> +				bio_op | write_flags,
>> +				end_compressed_bio_write,
>> +				&next_stripe_start);
>> +			if (IS_ERR(bio)) {
>> +				ret = errno_to_blk_status(PTR_ERR(bio));
>> +				bio = NULL;
>> +				goto finish_cb;
>> +			}
>> +		}
>> +		/*
>> +		 * We should never reach next_stripe_start, as if we reach the
>> +		 * boundary we will submit the bio immediately.
>> +		 */
>> +		ASSERT(cur_disk_bytenr != next_stripe_start);
>> +
>> +		/*
>> +		 * We have various limit on the real read size:
>> +		 * - stripe boundary
>> +		 * - page boundary
>> +		 * - compressed length boundary
>> +		 */
>> +		real_size = min_t(u64, U32_MAX,
>> +				  next_stripe_start - cur_disk_bytenr);
>> +		real_size = min_t(u64, real_size,
>> +				  PAGE_SIZE - offset_in_page(offset));
>> +		real_size = min_t(u64, real_size,
>> +				  compressed_len - offset);
>> +		ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
>>
>> -	/* create and submit bios for the compressed pages */
>> -	bytes_left = compressed_len;
>> -	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
>> -		int submit = 0;
>> -		int len;
>> +		added = bio_add_page(bio, page, real_size,
>> +				     offset_in_page(offset));
>> +		/*
>> +		 * Maximum compressed extent size is 128K, we should never
>> +		 * reach bio size limit.
>> +		 */
>> +		ASSERT(added == real_size);
>>
>> -		page = compressed_pages[pg_index];
>> -		page->mapping = inode->vfs_inode.i_mapping;
>> -		if (bio->bi_iter.bi_size)
>> -			submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
>> -							  0);
>> +		cur_disk_bytenr += added;
>>
>> -		if (pg_index == 0 && use_append)
>> -			len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
>> -		else
>> -			len = bio_add_page(bio, page, PAGE_SIZE, 0);
>
> I think you still need to distinguish between normal write and zone append here,
> as you adding pages to an already created bio.

Oh, my bad, forgot to handle the zoned append differently.

> Adding one page to an empty bio
> will always succeed but when adding more than one page to a zone append bio, you
> have to take the device's maximum zone append limit into account, as zone append
> bios can't be split. This is also the reason why we do the device
> lookup/bio_set_dev() for the zone append bios, so bio_add_zone_append_page() can
> look at the device's limitations when adding the pages.

Did you mean that for the bio_add_zone_append_page(), it may return less
bytes than we expected?
Even if our compressed write is ensured to be smaller than 128K?

Thanks,
Qu
Johannes Thumshirn June 11, 2021, 8:19 a.m. UTC | #3
On 11/06/2021 10:16, Qu Wenruo wrote:
> Did you mean that for the bio_add_zone_append_page(), it may return less
> bytes than we expected?
> Even if our compressed write is ensured to be smaller than 128K?

No it either adds the number of requested pages or it fails (I think there's
and effort going on to make bio_add_page() and friends return bool so this is
less confusing).
Qu Wenruo June 11, 2021, 8:26 a.m. UTC | #4
On 2021/6/11 下午4:19, Johannes Thumshirn wrote:
> On 11/06/2021 10:16, Qu Wenruo wrote:
>> Did you mean that for the bio_add_zone_append_page(), it may return less
>> bytes than we expected?
>> Even if our compressed write is ensured to be smaller than 128K?
> 
> No it either adds the number of requested pages or it fails (I think there's
> and effort going on to make bio_add_page() and friends return bool so this is
> less confusing).
> 

Got it, it means we still need to check the return value and submit if 
needed.

Only for regular bio_add_page() it would never fail as our write size is 
smaller than bio size limit.
But for bio_add_zone_append_page() it still has extra limit so that add 
can fail.

Will fix it in next version.

Thanks,
Qu
Qu Wenruo June 11, 2021, 12:40 p.m. UTC | #5
On 2021/6/11 下午4:19, Johannes Thumshirn wrote:
> On 11/06/2021 10:16, Qu Wenruo wrote:
>> Did you mean that for the bio_add_zone_append_page(), it may return less
>> bytes than we expected?
>> Even if our compressed write is ensured to be smaller than 128K?
>
> No it either adds the number of requested pages or it fails (I think there's
> and effort going on to make bio_add_page() and friends return bool so this is
> less confusing).
>
BTW, I have a question about the add or not at all behavior.

One of my concern is, if we're at the zone boundary (or what's the
proper term?), and we want to add 4 sectors, but the zone boundary can
only add one sector, and if we just submit current bio, wouldn't it
cause some problem?

E.g. we leave a one sector gap for the current zone.
Will that be a problem?

Thanks,
Qu
Johannes Thumshirn June 11, 2021, 12:43 p.m. UTC | #6
On 11/06/2021 14:40, Qu Wenruo wrote:
> 
> 
> On 2021/6/11 下午4:19, Johannes Thumshirn wrote:
>> On 11/06/2021 10:16, Qu Wenruo wrote:
>>> Did you mean that for the bio_add_zone_append_page(), it may return less
>>> bytes than we expected?
>>> Even if our compressed write is ensured to be smaller than 128K?
>>
>> No it either adds the number of requested pages or it fails (I think there's
>> and effort going on to make bio_add_page() and friends return bool so this is
>> less confusing).
>>
> BTW, I have a question about the add or not at all behavior.
> 
> One of my concern is, if we're at the zone boundary (or what's the
> proper term?), and we want to add 4 sectors, but the zone boundary can
> only add one sector, and if we just submit current bio, wouldn't it
> cause some problem?
> 
> E.g. we leave a one sector gap for the current zone.
> Will that be a problem?
> 

The zoned allocator will take care of it. It'll fill the zone as good as possible
or move the allocation to a new block group on a new zone.
diff mbox series

Patch

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 71d7b1b9b4bf..de91489b92c0 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -491,10 +491,7 @@  blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct bio *bio = NULL;
 	struct compressed_bio *cb;
-	unsigned long bytes_left;
-	int pg_index = 0;
-	struct page *page;
-	u64 first_byte = disk_start;
+	u64 cur_disk_bytenr = disk_start;
 	u64 next_stripe_start;
 	blk_status_t ret;
 	int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
@@ -518,38 +515,58 @@  blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	cb->orig_bio = NULL;
 	cb->nr_pages = nr_pages;
 
-	bio = alloc_compressed_bio(cb, first_byte, bio_op | write_flags,
-				   end_compressed_bio_write,
-				   &next_stripe_start);
-	if (IS_ERR(bio)) {
-		kfree(cb);
-		return errno_to_blk_status(PTR_ERR(bio));
-	}
+	while (cur_disk_bytenr < disk_start + compressed_len) {
+		u64 offset = cur_disk_bytenr - disk_start;
+		int index = offset >> PAGE_SHIFT;
+		unsigned int real_size;
+		unsigned int added;
+		struct page *page = compressed_pages[index];
 
-	if (blkcg_css) {
-		bio->bi_opf |= REQ_CGROUP_PUNT;
-		kthread_associate_blkcg(blkcg_css);
-	}
+		/* Allocate new bio if not allocated or already submitted */
+		if (!bio) {
+			bio = alloc_compressed_bio(cb, cur_disk_bytenr,
+				bio_op | write_flags,
+				end_compressed_bio_write,
+				&next_stripe_start);
+			if (IS_ERR(bio)) {
+				ret = errno_to_blk_status(PTR_ERR(bio));
+				bio = NULL;
+				goto finish_cb;
+			}
+		}
+		/*
+		 * We should never reach next_stripe_start, as if we reach the
+		 * boundary we will submit the bio immediately.
+		 */
+		ASSERT(cur_disk_bytenr != next_stripe_start);
+
+		/*
+		 * We have various limit on the real read size:
+		 * - stripe boundary
+		 * - page boundary
+		 * - compressed length boundary
+		 */
+		real_size = min_t(u64, U32_MAX,
+				  next_stripe_start - cur_disk_bytenr);
+		real_size = min_t(u64, real_size,
+				  PAGE_SIZE - offset_in_page(offset));
+		real_size = min_t(u64, real_size,
+				  compressed_len - offset);
+		ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
 
-	/* create and submit bios for the compressed pages */
-	bytes_left = compressed_len;
-	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
-		int submit = 0;
-		int len;
+		added = bio_add_page(bio, page, real_size,
+				     offset_in_page(offset));
+		/*
+		 * Maximum compressed extent size is 128K, we should never
+		 * reach bio size limit.
+		 */
+		ASSERT(added == real_size);
 
-		page = compressed_pages[pg_index];
-		page->mapping = inode->vfs_inode.i_mapping;
-		if (bio->bi_iter.bi_size)
-			submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
-							  0);
+		cur_disk_bytenr += added;
 
-		if (pg_index == 0 && use_append)
-			len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
-		else
-			len = bio_add_page(bio, page, PAGE_SIZE, 0);
+		/* Reached boundary stripe, need to submit */
+		if (cur_disk_bytenr == next_stripe_start) {
 
-		page->mapping = NULL;
-		if (submit || len < PAGE_SIZE) {
 			if (!skip_sum) {
 				ret = btrfs_csum_one_bio(inode, bio, start, 1);
 				if (ret)
@@ -559,47 +576,26 @@  blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 			ret = submit_compressed_bio(fs_info, cb, bio, 0);
 			if (ret)
 				goto finish_cb;
-
-			bio = alloc_compressed_bio(cb, first_byte,
-					bio_op | write_flags,
-					end_compressed_bio_write,
-					&next_stripe_start);
-			if (IS_ERR(bio)) {
-				ret = errno_to_blk_status(PTR_ERR(bio));
-				bio = NULL;
-				goto finish_cb;
-			}
-			if (blkcg_css)
-				bio->bi_opf |= REQ_CGROUP_PUNT;
-			/*
-			 * Use bio_add_page() to ensure the bio has at least one
-			 * page.
-			 */
-			bio_add_page(bio, page, PAGE_SIZE, 0);
-		}
-		if (bytes_left < PAGE_SIZE) {
-			btrfs_info(fs_info,
-					"bytes left %lu compress len %lu nr %lu",
-			       bytes_left, cb->compressed_len, cb->nr_pages);
+			bio = NULL;
 		}
-		bytes_left -= PAGE_SIZE;
-		first_byte += PAGE_SIZE;
 		cond_resched();
 	}
 
-	if (!skip_sum) {
-		ret = btrfs_csum_one_bio(inode, bio, start, 1);
+	/* Submit the last bio */
+	if (bio) {
+		if (!skip_sum) {
+			ret = btrfs_csum_one_bio(inode, bio, start, 1);
+			if (ret)
+				goto last_bio;
+		}
+
+		ret = submit_compressed_bio(fs_info, cb, bio, 0);
 		if (ret)
 			goto last_bio;
-	}
-
-	ret = submit_compressed_bio(fs_info, cb, bio, 0);
-	if (ret)
-		goto last_bio;
 
+	}
 	if (blkcg_css)
 		kthread_associate_blkcg(NULL);
-
 	return 0;
 
 last_bio: