diff mbox series

[1/4] btrfs: simplify the pending I/O counting in struct compressed_bio

Message ID 20220630160130.2550001-2-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/4] btrfs: simplify the pending I/O counting in struct compressed_bio | expand

Commit Message

Christoph Hellwig June 30, 2022, 4:01 p.m. UTC
Instead of counting the bytes just count the bios, with an extra
reference held during submission.  This significantly simplifies the
submission side error handling.

This reverts the change commit 6ec9765d746d ("btrfs: introduce
compressed_bio::pending_sectors to trace compressed bio") that moved to
counting sectors, but unlike the state before that commit the extra
reference held during the submission actually keeps the refcounting
sane.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Boris Burkov <boris@bur.io>
---
 fs/btrfs/compression.c | 126 ++++++++++-------------------------------
 fs/btrfs/compression.h |   4 +-
 2 files changed, 33 insertions(+), 97 deletions(-)

Comments

Nikolay Borisov July 5, 2022, 2:40 p.m. UTC | #1
On 30.06.22 г. 19:01 ч., Christoph Hellwig wrote:
> Instead of counting the bytes just count the bios, with an extra
> reference held during submission.  This significantly simplifies the
> submission side error handling.
> 
> This reverts the change commit 6ec9765d746d ("btrfs: introduce
> compressed_bio::pending_sectors to trace compressed bio") that moved to
> counting sectors, but unlike the state before that commit the extra
> reference held during the submission actually keeps the refcounting
> sane.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Boris Burkov <boris@bur.io>
> ---
>   fs/btrfs/compression.c | 126 ++++++++++-------------------------------
>   fs/btrfs/compression.h |   4 +-
>   2 files changed, 33 insertions(+), 97 deletions(-)
> 
> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> index 907fc8a4c092c..e756da640fd7b 100644
> --- a/fs/btrfs/compression.c
> +++ b/fs/btrfs/compression.c
> @@ -191,44 +191,6 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
>   	return 0;
>   }
>   

<snip>


>   		btrfs_record_physical_zoned(cb->inode, cb->start, bio);
> @@ -476,7 +444,7 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
>   		return ERR_PTR(ret);
>   	}
>   	*next_stripe_start = disk_bytenr + geom.len;
> -
> +	refcount_inc(&cb->pending_ios);
>   	return bio;
>   }
>   
> @@ -503,17 +471,17 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
>   	struct compressed_bio *cb;
>   	u64 cur_disk_bytenr = disk_start;
>   	u64 next_stripe_start;
> -	blk_status_t ret;
>   	int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
>   	const bool use_append = btrfs_use_zone_append(inode, disk_start);
>   	const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
> +	blk_status_t ret = BLK_STS_OK;
>   
>   	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
>   	       IS_ALIGNED(len, fs_info->sectorsize));
>   	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
>   	if (!cb)
>   		return BLK_STS_RESOURCE;
> -	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
> +	refcount_set(&cb->pending_ios, 1);
>   	cb->status = BLK_STS_OK;
>   	cb->inode = &inode->vfs_inode;
>   	cb->start = start;
> @@ -543,8 +511,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
>   				&next_stripe_start);
>   			if (IS_ERR(bio)) {
>   				ret = errno_to_blk_status(PTR_ERR(bio));
> -				bio = NULL;
> -				goto finish_cb;
> +				break;
>   			}
>   			if (blkcg_css)
>   				bio->bi_opf |= REQ_CGROUP_PUNT;
> @@ -588,8 +555,11 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
>   		if (submit) {
>   			if (!skip_sum) {
>   				ret = btrfs_csum_one_bio(inode, bio, start, true);
> -				if (ret)
> -					goto finish_cb;
> +				if (ret) {
> +					bio->bi_status = ret;
> +					bio_endio(bio);
> +					break;
> +				}
>   			}
>   
>   			ASSERT(bio->bi_iter.bi_size);
> @@ -598,33 +568,12 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
>   		}
>   		cond_resched();
>   	}
> -	if (blkcg_css)
> -		kthread_associate_blkcg(NULL);
>   
> -	return 0;
> -
> -finish_cb:
>   	if (blkcg_css)
>   		kthread_associate_blkcg(NULL);
>   
> -	if (bio) {
> -		bio->bi_status = ret;
> -		bio_endio(bio);
> -	}
> -	/* Last byte of @cb is submitted, endio will free @cb */
> -	if (cur_disk_bytenr == disk_start + compressed_len)
> -		return ret;
> -
> -	wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
> -			   (disk_start + compressed_len - cur_disk_bytenr) >>
> -			   fs_info->sectorsize_bits);
> -	/*
> -	 * Even with previous bio ended, we should still have io not yet
> -	 * submitted, thus need to finish manually.
> -	 */
> -	ASSERT(refcount_read(&cb->pending_sectors));
> -	/* Now we are the only one referring @cb, can finish it safely. */
> -	finish_compressed_bio_write(cb);
> +	if (refcount_dec_and_test(&cb->pending_ios))
> +		finish_compressed_bio_write(cb);

nit: This slightly changes the semantics of the function because with 
the old code the bio could have been completed in 
submit_compressed_write iff there was an error during submission for one 
of the sub-bios. Whilst with this new code there is a chance even in the 
success case this happens (if the sub bios complete by the time we 
arrive at this code). Generally that'd be very unlikely due to io 
latency and indeed this code becomes effective iff there is an error. 
Personally I'd like such changes to be called out explicitly in the 
change log or at least with a comment. I guess this ties into the "keeps 
ref counting sane" in the changelog but what exactly do you mean by 
sane? I guess it ties into what Qu mentions in his changelog about 
ensuring the compressed bio is not freed/finished by some completing 
sub-bio _before_ the submitter had a chance to submit all sub-bios and 
that the old code was doing another subtle thing - setting the counter 
the pending bios to 1, and noot incrementing it when doing the final 
submission, thus ensuring everything works out.

I agree your code is much better however I'd like to have the above 
details put (perhaps slightly reworded) in the changelog so that those 
subtle aspects are more visible to someone reading it some months down 
the line :) .

<snip>
Christoph Hellwig July 5, 2022, 5:17 p.m. UTC | #2
On Tue, Jul 05, 2022 at 05:40:22PM +0300, Nikolay Borisov wrote:
> nit: This slightly changes the semantics of the function because with the 
> old code the bio could have been completed in submit_compressed_write iff 
> there was an error during submission for one of the sub-bios. Whilst with 
> this new code there is a chance even in the success case this happens (if 
> the sub bios complete by the time we arrive at this code).

Yes.

> Generally that'd 
> be very unlikely due to io latency and indeed this code becomes effective 
> iff there is an error. Personally I'd like such changes to be called out 
> explicitly in the change log or at least with a comment.

Ok, I can spell this out a little more explicitly.
diff mbox series

Patch

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 907fc8a4c092c..e756da640fd7b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -191,44 +191,6 @@  static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
 	return 0;
 }
 
-/*
- * Reduce bio and io accounting for a compressed_bio with its corresponding bio.
- *
- * Return true if there is no pending bio nor io.
- * Return false otherwise.
- */
-static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
-{
-	struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
-	unsigned int bi_size = 0;
-	bool last_io = false;
-	struct bio_vec *bvec;
-	struct bvec_iter_all iter_all;
-
-	/*
-	 * At endio time, bi_iter.bi_size doesn't represent the real bio size.
-	 * Thus here we have to iterate through all segments to grab correct
-	 * bio size.
-	 */
-	bio_for_each_segment_all(bvec, bio, iter_all)
-		bi_size += bvec->bv_len;
-
-	if (bio->bi_status)
-		cb->status = bio->bi_status;
-
-	ASSERT(bi_size && bi_size <= cb->compressed_len);
-	last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
-					&cb->pending_sectors);
-	/*
-	 * Here we must wake up the possible error handler after all other
-	 * operations on @cb finished, or we can race with
-	 * finish_compressed_bio_*() which may free @cb.
-	 */
-	wake_up_var(cb);
-
-	return last_io;
-}
-
 static void finish_compressed_bio_read(struct compressed_bio *cb)
 {
 	unsigned int index;
@@ -288,7 +250,10 @@  static void end_compressed_bio_read(struct bio *bio)
 	unsigned int mirror = btrfs_bio(bio)->mirror_num;
 	int ret = 0;
 
-	if (!dec_and_test_compressed_bio(cb, bio))
+	if (bio->bi_status)
+		cb->status = bio->bi_status;
+
+	if (!refcount_dec_and_test(&cb->pending_ios))
 		goto out;
 
 	/*
@@ -417,7 +382,10 @@  static void end_compressed_bio_write(struct bio *bio)
 {
 	struct compressed_bio *cb = bio->bi_private;
 
-	if (dec_and_test_compressed_bio(cb, bio)) {
+	if (bio->bi_status)
+		cb->status = bio->bi_status;
+
+	if (refcount_dec_and_test(&cb->pending_ios)) {
 		struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
 
 		btrfs_record_physical_zoned(cb->inode, cb->start, bio);
@@ -476,7 +444,7 @@  static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
 		return ERR_PTR(ret);
 	}
 	*next_stripe_start = disk_bytenr + geom.len;
-
+	refcount_inc(&cb->pending_ios);
 	return bio;
 }
 
@@ -503,17 +471,17 @@  blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	struct compressed_bio *cb;
 	u64 cur_disk_bytenr = disk_start;
 	u64 next_stripe_start;
-	blk_status_t ret;
 	int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
 	const bool use_append = btrfs_use_zone_append(inode, disk_start);
 	const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
+	blk_status_t ret = BLK_STS_OK;
 
 	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
 	       IS_ALIGNED(len, fs_info->sectorsize));
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
 	if (!cb)
 		return BLK_STS_RESOURCE;
-	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
+	refcount_set(&cb->pending_ios, 1);
 	cb->status = BLK_STS_OK;
 	cb->inode = &inode->vfs_inode;
 	cb->start = start;
@@ -543,8 +511,7 @@  blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 				&next_stripe_start);
 			if (IS_ERR(bio)) {
 				ret = errno_to_blk_status(PTR_ERR(bio));
-				bio = NULL;
-				goto finish_cb;
+				break;
 			}
 			if (blkcg_css)
 				bio->bi_opf |= REQ_CGROUP_PUNT;
@@ -588,8 +555,11 @@  blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 		if (submit) {
 			if (!skip_sum) {
 				ret = btrfs_csum_one_bio(inode, bio, start, true);
-				if (ret)
-					goto finish_cb;
+				if (ret) {
+					bio->bi_status = ret;
+					bio_endio(bio);
+					break;
+				}
 			}
 
 			ASSERT(bio->bi_iter.bi_size);
@@ -598,33 +568,12 @@  blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 		}
 		cond_resched();
 	}
-	if (blkcg_css)
-		kthread_associate_blkcg(NULL);
 
-	return 0;
-
-finish_cb:
 	if (blkcg_css)
 		kthread_associate_blkcg(NULL);
 
-	if (bio) {
-		bio->bi_status = ret;
-		bio_endio(bio);
-	}
-	/* Last byte of @cb is submitted, endio will free @cb */
-	if (cur_disk_bytenr == disk_start + compressed_len)
-		return ret;
-
-	wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
-			   (disk_start + compressed_len - cur_disk_bytenr) >>
-			   fs_info->sectorsize_bits);
-	/*
-	 * Even with previous bio ended, we should still have io not yet
-	 * submitted, thus need to finish manually.
-	 */
-	ASSERT(refcount_read(&cb->pending_sectors));
-	/* Now we are the only one referring @cb, can finish it safely. */
-	finish_compressed_bio_write(cb);
+	if (refcount_dec_and_test(&cb->pending_ios))
+		finish_compressed_bio_write(cb);
 	return ret;
 }
 
@@ -830,7 +779,7 @@  void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 		goto out;
 	}
 
-	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
+	refcount_set(&cb->pending_ios, 1);
 	cb->status = BLK_STS_OK;
 	cb->inode = inode;
 	cb->mirror_num = mirror_num;
@@ -880,9 +829,9 @@  void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 					REQ_OP_READ, end_compressed_bio_read,
 					&next_stripe_start);
 			if (IS_ERR(comp_bio)) {
-				ret = errno_to_blk_status(PTR_ERR(comp_bio));
-				comp_bio = NULL;
-				goto finish_cb;
+				cb->status =
+					errno_to_blk_status(PTR_ERR(comp_bio));
+				break;
 			}
 		}
 		/*
@@ -921,8 +870,11 @@  void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			unsigned int nr_sectors;
 
 			ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
-			if (ret)
-				goto finish_cb;
+			if (ret) {
+				comp_bio->bi_status = ret;
+				bio_endio(comp_bio);
+				break;
+			}
 
 			nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
 						  fs_info->sectorsize);
@@ -933,6 +885,9 @@  void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			comp_bio = NULL;
 		}
 	}
+
+	if (refcount_dec_and_test(&cb->pending_ios))
+		finish_compressed_bio_read(cb);
 	return;
 
 fail:
@@ -950,25 +905,6 @@  void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	bio->bi_status = ret;
 	bio_endio(bio);
 	return;
-finish_cb:
-	if (comp_bio) {
-		comp_bio->bi_status = ret;
-		bio_endio(comp_bio);
-	}
-	/* All bytes of @cb is submitted, endio will free @cb */
-	if (cur_disk_byte == disk_bytenr + compressed_len)
-		return;
-
-	wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
-			   (disk_bytenr + compressed_len - cur_disk_byte) >>
-			   fs_info->sectorsize_bits);
-	/*
-	 * Even with previous bio ended, we should still have io not yet
-	 * submitted, thus need to finish @cb manually.
-	 */
-	ASSERT(refcount_read(&cb->pending_sectors));
-	/* Now we are the only one referring @cb, can finish it safely. */
-	finish_compressed_bio_read(cb);
 }
 
 /*
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 5fca7603e928a..0e4cbf04fd866 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -30,8 +30,8 @@  static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
 #define	BTRFS_ZLIB_DEFAULT_LEVEL		3
 
 struct compressed_bio {
-	/* Number of sectors with unfinished IO (unsubmitted or unfinished) */
-	refcount_t pending_sectors;
+	/* Number of outstanding bios */
+	refcount_t pending_ios;
 
 	/* Number of compressed pages in the array */
 	unsigned int nr_pages;