diff mbox series

[05/11] btrfs: remove bioc->stripes_pending

Message ID 20220806080330.3823644-6-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [01/11] btrfs: don't call bioset_integrity_create for btrfs_bioset | expand

Commit Message

Christoph Hellwig Aug. 6, 2022, 8:03 a.m. UTC
The stripes_pending in the btrfs_io_context counts number of inflight
low-level bios for an upper btrfs_bio.  For reads this is generally
one as reads are never cloned, while for writes we can trivially use
the bio remaining mechanisms that is used for chained bios.

To be able to make use of that mechanism, split out a separate trivial
end_io handler for the cloned bios that does a minimal amount of error
tracking and which then calls bio_endio on the original bio to transfer
control to that, with the remaining counter making sure it is completed
last.  This then allows to merge btrfs_end_bioc into the original bio
bi_end_io handler.  To make this all work all error handling needs to
happen through the bi_end_io handler, which requires a small amount
of reshuffling in submit_stripe_bio so that the bio is cloned already
by the time the suitability of the device is checked.

This reduces the size of the btrfs_io_context and prepares splitting
the btrfs_bio at the stripe boundary.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/volumes.c | 94 ++++++++++++++++++++++++----------------------
 fs/btrfs/volumes.h |  1 -
 2 files changed, 50 insertions(+), 45 deletions(-)

Comments

Anand Jain Aug. 18, 2022, 11:33 a.m. UTC | #1
On 8/6/22 16:03, Christoph Hellwig wrote:
> The stripes_pending in the btrfs_io_context counts number of inflight
> low-level bios for an upper btrfs_bio.  For reads this is generally
> one as reads are never cloned, while for writes we can trivially use
> the bio remaining mechanisms that is used for chained bios.
> 
> To be able to make use of that mechanism, split out a separate trivial
> end_io handler for the cloned bios that does a minimal amount of error
> tracking and which then calls bio_endio on the original bio to transfer
> control to that, with the remaining counter making sure it is completed
> last.  This then allows to merge btrfs_end_bioc into the original bio
> bi_end_io handler.  To make this all work all error handling needs to
> happen through the bi_end_io handler, which requires a small amount
> of reshuffling in submit_stripe_bio so that the bio is cloned already
> by the time the suitability of the device is checked.
> 
> This reduces the size of the btrfs_io_context and prepares splitting
> the btrfs_bio at the stripe boundary.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Nikolay Borisov <nborisov@suse.com>
> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>   fs/btrfs/volumes.c | 94 ++++++++++++++++++++++++----------------------
>   fs/btrfs/volumes.h |  1 -
>   2 files changed, 50 insertions(+), 45 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 014df2e64e67b..8775f2a635919 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -5892,7 +5892,6 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
>   		sizeof(u64) * (total_stripes),
>   		GFP_NOFS|__GFP_NOFAIL);
>   
> -	atomic_set(&bioc->error, 0);
>   	refcount_set(&bioc->refs, 1);
>   
>   	bioc->fs_info = fs_info;
> @@ -6647,7 +6646,21 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
>   	bio_trim(bio, offset >> 9, size >> 9);
>   	bbio->iter = bio->bi_iter;
>   	return bio;
> +}
> +
> +static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
> +{
> +	if (!dev || !dev->bdev)
> +		return;
> +	if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET)
> +		return;
>   
> +	if (btrfs_op(bio) == BTRFS_MAP_WRITE)
> +		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
> +	if (!(bio->bi_opf & REQ_RAHEAD))
> +		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
> +	if (bio->bi_opf & REQ_PREFLUSH)
> +		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
>   }
>   
>   static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc)
> @@ -6665,62 +6678,54 @@ static void btrfs_end_bio_work(struct work_struct *work)
>   	bio_endio(&bbio->bio);
>   }
>   
> -static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
> +static void btrfs_end_bio(struct bio *bio)
>   {
> -	struct bio *orig_bio = bioc->orig_bio;
> -	struct btrfs_bio *bbio = btrfs_bio(orig_bio);
> +	struct btrfs_io_stripe *stripe = bio->bi_private;
> +	struct btrfs_io_context *bioc = stripe->bioc;
> +	struct btrfs_bio *bbio = btrfs_bio(bio);
>   
>   	btrfs_bio_counter_dec(bioc->fs_info);
>   
> +	if (bio->bi_status) {
> +		atomic_inc(&bioc->error);
> +		btrfs_log_dev_io_error(bio, stripe->dev);
> +	}
> +
>   	bbio->mirror_num = bioc->mirror_num;
> -	orig_bio->bi_private = bioc->private;
> -	orig_bio->bi_end_io = bioc->end_io;
> +	bio->bi_end_io = bioc->end_io;
> +	bio->bi_private = bioc->private;



It looks like we are duplicating the copy of bi_end_io and bi_private 
which can be avoided. OR I am missing something.

Here is how:

Save Orig bio in bioc:


btrfs_submit_bio()

         bioc->orig_bio = bio;
         bioc->private = bio->bi_private;
         bioc->end_io = bio->bi_end_io;



Only non-clone bio shall use the btrfs_end_bio callback and orig bio.


submit_stripe_bio()

	if (clone) {
	<snip>
	} else {
                 bio = orig_bio;
                 btrfs_bio(bio)->device = dev;
                 bio->bi_end_io = btrfs_end_bio;
	}



So again, we copy back the same content to it. Which can be avoided. No?

btrfs_end_bio()

         bio->bi_end_io = bioc->end_io;
         bio->bi_private = bioc->private;


Thanks.

>   
>   	/*
>   	 * Only send an error to the higher layers if it is beyond the tolerance
>   	 * threshold.
>   	 */
>   	if (atomic_read(&bioc->error) > bioc->max_errors)
> -		orig_bio->bi_status = BLK_STS_IOERR;
> +		bio->bi_status = BLK_STS_IOERR;
>   	else
> -		orig_bio->bi_status = BLK_STS_OK;
> +		bio->bi_status = BLK_STS_OK;
>   
> -	if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
> +	if (btrfs_op(bio) == BTRFS_MAP_READ) {
>   		INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
>   		queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
>   	} else {
> -		bio_endio(orig_bio);
> +		bio_endio(bio);
>   	}
>   
>   	btrfs_put_bioc(bioc);
>   }
>   
> -static void btrfs_end_bio(struct bio *bio)
> +static void btrfs_clone_write_end_io(struct bio *bio)
>   {
>   	struct btrfs_io_stripe *stripe = bio->bi_private;
> -	struct btrfs_io_context *bioc = stripe->bioc;
>   
>   	if (bio->bi_status) {
> -		atomic_inc(&bioc->error);
> -		if (bio->bi_status == BLK_STS_IOERR ||
> -		    bio->bi_status == BLK_STS_TARGET) {
> -			if (btrfs_op(bio) == BTRFS_MAP_WRITE)
> -				btrfs_dev_stat_inc_and_print(stripe->dev,
> -						BTRFS_DEV_STAT_WRITE_ERRS);
> -			else if (!(bio->bi_opf & REQ_RAHEAD))
> -				btrfs_dev_stat_inc_and_print(stripe->dev,
> -						BTRFS_DEV_STAT_READ_ERRS);
> -			if (bio->bi_opf & REQ_PREFLUSH)
> -				btrfs_dev_stat_inc_and_print(stripe->dev,
> -						BTRFS_DEV_STAT_FLUSH_ERRS);
> -		}
> +		atomic_inc(&stripe->bioc->error);
> +		btrfs_log_dev_io_error(bio, stripe->dev);
>   	}
>   
> -	if (bio != bioc->orig_bio)
> -		bio_put(bio);
> -
> -	if (atomic_dec_and_test(&bioc->stripes_pending))
> -		btrfs_end_bioc(bioc, true);
> +	/* Pass on control to the original bio this one was cloned from */
> +	bio_endio(stripe->bioc->orig_bio);
> +	bio_put(bio);
>   }
>   
>   static void submit_stripe_bio(struct btrfs_io_context *bioc,
> @@ -6731,28 +6736,30 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
>   	u64 physical = bioc->stripes[dev_nr].physical;
>   	struct bio *bio;
>   
> -	if (!dev || !dev->bdev ||
> -	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
> -	    (btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
> -	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
> -		atomic_inc(&bioc->error);
> -		if (atomic_dec_and_test(&bioc->stripes_pending))
> -			btrfs_end_bioc(bioc, false);
> -		return;
> -	}
> -
>   	if (clone) {
> -		bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, &fs_bio_set);
> +		bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
> +		bio_inc_remaining(orig_bio);
> +		bio->bi_end_io = btrfs_clone_write_end_io;
>   	} else {
>   		bio = orig_bio;
> -		bio_set_dev(bio, dev->bdev);
>   		btrfs_bio(bio)->device = dev;
> +		bio->bi_end_io = btrfs_end_bio;
>   	}
>   
>   	bioc->stripes[dev_nr].bioc = bioc;
>   	bio->bi_private = &bioc->stripes[dev_nr];
> -	bio->bi_end_io = btrfs_end_bio;
>   	bio->bi_iter.bi_sector = physical >> 9;
> +
> +	if (!dev || !dev->bdev ||
> +	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
> +	    (btrfs_op(bio) == BTRFS_MAP_WRITE &&
> +	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
> +		bio_io_error(bio);
> +		return;
> +	}
> +
> +	bio_set_dev(bio, dev->bdev);
> +
>   	/*
>   	 * For zone append writing, bi_sector must point the beginning of the
>   	 * zone
> @@ -6801,7 +6808,6 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror
>   	bioc->orig_bio = bio;
>   	bioc->private = bio->bi_private;
>   	bioc->end_io = bio->bi_end_io;
> -	atomic_set(&bioc->stripes_pending, total_devs);
>   
>   	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
>   	    ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index bd108c7ed1ac3..9e984a9922c59 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -454,7 +454,6 @@ struct btrfs_discard_stripe {
>    */
>   struct btrfs_io_context {
>   	refcount_t refs;
> -	atomic_t stripes_pending;
>   	struct btrfs_fs_info *fs_info;
>   	u64 map_type; /* get from map_lookup->type */
>   	bio_end_io_t *end_io;
Anand Jain Aug. 18, 2022, 11:36 p.m. UTC | #2
On 8/18/22 19:33, Anand Jain wrote:
> On 8/6/22 16:03, Christoph Hellwig wrote:
>> The stripes_pending in the btrfs_io_context counts number of inflight
>> low-level bios for an upper btrfs_bio.  For reads this is generally
>> one as reads are never cloned, while for writes we can trivially use
>> the bio remaining mechanisms that is used for chained bios.
>>
>> To be able to make use of that mechanism, split out a separate trivial
>> end_io handler for the cloned bios that does a minimal amount of error
>> tracking and which then calls bio_endio on the original bio to transfer
>> control to that, with the remaining counter making sure it is completed
>> last.  This then allows to merge btrfs_end_bioc into the original bio
>> bi_end_io handler.  To make this all work all error handling needs to
>> happen through the bi_end_io handler, which requires a small amount
>> of reshuffling in submit_stripe_bio so that the bio is cloned already
>> by the time the suitability of the device is checked.
>>
>> This reduces the size of the btrfs_io_context and prepares splitting
>> the btrfs_bio at the stripe boundary.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> Reviewed-by: Nikolay Borisov <nborisov@suse.com>
>> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>> ---
>>   fs/btrfs/volumes.c | 94 ++++++++++++++++++++++++----------------------
>>   fs/btrfs/volumes.h |  1 -
>>   2 files changed, 50 insertions(+), 45 deletions(-)
>>
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index 014df2e64e67b..8775f2a635919 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -5892,7 +5892,6 @@ static struct btrfs_io_context 
>> *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
>>           sizeof(u64) * (total_stripes),
>>           GFP_NOFS|__GFP_NOFAIL);
>> -    atomic_set(&bioc->error, 0);
>>       refcount_set(&bioc->refs, 1);
>>       bioc->fs_info = fs_info;
>> @@ -6647,7 +6646,21 @@ struct bio *btrfs_bio_clone_partial(struct bio 
>> *orig, u64 offset, u64 size)
>>       bio_trim(bio, offset >> 9, size >> 9);
>>       bbio->iter = bio->bi_iter;
>>       return bio;
>> +}
>> +
>> +static void btrfs_log_dev_io_error(struct bio *bio, struct 
>> btrfs_device *dev)
>> +{
>> +    if (!dev || !dev->bdev)
>> +        return;
>> +    if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != 
>> BLK_STS_TARGET)
>> +        return;
>> +    if (btrfs_op(bio) == BTRFS_MAP_WRITE)
>> +        btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
>> +    if (!(bio->bi_opf & REQ_RAHEAD))
>> +        btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
>> +    if (bio->bi_opf & REQ_PREFLUSH)
>> +        btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
>>   }
>>   static struct workqueue_struct *btrfs_end_io_wq(struct 
>> btrfs_io_context *bioc)
>> @@ -6665,62 +6678,54 @@ static void btrfs_end_bio_work(struct 
>> work_struct *work)
>>       bio_endio(&bbio->bio);
>>   }
>> -static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
>> +static void btrfs_end_bio(struct bio *bio)
>>   {
>> -    struct bio *orig_bio = bioc->orig_bio;
>> -    struct btrfs_bio *bbio = btrfs_bio(orig_bio);
>> +    struct btrfs_io_stripe *stripe = bio->bi_private;
>> +    struct btrfs_io_context *bioc = stripe->bioc;
>> +    struct btrfs_bio *bbio = btrfs_bio(bio);
>>       btrfs_bio_counter_dec(bioc->fs_info);
>> +    if (bio->bi_status) {
>> +        atomic_inc(&bioc->error);
>> +        btrfs_log_dev_io_error(bio, stripe->dev);
>> +    }
>> +
>>       bbio->mirror_num = bioc->mirror_num;
>> -    orig_bio->bi_private = bioc->private;
>> -    orig_bio->bi_end_io = bioc->end_io;
>> +    bio->bi_end_io = bioc->end_io;
>> +    bio->bi_private = bioc->private;
> 
> 
> 
> It looks like we are duplicating the copy of bi_end_io and bi_private 
> which can be avoided. OR I am missing something.
> 

No, it is not duplicating. Now I know what I  missed. Sorry for the noise.
Anand Jain Aug. 18, 2022, 11:37 p.m. UTC | #3
On 8/6/22 16:03, Christoph Hellwig wrote:
> The stripes_pending in the btrfs_io_context counts number of inflight
> low-level bios for an upper btrfs_bio.  For reads this is generally
> one as reads are never cloned, while for writes we can trivially use
> the bio remaining mechanisms that is used for chained bios.
> 
> To be able to make use of that mechanism, split out a separate trivial
> end_io handler for the cloned bios that does a minimal amount of error
> tracking and which then calls bio_endio on the original bio to transfer
> control to that, with the remaining counter making sure it is completed
> last.  This then allows to merge btrfs_end_bioc into the original bio
> bi_end_io handler.  To make this all work all error handling needs to
> happen through the bi_end_io handler, which requires a small amount
> of reshuffling in submit_stripe_bio so that the bio is cloned already
> by the time the suitability of the device is checked.
> 
> This reduces the size of the btrfs_io_context and prepares splitting
> the btrfs_bio at the stripe boundary.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Nikolay Borisov <nborisov@suse.com>
> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>


Reviewed-by: Anand Jain <anand.jain@oracle.com>
diff mbox series

Patch

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 014df2e64e67b..8775f2a635919 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5892,7 +5892,6 @@  static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
 		sizeof(u64) * (total_stripes),
 		GFP_NOFS|__GFP_NOFAIL);
 
-	atomic_set(&bioc->error, 0);
 	refcount_set(&bioc->refs, 1);
 
 	bioc->fs_info = fs_info;
@@ -6647,7 +6646,21 @@  struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
 	bio_trim(bio, offset >> 9, size >> 9);
 	bbio->iter = bio->bi_iter;
 	return bio;
+}
+
+static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
+{
+	if (!dev || !dev->bdev)
+		return;
+	if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET)
+		return;
 
+	if (btrfs_op(bio) == BTRFS_MAP_WRITE)
+		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
+	if (!(bio->bi_opf & REQ_RAHEAD))
+		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
+	if (bio->bi_opf & REQ_PREFLUSH)
+		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
 }
 
 static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc)
@@ -6665,62 +6678,54 @@  static void btrfs_end_bio_work(struct work_struct *work)
 	bio_endio(&bbio->bio);
 }
 
-static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
+static void btrfs_end_bio(struct bio *bio)
 {
-	struct bio *orig_bio = bioc->orig_bio;
-	struct btrfs_bio *bbio = btrfs_bio(orig_bio);
+	struct btrfs_io_stripe *stripe = bio->bi_private;
+	struct btrfs_io_context *bioc = stripe->bioc;
+	struct btrfs_bio *bbio = btrfs_bio(bio);
 
 	btrfs_bio_counter_dec(bioc->fs_info);
 
+	if (bio->bi_status) {
+		atomic_inc(&bioc->error);
+		btrfs_log_dev_io_error(bio, stripe->dev);
+	}
+
 	bbio->mirror_num = bioc->mirror_num;
-	orig_bio->bi_private = bioc->private;
-	orig_bio->bi_end_io = bioc->end_io;
+	bio->bi_end_io = bioc->end_io;
+	bio->bi_private = bioc->private;
 
 	/*
 	 * Only send an error to the higher layers if it is beyond the tolerance
 	 * threshold.
 	 */
 	if (atomic_read(&bioc->error) > bioc->max_errors)
-		orig_bio->bi_status = BLK_STS_IOERR;
+		bio->bi_status = BLK_STS_IOERR;
 	else
-		orig_bio->bi_status = BLK_STS_OK;
+		bio->bi_status = BLK_STS_OK;
 
-	if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
+	if (btrfs_op(bio) == BTRFS_MAP_READ) {
 		INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
 		queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
 	} else {
-		bio_endio(orig_bio);
+		bio_endio(bio);
 	}
 
 	btrfs_put_bioc(bioc);
 }
 
-static void btrfs_end_bio(struct bio *bio)
+static void btrfs_clone_write_end_io(struct bio *bio)
 {
 	struct btrfs_io_stripe *stripe = bio->bi_private;
-	struct btrfs_io_context *bioc = stripe->bioc;
 
 	if (bio->bi_status) {
-		atomic_inc(&bioc->error);
-		if (bio->bi_status == BLK_STS_IOERR ||
-		    bio->bi_status == BLK_STS_TARGET) {
-			if (btrfs_op(bio) == BTRFS_MAP_WRITE)
-				btrfs_dev_stat_inc_and_print(stripe->dev,
-						BTRFS_DEV_STAT_WRITE_ERRS);
-			else if (!(bio->bi_opf & REQ_RAHEAD))
-				btrfs_dev_stat_inc_and_print(stripe->dev,
-						BTRFS_DEV_STAT_READ_ERRS);
-			if (bio->bi_opf & REQ_PREFLUSH)
-				btrfs_dev_stat_inc_and_print(stripe->dev,
-						BTRFS_DEV_STAT_FLUSH_ERRS);
-		}
+		atomic_inc(&stripe->bioc->error);
+		btrfs_log_dev_io_error(bio, stripe->dev);
 	}
 
-	if (bio != bioc->orig_bio)
-		bio_put(bio);
-
-	if (atomic_dec_and_test(&bioc->stripes_pending))
-		btrfs_end_bioc(bioc, true);
+	/* Pass on control to the original bio this one was cloned from */
+	bio_endio(stripe->bioc->orig_bio);
+	bio_put(bio);
 }
 
 static void submit_stripe_bio(struct btrfs_io_context *bioc,
@@ -6731,28 +6736,30 @@  static void submit_stripe_bio(struct btrfs_io_context *bioc,
 	u64 physical = bioc->stripes[dev_nr].physical;
 	struct bio *bio;
 
-	if (!dev || !dev->bdev ||
-	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
-	    (btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
-	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
-		atomic_inc(&bioc->error);
-		if (atomic_dec_and_test(&bioc->stripes_pending))
-			btrfs_end_bioc(bioc, false);
-		return;
-	}
-
 	if (clone) {
-		bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, &fs_bio_set);
+		bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
+		bio_inc_remaining(orig_bio);
+		bio->bi_end_io = btrfs_clone_write_end_io;
 	} else {
 		bio = orig_bio;
-		bio_set_dev(bio, dev->bdev);
 		btrfs_bio(bio)->device = dev;
+		bio->bi_end_io = btrfs_end_bio;
 	}
 
 	bioc->stripes[dev_nr].bioc = bioc;
 	bio->bi_private = &bioc->stripes[dev_nr];
-	bio->bi_end_io = btrfs_end_bio;
 	bio->bi_iter.bi_sector = physical >> 9;
+
+	if (!dev || !dev->bdev ||
+	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
+	    (btrfs_op(bio) == BTRFS_MAP_WRITE &&
+	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
+		bio_io_error(bio);
+		return;
+	}
+
+	bio_set_dev(bio, dev->bdev);
+
 	/*
 	 * For zone append writing, bi_sector must point the beginning of the
 	 * zone
@@ -6801,7 +6808,6 @@  void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror
 	bioc->orig_bio = bio;
 	bioc->private = bio->bi_private;
 	bioc->end_io = bio->bi_end_io;
-	atomic_set(&bioc->stripes_pending, total_devs);
 
 	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
 	    ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index bd108c7ed1ac3..9e984a9922c59 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -454,7 +454,6 @@  struct btrfs_discard_stripe {
  */
 struct btrfs_io_context {
 	refcount_t refs;
-	atomic_t stripes_pending;
 	struct btrfs_fs_info *fs_info;
 	u64 map_type; /* get from map_lookup->type */
 	bio_end_io_t *end_io;