diff mbox series

[v2,02/28] block: Remove req_bio_endio()

Message ID 20240325044452.3125418-3-dlemoal@kernel.org (mailing list archive)
State Superseded
Headers show
Series Zone write plugging | expand

Commit Message

Damien Le Moal March 25, 2024, 4:44 a.m. UTC
Moving req_bio_endio() code into its only caller, blk_update_request(),
allows reducing accesses to and tests of bio and request fields. Also,
given that partial completions of zone append operations is not
possible and that zone append operations cannot be merged, the update
of the BIO sector using the request sector for these operations can be
moved directly before the call to bio_endio().

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 block/blk-mq.c | 66 ++++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 35 deletions(-)

Comments

Bart Van Assche March 25, 2024, 7:39 p.m. UTC | #1
On 3/24/24 21:44, Damien Le Moal wrote:
> -		if (bio_bytes == bio->bi_iter.bi_size)
> +		if (unlikely(error))
> +			bio->bi_status = error;
> +
> +		if (bio_bytes == bio->bi_iter.bi_size) {

Why no "else" in front of the above if? I think this patch introduces a
behavior change. With the current code, if a zone append operation
fails, bio->bi_status is set to 'error'. With this patch applied, this
becomes BLK_STS_IOERR.

>   			req->bio = bio->bi_next;
> +		} else if (req_op(req) == REQ_OP_ZONE_APPEND) {
> +			/*
> +			 * Partial zone append completions cannot be supported
> +			 * as the BIO fragments may end up not being written
> +			 * sequentially. For such case, force the completed
> +			 * nbytes to be equal to the BIO size so that
> +			 * bio_advance() sets the BIO remaining size to 0 and
> +			 * we end up calling bio_endio() before returning.
> +			 */
> +			bio->bi_status = BLK_STS_IOERR;
> +			bio_bytes = bio->bi_iter.bi_size;
> +		}

Thanks,

Bart.
Damien Le Moal March 26, 2024, 1:54 a.m. UTC | #2
On 3/26/24 04:39, Bart Van Assche wrote:
> On 3/24/24 21:44, Damien Le Moal wrote:
>> -		if (bio_bytes == bio->bi_iter.bi_size)
>> +		if (unlikely(error))
>> +			bio->bi_status = error;
>> +
>> +		if (bio_bytes == bio->bi_iter.bi_size) {
> 
> Why no "else" in front of the above if? I think this patch introduces a
> behavior change. With the current code, if a zone append operation
> fails, bio->bi_status is set to 'error'. With this patch applied, this
> becomes BLK_STS_IOERR.

Adding an else would be very wrong since we still need to go to the next bio of
the request if the current BIO failed. But I get your point. Will fix it.

> 
>>   			req->bio = bio->bi_next;
>> +		} else if (req_op(req) == REQ_OP_ZONE_APPEND) {
>> +			/*
>> +			 * Partial zone append completions cannot be supported
>> +			 * as the BIO fragments may end up not being written
>> +			 * sequentially. For such case, force the completed
>> +			 * nbytes to be equal to the BIO size so that
>> +			 * bio_advance() sets the BIO remaining size to 0 and
>> +			 * we end up calling bio_endio() before returning.
>> +			 */
>> +			bio->bi_status = BLK_STS_IOERR;
>> +			bio_bytes = bio->bi_iter.bi_size;
>> +		}
> 
> Thanks,
> 
> Bart.
> 
>
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 555ada922cf0..8aeb8e96f1a7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -761,36 +761,6 @@  void blk_dump_rq_flags(struct request *rq, char *msg)
 }
 EXPORT_SYMBOL(blk_dump_rq_flags);
 
-static void req_bio_endio(struct request *rq, struct bio *bio,
-			  unsigned int nbytes, blk_status_t error)
-{
-	if (unlikely(error)) {
-		bio->bi_status = error;
-	} else if (req_op(rq) == REQ_OP_ZONE_APPEND) {
-		/*
-		 * Partial zone append completions cannot be supported as the
-		 * BIO fragments may end up not being written sequentially.
-		 * For such case, force the completed nbytes to be equal to
-		 * the BIO size so that bio_advance() sets the BIO remaining
-		 * size to 0 and we end up calling bio_endio() before returning.
-		 */
-		if (bio->bi_iter.bi_size != nbytes) {
-			bio->bi_status = BLK_STS_IOERR;
-			nbytes = bio->bi_iter.bi_size;
-		} else {
-			bio->bi_iter.bi_sector = rq->__sector;
-		}
-	}
-
-	bio_advance(bio, nbytes);
-
-	if (unlikely(rq->rq_flags & RQF_QUIET))
-		bio_set_flag(bio, BIO_QUIET);
-	/* don't actually finish bio if it's part of flush sequence */
-	if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
-		bio_endio(bio);
-}
-
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (req->part && blk_do_io_stat(req)) {
@@ -894,6 +864,8 @@  static void blk_complete_request(struct request *req)
 bool blk_update_request(struct request *req, blk_status_t error,
 		unsigned int nr_bytes)
 {
+	bool is_flush = req->rq_flags & RQF_FLUSH_SEQ;
+	bool quiet = req->rq_flags & RQF_QUIET;
 	int total_bytes;
 
 	trace_block_rq_complete(req, error, nr_bytes);
@@ -914,9 +886,8 @@  bool blk_update_request(struct request *req, blk_status_t error,
 	if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req))
 		__blk_crypto_rq_put_keyslot(req);
 
-	if (unlikely(error && !blk_rq_is_passthrough(req) &&
-		     !(req->rq_flags & RQF_QUIET)) &&
-		     !test_bit(GD_DEAD, &req->q->disk->state)) {
+	if (unlikely(error && !blk_rq_is_passthrough(req) && !quiet) &&
+	    !test_bit(GD_DEAD, &req->q->disk->state)) {
 		blk_print_req_error(req, error);
 		trace_block_rq_error(req, error, nr_bytes);
 	}
@@ -928,12 +899,37 @@  bool blk_update_request(struct request *req, blk_status_t error,
 		struct bio *bio = req->bio;
 		unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
 
-		if (bio_bytes == bio->bi_iter.bi_size)
+		if (unlikely(error))
+			bio->bi_status = error;
+
+		if (bio_bytes == bio->bi_iter.bi_size) {
 			req->bio = bio->bi_next;
+		} else if (req_op(req) == REQ_OP_ZONE_APPEND) {
+			/*
+			 * Partial zone append completions cannot be supported
+			 * as the BIO fragments may end up not being written
+			 * sequentially. For such case, force the completed
+			 * nbytes to be equal to the BIO size so that
+			 * bio_advance() sets the BIO remaining size to 0 and
+			 * we end up calling bio_endio() before returning.
+			 */
+			bio->bi_status = BLK_STS_IOERR;
+			bio_bytes = bio->bi_iter.bi_size;
+		}
 
 		/* Completion has already been traced */
 		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
-		req_bio_endio(req, bio, bio_bytes, error);
+		if (unlikely(quiet))
+			bio_set_flag(bio, BIO_QUIET);
+
+		bio_advance(bio, bio_bytes);
+
+		/* Don't actually finish bio if it's part of flush sequence */
+		if (!bio->bi_iter.bi_size && !is_flush) {
+			if (req_op(req) == REQ_OP_ZONE_APPEND)
+				bio->bi_iter.bi_sector = req->__sector;
+			bio_endio(bio);
+		}
 
 		total_bytes += bio_bytes;
 		nr_bytes -= bio_bytes;