[5/9] block: add support for blk_mq_end_request_batch()

Message ID	20211013165416.985696-6-axboe@kernel.dk (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-block-owner@kernel.org> From: Jens Axboe <axboe@kernel.dk> To: linux-block@vger.kernel.org Cc: Jens Axboe <axboe@kernel.dk> Subject: [PATCH 5/9] block: add support for blk_mq_end_request_batch() Date: Wed, 13 Oct 2021 10:54:12 -0600 Message-Id: <20211013165416.985696-6-axboe@kernel.dk> In-Reply-To: <20211013165416.985696-1-axboe@kernel.dk> References: <20211013165416.985696-1-axboe@kernel.dk> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	Batched completions \| expand [PATCHSET,v2,0/9] Batched completions [1/9] block: define io_batch structure [2/9] block: add a struct io_batch argument to fops->iopoll() [3/9] sbitmap: add helper to clear a batch of tags [4/9] sbitmap: test bit before calling test_and_set_bit() [5/9] block: add support for blk_mq_end_request_batch() [6/9] nvme: add support for batched completion of polled IO [7/9] block: assign batch completion handler in blk_poll() [8/9] io_uring: utilize the io_batch infrastructure for more efficient polled IO [9/9] nvme: wire up completion batching for the IRQ path

Message ID

20211013165416.985696-6-axboe@kernel.dk (mailing list archive)

State

New, archived

Headers

From: Jens Axboe <axboe@kernel.dk>
To: linux-block@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 5/9] block: add support for blk_mq_end_request_batch()
Date: Wed, 13 Oct 2021 10:54:12 -0600
Message-Id: <20211013165416.985696-6-axboe@kernel.dk>
In-Reply-To: <20211013165416.985696-1-axboe@kernel.dk>
References: <20211013165416.985696-1-axboe@kernel.dk>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

Batched completions | expand

Commit Message

Jens Axboe Oct. 13, 2021, 4:54 p.m. UTC

Instead of calling blk_mq_end_request() on a single request, add a helper
that takes the new struct io_batch and completes any request stored in
there.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.c     |  6 +++
 block/blk-mq-tag.h     |  1 +
 block/blk-mq.c         | 85 +++++++++++++++++++++++++++++++++++++-----
 include/linux/blk-mq.h | 13 +++++++
 4 files changed, 95 insertions(+), 10 deletions(-)

Comments

Christoph Hellwig Oct. 14, 2021, 7:32 a.m. UTC | #1

> +void blk_mq_end_request_batch(struct io_batch *iob)
> +{
> +	int tags[TAG_COMP_BATCH], nr_tags = 0, acct_tags = 0;
> +	struct blk_mq_hw_ctx *last_hctx = NULL;
> +	struct request *rq;
> +	u64 now = 0;
> +
> +	while ((rq = rq_list_pop(&iob->req_list)) != NULL) {
> +		if (!now && blk_mq_need_time_stamp(rq))
> +			now = ktime_get_ns();
> +		blk_update_request(rq, rq->status, blk_rq_bytes(rq));
> +		__blk_mq_end_request_acct(rq, rq->status, now);
> +
> +		if (rq->q->elevator) {
> +			blk_mq_free_request(rq);
> +			continue;
> +		}

So why do we even bother adding requests with an elevator to the batch
list?  

> +	/*
> +	 * csd is used for remote completions, fifo_time at scheduler time.
> +	 * They are mutually exclusive. result is used at completion time
> +	 * like csd, but for batched IO. Batched IO does not use IPI
> +	 * completions.
> +	 */
>  	union {
>  		struct __call_single_data csd;
>  		u64 fifo_time;
> +		blk_status_t status;
>  	};

The ->status field isn't needed any more now that error completions
aren't batched.

Jens Axboe Oct. 14, 2021, 3:27 p.m. UTC | #2

On 10/14/21 1:32 AM, Christoph Hellwig wrote:
>> +void blk_mq_end_request_batch(struct io_batch *iob)
>> +{
>> +	int tags[TAG_COMP_BATCH], nr_tags = 0, acct_tags = 0;
>> +	struct blk_mq_hw_ctx *last_hctx = NULL;
>> +	struct request *rq;
>> +	u64 now = 0;
>> +
>> +	while ((rq = rq_list_pop(&iob->req_list)) != NULL) {
>> +		if (!now && blk_mq_need_time_stamp(rq))
>> +			now = ktime_get_ns();
>> +		blk_update_request(rq, rq->status, blk_rq_bytes(rq));
>> +		__blk_mq_end_request_acct(rq, rq->status, now);
>> +
>> +		if (rq->q->elevator) {
>> +			blk_mq_free_request(rq);
>> +			continue;
>> +		}
> 
> So why do we even bother adding requests with an elevator to the batch
> list?  

You still get the benefit of amortized time keeping, and it's more
efficient to complete in batches rather than one-at-the-time. It's just
not as good as the non-elevator path.

>> +	/*
>> +	 * csd is used for remote completions, fifo_time at scheduler time.
>> +	 * They are mutually exclusive. result is used at completion time
>> +	 * like csd, but for batched IO. Batched IO does not use IPI
>> +	 * completions.
>> +	 */
>>  	union {
>>  		struct __call_single_data csd;
>>  		u64 fifo_time;
>> +		blk_status_t status;
>>  	};
> 
> The ->status field isn't needed any more now that error completions
> aren't batched.

Killed.

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index c43b97201161..b94c3e8ef392 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -207,6 +207,12 @@  void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
 	}
 }
 
+void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
+{
+	sbitmap_queue_clear_batch(&tags->bitmap_tags, tags->nr_reserved_tags,
+					tag_array, nr_tags);
+}
+
 struct bt_iter_data {
 	struct blk_mq_hw_ctx *hctx;
 	busy_iter_fn *fn;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 71c2f7d8e9b7..78ae2fb8e2a4 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -42,6 +42,7 @@  unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
 			      unsigned int *offset);
 extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
 			   unsigned int tag);
+void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags);
 extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_tags **tags,
 					unsigned int depth, bool can_grow);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6eac10fd244e..d603703cf272 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -603,19 +603,22 @@  void blk_mq_free_plug_rqs(struct blk_plug *plug)
 	}
 }
 
-inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
+static inline void __blk_mq_end_request_acct(struct request *rq,
+					     blk_status_t error, u64 now)
 {
-	if (blk_mq_need_time_stamp(rq)) {
-		u64 now = ktime_get_ns();
+	if (rq->rq_flags & RQF_STATS) {
+		blk_mq_poll_stats_start(rq->q);
+		blk_stat_add(rq, now);
+	}
 
-		if (rq->rq_flags & RQF_STATS) {
-			blk_mq_poll_stats_start(rq->q);
-			blk_stat_add(rq, now);
-		}
+	blk_mq_sched_completed_request(rq, now);
+	blk_account_io_done(rq, now);
+}
 
-		blk_mq_sched_completed_request(rq, now);
-		blk_account_io_done(rq, now);
-	}
+inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
+{
+	if (blk_mq_need_time_stamp(rq))
+		__blk_mq_end_request_acct(rq, error, ktime_get_ns());
 
 	if (rq->end_io) {
 		rq_qos_done(rq->q, rq);
@@ -848,6 +851,68 @@  void blk_mq_end_request(struct request *rq, blk_status_t error)
 }
 EXPORT_SYMBOL(blk_mq_end_request);
 
+#define TAG_COMP_BATCH		32
+#define TAG_SCHED_BATCH		(TAG_COMP_BATCH >> 1)
+
+static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx,
+					  int *tag_array, int nr_tags)
+{
+	struct request_queue *q = hctx->queue;
+
+	blk_mq_put_tags(hctx->tags, tag_array, nr_tags);
+	if (q->elevator)
+		blk_mq_put_tags(hctx->sched_tags, &tag_array[TAG_SCHED_BATCH],
+				nr_tags);
+	percpu_ref_put_many(&q->q_usage_counter, nr_tags);
+	blk_mq_sched_restart(hctx);
+}
+
+void blk_mq_end_request_batch(struct io_batch *iob)
+{
+	int tags[TAG_COMP_BATCH], nr_tags = 0, acct_tags = 0;
+	struct blk_mq_hw_ctx *last_hctx = NULL;
+	struct request *rq;
+	u64 now = 0;
+
+	while ((rq = rq_list_pop(&iob->req_list)) != NULL) {
+		if (!now && blk_mq_need_time_stamp(rq))
+			now = ktime_get_ns();
+		blk_update_request(rq, rq->status, blk_rq_bytes(rq));
+		__blk_mq_end_request_acct(rq, rq->status, now);
+
+		if (rq->q->elevator) {
+			blk_mq_free_request(rq);
+			continue;
+		}
+
+		if (!refcount_dec_and_test(&rq->ref))
+			continue;
+
+		blk_crypto_free_request(rq);
+		blk_pm_mark_last_busy(rq);
+		rq_qos_done(rq->q, rq);
+		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
+
+		if (acct_tags == TAG_COMP_BATCH ||
+		    (last_hctx && last_hctx != rq->mq_hctx)) {
+			blk_mq_flush_tag_batch(last_hctx, tags, nr_tags);
+			acct_tags = nr_tags = 0;
+		}
+		tags[nr_tags] = rq->tag;
+		last_hctx = rq->mq_hctx;
+		if (last_hctx->queue->elevator) {
+			tags[nr_tags + TAG_SCHED_BATCH] = rq->internal_tag;
+			acct_tags++;
+		}
+		nr_tags++;
+		acct_tags++;
+	}
+
+	if (nr_tags)
+		blk_mq_flush_tag_batch(last_hctx, tags, nr_tags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_end_request_batch);
+
 static void blk_complete_reqs(struct llist_head *list)
 {
 	struct llist_node *entry = llist_reverse_order(llist_del_all(list));
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 5106c4cc411a..aea7d866a34c 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -183,9 +183,16 @@  struct request {
 	unsigned int timeout;
 	unsigned long deadline;
 
+	/*
+	 * csd is used for remote completions, fifo_time at scheduler time.
+	 * They are mutually exclusive. result is used at completion time
+	 * like csd, but for batched IO. Batched IO does not use IPI
+	 * completions.
+	 */
 	union {
 		struct __call_single_data csd;
 		u64 fifo_time;
+		blk_status_t status;
 	};
 
 	/*
@@ -570,6 +577,11 @@  struct blk_mq_ops {
 	 */
 	void (*complete)(struct request *);
 
+	/**
+	 * @complete_batch: Mark list of requests as complete
+	 */
+	void (*complete_batch)(struct io_batch *);
+
 	/**
 	 * @init_hctx: Called when the block layer side of a hardware queue has
 	 * been set up, allowing the driver to allocate/init matching
@@ -759,6 +771,7 @@  static inline void blk_mq_set_request_complete(struct request *rq)
 void blk_mq_start_request(struct request *rq);
 void blk_mq_end_request(struct request *rq, blk_status_t error);
 void __blk_mq_end_request(struct request *rq, blk_status_t error);
+void blk_mq_end_request_batch(struct io_batch *ib);
 
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);

[5/9] block: add support for blk_mq_end_request_batch()

Commit Message

Comments

Patch