[07/14] block: change plugging to use a singly linked list

Message ID	20211017013748.76461-8-axboe@kernel.dk (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-block-owner@kernel.org> From: Jens Axboe <axboe@kernel.dk> To: linux-block@vger.kernel.org Cc: Jens Axboe <axboe@kernel.dk> Subject: [PATCH 07/14] block: change plugging to use a singly linked list Date: Sat, 16 Oct 2021 19:37:41 -0600 Message-Id: <20211017013748.76461-8-axboe@kernel.dk> In-Reply-To: <20211017013748.76461-1-axboe@kernel.dk> References: <20211017013748.76461-1-axboe@kernel.dk> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	Various block layer optimizations \| expand [PATCHSET,0/14] Various block layer optimizations [01/14] block: inline fast path of driver tag allocation [02/14] block: don't bother iter advancing a fully done bio [03/14] block: remove useless caller argument to print_req_error() [04/14] block: move update request helpers into blk-mq.c [05/14] block: don't call blk_status_to_errno() for success status [06/14] block: store elevator state in request [07/14] block: change plugging to use a singly linked list [08/14] block: improve layout of struct request [09/14] block: only mark bio as tracked if it really is tracked [10/14] block: move blk_mq_tag_to_rq() inline [11/14] block: optimize blk_mq_rq_ctx_init() [12/14] block: align blkdev_dio inlined bio to a cacheline [13/14] block: remove debugfs blk_mq_ctx dispatched/merged/completed attributes [14/14] block: remove some blk_mq_hw_ctx debugfs entries

Message ID

20211017013748.76461-8-axboe@kernel.dk (mailing list archive)

State

New, archived

Headers

From: Jens Axboe <axboe@kernel.dk>
To: linux-block@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 07/14] block: change plugging to use a singly linked list
Date: Sat, 16 Oct 2021 19:37:41 -0600
Message-Id: <20211017013748.76461-8-axboe@kernel.dk>
In-Reply-To: <20211017013748.76461-1-axboe@kernel.dk>
References: <20211017013748.76461-1-axboe@kernel.dk>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

Various block layer optimizations | expand

Commit Message

Jens Axboe Oct. 17, 2021, 1:37 a.m. UTC

Use a singly linked list for the blk_plug. This saves 8 bytes in the
blk_plug struct, and makes for faster list manipulations than doubly
linked lists. As we don't use the doubly linked lists for anything,
singly linked is just fine.

This yields a bump in default (merging enabled) performance from 7.0
to 7.1M IOPS, and ~7.5M IOPS with merging disabled.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       |   5 +-
 block/blk-merge.c      |   8 +--
 block/blk-mq.c         | 156 +++++++++++++++++++++++++++--------------
 block/blk.h            |   2 +-
 include/linux/blkdev.h |   6 +-
 5 files changed, 113 insertions(+), 64 deletions(-)

Comments

Christoph Hellwig Oct. 18, 2021, 9:19 a.m. UTC | #1

On Sat, Oct 16, 2021 at 07:37:41PM -0600, Jens Axboe wrote:
> Use a singly linked list for the blk_plug. This saves 8 bytes in the
> blk_plug struct, and makes for faster list manipulations than doubly
> linked lists. As we don't use the doubly linked lists for anything,
> singly linked is just fine.

This patch also does a few other thing, like changing the same_queue_rq
type and adding a has_elevator flag to the plug.  Can you please split
this out and document the changes better?

>static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
>  {
> +	struct blk_mq_hw_ctx *hctx = NULL;
> +	int queued = 0;
> +	int errors = 0;
> +
> +	while (!rq_list_empty(plug->mq_list)) {
> +		struct request *rq;
> +		blk_status_t ret;
> +
> +		rq = rq_list_pop(&plug->mq_list);
>  
> +		if (!hctx)
> +			hctx = rq->mq_hctx;
> +		else if (hctx != rq->mq_hctx && hctx->queue->mq_ops->commit_rqs) {
> +			trace_block_unplug(hctx->queue, queued, !from_schedule);
> +			hctx->queue->mq_ops->commit_rqs(hctx);
> +			queued = 0;
> +			hctx = rq->mq_hctx;
> +		}
> +
> +		ret = blk_mq_request_issue_directly(rq,
> +						rq_list_empty(plug->mq_list));
> +		if (ret != BLK_STS_OK) {
> +			if (ret == BLK_STS_RESOURCE ||
> +					ret == BLK_STS_DEV_RESOURCE) {
> +				blk_mq_request_bypass_insert(rq, false,
> +						rq_list_empty(plug->mq_list));
> +				break;
> +			}
> +			blk_mq_end_request(rq, ret);
> +			errors++;
> +		} else
> +			queued++;
> +	}

This all looks a bit messy to me.  I'd suggest reordering this a bit
including a new helper:

static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int *queued,
		bool from_schedule)
{
	if (hctx->queue->mq_ops->commit_rqs) {
		trace_block_unplug(hctx->queue, *queued, !from_schedule);
		hctx->queue->mq_ops->commit_rqs(hctx);
	}
	*queued = 0;
}

static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
{
	struct blk_mq_hw_ctx *hctx = NULL;
	int queued = 0;
	int errors = 0;

	while ((rq = rq_list_pop(&plug->mq_list))) {
		bool last = rq_list_empty(plug->mq_list);
		blk_status_t ret;

		if (hctx != rq->mq_hctx) {
			if (hctx)
				blk_mq_commit_rqs(hctx, &queued, from_schedule);
			hctx = rq->mq_hctx;
		}

		ret = blk_mq_request_issue_directly(rq, last);
		switch (ret) {
		case BLK_STS_OK:
			queued++;
			break;
		case BLK_STS_RESOURCE:
		case BLK_STS_DEV_RESOURCE:
			blk_mq_request_bypass_insert(rq, false, last);
			blk_mq_commit_rqs(hctx, &queued, from_schedule);
			return;
		default:
			blk_mq_end_request(rq, ret);
			errors++;
			break;
		}
	}

	/*
	 * If we didn't flush the entire list, we could have told the driver
	 * there was more coming, but that turned out to be a lie.
	 */
	if (errors)
		blk_mq_commit_rqs(hctx, &queued, from_schedule);
}

Pavel Begunkov Oct. 18, 2021, 12:56 p.m. UTC | #2

On 10/17/21 01:37, Jens Axboe wrote:
> Use a singly linked list for the blk_plug. This saves 8 bytes in the
> blk_plug struct, and makes for faster list manipulations than doubly
> linked lists. As we don't use the doubly linked lists for anything,
> singly linked is just fine.
> 
> This yields a bump in default (merging enabled) performance from 7.0
> to 7.1M IOPS, and ~7.5M IOPS with merging disabled.

block/blk-merge.c: In function ‘blk_attempt_plug_merge’:
block/blk-merge.c:1094:22: error: implicit declaration of function ‘rq_list_empty’; did you mean ‘bio_list_empty’? [-Werror=implicit-function-declaration]
  1094 |         if (!plug || rq_list_empty(plug->mq_list))


Also in blk-mq.c and others, and I don't see it defined anywhere.
Used just fetched for-5.16/block. Did I miss it somewhere?


> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>   block/blk-core.c       |   5 +-
>   block/blk-merge.c      |   8 +--
>   block/blk-mq.c         | 156 +++++++++++++++++++++++++++--------------
>   block/blk.h            |   2 +-
>   include/linux/blkdev.h |   6 +-
>   5 files changed, 113 insertions(+), 64 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index bdc03b80a8d0..fced71948162 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1542,11 +1542,12 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
>   	if (tsk->plug)
>   		return;
>   
> -	INIT_LIST_HEAD(&plug->mq_list);
> +	plug->mq_list = NULL;
>   	plug->cached_rq = NULL;
>   	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
>   	plug->rq_count = 0;
>   	plug->multiple_queues = false;
> +	plug->has_elevator = false;
>   	plug->nowait = false;
>   	INIT_LIST_HEAD(&plug->cb_list);
>   
> @@ -1632,7 +1633,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
>   {
>   	flush_plug_callbacks(plug, from_schedule);
>   
> -	if (!list_empty(&plug->mq_list))
> +	if (!rq_list_empty(plug->mq_list))
>   		blk_mq_flush_plug_list(plug, from_schedule);
>   	if (unlikely(!from_schedule && plug->cached_rq))
>   		blk_mq_free_plug_rqs(plug);
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index ec727234ac48..b3f3e689a5ac 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -1085,23 +1085,23 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
>    * Caller must ensure !blk_queue_nomerges(q) beforehand.
>    */
>   bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
> -		unsigned int nr_segs, struct request **same_queue_rq)
> +		unsigned int nr_segs, bool *same_queue_rq)
>   {
>   	struct blk_plug *plug;
>   	struct request *rq;
>   
>   	plug = blk_mq_plug(q, bio);
> -	if (!plug || list_empty(&plug->mq_list))
> +	if (!plug || rq_list_empty(plug->mq_list))
>   		return false;
>   
>   	/* check the previously added entry for a quick merge attempt */
> -	rq = list_last_entry(&plug->mq_list, struct request, queuelist);
> +	rq = rq_list_peek(&plug->mq_list);
>   	if (rq->q == q && same_queue_rq) {
>   		/*
>   		 * Only blk-mq multiple hardware queues case checks the rq in
>   		 * the same queue, there should be only one such rq in a queue
>   		 */
> -		*same_queue_rq = rq;
> +		*same_queue_rq = true;
>   	}
>   	if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == BIO_MERGE_OK)
>   		return true;
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 5d22c228f6df..cd1249284c1f 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -19,7 +19,6 @@
>   #include <linux/smp.h>
>   #include <linux/interrupt.h>
>   #include <linux/llist.h>
> -#include <linux/list_sort.h>
>   #include <linux/cpu.h>
>   #include <linux/cache.h>
>   #include <linux/sched/sysctl.h>
> @@ -2118,54 +2117,100 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
>   	spin_unlock(&ctx->lock);
>   }
>   
> -static int plug_rq_cmp(void *priv, const struct list_head *a,
> -		       const struct list_head *b)
> +static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
>   {
> -	struct request *rqa = container_of(a, struct request, queuelist);
> -	struct request *rqb = container_of(b, struct request, queuelist);
> +	struct blk_mq_hw_ctx *hctx = NULL;
> +	int queued = 0;
> +	int errors = 0;
> +
> +	while (!rq_list_empty(plug->mq_list)) {
> +		struct request *rq;
> +		blk_status_t ret;
> +
> +		rq = rq_list_pop(&plug->mq_list);
>   
> -	if (rqa->mq_ctx != rqb->mq_ctx)
> -		return rqa->mq_ctx > rqb->mq_ctx;
> -	if (rqa->mq_hctx != rqb->mq_hctx)
> -		return rqa->mq_hctx > rqb->mq_hctx;
> +		if (!hctx)
> +			hctx = rq->mq_hctx;
> +		else if (hctx != rq->mq_hctx && hctx->queue->mq_ops->commit_rqs) {
> +			trace_block_unplug(hctx->queue, queued, !from_schedule);
> +			hctx->queue->mq_ops->commit_rqs(hctx);
> +			queued = 0;
> +			hctx = rq->mq_hctx;
> +		}
> +
> +		ret = blk_mq_request_issue_directly(rq,
> +						rq_list_empty(plug->mq_list));
> +		if (ret != BLK_STS_OK) {
> +			if (ret == BLK_STS_RESOURCE ||
> +					ret == BLK_STS_DEV_RESOURCE) {
> +				blk_mq_request_bypass_insert(rq, false,
> +						rq_list_empty(plug->mq_list));
> +				break;
> +			}
> +			blk_mq_end_request(rq, ret);
> +			errors++;
> +		} else
> +			queued++;
> +	}
>   
> -	return blk_rq_pos(rqa) > blk_rq_pos(rqb);
> +	/*
> +	 * If we didn't flush the entire list, we could have told
> +	 * the driver there was more coming, but that turned out to
> +	 * be a lie.
> +	 */
> +	if ((!rq_list_empty(plug->mq_list) || errors) &&
> +	     hctx->queue->mq_ops->commit_rqs && queued)
> +		hctx->queue->mq_ops->commit_rqs(hctx);
>   }
>   
>   void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
>   {
> +	struct blk_mq_hw_ctx *this_hctx;
> +	struct blk_mq_ctx *this_ctx;
> +	unsigned int depth;
>   	LIST_HEAD(list);
>   
> -	if (list_empty(&plug->mq_list))
> +	if (rq_list_empty(plug->mq_list))
>   		return;
> -	list_splice_init(&plug->mq_list, &list);
> -
> -	if (plug->rq_count > 2 && plug->multiple_queues)
> -		list_sort(NULL, &list, plug_rq_cmp);
> -
>   	plug->rq_count = 0;
>   
> +	if (!plug->multiple_queues && !plug->has_elevator) {
> +		blk_mq_plug_issue_direct(plug, from_schedule);
> +		if (rq_list_empty(plug->mq_list))
> +			return;
> +	}
> +
> +	this_hctx = NULL;
> +	this_ctx = NULL;
> +	depth = 0;
>   	do {
> -		struct list_head rq_list;
> -		struct request *rq, *head_rq = list_entry_rq(list.next);
> -		struct list_head *pos = &head_rq->queuelist; /* skip first */
> -		struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx;
> -		struct blk_mq_ctx *this_ctx = head_rq->mq_ctx;
> -		unsigned int depth = 1;
> -
> -		list_for_each_continue(pos, &list) {
> -			rq = list_entry_rq(pos);
> -			BUG_ON(!rq->q);
> -			if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx)
> -				break;
> -			depth++;
> +		struct request *rq;
> +
> +		rq = rq_list_pop(&plug->mq_list);
> +
> +		if (!this_hctx) {
> +			this_hctx = rq->mq_hctx;
> +			this_ctx = rq->mq_ctx;
> +		} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
> +			trace_block_unplug(this_hctx->queue, depth,
> +						!from_schedule);
> +			blk_mq_sched_insert_requests(this_hctx, this_ctx,
> +						&list, from_schedule);
> +			depth = 0;
> +			this_hctx = rq->mq_hctx;
> +			this_ctx = rq->mq_ctx;
> +
>   		}
>   
> -		list_cut_before(&rq_list, &list, pos);
> -		trace_block_unplug(head_rq->q, depth, !from_schedule);
> -		blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
> +		list_add(&rq->queuelist, &list);
> +		depth++;
> +	} while (!rq_list_empty(plug->mq_list));
> +
> +	if (!list_empty(&list)) {
> +		trace_block_unplug(this_hctx->queue, depth, !from_schedule);
> +		blk_mq_sched_insert_requests(this_hctx, this_ctx, &list,
>   						from_schedule);
> -	} while(!list_empty(&list));
> +	}
>   }
>   
>   static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
> @@ -2345,16 +2390,17 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
>   
>   static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
>   {
> -	list_add_tail(&rq->queuelist, &plug->mq_list);
> -	plug->rq_count++;
> -	if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) {
> -		struct request *tmp;
> +	if (!plug->multiple_queues) {
> +		struct request *nxt = rq_list_peek(&plug->mq_list);
>   
> -		tmp = list_first_entry(&plug->mq_list, struct request,
> -						queuelist);
> -		if (tmp->q != rq->q)
> +		if (nxt && nxt->q != rq->q)
>   			plug->multiple_queues = true;
>   	}
> +	if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
> +		plug->has_elevator = true;
> +	rq->rq_next = NULL;
> +	rq_list_add(&plug->mq_list, rq);
> +	plug->rq_count++;
>   }
>   
>   /*
> @@ -2389,7 +2435,7 @@ void blk_mq_submit_bio(struct bio *bio)
>   	const int is_flush_fua = op_is_flush(bio->bi_opf);
>   	struct request *rq;
>   	struct blk_plug *plug;
> -	struct request *same_queue_rq = NULL;
> +	bool same_queue_rq = false;
>   	unsigned int nr_segs = 1;
>   	blk_status_t ret;
>   
> @@ -2463,15 +2509,17 @@ void blk_mq_submit_bio(struct bio *bio)
>   		 * IO may benefit a lot from plug merging.
>   		 */
>   		unsigned int request_count = plug->rq_count;
> -		struct request *last = NULL;
> +		struct request *tmp = NULL;
>   
> -		if (!request_count)
> +		if (!request_count) {
>   			trace_block_plug(q);
> -		else
> -			last = list_entry_rq(plug->mq_list.prev);
> +		} else if (!blk_queue_nomerges(q)) {
> +			tmp = rq_list_peek(&plug->mq_list);
> +			if (blk_rq_bytes(tmp) < BLK_PLUG_FLUSH_SIZE)
> +				tmp = NULL;
> +		}
>   
> -		if (request_count >= blk_plug_max_rq_count(plug) || (last &&
> -		    blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
> +		if (request_count >= blk_plug_max_rq_count(plug) || tmp) {
>   			blk_flush_plug_list(plug, false);
>   			trace_block_plug(q);
>   		}
> @@ -2481,6 +2529,8 @@ void blk_mq_submit_bio(struct bio *bio)
>   		/* Insert the request at the IO scheduler queue */
>   		blk_mq_sched_insert_request(rq, false, true, true);
>   	} else if (plug && !blk_queue_nomerges(q)) {
> +		struct request *first_rq = NULL;
> +
>   		/*
>   		 * We do limited plugging. If the bio can be merged, do that.
>   		 * Otherwise the existing request in the plug list will be
> @@ -2488,19 +2538,17 @@ void blk_mq_submit_bio(struct bio *bio)
>   		 * The plug list might get flushed before this. If that happens,
>   		 * the plug list is empty, and same_queue_rq is invalid.
>   		 */
> -		if (list_empty(&plug->mq_list))
> -			same_queue_rq = NULL;
> -		if (same_queue_rq) {
> -			list_del_init(&same_queue_rq->queuelist);
> +		if (!rq_list_empty(plug->mq_list) && same_queue_rq) {
> +			first_rq = rq_list_pop(&plug->mq_list);
>   			plug->rq_count--;
>   		}
>   		blk_add_rq_to_plug(plug, rq);
>   		trace_block_plug(q);
>   
> -		if (same_queue_rq) {
> +		if (first_rq) {
>   			trace_block_unplug(q, 1, true);
> -			blk_mq_try_issue_directly(same_queue_rq->mq_hctx,
> -						  same_queue_rq);
> +			blk_mq_try_issue_directly(first_rq->mq_hctx,
> +						  first_rq);
>   		}
>   	} else if ((q->nr_hw_queues > 1 && is_sync) ||
>   		   !rq->mq_hctx->dispatch_busy) {
> diff --git a/block/blk.h b/block/blk.h
> index fdfaa6896fc4..c15d1ab224b8 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -216,7 +216,7 @@ void blk_add_timer(struct request *req);
>   void blk_print_req_error(struct request *req, blk_status_t status);
>   
>   bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
> -		unsigned int nr_segs, struct request **same_queue_rq);
> +		unsigned int nr_segs, bool *same_queue_rq);
>   bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
>   			struct bio *bio, unsigned int nr_segs);
>   
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 43dda725dcae..c6a6402cb1a1 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -727,7 +727,7 @@ extern void blk_set_queue_dying(struct request_queue *);
>    * schedule() where blk_schedule_flush_plug() is called.
>    */
>   struct blk_plug {
> -	struct list_head mq_list; /* blk-mq requests */
> +	struct request *mq_list; /* blk-mq requests */
>   
>   	/* if ios_left is > 1, we can batch tag/rq allocations */
>   	struct request *cached_rq;
> @@ -736,6 +736,7 @@ struct blk_plug {
>   	unsigned short rq_count;
>   
>   	bool multiple_queues;
> +	bool has_elevator;
>   	bool nowait;
>   
>   	struct list_head cb_list; /* md requires an unplug callback */
> @@ -776,8 +777,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
>   	struct blk_plug *plug = tsk->plug;
>   
>   	return plug &&
> -		 (!list_empty(&plug->mq_list) ||
> -		 !list_empty(&plug->cb_list));
> +		 (plug->mq_list || !list_empty(&plug->cb_list));
>   }
>   
>   int blkdev_issue_flush(struct block_device *bdev);
>

Jens Axboe Oct. 18, 2021, 1:34 p.m. UTC | #3

On 10/18/21 6:56 AM, Pavel Begunkov wrote:
> On 10/17/21 01:37, Jens Axboe wrote:
>> Use a singly linked list for the blk_plug. This saves 8 bytes in the
>> blk_plug struct, and makes for faster list manipulations than doubly
>> linked lists. As we don't use the doubly linked lists for anything,
>> singly linked is just fine.
>>
>> This yields a bump in default (merging enabled) performance from 7.0
>> to 7.1M IOPS, and ~7.5M IOPS with merging disabled.
> 
> block/blk-merge.c: In function ‘blk_attempt_plug_merge’:
> block/blk-merge.c:1094:22: error: implicit declaration of function ‘rq_list_empty’; did you mean ‘bio_list_empty’? [-Werror=implicit-function-declaration]
>   1094 |         if (!plug || rq_list_empty(plug->mq_list))
> 
> 
> Also in blk-mq.c and others, and I don't see it defined anywhere.
> Used just fetched for-5.16/block. Did I miss it somewhere?

I forgot to include this prep patch:

https://git.kernel.dk/cgit/linux-block/commit/?h=perf-wip&id=b3ca14988ce2eb125a8a80c3ee146f3ad55cfcf4

Jens Axboe Oct. 18, 2021, 4:10 p.m. UTC | #4

On 10/18/21 3:19 AM, Christoph Hellwig wrote:
> On Sat, Oct 16, 2021 at 07:37:41PM -0600, Jens Axboe wrote:
>> Use a singly linked list for the blk_plug. This saves 8 bytes in the
>> blk_plug struct, and makes for faster list manipulations than doubly
>> linked lists. As we don't use the doubly linked lists for anything,
>> singly linked is just fine.
> 
> This patch also does a few other thing, like changing the same_queue_rq
> type and adding a has_elevator flag to the plug.  Can you please split
> this out and document the changes better?

I'll split it, should probably be 3 patches.

> 
>> static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
>>  {
>> +	struct blk_mq_hw_ctx *hctx = NULL;
>> +	int queued = 0;
>> +	int errors = 0;
>> +
>> +	while (!rq_list_empty(plug->mq_list)) {
>> +		struct request *rq;
>> +		blk_status_t ret;
>> +
>> +		rq = rq_list_pop(&plug->mq_list);
>>  
>> +		if (!hctx)
>> +			hctx = rq->mq_hctx;
>> +		else if (hctx != rq->mq_hctx && hctx->queue->mq_ops->commit_rqs) {
>> +			trace_block_unplug(hctx->queue, queued, !from_schedule);
>> +			hctx->queue->mq_ops->commit_rqs(hctx);
>> +			queued = 0;
>> +			hctx = rq->mq_hctx;
>> +		}
>> +
>> +		ret = blk_mq_request_issue_directly(rq,
>> +						rq_list_empty(plug->mq_list));
>> +		if (ret != BLK_STS_OK) {
>> +			if (ret == BLK_STS_RESOURCE ||
>> +					ret == BLK_STS_DEV_RESOURCE) {
>> +				blk_mq_request_bypass_insert(rq, false,
>> +						rq_list_empty(plug->mq_list));
>> +				break;
>> +			}
>> +			blk_mq_end_request(rq, ret);
>> +			errors++;
>> +		} else
>> +			queued++;
>> +	}
> 
> This all looks a bit messy to me.  I'd suggest reordering this a bit
> including a new helper:
> 
> static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int *queued,
> 		bool from_schedule)
> {
> 	if (hctx->queue->mq_ops->commit_rqs) {
> 		trace_block_unplug(hctx->queue, *queued, !from_schedule);
> 		hctx->queue->mq_ops->commit_rqs(hctx);
> 	}
> 	*queued = 0;
> }
> 
> static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
> {
> 	struct blk_mq_hw_ctx *hctx = NULL;
> 	int queued = 0;
> 	int errors = 0;
> 
> 	while ((rq = rq_list_pop(&plug->mq_list))) {
> 		bool last = rq_list_empty(plug->mq_list);
> 		blk_status_t ret;
> 
> 		if (hctx != rq->mq_hctx) {
> 			if (hctx)
> 				blk_mq_commit_rqs(hctx, &queued, from_schedule);
> 			hctx = rq->mq_hctx;
> 		}
> 
> 		ret = blk_mq_request_issue_directly(rq, last);
> 		switch (ret) {
> 		case BLK_STS_OK:
> 			queued++;
> 			break;
> 		case BLK_STS_RESOURCE:
> 		case BLK_STS_DEV_RESOURCE:
> 			blk_mq_request_bypass_insert(rq, false, last);
> 			blk_mq_commit_rqs(hctx, &queued, from_schedule);
> 			return;
> 		default:
> 			blk_mq_end_request(rq, ret);
> 			errors++;
> 			break;
> 		}
> 	}
> 
> 	/*
> 	 * If we didn't flush the entire list, we could have told the driver
> 	 * there was more coming, but that turned out to be a lie.
> 	 */
> 	if (errors)
> 		blk_mq_commit_rqs(hctx, &queued, from_schedule);
> }

Good suggestion!

diff --git a/block/blk-core.c b/block/blk-core.c
index bdc03b80a8d0..fced71948162 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1542,11 +1542,12 @@  void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
 	if (tsk->plug)
 		return;
 
-	INIT_LIST_HEAD(&plug->mq_list);
+	plug->mq_list = NULL;
 	plug->cached_rq = NULL;
 	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
 	plug->rq_count = 0;
 	plug->multiple_queues = false;
+	plug->has_elevator = false;
 	plug->nowait = false;
 	INIT_LIST_HEAD(&plug->cb_list);
 
@@ -1632,7 +1633,7 @@  void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
 	flush_plug_callbacks(plug, from_schedule);
 
-	if (!list_empty(&plug->mq_list))
+	if (!rq_list_empty(plug->mq_list))
 		blk_mq_flush_plug_list(plug, from_schedule);
 	if (unlikely(!from_schedule && plug->cached_rq))
 		blk_mq_free_plug_rqs(plug);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index ec727234ac48..b3f3e689a5ac 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -1085,23 +1085,23 @@  static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
  * Caller must ensure !blk_queue_nomerges(q) beforehand.
  */
 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
-		unsigned int nr_segs, struct request **same_queue_rq)
+		unsigned int nr_segs, bool *same_queue_rq)
 {
 	struct blk_plug *plug;
 	struct request *rq;
 
 	plug = blk_mq_plug(q, bio);
-	if (!plug || list_empty(&plug->mq_list))
+	if (!plug || rq_list_empty(plug->mq_list))
 		return false;
 
 	/* check the previously added entry for a quick merge attempt */
-	rq = list_last_entry(&plug->mq_list, struct request, queuelist);
+	rq = rq_list_peek(&plug->mq_list);
 	if (rq->q == q && same_queue_rq) {
 		/*
 		 * Only blk-mq multiple hardware queues case checks the rq in
 		 * the same queue, there should be only one such rq in a queue
 		 */
-		*same_queue_rq = rq;
+		*same_queue_rq = true;
 	}
 	if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == BIO_MERGE_OK)
 		return true;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5d22c228f6df..cd1249284c1f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -19,7 +19,6 @@ 
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/llist.h>
-#include <linux/list_sort.h>
 #include <linux/cpu.h>
 #include <linux/cache.h>
 #include <linux/sched/sysctl.h>
@@ -2118,54 +2117,100 @@  void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 	spin_unlock(&ctx->lock);
 }
 
-static int plug_rq_cmp(void *priv, const struct list_head *a,
-		       const struct list_head *b)
+static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
 {
-	struct request *rqa = container_of(a, struct request, queuelist);
-	struct request *rqb = container_of(b, struct request, queuelist);
+	struct blk_mq_hw_ctx *hctx = NULL;
+	int queued = 0;
+	int errors = 0;
+
+	while (!rq_list_empty(plug->mq_list)) {
+		struct request *rq;
+		blk_status_t ret;
+
+		rq = rq_list_pop(&plug->mq_list);
 
-	if (rqa->mq_ctx != rqb->mq_ctx)
-		return rqa->mq_ctx > rqb->mq_ctx;
-	if (rqa->mq_hctx != rqb->mq_hctx)
-		return rqa->mq_hctx > rqb->mq_hctx;
+		if (!hctx)
+			hctx = rq->mq_hctx;
+		else if (hctx != rq->mq_hctx && hctx->queue->mq_ops->commit_rqs) {
+			trace_block_unplug(hctx->queue, queued, !from_schedule);
+			hctx->queue->mq_ops->commit_rqs(hctx);
+			queued = 0;
+			hctx = rq->mq_hctx;
+		}
+
+		ret = blk_mq_request_issue_directly(rq,
+						rq_list_empty(plug->mq_list));
+		if (ret != BLK_STS_OK) {
+			if (ret == BLK_STS_RESOURCE ||
+					ret == BLK_STS_DEV_RESOURCE) {
+				blk_mq_request_bypass_insert(rq, false,
+						rq_list_empty(plug->mq_list));
+				break;
+			}
+			blk_mq_end_request(rq, ret);
+			errors++;
+		} else
+			queued++;
+	}
 
-	return blk_rq_pos(rqa) > blk_rq_pos(rqb);
+	/*
+	 * If we didn't flush the entire list, we could have told
+	 * the driver there was more coming, but that turned out to
+	 * be a lie.
+	 */
+	if ((!rq_list_empty(plug->mq_list) || errors) &&
+	     hctx->queue->mq_ops->commit_rqs && queued)
+		hctx->queue->mq_ops->commit_rqs(hctx);
 }
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
+	struct blk_mq_hw_ctx *this_hctx;
+	struct blk_mq_ctx *this_ctx;
+	unsigned int depth;
 	LIST_HEAD(list);
 
-	if (list_empty(&plug->mq_list))
+	if (rq_list_empty(plug->mq_list))
 		return;
-	list_splice_init(&plug->mq_list, &list);
-
-	if (plug->rq_count > 2 && plug->multiple_queues)
-		list_sort(NULL, &list, plug_rq_cmp);
-
 	plug->rq_count = 0;
 
+	if (!plug->multiple_queues && !plug->has_elevator) {
+		blk_mq_plug_issue_direct(plug, from_schedule);
+		if (rq_list_empty(plug->mq_list))
+			return;
+	}
+
+	this_hctx = NULL;
+	this_ctx = NULL;
+	depth = 0;
 	do {
-		struct list_head rq_list;
-		struct request *rq, *head_rq = list_entry_rq(list.next);
-		struct list_head *pos = &head_rq->queuelist; /* skip first */
-		struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx;
-		struct blk_mq_ctx *this_ctx = head_rq->mq_ctx;
-		unsigned int depth = 1;
-
-		list_for_each_continue(pos, &list) {
-			rq = list_entry_rq(pos);
-			BUG_ON(!rq->q);
-			if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx)
-				break;
-			depth++;
+		struct request *rq;
+
+		rq = rq_list_pop(&plug->mq_list);
+
+		if (!this_hctx) {
+			this_hctx = rq->mq_hctx;
+			this_ctx = rq->mq_ctx;
+		} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
+			trace_block_unplug(this_hctx->queue, depth,
+						!from_schedule);
+			blk_mq_sched_insert_requests(this_hctx, this_ctx,
+						&list, from_schedule);
+			depth = 0;
+			this_hctx = rq->mq_hctx;
+			this_ctx = rq->mq_ctx;
+
 		}
 
-		list_cut_before(&rq_list, &list, pos);
-		trace_block_unplug(head_rq->q, depth, !from_schedule);
-		blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
+		list_add(&rq->queuelist, &list);
+		depth++;
+	} while (!rq_list_empty(plug->mq_list));
+
+	if (!list_empty(&list)) {
+		trace_block_unplug(this_hctx->queue, depth, !from_schedule);
+		blk_mq_sched_insert_requests(this_hctx, this_ctx, &list,
 						from_schedule);
-	} while(!list_empty(&list));
+	}
 }
 
 static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
@@ -2345,16 +2390,17 @@  void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 
 static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 {
-	list_add_tail(&rq->queuelist, &plug->mq_list);
-	plug->rq_count++;
-	if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) {
-		struct request *tmp;
+	if (!plug->multiple_queues) {
+		struct request *nxt = rq_list_peek(&plug->mq_list);
 
-		tmp = list_first_entry(&plug->mq_list, struct request,
-						queuelist);
-		if (tmp->q != rq->q)
+		if (nxt && nxt->q != rq->q)
 			plug->multiple_queues = true;
 	}
+	if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+		plug->has_elevator = true;
+	rq->rq_next = NULL;
+	rq_list_add(&plug->mq_list, rq);
+	plug->rq_count++;
 }
 
 /*
@@ -2389,7 +2435,7 @@  void blk_mq_submit_bio(struct bio *bio)
 	const int is_flush_fua = op_is_flush(bio->bi_opf);
 	struct request *rq;
 	struct blk_plug *plug;
-	struct request *same_queue_rq = NULL;
+	bool same_queue_rq = false;
 	unsigned int nr_segs = 1;
 	blk_status_t ret;
 
@@ -2463,15 +2509,17 @@  void blk_mq_submit_bio(struct bio *bio)
 		 * IO may benefit a lot from plug merging.
 		 */
 		unsigned int request_count = plug->rq_count;
-		struct request *last = NULL;
+		struct request *tmp = NULL;
 
-		if (!request_count)
+		if (!request_count) {
 			trace_block_plug(q);
-		else
-			last = list_entry_rq(plug->mq_list.prev);
+		} else if (!blk_queue_nomerges(q)) {
+			tmp = rq_list_peek(&plug->mq_list);
+			if (blk_rq_bytes(tmp) < BLK_PLUG_FLUSH_SIZE)
+				tmp = NULL;
+		}
 
-		if (request_count >= blk_plug_max_rq_count(plug) || (last &&
-		    blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
+		if (request_count >= blk_plug_max_rq_count(plug) || tmp) {
 			blk_flush_plug_list(plug, false);
 			trace_block_plug(q);
 		}
@@ -2481,6 +2529,8 @@  void blk_mq_submit_bio(struct bio *bio)
 		/* Insert the request at the IO scheduler queue */
 		blk_mq_sched_insert_request(rq, false, true, true);
 	} else if (plug && !blk_queue_nomerges(q)) {
+		struct request *first_rq = NULL;
+
 		/*
 		 * We do limited plugging. If the bio can be merged, do that.
 		 * Otherwise the existing request in the plug list will be
@@ -2488,19 +2538,17 @@  void blk_mq_submit_bio(struct bio *bio)
 		 * The plug list might get flushed before this. If that happens,
 		 * the plug list is empty, and same_queue_rq is invalid.
 		 */
-		if (list_empty(&plug->mq_list))
-			same_queue_rq = NULL;
-		if (same_queue_rq) {
-			list_del_init(&same_queue_rq->queuelist);
+		if (!rq_list_empty(plug->mq_list) && same_queue_rq) {
+			first_rq = rq_list_pop(&plug->mq_list);
 			plug->rq_count--;
 		}
 		blk_add_rq_to_plug(plug, rq);
 		trace_block_plug(q);
 
-		if (same_queue_rq) {
+		if (first_rq) {
 			trace_block_unplug(q, 1, true);
-			blk_mq_try_issue_directly(same_queue_rq->mq_hctx,
-						  same_queue_rq);
+			blk_mq_try_issue_directly(first_rq->mq_hctx,
+						  first_rq);
 		}
 	} else if ((q->nr_hw_queues > 1 && is_sync) ||
 		   !rq->mq_hctx->dispatch_busy) {
diff --git a/block/blk.h b/block/blk.h
index fdfaa6896fc4..c15d1ab224b8 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -216,7 +216,7 @@  void blk_add_timer(struct request *req);
 void blk_print_req_error(struct request *req, blk_status_t status);
 
 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
-		unsigned int nr_segs, struct request **same_queue_rq);
+		unsigned int nr_segs, bool *same_queue_rq);
 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
 			struct bio *bio, unsigned int nr_segs);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 43dda725dcae..c6a6402cb1a1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -727,7 +727,7 @@  extern void blk_set_queue_dying(struct request_queue *);
  * schedule() where blk_schedule_flush_plug() is called.
  */
 struct blk_plug {
-	struct list_head mq_list; /* blk-mq requests */
+	struct request *mq_list; /* blk-mq requests */
 
 	/* if ios_left is > 1, we can batch tag/rq allocations */
 	struct request *cached_rq;
@@ -736,6 +736,7 @@  struct blk_plug {
 	unsigned short rq_count;
 
 	bool multiple_queues;
+	bool has_elevator;
 	bool nowait;
 
 	struct list_head cb_list; /* md requires an unplug callback */
@@ -776,8 +777,7 @@  static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 	struct blk_plug *plug = tsk->plug;
 
 	return plug &&
-		 (!list_empty(&plug->mq_list) ||
-		 !list_empty(&plug->cb_list));
+		 (plug->mq_list || !list_empty(&plug->cb_list));
 }
 
 int blkdev_issue_flush(struct block_device *bdev);

[07/14] block: change plugging to use a singly linked list

Commit Message

Comments

Patch