diff mbox series

block/mq-deadline: Optimize request insertion

Message ID 20240122235332.2299150-1-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series block/mq-deadline: Optimize request insertion | expand

Commit Message

Bart Van Assche Jan. 22, 2024, 11:53 p.m. UTC
Reduce lock contention on dd->lock by calling dd_insert_request() from
inside the dispatch callback instead of from the insert callback. This
patch is inspired by a patch from Jens.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/mq-deadline.c | 70 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 12 deletions(-)

Comments

Damien Le Moal Jan. 23, 2024, 2:30 a.m. UTC | #1
On 1/23/24 08:53, Bart Van Assche wrote:
> Reduce lock contention on dd->lock by calling dd_insert_request() from
> inside the dispatch callback instead of from the insert callback. This
> patch is inspired by a patch from Jens.

I supposed this is a followup of the performance discussion with Jens. If so,
can you add performance numbers here and so justifying the change ?
Otherwise, it is hard to figure out the effect of the patch and so why you are
making this change.

> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/mq-deadline.c | 70 +++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 58 insertions(+), 12 deletions(-)
> 
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index 83bc21801226..d11b8604f046 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -89,11 +89,15 @@ struct deadline_data {
>  	 */
>  	struct {
>  		spinlock_t lock;
> +		spinlock_t insert_lock;
>  		spinlock_t zone_lock;
>  	} ____cacheline_aligned_in_smp;
>  
>  	unsigned long run_state;
>  
> +	struct list_head at_head;
> +	struct list_head at_tail;
> +
>  	struct dd_per_prio per_prio[DD_PRIO_COUNT];
>  
>  	/* Data direction of latest dispatched request. */
> @@ -120,6 +124,9 @@ static const enum dd_prio ioprio_class_to_prio[] = {
>  	[IOPRIO_CLASS_IDLE]	= DD_IDLE_PRIO,
>  };
>  
> +static void dd_insert_request(struct request_queue *q, struct request *rq,
> +			      blk_insert_t flags, struct list_head *free);
> +
>  static inline struct rb_root *
>  deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
>  {
> @@ -592,6 +599,35 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
>  	return NULL;
>  }
>  
> +static void __dd_do_insert(struct request_queue *q, blk_insert_t flags,
> +			   struct list_head *list, struct list_head *free)
> +{
> +	while (!list_empty(list)) {
> +		struct request *rq;
> +
> +		rq = list_first_entry(list, struct request, queuelist);
> +		list_del_init(&rq->queuelist);
> +		dd_insert_request(q, rq, flags, free);
> +	}
> +}
> +
> +static void dd_do_insert(struct request_queue *q, struct list_head *free)
> +{
> +	struct deadline_data *dd = q->elevator->elevator_data;
> +	LIST_HEAD(at_head);
> +	LIST_HEAD(at_tail);
> +
> +	lockdep_assert_held(&dd->lock);
> +
> +	spin_lock(&dd->insert_lock);
> +	list_splice_init(&dd->at_head, &at_head);
> +	list_splice_init(&dd->at_tail, &at_tail);
> +	spin_unlock(&dd->insert_lock);
> +
> +	__dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free);
> +	__dd_do_insert(q, 0, &at_tail, free);
> +}
> +
>  /*
>   * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
>   *
> @@ -606,6 +642,7 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
>  	const unsigned long now = jiffies;
>  	struct request *rq;
>  	enum dd_prio prio;
> +	LIST_HEAD(free);
>  
>  	/*
>  	 * If someone else is already dispatching, skip this one. This will
> @@ -620,6 +657,11 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
>  		return NULL;
>  
>  	spin_lock(&dd->lock);
> +        /*
> +         * Request insertion happens from inside the dispatch callback instead
> +         * of inside the insert callback to minimize contention on dd->lock.
> +         */
> +	dd_do_insert(hctx->queue, &free);
>  	rq = dd_dispatch_prio_aged_requests(dd, now);
>  	if (rq)
>  		goto unlock;
> @@ -638,6 +680,8 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
>  	clear_bit(DD_DISPATCHING, &dd->run_state);
>  	spin_unlock(&dd->lock);
>  
> +	blk_mq_free_requests(&free);
> +
>  	return rq;
>  }
>  
> @@ -727,8 +771,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
>  	eq->elevator_data = dd;
>  
>  	spin_lock_init(&dd->lock);
> +	spin_lock_init(&dd->insert_lock);
>  	spin_lock_init(&dd->zone_lock);
>  
> +	INIT_LIST_HEAD(&dd->at_head);
> +	INIT_LIST_HEAD(&dd->at_tail);
> +
>  	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
>  		struct dd_per_prio *per_prio = &dd->per_prio[prio];
>  
> @@ -899,19 +947,13 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
>  {
>  	struct request_queue *q = hctx->queue;
>  	struct deadline_data *dd = q->elevator->elevator_data;
> -	LIST_HEAD(free);
> -
> -	spin_lock(&dd->lock);
> -	while (!list_empty(list)) {
> -		struct request *rq;
>  
> -		rq = list_first_entry(list, struct request, queuelist);
> -		list_del_init(&rq->queuelist);
> -		dd_insert_request(q, rq, flags, &free);
> -	}
> -	spin_unlock(&dd->lock);
> -
> -	blk_mq_free_requests(&free);
> +	spin_lock(&dd->insert_lock);
> +	if (flags & BLK_MQ_INSERT_AT_HEAD)
> +		list_splice_init(list, &dd->at_head);
> +	else
> +		list_splice_init(list, &dd->at_tail);
> +	spin_unlock(&dd->insert_lock);
>  }
>  
>  /* Callback from inside blk_mq_rq_ctx_init(). */
> @@ -990,6 +1032,10 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
>  	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
>  	enum dd_prio prio;
>  
> +	if (!list_empty_careful(&dd->at_head) ||
> +	    !list_empty_careful(&dd->at_tail))
> +		return true;
> +
>  	for (prio = 0; prio <= DD_PRIO_MAX; prio++)
>  		if (dd_has_work_for_prio(&dd->per_prio[prio]))
>  			return true;
Jens Axboe Jan. 23, 2024, 2:52 p.m. UTC | #2
On 1/22/24 7:30 PM, Damien Le Moal wrote:
> On 1/23/24 08:53, Bart Van Assche wrote:
>> Reduce lock contention on dd->lock by calling dd_insert_request() from
>> inside the dispatch callback instead of from the insert callback. This
>> patch is inspired by a patch from Jens.
> 
> I supposed this is a followup of the performance discussion with Jens. If so,
> can you add performance numbers here and so justifying the change ?
> Otherwise, it is hard to figure out the effect of the patch and so why you are
> making this change.

Numbers are in here [1], I will fold them into the commit message. It's worth,
as is mentioned in my commit message, that this also relies on the previous
patches to get there. It just leaves the insertion side contention as the
dominant one until this patch is applied.

Before:

Device		IOPS	sys	contention	diff
====================================================
null_blk	879K	89%	93.6%
nvme0n1		901K	86%	94.5%

and after this and the previous two patches:

Device		IOPS	sys	contention	diff
====================================================
null_blk	2867K	11.1%	~6.0%		+226%
nvme0n1		3162K	 9.9%	~5.0%		+250%

[1] https://git.kernel.dk/cgit/linux/commit/?h=block-deadline&id=678abafc92bae49e6f4a92cea05dcd5f39928054
Bart Van Assche Jan. 23, 2024, 3:09 p.m. UTC | #3
On 1/22/24 18:30, Damien Le Moal wrote:
> On 1/23/24 08:53, Bart Van Assche wrote:
>> Reduce lock contention on dd->lock by calling dd_insert_request() from
>> inside the dispatch callback instead of from the insert callback. This
>> patch is inspired by a patch from Jens.
> 
> I supposed this is a followup of the performance discussion with Jens. If so,
> can you add performance numbers here and so justifying the change ?
> Otherwise, it is hard to figure out the effect of the patch and so why you are
> making this change.

Hi Damien,

If I replace Jens' patch 3/4 with this patch I achieve 20% higher IOPS
for 1..4 CPU cores (null_blk + fio in an x86 VM).

Bart.
diff mbox series

Patch

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 83bc21801226..d11b8604f046 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -89,11 +89,15 @@  struct deadline_data {
 	 */
 	struct {
 		spinlock_t lock;
+		spinlock_t insert_lock;
 		spinlock_t zone_lock;
 	} ____cacheline_aligned_in_smp;
 
 	unsigned long run_state;
 
+	struct list_head at_head;
+	struct list_head at_tail;
+
 	struct dd_per_prio per_prio[DD_PRIO_COUNT];
 
 	/* Data direction of latest dispatched request. */
@@ -120,6 +124,9 @@  static const enum dd_prio ioprio_class_to_prio[] = {
 	[IOPRIO_CLASS_IDLE]	= DD_IDLE_PRIO,
 };
 
+static void dd_insert_request(struct request_queue *q, struct request *rq,
+			      blk_insert_t flags, struct list_head *free);
+
 static inline struct rb_root *
 deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
 {
@@ -592,6 +599,35 @@  static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
 	return NULL;
 }
 
+static void __dd_do_insert(struct request_queue *q, blk_insert_t flags,
+			   struct list_head *list, struct list_head *free)
+{
+	while (!list_empty(list)) {
+		struct request *rq;
+
+		rq = list_first_entry(list, struct request, queuelist);
+		list_del_init(&rq->queuelist);
+		dd_insert_request(q, rq, flags, free);
+	}
+}
+
+static void dd_do_insert(struct request_queue *q, struct list_head *free)
+{
+	struct deadline_data *dd = q->elevator->elevator_data;
+	LIST_HEAD(at_head);
+	LIST_HEAD(at_tail);
+
+	lockdep_assert_held(&dd->lock);
+
+	spin_lock(&dd->insert_lock);
+	list_splice_init(&dd->at_head, &at_head);
+	list_splice_init(&dd->at_tail, &at_tail);
+	spin_unlock(&dd->insert_lock);
+
+	__dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free);
+	__dd_do_insert(q, 0, &at_tail, free);
+}
+
 /*
  * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
  *
@@ -606,6 +642,7 @@  static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 	const unsigned long now = jiffies;
 	struct request *rq;
 	enum dd_prio prio;
+	LIST_HEAD(free);
 
 	/*
 	 * If someone else is already dispatching, skip this one. This will
@@ -620,6 +657,11 @@  static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 		return NULL;
 
 	spin_lock(&dd->lock);
+        /*
+         * Request insertion happens from inside the dispatch callback instead
+         * of inside the insert callback to minimize contention on dd->lock.
+         */
+	dd_do_insert(hctx->queue, &free);
 	rq = dd_dispatch_prio_aged_requests(dd, now);
 	if (rq)
 		goto unlock;
@@ -638,6 +680,8 @@  static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 	clear_bit(DD_DISPATCHING, &dd->run_state);
 	spin_unlock(&dd->lock);
 
+	blk_mq_free_requests(&free);
+
 	return rq;
 }
 
@@ -727,8 +771,12 @@  static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	eq->elevator_data = dd;
 
 	spin_lock_init(&dd->lock);
+	spin_lock_init(&dd->insert_lock);
 	spin_lock_init(&dd->zone_lock);
 
+	INIT_LIST_HEAD(&dd->at_head);
+	INIT_LIST_HEAD(&dd->at_tail);
+
 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
@@ -899,19 +947,13 @@  static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
 {
 	struct request_queue *q = hctx->queue;
 	struct deadline_data *dd = q->elevator->elevator_data;
-	LIST_HEAD(free);
-
-	spin_lock(&dd->lock);
-	while (!list_empty(list)) {
-		struct request *rq;
 
-		rq = list_first_entry(list, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-		dd_insert_request(q, rq, flags, &free);
-	}
-	spin_unlock(&dd->lock);
-
-	blk_mq_free_requests(&free);
+	spin_lock(&dd->insert_lock);
+	if (flags & BLK_MQ_INSERT_AT_HEAD)
+		list_splice_init(list, &dd->at_head);
+	else
+		list_splice_init(list, &dd->at_tail);
+	spin_unlock(&dd->insert_lock);
 }
 
 /* Callback from inside blk_mq_rq_ctx_init(). */
@@ -990,6 +1032,10 @@  static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
 	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
 	enum dd_prio prio;
 
+	if (!list_empty_careful(&dd->at_head) ||
+	    !list_empty_careful(&dd->at_tail))
+		return true;
+
 	for (prio = 0; prio <= DD_PRIO_MAX; prio++)
 		if (dd_has_work_for_prio(&dd->per_prio[prio]))
 			return true;