Message ID | 20240122235332.2299150-1-bvanassche@acm.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | block/mq-deadline: Optimize request insertion | expand |
On 1/23/24 08:53, Bart Van Assche wrote: > Reduce lock contention on dd->lock by calling dd_insert_request() from > inside the dispatch callback instead of from the insert callback. This > patch is inspired by a patch from Jens. I supposed this is a followup of the performance discussion with Jens. If so, can you add performance numbers here and so justifying the change ? Otherwise, it is hard to figure out the effect of the patch and so why you are making this change. > > Signed-off-by: Bart Van Assche <bvanassche@acm.org> > --- > block/mq-deadline.c | 70 +++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 58 insertions(+), 12 deletions(-) > > diff --git a/block/mq-deadline.c b/block/mq-deadline.c > index 83bc21801226..d11b8604f046 100644 > --- a/block/mq-deadline.c > +++ b/block/mq-deadline.c > @@ -89,11 +89,15 @@ struct deadline_data { > */ > struct { > spinlock_t lock; > + spinlock_t insert_lock; > spinlock_t zone_lock; > } ____cacheline_aligned_in_smp; > > unsigned long run_state; > > + struct list_head at_head; > + struct list_head at_tail; > + > struct dd_per_prio per_prio[DD_PRIO_COUNT]; > > /* Data direction of latest dispatched request. */ > @@ -120,6 +124,9 @@ static const enum dd_prio ioprio_class_to_prio[] = { > [IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO, > }; > > +static void dd_insert_request(struct request_queue *q, struct request *rq, > + blk_insert_t flags, struct list_head *free); > + > static inline struct rb_root * > deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq) > { > @@ -592,6 +599,35 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd, > return NULL; > } > > +static void __dd_do_insert(struct request_queue *q, blk_insert_t flags, > + struct list_head *list, struct list_head *free) > +{ > + while (!list_empty(list)) { > + struct request *rq; > + > + rq = list_first_entry(list, struct request, queuelist); > + list_del_init(&rq->queuelist); > + dd_insert_request(q, rq, flags, free); > + } > +} > + > +static void dd_do_insert(struct request_queue *q, struct list_head *free) > +{ > + struct deadline_data *dd = q->elevator->elevator_data; > + LIST_HEAD(at_head); > + LIST_HEAD(at_tail); > + > + lockdep_assert_held(&dd->lock); > + > + spin_lock(&dd->insert_lock); > + list_splice_init(&dd->at_head, &at_head); > + list_splice_init(&dd->at_tail, &at_tail); > + spin_unlock(&dd->insert_lock); > + > + __dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free); > + __dd_do_insert(q, 0, &at_tail, free); > +} > + > /* > * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests(). > * > @@ -606,6 +642,7 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) > const unsigned long now = jiffies; > struct request *rq; > enum dd_prio prio; > + LIST_HEAD(free); > > /* > * If someone else is already dispatching, skip this one. This will > @@ -620,6 +657,11 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) > return NULL; > > spin_lock(&dd->lock); > + /* > + * Request insertion happens from inside the dispatch callback instead > + * of inside the insert callback to minimize contention on dd->lock. > + */ > + dd_do_insert(hctx->queue, &free); > rq = dd_dispatch_prio_aged_requests(dd, now); > if (rq) > goto unlock; > @@ -638,6 +680,8 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) > clear_bit(DD_DISPATCHING, &dd->run_state); > spin_unlock(&dd->lock); > > + blk_mq_free_requests(&free); > + > return rq; > } > > @@ -727,8 +771,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) > eq->elevator_data = dd; > > spin_lock_init(&dd->lock); > + spin_lock_init(&dd->insert_lock); > spin_lock_init(&dd->zone_lock); > > + INIT_LIST_HEAD(&dd->at_head); > + INIT_LIST_HEAD(&dd->at_tail); > + > for (prio = 0; prio <= DD_PRIO_MAX; prio++) { > struct dd_per_prio *per_prio = &dd->per_prio[prio]; > > @@ -899,19 +947,13 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, > { > struct request_queue *q = hctx->queue; > struct deadline_data *dd = q->elevator->elevator_data; > - LIST_HEAD(free); > - > - spin_lock(&dd->lock); > - while (!list_empty(list)) { > - struct request *rq; > > - rq = list_first_entry(list, struct request, queuelist); > - list_del_init(&rq->queuelist); > - dd_insert_request(q, rq, flags, &free); > - } > - spin_unlock(&dd->lock); > - > - blk_mq_free_requests(&free); > + spin_lock(&dd->insert_lock); > + if (flags & BLK_MQ_INSERT_AT_HEAD) > + list_splice_init(list, &dd->at_head); > + else > + list_splice_init(list, &dd->at_tail); > + spin_unlock(&dd->insert_lock); > } > > /* Callback from inside blk_mq_rq_ctx_init(). */ > @@ -990,6 +1032,10 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx) > struct deadline_data *dd = hctx->queue->elevator->elevator_data; > enum dd_prio prio; > > + if (!list_empty_careful(&dd->at_head) || > + !list_empty_careful(&dd->at_tail)) > + return true; > + > for (prio = 0; prio <= DD_PRIO_MAX; prio++) > if (dd_has_work_for_prio(&dd->per_prio[prio])) > return true;
On 1/22/24 7:30 PM, Damien Le Moal wrote: > On 1/23/24 08:53, Bart Van Assche wrote: >> Reduce lock contention on dd->lock by calling dd_insert_request() from >> inside the dispatch callback instead of from the insert callback. This >> patch is inspired by a patch from Jens. > > I supposed this is a followup of the performance discussion with Jens. If so, > can you add performance numbers here and so justifying the change ? > Otherwise, it is hard to figure out the effect of the patch and so why you are > making this change. Numbers are in here [1], I will fold them into the commit message. It's worth, as is mentioned in my commit message, that this also relies on the previous patches to get there. It just leaves the insertion side contention as the dominant one until this patch is applied. Before: Device IOPS sys contention diff ==================================================== null_blk 879K 89% 93.6% nvme0n1 901K 86% 94.5% and after this and the previous two patches: Device IOPS sys contention diff ==================================================== null_blk 2867K 11.1% ~6.0% +226% nvme0n1 3162K 9.9% ~5.0% +250% [1] https://git.kernel.dk/cgit/linux/commit/?h=block-deadline&id=678abafc92bae49e6f4a92cea05dcd5f39928054
On 1/22/24 18:30, Damien Le Moal wrote: > On 1/23/24 08:53, Bart Van Assche wrote: >> Reduce lock contention on dd->lock by calling dd_insert_request() from >> inside the dispatch callback instead of from the insert callback. This >> patch is inspired by a patch from Jens. > > I supposed this is a followup of the performance discussion with Jens. If so, > can you add performance numbers here and so justifying the change ? > Otherwise, it is hard to figure out the effect of the patch and so why you are > making this change. Hi Damien, If I replace Jens' patch 3/4 with this patch I achieve 20% higher IOPS for 1..4 CPU cores (null_blk + fio in an x86 VM). Bart.
diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 83bc21801226..d11b8604f046 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -89,11 +89,15 @@ struct deadline_data { */ struct { spinlock_t lock; + spinlock_t insert_lock; spinlock_t zone_lock; } ____cacheline_aligned_in_smp; unsigned long run_state; + struct list_head at_head; + struct list_head at_tail; + struct dd_per_prio per_prio[DD_PRIO_COUNT]; /* Data direction of latest dispatched request. */ @@ -120,6 +124,9 @@ static const enum dd_prio ioprio_class_to_prio[] = { [IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO, }; +static void dd_insert_request(struct request_queue *q, struct request *rq, + blk_insert_t flags, struct list_head *free); + static inline struct rb_root * deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq) { @@ -592,6 +599,35 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd, return NULL; } +static void __dd_do_insert(struct request_queue *q, blk_insert_t flags, + struct list_head *list, struct list_head *free) +{ + while (!list_empty(list)) { + struct request *rq; + + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + dd_insert_request(q, rq, flags, free); + } +} + +static void dd_do_insert(struct request_queue *q, struct list_head *free) +{ + struct deadline_data *dd = q->elevator->elevator_data; + LIST_HEAD(at_head); + LIST_HEAD(at_tail); + + lockdep_assert_held(&dd->lock); + + spin_lock(&dd->insert_lock); + list_splice_init(&dd->at_head, &at_head); + list_splice_init(&dd->at_tail, &at_tail); + spin_unlock(&dd->insert_lock); + + __dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free); + __dd_do_insert(q, 0, &at_tail, free); +} + /* * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests(). * @@ -606,6 +642,7 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) const unsigned long now = jiffies; struct request *rq; enum dd_prio prio; + LIST_HEAD(free); /* * If someone else is already dispatching, skip this one. This will @@ -620,6 +657,11 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) return NULL; spin_lock(&dd->lock); + /* + * Request insertion happens from inside the dispatch callback instead + * of inside the insert callback to minimize contention on dd->lock. + */ + dd_do_insert(hctx->queue, &free); rq = dd_dispatch_prio_aged_requests(dd, now); if (rq) goto unlock; @@ -638,6 +680,8 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) clear_bit(DD_DISPATCHING, &dd->run_state); spin_unlock(&dd->lock); + blk_mq_free_requests(&free); + return rq; } @@ -727,8 +771,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) eq->elevator_data = dd; spin_lock_init(&dd->lock); + spin_lock_init(&dd->insert_lock); spin_lock_init(&dd->zone_lock); + INIT_LIST_HEAD(&dd->at_head); + INIT_LIST_HEAD(&dd->at_tail); + for (prio = 0; prio <= DD_PRIO_MAX; prio++) { struct dd_per_prio *per_prio = &dd->per_prio[prio]; @@ -899,19 +947,13 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, { struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; - LIST_HEAD(free); - - spin_lock(&dd->lock); - while (!list_empty(list)) { - struct request *rq; - rq = list_first_entry(list, struct request, queuelist); - list_del_init(&rq->queuelist); - dd_insert_request(q, rq, flags, &free); - } - spin_unlock(&dd->lock); - - blk_mq_free_requests(&free); + spin_lock(&dd->insert_lock); + if (flags & BLK_MQ_INSERT_AT_HEAD) + list_splice_init(list, &dd->at_head); + else + list_splice_init(list, &dd->at_tail); + spin_unlock(&dd->insert_lock); } /* Callback from inside blk_mq_rq_ctx_init(). */ @@ -990,6 +1032,10 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx) struct deadline_data *dd = hctx->queue->elevator->elevator_data; enum dd_prio prio; + if (!list_empty_careful(&dd->at_head) || + !list_empty_careful(&dd->at_tail)) + return true; + for (prio = 0; prio <= DD_PRIO_MAX; prio++) if (dd_has_work_for_prio(&dd->per_prio[prio])) return true;
Reduce lock contention on dd->lock by calling dd_insert_request() from inside the dispatch callback instead of from the insert callback. This patch is inspired by a patch from Jens. Signed-off-by: Bart Van Assche <bvanassche@acm.org> --- block/mq-deadline.c | 70 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 12 deletions(-)