[v3] block: Improve IOPS by removing the fairness code

Message ID	20240403211044.434916-1-bvanassche@acm.org (mailing list archive)
State	New, archived
Headers	show Received: from 009.lax.mailroute.net (009.lax.mailroute.net [199.89.1.12]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2995C2F24 for <linux-block@vger.kernel.org>; Wed, 3 Apr 2024 21:11:32 +0000 (UTC) sender: bvanassche@acm.org) by 009.lax.mailroute.net (Postfix) with ESMTPSA id 4V8y912ZVQzlgTHp; Wed, 3 Apr 2024 21:11:17 +0000 (UTC) From: Bart Van Assche <bvanassche@acm.org> To: Jens Axboe <axboe@kernel.dk> Cc: linux-block@vger.kernel.org, Christoph Hellwig <hch@lst.de>, Bart Van Assche <bvanassche@acm.org>, Hannes Reinecke <hare@suse.de>, Ming Lei <ming.lei@redhat.com>, Yu Kuai <yukuai3@huawei.com> Subject: [PATCH v3] block: Improve IOPS by removing the fairness code Date: Wed, 3 Apr 2024 14:10:44 -0700 Message-ID: <20240403211044.434916-1-bvanassche@acm.org> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable
Series	[v3] block: Improve IOPS by removing the fairness code \| expand [v3] block: Improve IOPS by removing the fairness code

diff --git a/block/blk-core.c b/block/blk-core.c index a16b5abdbbf5..57dfa4612b43 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -425,8 +425,6 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) q->node = node_id; - atomic_set(&q->nr_active_requests_shared_tags, 0); - timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); INIT_WORK(&q->timeout_work, blk_timeout_work); INIT_LIST_HEAD(&q->icq_list); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 94668e72ab09..8e0c7774d6ec 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -479,11 +479,31 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) return res; } +struct count_active_params { + struct blk_mq_hw_ctx *hctx; + int *active; +}; + +static bool hctx_count_active(struct request *rq, void *data) +{ + const struct count_active_params *params = data; + + if (rq->mq_hctx == params->hctx) + (*params->active)++; + + return true; +} + static int hctx_active_show(void *data, struct seq_file *m) { struct blk_mq_hw_ctx *hctx = data; + int active = 0; + struct count_active_params params = { .hctx = hctx, .active = &active }; + + blk_mq_all_tag_iter(hctx->sched_tags ?: hctx->tags, hctx_count_active, + &params); - seq_printf(m, "%d\n", __blk_mq_active_requests(hctx)); + seq_printf(m, "%d\n", active); return 0; } diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index cc57e2dd9a0b..25334bfcabf8 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -105,10 +105,6 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) static int __blk_mq_get_tag(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt) { - if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) && - !hctx_may_queue(data->hctx, bt)) - return BLK_MQ_NO_TAG; - if (data->shallow_depth) return sbitmap_queue_get_shallow(bt, data->shallow_depth); else diff --git a/block/blk-mq.c b/block/blk-mq.c index b8dbfed8b28b..34060d885c5a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -425,8 +425,6 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data) rq_list_add(data->cached_rq, rq); nr++; } - if (!(data->rq_flags & RQF_SCHED_TAGS)) - blk_mq_add_active_requests(data->hctx, nr); /* caller already holds a reference, add for remainder */ percpu_ref_get_many(&data->q->q_usage_counter, nr - 1); data->nr_tags -= nr; @@ -511,8 +509,6 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) goto retry; } - if (!(data->rq_flags & RQF_SCHED_TAGS)) - blk_mq_inc_active_requests(data->hctx); rq = blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag); blk_mq_rq_time_init(rq, alloc_time_ns); return rq; @@ -672,8 +668,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, tag = blk_mq_get_tag(&data); if (tag == BLK_MQ_NO_TAG) goto out_queue_exit; - if (!(data.rq_flags & RQF_SCHED_TAGS)) - blk_mq_inc_active_requests(data.hctx); rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag); blk_mq_rq_time_init(rq, alloc_time_ns); rq->__data_len = 0; @@ -713,10 +707,8 @@ static void __blk_mq_free_request(struct request *rq) blk_pm_mark_last_busy(rq); rq->mq_hctx = NULL; - if (rq->tag != BLK_MQ_NO_TAG) { - blk_mq_dec_active_requests(hctx); + if (rq->tag != BLK_MQ_NO_TAG) blk_mq_put_tag(hctx->tags, ctx, rq->tag); - } if (sched_tag != BLK_MQ_NO_TAG) blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); blk_mq_sched_restart(hctx); @@ -1065,8 +1057,6 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx, { struct request_queue *q = hctx->queue; - blk_mq_sub_active_requests(hctx, nr_tags); - blk_mq_put_tags(hctx->tags, tag_array, nr_tags); percpu_ref_put_many(&q->q_usage_counter, nr_tags); } @@ -1761,9 +1751,6 @@ bool __blk_mq_alloc_driver_tag(struct request *rq) if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { bt = &rq->mq_hctx->tags->breserved_tags; tag_offset = 0; - } else { - if (!hctx_may_queue(rq->mq_hctx, bt)) - return false; } tag = __sbitmap_queue_get(bt); @@ -1771,7 +1758,6 @@ bool __blk_mq_alloc_driver_tag(struct request *rq) return false; rq->tag = tag + tag_offset; - blk_mq_inc_active_requests(rq->mq_hctx); return true; } @@ -3729,7 +3715,6 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node)) goto free_hctx; - atomic_set(&hctx->nr_active, 0); if (node == NUMA_NO_NODE) node = set->numa_node; hctx->numa_node = node; diff --git a/block/blk-mq.h b/block/blk-mq.h index f75a9ecfebde..ac29a30e4322 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -271,70 +271,9 @@ static inline int blk_mq_get_rq_budget_token(struct request *rq) return -1; } -static inline void __blk_mq_add_active_requests(struct blk_mq_hw_ctx *hctx, - int val) -{ - if (blk_mq_is_shared_tags(hctx->flags)) - atomic_add(val, &hctx->queue->nr_active_requests_shared_tags); - else - atomic_add(val, &hctx->nr_active); -} - -static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) -{ - __blk_mq_add_active_requests(hctx, 1); -} - -static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, - int val) -{ - if (blk_mq_is_shared_tags(hctx->flags)) - atomic_sub(val, &hctx->queue->nr_active_requests_shared_tags); - else - atomic_sub(val, &hctx->nr_active); -} - -static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) -{ - __blk_mq_sub_active_requests(hctx, 1); -} - -static inline void blk_mq_add_active_requests(struct blk_mq_hw_ctx *hctx, - int val) -{ - if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) - __blk_mq_add_active_requests(hctx, val); -} - -static inline void blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) -{ - if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) - __blk_mq_inc_active_requests(hctx); -} - -static inline void blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, - int val) -{ - if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) - __blk_mq_sub_active_requests(hctx, val); -} - -static inline void blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) -{ - if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) - __blk_mq_dec_active_requests(hctx); -} - -static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx) -{ - if (blk_mq_is_shared_tags(hctx->flags)) - return atomic_read(&hctx->queue->nr_active_requests_shared_tags); - return atomic_read(&hctx->nr_active); -} static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) { - blk_mq_dec_active_requests(hctx); blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); rq->tag = BLK_MQ_NO_TAG; } @@ -407,45 +346,6 @@ static inline void blk_mq_free_requests(struct list_head *list) } } -/* - * For shared tag users, we track the number of currently active users - * and attempt to provide a fair share of the tag depth for each of them. - */ -static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, - struct sbitmap_queue *bt) -{ - unsigned int depth, users; - - if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) - return true; - - /* - * Don't try dividing an ant - */ - if (bt->sb.depth == 1) - return true; - - if (blk_mq_is_shared_tags(hctx->flags)) { - struct request_queue *q = hctx->queue; - - if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) - return true; - } else { - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - return true; - } - - users = READ_ONCE(hctx->tags->active_queues); - if (!users) - return true; - - /* - * Allow at least some tags - */ - depth = max((bt->sb.depth + users - 1) / users, 4U); - return __blk_mq_active_requests(hctx) < depth; -} - /* run the code block in @dispatch_ops with rcu/srcu read lock held */ #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \ do { \ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d3d8fd8e229b..a066ea77148f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -398,12 +398,6 @@ struct blk_mq_hw_ctx { /** @queue_num: Index of this hardware queue. */ unsigned int queue_num; - /** - * @nr_active: Number of active requests. Only used when a tag set is - * shared across request queues. - */ - atomic_t nr_active; - /** @cpuhp_online: List to store request if CPU is going to die */ struct hlist_node cpuhp_online; /** @cpuhp_dead: List to store request if some CPU die. */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c3e8f7cf96be..ed1e807f42a2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -445,8 +445,6 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; - atomic_t nr_active_requests_shared_tags; - unsigned int required_elevator_features; struct blk_mq_tags *sched_shared_tags;

[v3] block: Improve IOPS by removing the fairness code

Commit Message

Patch