diff mbox series

[-next,RFC,v3,2/8] blk-mq: call 'bt_wait_ptr()' later in blk_mq_get_tag()

Message ID 20220415101053.554495-3-yukuai3@huawei.com (mailing list archive)
State New, archived
Headers show
Series improve tag allocation under heavy load | expand

Commit Message

Yu Kuai April 15, 2022, 10:10 a.m. UTC
bt_wait_ptr() will increase 'wait_index', however, if blk_mq_get_tag()
get a tag successfully after bt_wait_ptr() is called and before
sbitmap_prepare_to_wait() is called, then the 'ws' is skipped. This
behavior might cause 8 waitqueues to be unbalanced.

Move bt_wait_ptr() later should reduce the problem when the disk is
under high io preesure.

In the meantime, instead of calling bt_wait_ptr() during every loop,
calling bt_wait_ptr() only if destination hw queue is changed, which
should reduce the unfairness further.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-mq-tag.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 68ac23d0b640..5ad85063e91e 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -131,7 +131,7 @@  unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 {
 	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
 	struct sbitmap_queue *bt;
-	struct sbq_wait_state *ws;
+	struct sbq_wait_state *ws = NULL;
 	DEFINE_SBQ_WAIT(wait);
 	unsigned int tag_offset;
 	int tag;
@@ -155,7 +155,6 @@  unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 	if (data->flags & BLK_MQ_REQ_NOWAIT)
 		return BLK_MQ_NO_TAG;
 
-	ws = bt_wait_ptr(bt, data->hctx);
 	do {
 		struct sbitmap_queue *bt_prev;
 
@@ -174,6 +173,8 @@  unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		if (tag != BLK_MQ_NO_TAG)
 			break;
 
+		if (!ws)
+			ws = bt_wait_ptr(bt, data->hctx);
 		sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
 
 		tag = __blk_mq_get_tag(data, bt);
@@ -199,10 +200,10 @@  unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		 * previous queue for compensating the wake up miss, so
 		 * other allocations on previous queue won't be starved.
 		 */
-		if (bt != bt_prev)
+		if (bt != bt_prev) {
 			sbitmap_queue_wake_up(bt_prev);
-
-		ws = bt_wait_ptr(bt, data->hctx);
+			ws = bt_wait_ptr(bt, data->hctx);
+		}
 	} while (1);
 
 	sbitmap_finish_wait(bt, ws, &wait);