diff mbox series

block: support changing scheduler & nr_requests together

Message ID 20240409060633.220596-1-yang.yang@vivo.com (mailing list archive)
State New
Headers show
Series block: support changing scheduler & nr_requests together | expand

Commit Message

YangYang April 9, 2024, 6:06 a.m. UTC
When switching the scheduler, the nr_requests defaults to an initial
value. If we want to change the nr_requests after switching the
scheduler, we need to perform the setting twice. It means the update
process for nr_requests needs to be run twice. And updating nr_requests
may cost dozens of milliseconds in our test environment.

[2.091825] init: Command 'write /sys/block/sda/queue/nr_requests 96' took 24ms
[2.123248] init: Command 'write /sys/block/sdb/queue/nr_requests 96' took 31ms
[2.146620] init: Command 'write /sys/block/sdc/queue/nr_requests 96' took 22ms
[2.171127] init: Command 'write /sys/block/sdd/queue/nr_requests 96' took 24ms
[2.207286] init: Command 'write /sys/block/sde/queue/nr_requests 96' took 36ms
[2.222515] init: Command 'write /sys/block/sdf/queue/nr_requests 96' took 15ms
[2.246704] init: Command 'write /sys/block/sdg/queue/nr_requests 96' took 24ms

So provide a way to configure scheduler and nr_requests together.

Signed-off-by: Yang Yang <yang.yang@vivo.com>
---
 Documentation/ABI/stable/sysfs-block |  2 ++
 block/blk-mq-sched.c                 | 10 +++++---
 block/blk-mq-sched.h                 |  3 ++-
 block/blk-mq.c                       |  2 +-
 block/blk.h                          |  3 ++-
 block/elevator.c                     | 34 +++++++++++++++++++++++-----
 6 files changed, 42 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 1fe9a553c37b..3f554fc379da 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -566,6 +566,8 @@  Description:
 		device to that new IO scheduler. Note that writing an IO
 		scheduler name to this file will attempt to load that IO
 		scheduler module, if it isn't already present in the system.
+		Support setting scheduler & nr_requests together,
+		e.g. echo mq-deadline:256 > scheduler
 
 
 What:		/sys/block/<disk>/queue/stable_writes
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 451a2c1f1f32..e7cd00e238db 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -438,7 +438,8 @@  static int blk_mq_init_sched_shared_tags(struct request_queue *queue)
 }
 
 /* caller must have a reference to @e, will grab another one if successful */
-int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
+int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e,
+		unsigned long nr_rqs)
 {
 	unsigned int flags = q->tag_set->flags;
 	struct blk_mq_hw_ctx *hctx;
@@ -451,8 +452,11 @@  int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	 * since we don't split into sync/async like the old code did.
 	 * Additionally, this is a per-hw queue depth.
 	 */
-	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
-				   BLKDEV_DEFAULT_RQ);
+	if (nr_rqs)
+		q->nr_requests = nr_rqs;
+	else
+		q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
+				BLKDEV_DEFAULT_RQ);
 
 	if (blk_mq_is_shared_tags(flags)) {
 		ret = blk_mq_init_sched_shared_tags(q);
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 1326526bb733..11631840341e 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -18,7 +18,8 @@  void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
 
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
 
-int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
+int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e,
+		unsigned long nr_rqs);
 void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
 void blk_mq_sched_free_rqs(struct request_queue *q);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 32afb87efbd0..4ecb9db62337 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4707,7 +4707,7 @@  static void blk_mq_elv_switch_back(struct list_head *head,
 	kfree(qe);
 
 	mutex_lock(&q->sysfs_lock);
-	elevator_switch(q, t);
+	elevator_switch(q, t, 0);
 	/* drop the reference acquired in blk_mq_elv_switch_none */
 	elevator_put(t);
 	mutex_unlock(&q->sysfs_lock);
diff --git a/block/blk.h b/block/blk.h
index d9f584984bc4..d52e6e087117 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -287,7 +287,8 @@  bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
 
 bool blk_insert_flush(struct request *rq);
 
-int elevator_switch(struct request_queue *q, struct elevator_type *new_e);
+int elevator_switch(struct request_queue *q, struct elevator_type *new_e,
+		unsigned long nr_rqs);
 void elevator_disable(struct request_queue *q);
 void elevator_exit(struct request_queue *q);
 int elv_register_queue(struct request_queue *q, bool uevent);
diff --git a/block/elevator.c b/block/elevator.c
index 5ff093cb3cf8..07ed7f544ef6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -639,7 +639,7 @@  void elevator_init_mq(struct request_queue *q)
 	blk_mq_freeze_queue(q);
 	blk_mq_cancel_work_sync(q);
 
-	err = blk_mq_init_sched(q, e);
+	err = blk_mq_init_sched(q, e, 0);
 
 	blk_mq_unfreeze_queue(q);
 
@@ -657,7 +657,8 @@  void elevator_init_mq(struct request_queue *q)
  * If switching fails, we are most likely running out of memory and not able
  * to restore the old io scheduler, so leaving the io scheduler being none.
  */
-int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
+int elevator_switch(struct request_queue *q, struct elevator_type *new_e,
+		unsigned long nr_rqs)
 {
 	int ret;
 
@@ -671,7 +672,7 @@  int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 		elevator_exit(q);
 	}
 
-	ret = blk_mq_init_sched(q, new_e);
+	ret = blk_mq_init_sched(q, new_e, nr_rqs);
 	if (ret)
 		goto out_unfreeze;
 
@@ -715,7 +716,8 @@  void elevator_disable(struct request_queue *q)
 /*
  * Switch this queue to the given IO scheduler.
  */
-static int elevator_change(struct request_queue *q, const char *elevator_name)
+static int elevator_change(struct request_queue *q, const char *elevator_name,
+		unsigned long nr_rqs)
 {
 	struct elevator_type *e;
 	int ret;
@@ -740,7 +742,7 @@  static int elevator_change(struct request_queue *q, const char *elevator_name)
 		if (!e)
 			return -EINVAL;
 	}
-	ret = elevator_switch(q, e);
+	ret = elevator_switch(q, e, nr_rqs);
 	elevator_put(e);
 	return ret;
 }
@@ -749,13 +751,33 @@  ssize_t elv_iosched_store(struct request_queue *q, const char *buf,
 			  size_t count)
 {
 	char elevator_name[ELV_NAME_MAX];
+	unsigned long nr_rqs;
+	const char *num;
+	char *tmp;
 	int ret;
 
 	if (!elv_support_iosched(q))
 		return count;
 
+	/*
+	 * Support setting scheduler & nr_requests together
+	 * e.g. echo mq-deadline:256 > scheduler
+	 */
+	tmp = strchr(buf, ':');
+	if (tmp) {
+		*tmp = '\0';
+		num = tmp + 1;
+
+		ret = kstrtoul(num, 10, &nr_rqs);
+		if (ret || nr_rqs > MAX_SCHED_RQ)
+			return -EINVAL;
+
+		if (nr_rqs < BLKDEV_MIN_RQ)
+			nr_rqs = BLKDEV_MIN_RQ;
+	}
+
 	strscpy(elevator_name, buf, sizeof(elevator_name));
-	ret = elevator_change(q, strstrip(elevator_name));
+	ret = elevator_change(q, strstrip(elevator_name), nr_rqs);
 	if (!ret)
 		return count;
 	return ret;