diff mbox series

[05/15] block: simplify elevator reset for updating nr_hw_queues

Message ID 20250410133029.2487054-6-ming.lei@redhat.com (mailing list archive)
State New
Headers show
Series block: unify elevator changing and fix lockdep warning | expand

Commit Message

Ming Lei April 10, 2025, 1:30 p.m. UTC
In blk_mq_update_nr_hw_queues(), nr_hw_queues may change, so elevator has
to be reset after nr_hw_queues is changed.

Now elevator switch isn't allowed during blk_mq_update_nr_hw_queues(), so
we can simply call elevator_change() to reset elevator sched tags after
nr_hw_queues is updated.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq.c   | 99 +++++-------------------------------------------
 block/blk.h      |  4 +-
 block/elevator.c | 12 +++---
 3 files changed, 18 insertions(+), 97 deletions(-)

Comments

Christoph Hellwig April 10, 2025, 2:40 p.m. UTC | #1
On Thu, Apr 10, 2025 at 09:30:17PM +0800, Ming Lei wrote:
> In blk_mq_update_nr_hw_queues(), nr_hw_queues may change, so elevator has
> to be reset after nr_hw_queues is changed.

This should be past tense I guess?

> Now elevator switch isn't allowed during blk_mq_update_nr_hw_queues(), so
> we can simply call elevator_change() to reset elevator sched tags after
> nr_hw_queues is updated.

Now that elevator switches aren't allowed during
blk_mq_update_nr_hw_queues(), we can simply call elevator_change() to
reset elevator sched tags after nr_hw_queues is updated.

?

Anyway, glad to see this code go away!

Reviewed-by: Christoph Hellwig <hch@lst.de>
Christoph Hellwig April 10, 2025, 3:34 p.m. UTC | #2
On Thu, Apr 10, 2025 at 09:30:17PM +0800, Ming Lei wrote:
> @@ -5071,9 +4984,15 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
>  		blk_mq_debugfs_register_hctxs(q);
>  	}
>  
> -switch_back:
> -	list_for_each_entry(q, &set->tag_list, tag_set_list)
> -		blk_mq_elv_switch_back(&head, q);
> +	list_for_each_entry(q, &set->tag_list, tag_set_list) {
> +		const char *name = "none";
> +
> +		mutex_lock(&q->elevator_lock);
> +		if (q->elevator && !blk_queue_dying(q))
> +			name = q->elevator->type->elevator_name;
> +		__elevator_change(q, name, true);
> +		mutex_unlock(&q->elevator_lock);
> +	}

Coming back to this after looking through the next patches.

Why do we even need the __elevator_change call here?  We've not
actually disabled the elevator, and we prevent other callers
from changing it.

As you pass in the force argument this now always calls
elevator_switch and thus blk_mq_init_sched.  But why?
Ming Lei April 14, 2025, 12:58 a.m. UTC | #3
On Thu, Apr 10, 2025 at 05:34:18PM +0200, Christoph Hellwig wrote:
> On Thu, Apr 10, 2025 at 09:30:17PM +0800, Ming Lei wrote:
> > @@ -5071,9 +4984,15 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
> >  		blk_mq_debugfs_register_hctxs(q);
> >  	}
> >  
> > -switch_back:
> > -	list_for_each_entry(q, &set->tag_list, tag_set_list)
> > -		blk_mq_elv_switch_back(&head, q);
> > +	list_for_each_entry(q, &set->tag_list, tag_set_list) {
> > +		const char *name = "none";
> > +
> > +		mutex_lock(&q->elevator_lock);
> > +		if (q->elevator && !blk_queue_dying(q))
> > +			name = q->elevator->type->elevator_name;
> > +		__elevator_change(q, name, true);
> > +		mutex_unlock(&q->elevator_lock);
> > +	}
> 
> Coming back to this after looking through the next patches.
> 
> Why do we even need the __elevator_change call here?  We've not
> actually disabled the elevator, and we prevent other callers
> from changing it.
> 
> As you pass in the force argument this now always calls
> elevator_switch and thus blk_mq_init_sched.  But why?

sched tags is built over hctx and depends on ->nr_hw_queues,
when nr_hw_queues is changed, sched tags has to be rebuilt.


Thanks, 
Ming
Christoph Hellwig April 14, 2025, 6:09 a.m. UTC | #4
On Mon, Apr 14, 2025 at 08:58:28AM +0800, Ming Lei wrote:
> > Coming back to this after looking through the next patches.
> > 
> > Why do we even need the __elevator_change call here?  We've not
> > actually disabled the elevator, and we prevent other callers
> > from changing it.
> > 
> > As you pass in the force argument this now always calls
> > elevator_switch and thus blk_mq_init_sched.  But why?
> 
> sched tags is built over hctx and depends on ->nr_hw_queues,
> when nr_hw_queues is changed, sched tags has to be rebuilt.

Can you add a comment explaining this?
Ming Lei April 15, 2025, 2:05 a.m. UTC | #5
On Mon, Apr 14, 2025 at 08:09:59AM +0200, Christoph Hellwig wrote:
> On Mon, Apr 14, 2025 at 08:58:28AM +0800, Ming Lei wrote:
> > > Coming back to this after looking through the next patches.
> > > 
> > > Why do we even need the __elevator_change call here?  We've not
> > > actually disabled the elevator, and we prevent other callers
> > > from changing it.
> > > 
> > > As you pass in the force argument this now always calls
> > > elevator_switch and thus blk_mq_init_sched.  But why?
> > 
> > sched tags is built over hctx and depends on ->nr_hw_queues,
> > when nr_hw_queues is changed, sched tags has to be rebuilt.
> 
> Can you add a comment explaining this?

Sure.

Thanks,
Ming
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4b0707fb7ae3..b7e3cd355e66 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4930,88 +4930,10 @@  int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 	return ret;
 }
 
-/*
- * request_queue and elevator_type pair.
- * It is just used by __blk_mq_update_nr_hw_queues to cache
- * the elevator_type associated with a request_queue.
- */
-struct blk_mq_qe_pair {
-	struct list_head node;
-	struct request_queue *q;
-	struct elevator_type *type;
-};
-
-/*
- * Cache the elevator_type in qe pair list and switch the
- * io scheduler to 'none'
- */
-static bool blk_mq_elv_switch_none(struct list_head *head,
-		struct request_queue *q)
-{
-	struct blk_mq_qe_pair *qe;
-
-	qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
-	if (!qe)
-		return false;
-
-	/* Accessing q->elevator needs protection from ->elevator_lock. */
-	mutex_lock(&q->elevator_lock);
-
-	if (!q->elevator) {
-		kfree(qe);
-		goto unlock;
-	}
-
-	INIT_LIST_HEAD(&qe->node);
-	qe->q = q;
-	qe->type = q->elevator->type;
-	/* keep a reference to the elevator module as we'll switch back */
-	__elevator_get(qe->type);
-	list_add(&qe->node, head);
-	elevator_disable(q);
-unlock:
-	mutex_unlock(&q->elevator_lock);
-
-	return true;
-}
-
-static struct blk_mq_qe_pair *blk_lookup_qe_pair(struct list_head *head,
-						struct request_queue *q)
-{
-	struct blk_mq_qe_pair *qe;
-
-	list_for_each_entry(qe, head, node)
-		if (qe->q == q)
-			return qe;
-
-	return NULL;
-}
-
-static void blk_mq_elv_switch_back(struct list_head *head,
-				  struct request_queue *q)
-{
-	struct blk_mq_qe_pair *qe;
-	struct elevator_type *t;
-
-	qe = blk_lookup_qe_pair(head, q);
-	if (!qe)
-		return;
-	t = qe->type;
-	list_del(&qe->node);
-	kfree(qe);
-
-	mutex_lock(&q->elevator_lock);
-	elevator_switch(q, t);
-	/* drop the reference acquired in blk_mq_elv_switch_none */
-	elevator_put(t);
-	mutex_unlock(&q->elevator_lock);
-}
-
 static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 							int nr_hw_queues)
 {
 	struct request_queue *q;
-	LIST_HEAD(head);
 	int prev_nr_hw_queues = set->nr_hw_queues;
 	unsigned int memflags;
 	int i;
@@ -5029,15 +4951,6 @@  static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
 		blk_mq_freeze_queue_nomemsave(q);
 
-	/*
-	 * Switch IO scheduler to 'none', cleaning up the data associated
-	 * with the previous scheduler. We will switch back once we are done
-	 * updating the new sw to hw queue mappings.
-	 */
-	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		if (!blk_mq_elv_switch_none(&head, q))
-			goto switch_back;
-
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_debugfs_unregister_hctxs(q);
 		blk_mq_sysfs_unregister_hctxs(q);
@@ -5071,9 +4984,15 @@  static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		blk_mq_debugfs_register_hctxs(q);
 	}
 
-switch_back:
-	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_elv_switch_back(&head, q);
+	list_for_each_entry(q, &set->tag_list, tag_set_list) {
+		const char *name = "none";
+
+		mutex_lock(&q->elevator_lock);
+		if (q->elevator && !blk_queue_dying(q))
+			name = q->elevator->type->elevator_name;
+		__elevator_change(q, name, true);
+		mutex_unlock(&q->elevator_lock);
+	}
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
 		blk_mq_unfreeze_queue_nomemrestore(q);
diff --git a/block/blk.h b/block/blk.h
index 006e3be433d2..0c3cc1af2525 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -319,8 +319,8 @@  bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
 
 bool blk_insert_flush(struct request *rq);
 
-int elevator_switch(struct request_queue *q, struct elevator_type *new_e);
-void elevator_disable(struct request_queue *q);
+int __elevator_change(struct request_queue *q, const char *elevator_name,
+		      bool force);
 void elevator_exit(struct request_queue *q);
 int elv_register_queue(struct request_queue *q, bool uevent);
 void elv_unregister_queue(struct request_queue *q);
diff --git a/block/elevator.c b/block/elevator.c
index 7d7b77dd4341..612fa2bdd40d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -619,7 +619,7 @@  void elevator_init_mq(struct request_queue *q)
  * If switching fails, we are most likely running out of memory and not able
  * to restore the old io scheduler, so leaving the io scheduler being none.
  */
-int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
+static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
 	int ret;
 
@@ -655,7 +655,7 @@  int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	return ret;
 }
 
-void elevator_disable(struct request_queue *q)
+static void elevator_disable(struct request_queue *q)
 {
 	WARN_ON_ONCE(q->mq_freeze_depth == 0);
 	lockdep_assert_held(&q->elevator_lock);
@@ -675,7 +675,8 @@  void elevator_disable(struct request_queue *q)
 /*
  * Switch this queue to the given IO scheduler.
  */
-static int elevator_change(struct request_queue *q, const char *elevator_name)
+int __elevator_change(struct request_queue *q, const char *elevator_name,
+		      bool force)
 {
 	struct elevator_type *e;
 	int ret;
@@ -690,7 +691,8 @@  static int elevator_change(struct request_queue *q, const char *elevator_name)
 		return 0;
 	}
 
-	if (q->elevator && elevator_match(q->elevator->type, elevator_name))
+	if (!force && q->elevator &&
+	    elevator_match(q->elevator->type, elevator_name))
 		return 0;
 
 	e = elevator_find_get(elevator_name);
@@ -742,7 +744,7 @@  ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
 
 	memflags = blk_mq_freeze_queue(q);
 	mutex_lock(&q->elevator_lock);
-	ret = elevator_change(q, name);
+	ret = __elevator_change(q, name, false);
 	if (!ret)
 		ret = count;
 	mutex_unlock(&q->elevator_lock);