@@ -869,14 +869,8 @@ int blk_register_queue(struct gendisk *disk)
if (ret)
goto out_unregister_ia_ranges;
+ elevator_set_default(q);
mutex_lock(&q->elevator_lock);
- if (q->elevator) {
- ret = elv_register_queue(q, false);
- if (ret) {
- mutex_unlock(&q->elevator_lock);
- goto out_crypto_sysfs_unregister;
- }
- }
wbt_enable_default(disk);
mutex_unlock(&q->elevator_lock);
@@ -902,8 +896,6 @@ int blk_register_queue(struct gendisk *disk)
return ret;
-out_crypto_sysfs_unregister:
- blk_crypto_sysfs_unregister(disk);
out_unregister_ia_ranges:
disk_unregister_independent_access_ranges(disk);
out_debugfs_remove:
@@ -949,9 +941,11 @@ void blk_unregister_queue(struct gendisk *disk)
blk_mq_sysfs_unregister(disk);
blk_crypto_sysfs_unregister(disk);
- mutex_lock(&q->elevator_lock);
- elv_unregister_queue(q);
- mutex_unlock(&q->elevator_lock);
+ if (q->elevator) {
+ blk_mq_quiesce_queue(q);
+ elevator_set_none(q);
+ blk_mq_unquiesce_queue(q);
+ }
mutex_lock(&q->sysfs_lock);
disk_unregister_independent_access_ranges(disk);
@@ -321,9 +321,8 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
bool blk_insert_flush(struct request *rq);
int __elevator_change(struct request_queue *q, struct elv_change_ctx *ctx);
-void elevator_exit(struct request_queue *q);
-int elv_register_queue(struct request_queue *q, bool uevent);
-void elv_unregister_queue(struct request_queue *q);
+void elevator_set_default(struct request_queue *q);
+void elevator_set_none(struct request_queue *q);
ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
char *buf);
@@ -151,7 +151,7 @@ static void elevator_release(struct kobject *kobj)
kfree(e);
}
-void elevator_exit(struct request_queue *q)
+static void elevator_exit(struct request_queue *q)
{
struct elevator_queue *e = q->elevator;
@@ -455,7 +455,7 @@ static const struct kobj_type elv_ktype = {
.release = elevator_release,
};
-int elv_register_queue(struct request_queue *q, bool uevent)
+static int elv_register_queue(struct request_queue *q, bool uevent)
{
struct elevator_queue *e = q->elevator;
int error;
@@ -485,7 +485,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
return error;
}
-void elv_unregister_queue(struct request_queue *q)
+static void elv_unregister_queue(struct request_queue *q)
{
struct elevator_queue *e = q->elevator;
@@ -562,60 +562,59 @@ EXPORT_SYMBOL_GPL(elv_unregister);
* For single queue devices, default to using mq-deadline. If we have multiple
* queues or mq-deadline is not available, default to "none".
*/
-static struct elevator_type *elevator_get_default(struct request_queue *q)
+static bool use_default_elevator(struct request_queue *q)
{
if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
- return NULL;
+ return false;
if (q->nr_hw_queues != 1 &&
!blk_mq_is_shared_tags(q->tag_set->flags))
- return NULL;
+ return false;
- return elevator_find_get("mq-deadline");
+ return true;
}
/*
* Use the default elevator settings. If the chosen elevator initialization
* fails, fall back to the "none" elevator (no elevator).
*/
-void elevator_init_mq(struct request_queue *q)
+void elevator_set_default(struct request_queue *q)
{
- struct elevator_type *e;
- unsigned int memflags;
+ struct elv_change_ctx ctx = {
+ .init = true,
+ };
int err;
- WARN_ON_ONCE(blk_queue_registered(q));
-
- if (unlikely(q->elevator))
+ if (!queue_is_mq(q))
return;
- e = elevator_get_default(q);
- if (!e)
+ ctx.name = use_default_elevator(q) ? "mq-deadline" : "none";
+ if (!q->elevator && !strcmp(ctx.name, "none"))
return;
+ err = elevator_change(q, &ctx);
+ if (err < 0)
+ pr_warn("\"%s\" set elevator failed %d, "
+ "falling back to \"none\"\n", ctx.name, err);
+}
- /*
- * We are called before adding disk, when there isn't any FS I/O,
- * so freezing queue plus canceling dispatch work is enough to
- * drain any dispatch activities originated from passthrough
- * requests, then no need to quiesce queue which may add long boot
- * latency, especially when lots of disks are involved.
- *
- * Disk isn't added yet, so verifying queue lock only manually.
- */
- memflags = blk_mq_freeze_queue(q);
-
- blk_mq_cancel_work_sync(q);
-
- err = blk_mq_init_sched(q, e);
+void elevator_set_none(struct request_queue *q)
+{
+ struct elv_change_ctx ctx = {
+ .name = "none",
+ .uevent = true,
+ .init = true,
+ };
+ int err;
- blk_mq_unfreeze_queue(q, memflags);
+ if (!queue_is_mq(q))
+ return;
- if (err) {
- pr_warn("\"%s\" elevator initialization failed, "
- "falling back to \"none\"\n", e->elevator_name);
- }
+ if (!q->elevator)
+ return;
- elevator_put(e);
+ err = elevator_change(q, &ctx);
+ if (err < 0)
+ pr_warn("%s: set none elevator failed %d\n", __func__, err);
}
/*
@@ -688,7 +687,7 @@ int __elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
int ret;
/* Make sure queue is not in the middle of being removed */
- if (!blk_queue_registered(q))
+ if (!ctx->init && !blk_queue_registered(q))
return -ENOENT;
if (!strncmp(elevator_name, "none", 4)) {
@@ -723,6 +722,16 @@ static int elevator_change(struct request_queue *q,
}
memflags = blk_mq_freeze_queue(q);
+ /*
+ * May be called before adding disk, when there isn't any FS I/O,
+ * so freezing queue plus canceling dispatch work is enough to
+ * drain any dispatch activities originated from passthrough
+ * requests, then no need to quiesce queue which may add long boot
+ * latency, especially when lots of disks are involved.
+ *
+ * Disk isn't added yet, so verifying queue lock only manually.
+ */
+ blk_mq_cancel_work_sync(q);
mutex_lock(&q->elevator_lock);
ret = __elevator_change(q, ctx);
mutex_unlock(&q->elevator_lock);
@@ -127,6 +127,7 @@ struct elv_change_ctx {
const char *name;
bool force;
bool uevent;
+ bool init;
};
/*
@@ -438,12 +438,6 @@ static int __add_disk_fwnode(struct gendisk_data *data)
*/
if (disk->fops->submit_bio || disk->fops->poll_bio)
return -EINVAL;
-
- /*
- * Initialize the I/O scheduler code and pick a default one if
- * needed.
- */
- elevator_init_mq(disk->queue);
} else {
if (!disk->fops->submit_bio)
return -EINVAL;
@@ -587,11 +581,7 @@ static int __add_disk_fwnode(struct gendisk_data *data)
if (disk->major == BLOCK_EXT_MAJOR)
blk_free_ext_minor(disk->first_minor);
out_exit_elevator:
- if (disk->queue->elevator) {
- mutex_lock(&disk->queue->elevator_lock);
- elevator_exit(disk->queue);
- mutex_unlock(&disk->queue->elevator_lock);
- }
+ elevator_set_none(disk->queue);
return ret;
}
@@ -771,14 +761,7 @@ static int __del_gendisk(struct gendisk_data *data)
if (queue_is_mq(q))
blk_mq_cancel_work_sync(q);
- blk_mq_quiesce_queue(q);
- if (q->elevator) {
- mutex_lock(&q->elevator_lock);
- elevator_exit(q);
- mutex_unlock(&q->elevator_lock);
- }
rq_qos_exit(q);
- blk_mq_unquiesce_queue(q);
/*
* If the disk does not own the queue, allow using passthrough requests
elevator change is one well-define behavior: - tear down current elevator if it exists - setup new elevator It is supposed to cover any case for changing elevator by single internal API, typically the following cases: - setup default elevator in add_disk() - switch to none in del_disk() - reset elevator in blk_mq_update_nr_hw_queues() - switch elevator in sysfs `store` elevator attribute This patch uses elevator_change() to cover all above cases: - every elevator switch is serialized with each other: add_disk/del_disk/ store elevator is serialized already, blk_mq_update_nr_hw_queues() uses srcu for syncing with the other three cases - for both add_disk()/del_disk(), queue freeze works at atomic mode or has been froze, so the freeze in elevator_change() won't add extra delay - `struct elev_change_ctx` instance holds any info for changing elevator Signed-off-by: Ming Lei <ming.lei@redhat.com> --- block/blk-sysfs.c | 18 ++++------- block/blk.h | 5 ++- block/elevator.c | 81 ++++++++++++++++++++++++++--------------------- block/elevator.h | 1 + block/genhd.c | 19 +---------- 5 files changed, 55 insertions(+), 69 deletions(-)