[RFC,V4,1/6] blk: prepare to make blk-rq-qos pluggable and modular

Message ID	20220217031349.98561-2-jianchao.wan9@gmail.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-block-owner@kernel.org> From: "Wang Jianchao (Kuaishou)" <jianchao.wan9@gmail.com> To: Jens Axboe <axboe@kernel.dk> Cc: Josef Bacik <jbacik@fb.com>, Tejun Heo <tj@kernel.org>, Bart Van Assche <bvanassche@acm.org>, linux-block@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [RFC V4 1/6] blk: prepare to make blk-rq-qos pluggable and modular Date: Thu, 17 Feb 2022 11:13:44 +0800 Message-Id: <20220217031349.98561-2-jianchao.wan9@gmail.com> In-Reply-To: <20220217031349.98561-1-jianchao.wan9@gmail.com> References: <20220217031349.98561-1-jianchao.wan9@gmail.com> Precedence: bulk
Series	blk: make blk-rq-qos policies pluggable and modular \| expand [RFC,V4,0/6] blk: make blk-rq-qos policies pluggable and modular [RFC,V4,1/6] blk: prepare to make blk-rq-qos pluggable and modular [RFC,V4,2/6] blk-wbt: make wbt pluggable [RFC,V4,3/6] blk-iolatency: make iolatency pluggable [RFC,V4,4/6] blk-iocost: make iocost pluggable [RFC,V4,5/6] blk-ioprio: make ioprio pluggable and modular [RFC,V4,6/6] blk: export the sysfs for switching qos

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3a790eb4995c..8b6d557e1ad6 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -729,7 +729,10 @@ void blk_mq_debugfs_register(struct request_queue *q) if (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; - + /* + * queue has not been registered right now, it is safe to + * iterate the rqos w/o lock + */ while (rqos) { blk_mq_debugfs_register_rqos(rqos); rqos = rqos->next; @@ -844,7 +847,9 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) { struct request_queue *q = rqos->q; - const char *dir_name = rq_qos_id_to_name(rqos->id); + const char *dir_name; + + dir_name = rqos->ops->name ? rqos->ops->name : rq_qos_id_to_name(rqos->id); if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) return; diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index e83af7bc7591..db13581ae878 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -2,6 +2,11 @@ #include "blk-rq-qos.h" +static DEFINE_IDA(rq_qos_ida); +static int nr_rqos_blkcg_pols; +static DEFINE_MUTEX(rq_qos_mutex); +static LIST_HEAD(rq_qos_list); + /* * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, * false if 'v' + 1 would be bigger than 'below'. @@ -294,11 +299,303 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, void rq_qos_exit(struct request_queue *q) { - blk_mq_debugfs_unregister_queue_rqos(q); - + /* + * queue must have been unregistered here, it is safe to iterate + * the list w/o lock + */ while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); } + blk_mq_debugfs_unregister_queue_rqos(q); +} + +static struct rq_qos *rq_qos_by_name(struct request_queue *q, + const char *name) +{ + struct rq_qos *rqos; + + for (rqos = q->rq_qos; rqos; rqos = rqos->next) { + if (!rqos->ops->name) + continue; + + if (!strncmp(rqos->ops->name, name, + strlen(rqos->ops->name))) + return rqos; + } + return NULL; +} + +/* + * After the pluggable blk-qos, rqos's life cycle become complicated, + * as we may modify the rqos list there. Except for the places where + * queue is not registered, there are following places may access rqos + * list concurrently: + * (1) normal IO path, can be serialized by queue freezing + * (2) blkg_create, the .pd_init_fn() may access rqos, can be serialized + * by queue_lock. + * (3) cgroup file, such as ioc_cost_model_write, rq_qos_get is for this + * case to keep the rqos alive. + */ +struct rq_qos *rq_qos_get(struct request_queue *q, int id) +{ + struct rq_qos *rqos; + + spin_lock_irq(&q->queue_lock); + rqos = rq_qos_by_id(q, id); + if (rqos && rqos->dying) + rqos = NULL; + if (rqos) + refcount_inc(&rqos->ref); + spin_unlock_irq(&q->queue_lock); + return rqos; +} +EXPORT_SYMBOL_GPL(rq_qos_get); + +void rq_qos_put(struct rq_qos *rqos) +{ + struct request_queue *q = rqos->q; + + spin_lock_irq(&q->queue_lock); + refcount_dec(&rqos->ref); + if (rqos->dying) + wake_up(&rqos->waitq); + spin_unlock_irq(&q->queue_lock); +} +EXPORT_SYMBOL_GPL(rq_qos_put); + +void rq_qos_activate(struct request_queue *q, + struct rq_qos *rqos, const struct rq_qos_ops *ops) +{ + struct rq_qos *pos; + + rqos->dying = false; + refcount_set(&rqos->ref, 1); + init_waitqueue_head(&rqos->waitq); + rqos->id = ops->id; + rqos->ops = ops; + rqos->q = q; + rqos->next = NULL; + + spin_lock_irq(&q->queue_lock); + pos = q->rq_qos; + if (pos) { + while (pos->next) + pos = pos->next; + pos->next = rqos; + } else { + q->rq_qos = rqos; + } + spin_unlock_irq(&q->queue_lock); + + if (rqos->ops->debugfs_attrs) + blk_mq_debugfs_register_rqos(rqos); + + if (ops->owner) + __module_get(ops->owner); +} +EXPORT_SYMBOL_GPL(rq_qos_activate); + +void rq_qos_deactivate(struct rq_qos *rqos) +{ + struct request_queue *q = rqos->q; + struct rq_qos **cur; + + spin_lock_irq(&q->queue_lock); + rqos->dying = true; + /* + * Drain all of the usage of get/put_rqos() + */ + wait_event_lock_irq(rqos->waitq, + refcount_read(&rqos->ref) == 1, q->queue_lock); + for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { + if (*cur == rqos) { + *cur = rqos->next; + break; + } + } + spin_unlock_irq(&q->queue_lock); + blk_mq_debugfs_unregister_rqos(rqos); + + if (rqos->ops->owner) + module_put(rqos->ops->owner); +} +EXPORT_SYMBOL_GPL(rq_qos_deactivate); + +static struct rq_qos_ops *rq_qos_op_find(const char *name) +{ + struct rq_qos_ops *pos; + + list_for_each_entry(pos, &rq_qos_list, node) { + if (!strncmp(pos->name, name, strlen(pos->name))) + return pos; + } + + return NULL; +} + +int rq_qos_register(struct rq_qos_ops *ops) +{ + int ret, start; + + mutex_lock(&rq_qos_mutex); + + if (rq_qos_op_find(ops->name)) { + ret = -EEXIST; + goto out; + } + + if (ops->flags & RQOS_FLAG_CGRP_POL && + nr_rqos_blkcg_pols >= (BLKCG_MAX_POLS - BLKCG_NON_RQOS_POLS)) { + ret = -ENOSPC; + goto out; + } + + start = RQ_QOS_IOPRIO + 1; + ret = ida_simple_get(&rq_qos_ida, start, INT_MAX, GFP_KERNEL); + if (ret < 0) + goto out; + + if (ops->flags & RQOS_FLAG_CGRP_POL) + nr_rqos_blkcg_pols++; + + ops->id = ret; + ret = 0; + INIT_LIST_HEAD(&ops->node); + list_add_tail(&ops->node, &rq_qos_list); +out: + mutex_unlock(&rq_qos_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(rq_qos_register); + +void rq_qos_unregister(struct rq_qos_ops *ops) +{ + mutex_lock(&rq_qos_mutex); + + if (ops->flags & RQOS_FLAG_CGRP_POL) + nr_rqos_blkcg_pols--; + list_del_init(&ops->node); + ida_simple_remove(&rq_qos_ida, ops->id); + mutex_unlock(&rq_qos_mutex); +} +EXPORT_SYMBOL_GPL(rq_qos_unregister); + +ssize_t queue_qos_show(struct request_queue *q, char *buf) +{ + struct rq_qos_ops *ops; + struct rq_qos *rqos; + int ret = 0; + + mutex_lock(&rq_qos_mutex); + /* + * Show the policies in the order of being invoked. + * queue_lock is not needed here as the sysfs_lock is + * protected us from the queue_qos_store() + */ + for (rqos = q->rq_qos; rqos; rqos = rqos->next) { + if (!rqos->ops->name) + continue; + ret += sprintf(buf + ret, "[%s] ", rqos->ops->name); + } + list_for_each_entry(ops, &rq_qos_list, node) { + if (!rq_qos_by_name(q, ops->name)) + ret += sprintf(buf + ret, "%s ", ops->name); + } + + ret--; /* overwrite the last space */ + ret += sprintf(buf + ret, "\n"); + mutex_unlock(&rq_qos_mutex); + + return ret; +} + +static int rq_qos_switch(struct request_queue *q, + const struct rq_qos_ops *ops, + struct rq_qos *rqos) +{ + int ret; + + blk_mq_freeze_queue(q); + if (!rqos) { + ret = ops->init(q); + } else { + ops->exit(rqos); + ret = 0; + } + blk_mq_unfreeze_queue(q); + + return ret; +} + +ssize_t queue_qos_store(struct request_queue *q, const char *page, + size_t count) +{ + const struct rq_qos_ops *ops; + struct rq_qos *rqos; + const char *qosname; + char *buf; + bool add; + int ret; + + if (!blk_queue_registered(q)) + return -ENOENT; + + buf = kstrdup(page, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf = strim(buf); + if (buf[0] != '+' && buf[0] != '-') { + ret = -EINVAL; + goto out; + } + + add = buf[0] == '+'; + qosname = buf + 1; + + rqos = rq_qos_by_name(q, qosname); + if ((buf[0] == '+' && rqos)) { + ret = -EEXIST; + goto out; + } + + if ((buf[0] == '-' && !rqos)) { + ret = -ENODEV; + goto out; + } + + if (add) { + mutex_lock(&rq_qos_mutex); + ops = rq_qos_op_find(qosname); + if (!ops) { + /* + * module_init callback may request this mutex + */ + mutex_unlock(&rq_qos_mutex); + request_module("%s", qosname); + mutex_lock(&rq_qos_mutex); + ops = rq_qos_op_find(qosname); + } + if (!ops) { + ret = -EINVAL; + } else if (ops->owner && !try_module_get(ops->owner)) { + ops = NULL; + ret = -EAGAIN; + } + mutex_unlock(&rq_qos_mutex); + if (!ops) + goto out; + } else { + ops = rqos->ops; + } + + ret = rq_qos_switch(q, ops, add ? NULL : rqos); + + if (add) + module_put(ops->owner); +out: + kfree(buf); + return ret ? ret : count; } diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 3cfbc8668cba..586c3f5ec152 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -26,16 +26,28 @@ struct rq_wait { }; struct rq_qos { - struct rq_qos_ops *ops; + const struct rq_qos_ops *ops; struct request_queue *q; enum rq_qos_id id; + refcount_t ref; + wait_queue_head_t waitq; + bool dying; struct rq_qos *next; #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; #endif }; +enum { + RQOS_FLAG_CGRP_POL = 1 << 0, +}; + struct rq_qos_ops { + struct list_head node; + struct module *owner; + const char *name; + int flags; + int id; void (*throttle)(struct rq_qos *, struct bio *); void (*track)(struct rq_qos *, struct request *, struct bio *); void (*merge)(struct rq_qos *, struct request *, struct bio *); @@ -46,6 +58,7 @@ struct rq_qos_ops { void (*cleanup)(struct rq_qos *, struct bio *); void (*queue_depth_changed)(struct rq_qos *); void (*exit)(struct rq_qos *); + int (*init)(struct request_queue *); const struct blk_mq_debugfs_attr *debugfs_attrs; }; @@ -70,6 +83,19 @@ static inline struct rq_qos *rq_qos_id(struct request_queue *q, return rqos; } +static inline struct rq_qos *rq_qos_by_id(struct request_queue *q, int id) +{ + struct rq_qos *rqos; + + WARN_ON(!mutex_is_locked(&q->sysfs_lock) && !spin_is_locked(&q->queue_lock)); + + for (rqos = q->rq_qos; rqos; rqos = rqos->next) { + if (rqos->id == id) + break; + } + return rqos; +} + static inline struct rq_qos *wbt_rq_qos(struct request_queue *q) { return rq_qos_id(q, RQ_QOS_WBT); @@ -132,6 +158,17 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) blk_mq_debugfs_unregister_rqos(rqos); } +int rq_qos_register(struct rq_qos_ops *ops); +void rq_qos_unregister(struct rq_qos_ops *ops); +void rq_qos_activate(struct request_queue *q, + struct rq_qos *rqos, const struct rq_qos_ops *ops); +void rq_qos_deactivate(struct rq_qos *rqos); +ssize_t queue_qos_show(struct request_queue *q, char *buf); +ssize_t queue_qos_store(struct request_queue *q, const char *page, + size_t count); +struct rq_qos *rq_qos_get(struct request_queue *q, int id); +void rq_qos_put(struct rq_qos *rqos); + typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f35aea98bc35..d5698a7cda67 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -44,6 +44,10 @@ struct blk_crypto_profile; * Defined here to simplify include dependency. */ #define BLKCG_MAX_POLS 6 +/* + * Non blk-rq-qos blkcg policies include blk-throttle and bfq + */ +#define BLKCG_NON_RQOS_POLS 2 static inline int blk_validate_block_size(unsigned long bsize) {

[RFC,V4,1/6] blk: prepare to make blk-rq-qos pluggable and modular

Commit Message

Comments

Patch