@@ -38,8 +38,6 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
cancel_delayed_work_sync(&hctx->run_work);
- if (hctx->flags & BLK_MQ_F_BLOCKING)
- cleanup_srcu_struct(hctx->srcu);
blk_free_flush_queue(hctx->fq);
sbitmap_free(&hctx->ctx_map);
free_cpumask_var(hctx->cpumask);
@@ -225,19 +225,23 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
*/
void blk_mq_quiesce_queue(struct request_queue *q)
{
- struct blk_mq_hw_ctx *hctx;
- unsigned int i;
- bool rcu = false;
+ bool blocking = !!(q->tag_set->flags & BLK_MQ_F_BLOCKING);
+ bool was_quiesced =__blk_mq_quiesce_queue_nowait(q);
- __blk_mq_quiesce_queue_nowait(q);
+ if (!was_quiesced && blocking)
+ percpu_ref_kill(&q->dispatch_counter);
- queue_for_each_hw_ctx(q, hctx, i) {
- if (hctx->flags & BLK_MQ_F_BLOCKING)
- synchronize_srcu(hctx->srcu);
- else
- rcu = true;
- }
- if (rcu)
+ /*
+ * In case of F_BLOCKING, if driver unquiesces its queue being
+ * quiesced, it can cause bigger trouble, and we simply return &
+ * warn once for avoiding hang here.
+ */
+ if (blocking)
+ wait_event(q->mq_quiesce_wq,
+ percpu_ref_is_zero(&q->dispatch_counter) ||
+ WARN_ON_ONCE(!percpu_ref_is_dying(
+ &q->dispatch_counter)));
+ else
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
@@ -251,7 +255,10 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
*/
void blk_mq_unquiesce_queue(struct request_queue *q)
{
- blk_queue_flag_test_and_clear(QUEUE_FLAG_QUIESCED, q);
+ if (blk_queue_flag_test_and_clear(QUEUE_FLAG_QUIESCED, q)) {
+ if (q->tag_set->flags & BLK_MQ_F_BLOCKING)
+ percpu_ref_resurrect(&q->dispatch_counter);
+ }
/* dispatch requests which are inserted during quiescing */
blk_mq_run_hw_queues(q, true);
@@ -704,24 +711,21 @@ void blk_mq_complete_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_complete_request);
-static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
- __releases(hctx->srcu)
+static void hctx_unlock(struct blk_mq_hw_ctx *hctx)
{
- if (!(hctx->flags & BLK_MQ_F_BLOCKING))
- rcu_read_unlock();
+ if (hctx->flags & BLK_MQ_F_BLOCKING)
+ percpu_ref_put(&hctx->queue->dispatch_counter);
else
- srcu_read_unlock(hctx->srcu, srcu_idx);
+ rcu_read_unlock();
}
-static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
- __acquires(hctx->srcu)
+/* Returning false means that queue is being quiesced */
+static inline bool hctx_lock(struct blk_mq_hw_ctx *hctx)
{
- if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
- /* shut up gcc false positive */
- *srcu_idx = 0;
- rcu_read_lock();
- } else
- *srcu_idx = srcu_read_lock(hctx->srcu);
+ if (hctx->flags & BLK_MQ_F_BLOCKING)
+ return percpu_ref_tryget_live(&hctx->queue->dispatch_counter);
+ rcu_read_lock();
+ return true;
}
/**
@@ -1501,8 +1505,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
*/
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
{
- int srcu_idx;
-
/*
* We should be running this queue from one of the CPUs that
* are mapped to it.
@@ -1536,9 +1538,10 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
- hctx_lock(hctx, &srcu_idx);
- blk_mq_sched_dispatch_requests(hctx);
- hctx_unlock(hctx, srcu_idx);
+ if (hctx_lock(hctx)) {
+ blk_mq_sched_dispatch_requests(hctx);
+ hctx_unlock(hctx);
+ }
}
static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
@@ -1650,7 +1653,6 @@ EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
*/
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
- int srcu_idx;
bool need_run;
/*
@@ -1661,10 +1663,12 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
* And queue will be rerun in blk_mq_unquiesce_queue() if it is
* quiesced.
*/
- hctx_lock(hctx, &srcu_idx);
+ if (!hctx_lock(hctx))
+ return;
+
need_run = !blk_queue_quiesced(hctx->queue) &&
blk_mq_hctx_has_pending(hctx);
- hctx_unlock(hctx, srcu_idx);
+ hctx_unlock(hctx);
if (need_run)
__blk_mq_delay_run_hw_queue(hctx, async, 0);
@@ -2009,7 +2013,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
bool run_queue = true;
/*
- * RCU or SRCU read lock is needed before checking quiesced flag.
+ * hctx_lock() is needed before checking quiesced flag.
*
* When queue is stopped or quiesced, ignore 'bypass_insert' from
* blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
@@ -2057,11 +2061,14 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq, blk_qc_t *cookie)
{
blk_status_t ret;
- int srcu_idx;
might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
- hctx_lock(hctx, &srcu_idx);
+ /* Insert request to queue in case of being quiesced */
+ if (!hctx_lock(hctx)) {
+ blk_mq_sched_insert_request(rq, false, false, false);
+ return;
+ }
ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
@@ -2069,19 +2076,22 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
else if (ret != BLK_STS_OK)
blk_mq_end_request(rq, ret);
- hctx_unlock(hctx, srcu_idx);
+ hctx_unlock(hctx);
}
blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
{
blk_status_t ret;
- int srcu_idx;
blk_qc_t unused_cookie;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
- hctx_lock(hctx, &srcu_idx);
+ /* Insert request to queue in case of being quiesced */
+ if (!hctx_lock(hctx)) {
+ blk_mq_sched_insert_request(rq, false, false, false);
+ return BLK_STS_OK;
+ }
ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
- hctx_unlock(hctx, srcu_idx);
+ hctx_unlock(hctx);
return ret;
}
@@ -2612,20 +2622,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
}
}
-static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
-{
- int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
-
- BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu),
- __alignof__(struct blk_mq_hw_ctx)) !=
- sizeof(struct blk_mq_hw_ctx));
-
- if (tag_set->flags & BLK_MQ_F_BLOCKING)
- hw_ctx_size += sizeof(struct srcu_struct);
-
- return hw_ctx_size;
-}
-
static int blk_mq_init_hctx(struct request_queue *q,
struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
@@ -2663,7 +2659,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx;
gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
- hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node);
+ hctx = kzalloc_node(sizeof(struct blk_mq_hw_ctx), gfp, node);
if (!hctx)
goto fail_alloc_hctx;
@@ -2706,8 +2702,6 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
if (!hctx->fq)
goto free_bitmap;
- if (hctx->flags & BLK_MQ_F_BLOCKING)
- init_srcu_struct(hctx->srcu);
blk_mq_hctx_kobj_init(hctx);
return hctx;
@@ -3187,6 +3181,13 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
mutex_unlock(&q->sysfs_lock);
}
+static void blk_mq_dispatch_counter_release(struct percpu_ref *ref)
+{
+ struct request_queue *q = container_of(ref, struct request_queue,
+ dispatch_counter);
+ wake_up_all(&q->mq_quiesce_wq);
+}
+
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q,
bool elevator_init)
@@ -3203,6 +3204,14 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
if (blk_mq_alloc_ctxs(q))
goto err_poll;
+ if (set->flags & BLK_MQ_F_BLOCKING) {
+ init_waitqueue_head(&q->mq_quiesce_wq);
+ if (percpu_ref_init(&q->dispatch_counter,
+ blk_mq_dispatch_counter_release,
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
+ goto err_hctxs;
+ }
+
/* init q->mq_kobj and sw queues' kobjects */
blk_mq_sysfs_init(q);
@@ -3211,7 +3220,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
blk_mq_realloc_hw_ctxs(set, q);
if (!q->nr_hw_queues)
- goto err_hctxs;
+ goto err_dispatch_counter;
INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
@@ -3245,6 +3254,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
return q;
+err_dispatch_counter:
+ if (set->flags & BLK_MQ_F_BLOCKING)
+ percpu_ref_exit(&q->dispatch_counter);
err_hctxs:
kfree(q->queue_hw_ctx);
q->nr_hw_queues = 0;
@@ -798,9 +798,13 @@ static void blk_release_queue(struct kobject *kobj)
blk_queue_free_zone_bitmaps(q);
- if (queue_is_mq(q))
+ if (queue_is_mq(q)) {
blk_mq_release(q);
+ if (q->tag_set->flags & BLK_MQ_F_BLOCKING)
+ percpu_ref_exit(&q->dispatch_counter);
+ }
+
blk_trace_shutdown(q);
mutex_lock(&q->debugfs_mutex);
debugfs_remove_recursive(q->debugfs_dir);
@@ -4,7 +4,6 @@
#include <linux/blkdev.h>
#include <linux/sbitmap.h>
-#include <linux/srcu.h>
struct blk_mq_tags;
struct blk_flush_queue;
@@ -173,13 +172,6 @@ struct blk_mq_hw_ctx {
* q->unused_hctx_list.
*/
struct list_head hctx_list;
-
- /**
- * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
- * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
- * blk_mq_hw_ctx_size().
- */
- struct srcu_struct srcu[];
};
/**
@@ -572,6 +572,10 @@ struct request_queue {
*/
struct mutex mq_freeze_lock;
+ /* only used for BLK_MQ_F_BLOCKING */
+ struct percpu_ref dispatch_counter;
+ wait_queue_head_t mq_quiesce_wq;
+
struct blk_mq_tag_set *tag_set;
struct list_head tag_set_list;
struct bio_set bio_split;