diff mbox series

[V2,1/2] blk-mq: serialize queue quiesce and unquiesce by mutex

Message ID 20200825141734.115879-2-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series blk-mq: implement queue quiesce via percpu_ref for BLK_MQ_F_BLOCKING | expand

Commit Message

Ming Lei Aug. 25, 2020, 2:17 p.m. UTC
Add .mq_quiesce_mutext to request queue, so that queue quiesce and
unquiesce can be serialized. Meantime we can avoid unnecessary
synchronize_rcu() in case that queue has been quiesced already.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Johannes Thumshirn <Johannes.Thumshirn@wdc.com>
Cc: Chao Leng <lengchao@huawei.com>
---
 block/blk-core.c       |  2 ++
 block/blk-mq.c         | 11 +++++++++++
 include/linux/blkdev.h |  2 ++
 3 files changed, 15 insertions(+)

Comments

Chao Leng Aug. 26, 2020, 7:51 a.m. UTC | #1
It doesn't matter. Because the reentry of quiesce&unquiesce queue is not
safe, must be avoided by other mechanism. otherwise, exceptions may
occur. Introduce mq_quiesce_lock looks saving possible synchronization
waits, but it should not happen. If really happen, we need fix it.

On 2020/8/25 22:17, Ming Lei wrote:
> Add .mq_quiesce_mutext to request queue, so that queue quiesce and
> unquiesce can be serialized. Meantime we can avoid unnecessary
> synchronize_rcu() in case that queue has been quiesced already.
> 
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> Cc: Sagi Grimberg <sagi@grimberg.me>
> Cc: Bart Van Assche <bvanassche@acm.org>
> Cc: Johannes Thumshirn <Johannes.Thumshirn@wdc.com>
> Cc: Chao Leng <lengchao@huawei.com>
> ---
>   block/blk-core.c       |  2 ++
>   block/blk-mq.c         | 11 +++++++++++
>   include/linux/blkdev.h |  2 ++
>   3 files changed, 15 insertions(+)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index d9d632639bd1..ffc57df70064 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -561,6 +561,8 @@ struct request_queue *blk_alloc_queue(int node_id)
>   	init_waitqueue_head(&q->mq_freeze_wq);
>   	mutex_init(&q->mq_freeze_lock);
>   
> +	mutex_init(&q->mq_quiesce_lock);
> +
>   	/*
>   	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
>   	 * See blk_register_queue() for details.
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index b3d2785eefe9..817e016ef886 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -224,6 +224,11 @@ void blk_mq_quiesce_queue(struct request_queue *q)
>   	unsigned int i;
>   	bool rcu = false;
>   
> +	mutex_lock(&q->mq_quiesce_lock);
> +
> +	if (blk_queue_quiesced(q))
> +		goto exit;
> +
>   	blk_mq_quiesce_queue_nowait(q);
>   
>   	queue_for_each_hw_ctx(q, hctx, i) {
> @@ -234,6 +239,8 @@ void blk_mq_quiesce_queue(struct request_queue *q)
>   	}
>   	if (rcu)
>   		synchronize_rcu();
> + exit:
> +	mutex_unlock(&q->mq_quiesce_lock);
>   }
>   EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
>   
> @@ -246,10 +253,14 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
>    */
>   void blk_mq_unquiesce_queue(struct request_queue *q)
>   {
> +	mutex_lock(&q->mq_quiesce_lock);
> +
>   	blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
>   
>   	/* dispatch requests which are inserted during quiescing */
>   	blk_mq_run_hw_queues(q, true);
> +
> +	mutex_unlock(&q->mq_quiesce_lock);
>   }
>   EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
>   
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index d8dba550ecac..5ed03066b33e 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -569,6 +569,8 @@ struct request_queue {
>   	 */
>   	struct mutex		mq_freeze_lock;
>   
> +	struct mutex		mq_quiesce_lock;
> +
>   	struct blk_mq_tag_set	*tag_set;
>   	struct list_head	tag_set_list;
>   	struct bio_set		bio_split;
>
Ming Lei Aug. 26, 2020, 8:54 a.m. UTC | #2
On Wed, Aug 26, 2020 at 03:51:25PM +0800, Chao Leng wrote:
> It doesn't matter. Because the reentry of quiesce&unquiesce queue is not
> safe, must be avoided by other mechanism. otherwise, exceptions may
> occur. Introduce mq_quiesce_lock looks saving possible synchronization
> waits, but it should not happen. If really happen, we need fix it.

Sagi mentioned there may be nested queue quiesce, so I add .mq_quiesce_lock 
to make this usage easy to support, meantime avoid percpu_ref warning
in such usage.

Anyway, not see any problem with adding .mq_quiesce_lock, so I'd suggest to
move on with this way.


Thanks, 
Ming
Keith Busch Aug. 26, 2020, 3:36 p.m. UTC | #3
On Wed, Aug 26, 2020 at 04:54:22PM +0800, Ming Lei wrote:
> On Wed, Aug 26, 2020 at 03:51:25PM +0800, Chao Leng wrote:
> > It doesn't matter. Because the reentry of quiesce&unquiesce queue is not
> > safe, must be avoided by other mechanism. otherwise, exceptions may
> > occur. Introduce mq_quiesce_lock looks saving possible synchronization
> > waits, but it should not happen. If really happen, we need fix it.
> 
> Sagi mentioned there may be nested queue quiesce, so I add .mq_quiesce_lock 
> to make this usage easy to support, meantime avoid percpu_ref warning
> in such usage.
> 
> Anyway, not see any problem with adding .mq_quiesce_lock, so I'd suggest to
> move on with this way.

I'm not sure there really are any nested queue quiesce paths, but if
there are, wouldn't we need to track the "depth" like how a queue freeze
works?
Sagi Grimberg Aug. 26, 2020, 4:23 p.m. UTC | #4
>>> It doesn't matter. Because the reentry of quiesce&unquiesce queue is not
>>> safe, must be avoided by other mechanism. otherwise, exceptions may
>>> occur. Introduce mq_quiesce_lock looks saving possible synchronization
>>> waits, but it should not happen. If really happen, we need fix it.
>>
>> Sagi mentioned there may be nested queue quiesce, so I add .mq_quiesce_lock
>> to make this usage easy to support, meantime avoid percpu_ref warning
>> in such usage.
>>
>> Anyway, not see any problem with adding .mq_quiesce_lock, so I'd suggest to
>> move on with this way.
> 
> I'm not sure there really are any nested queue quiesce paths, but if
> there are, wouldn't we need to track the "depth" like how a queue freeze
> works?

We might need it when the async quiesce is implemented for this,
because then quiesce will only "start" and in a different context wait
for the quiesced event (where we may need to add wakeup for waiters).

This is why I want to see this with the async quiesce piece.
Ming Lei Aug. 27, 2020, 2:38 a.m. UTC | #5
On Wed, Aug 26, 2020 at 08:36:33AM -0700, Keith Busch wrote:
> On Wed, Aug 26, 2020 at 04:54:22PM +0800, Ming Lei wrote:
> > On Wed, Aug 26, 2020 at 03:51:25PM +0800, Chao Leng wrote:
> > > It doesn't matter. Because the reentry of quiesce&unquiesce queue is not
> > > safe, must be avoided by other mechanism. otherwise, exceptions may
> > > occur. Introduce mq_quiesce_lock looks saving possible synchronization
> > > waits, but it should not happen. If really happen, we need fix it.
> > 
> > Sagi mentioned there may be nested queue quiesce, so I add .mq_quiesce_lock 
> > to make this usage easy to support, meantime avoid percpu_ref warning
> > in such usage.
> > 
> > Anyway, not see any problem with adding .mq_quiesce_lock, so I'd suggest to
> > move on with this way.
> 
> I'm not sure there really are any nested queue quiesce paths, but if
> there are, wouldn't we need to track the "depth" like how a queue freeze
> works?

Both atomic 'depth' and .mq_quiesce_lock can work for nested queue
quiesce since we can avoid unnecessary queue quiesce with the mutex.
percpu_ref_kill() / percpu_ref_kill_and_confirm() can warn if the
percpu_ref has been killed, that is why I think Sagi's suggestion is good.

But 'depth' may cause trouble easily, such as unbalanced quiesce/unquiesce,
however no such issue with mutex, at least we don't require the two to
be paired strictly so far.


Thanks,
Ming
diff mbox series

Patch

diff --git a/block/blk-core.c b/block/blk-core.c
index d9d632639bd1..ffc57df70064 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -561,6 +561,8 @@  struct request_queue *blk_alloc_queue(int node_id)
 	init_waitqueue_head(&q->mq_freeze_wq);
 	mutex_init(&q->mq_freeze_lock);
 
+	mutex_init(&q->mq_quiesce_lock);
+
 	/*
 	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
 	 * See blk_register_queue() for details.
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b3d2785eefe9..817e016ef886 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -224,6 +224,11 @@  void blk_mq_quiesce_queue(struct request_queue *q)
 	unsigned int i;
 	bool rcu = false;
 
+	mutex_lock(&q->mq_quiesce_lock);
+
+	if (blk_queue_quiesced(q))
+		goto exit;
+
 	blk_mq_quiesce_queue_nowait(q);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
@@ -234,6 +239,8 @@  void blk_mq_quiesce_queue(struct request_queue *q)
 	}
 	if (rcu)
 		synchronize_rcu();
+ exit:
+	mutex_unlock(&q->mq_quiesce_lock);
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
 
@@ -246,10 +253,14 @@  EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
  */
 void blk_mq_unquiesce_queue(struct request_queue *q)
 {
+	mutex_lock(&q->mq_quiesce_lock);
+
 	blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
 
 	/* dispatch requests which are inserted during quiescing */
 	blk_mq_run_hw_queues(q, true);
+
+	mutex_unlock(&q->mq_quiesce_lock);
 }
 EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d8dba550ecac..5ed03066b33e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -569,6 +569,8 @@  struct request_queue {
 	 */
 	struct mutex		mq_freeze_lock;
 
+	struct mutex		mq_quiesce_lock;
+
 	struct blk_mq_tag_set	*tag_set;
 	struct list_head	tag_set_list;
 	struct bio_set		bio_split;