diff mbox series

[7/8] nvme: use blk_mq_queue_tag_busy_iter

Message ID 1552640264-26101-8-git-send-email-jianchao.w.wang@oracle.com (mailing list archive)
State New, archived
Headers show
Series : blk-mq: use static_rqs to iterate busy tags | expand

Commit Message

jianchao.wang March 15, 2019, 8:57 a.m. UTC
blk_mq_tagset_busy_iter is not safe that it could get stale request
in tags->rqs[]. Use blk_mq_queue_tag_busy_iter here. A new helper
interface nvme_iterate_inflight_rqs is introduced to iterate
all of the ns under a ctrl.

Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
---
 drivers/nvme/host/core.c   | 12 ++++++++++++
 drivers/nvme/host/fc.c     | 12 ++++++------
 drivers/nvme/host/nvme.h   |  2 ++
 drivers/nvme/host/pci.c    |  5 +++--
 drivers/nvme/host/rdma.c   |  6 +++---
 drivers/nvme/host/tcp.c    |  5 +++--
 drivers/nvme/target/loop.c |  6 +++---
 7 files changed, 32 insertions(+), 16 deletions(-)

Comments

James Smart March 15, 2019, 4:33 p.m. UTC | #1
On 3/15/2019 1:57 AM, Jianchao Wang wrote:
> blk_mq_tagset_busy_iter is not safe that it could get stale request
> in tags->rqs[]. Use blk_mq_queue_tag_busy_iter here. A new helper
> interface nvme_iterate_inflight_rqs is introduced to iterate
> all of the ns under a ctrl.
> 
> Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
> ---
>   drivers/nvme/host/core.c   | 12 ++++++++++++
>   drivers/nvme/host/fc.c     | 12 ++++++------
>   drivers/nvme/host/nvme.h   |  2 ++
>   drivers/nvme/host/pci.c    |  5 +++--
>   drivers/nvme/host/rdma.c   |  6 +++---
>   drivers/nvme/host/tcp.c    |  5 +++--
>   drivers/nvme/target/loop.c |  6 +++---
>   7 files changed, 32 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 6a9dd68..5760fad 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3816,6 +3816,18 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
>   }
>   EXPORT_SYMBOL_GPL(nvme_start_queues);
>   
> +void nvme_iterate_inflight_rqs(struct nvme_ctrl *ctrl,
> +		busy_iter_fn *fn, void *data)
> +{
> +	struct nvme_ns *ns;
> +
> +	down_read(&ctrl->namespaces_rwsem);
> +	list_for_each_entry(ns, &ctrl->namespaces, list)
> +		blk_mq_queue_tag_busy_iter(ns->queue, fn, data, true);
> +	up_read(&ctrl->namespaces_rwsem);
> +}
> +EXPORT_SYMBOL_GPL(nvme_iterate_inflight_rqs);

Hmm... so this aborts ios for namespaces. How about ios that aren't for 
a namespace but rather for the controller itself.  For example, anything 
on the admin queue ?   Rather critical for those ios be terminated as well.

-- james
James Smart March 15, 2019, 4:39 p.m. UTC | #2
On 3/15/2019 9:33 AM, James Smart wrote:
> On 3/15/2019 1:57 AM, Jianchao Wang wrote:
>> blk_mq_tagset_busy_iter is not safe that it could get stale request
>> in tags->rqs[]. Use blk_mq_queue_tag_busy_iter here. A new helper
>> interface nvme_iterate_inflight_rqs is introduced to iterate
>> all of the ns under a ctrl.
>>
>> Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
>> ---
>>   drivers/nvme/host/core.c   | 12 ++++++++++++
>>   drivers/nvme/host/fc.c     | 12 ++++++------
>>   drivers/nvme/host/nvme.h   |  2 ++
>>   drivers/nvme/host/pci.c    |  5 +++--
>>   drivers/nvme/host/rdma.c   |  6 +++---
>>   drivers/nvme/host/tcp.c    |  5 +++--
>>   drivers/nvme/target/loop.c |  6 +++---
>>   7 files changed, 32 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 6a9dd68..5760fad 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -3816,6 +3816,18 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
>>   }
>>   EXPORT_SYMBOL_GPL(nvme_start_queues);
>>   +void nvme_iterate_inflight_rqs(struct nvme_ctrl *ctrl,
>> +        busy_iter_fn *fn, void *data)
>> +{
>> +    struct nvme_ns *ns;
>> +
>> +    down_read(&ctrl->namespaces_rwsem);
>> +    list_for_each_entry(ns, &ctrl->namespaces, list)
>> +        blk_mq_queue_tag_busy_iter(ns->queue, fn, data, true);
>> +    up_read(&ctrl->namespaces_rwsem);
>> +}
>> +EXPORT_SYMBOL_GPL(nvme_iterate_inflight_rqs);
>
> Hmm... so this aborts ios for namespaces. How about ios that aren't 
> for a namespace but rather for the controller itself.  For example, 
> anything on the admin queue ?   Rather critical for those ios be 
> terminated as well.
NVM - didn't look at which tag set.  but it does highlight - doesn't 
cover anything issued by core/transport on the io queues.

-- james
Hannes Reinecke March 15, 2019, 4:49 p.m. UTC | #3
On 3/15/19 5:39 PM, James Smart wrote:
> 
> 
> On 3/15/2019 9:33 AM, James Smart wrote:
>> On 3/15/2019 1:57 AM, Jianchao Wang wrote:
>>> blk_mq_tagset_busy_iter is not safe that it could get stale request
>>> in tags->rqs[]. Use blk_mq_queue_tag_busy_iter here. A new helper
>>> interface nvme_iterate_inflight_rqs is introduced to iterate
>>> all of the ns under a ctrl.
>>>
>>> Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
>>> ---
>>>   drivers/nvme/host/core.c   | 12 ++++++++++++
>>>   drivers/nvme/host/fc.c     | 12 ++++++------
>>>   drivers/nvme/host/nvme.h   |  2 ++
>>>   drivers/nvme/host/pci.c    |  5 +++--
>>>   drivers/nvme/host/rdma.c   |  6 +++---
>>>   drivers/nvme/host/tcp.c    |  5 +++--
>>>   drivers/nvme/target/loop.c |  6 +++---
>>>   7 files changed, 32 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>> index 6a9dd68..5760fad 100644
>>> --- a/drivers/nvme/host/core.c
>>> +++ b/drivers/nvme/host/core.c
>>> @@ -3816,6 +3816,18 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
>>>   }
>>>   EXPORT_SYMBOL_GPL(nvme_start_queues);
>>>   +void nvme_iterate_inflight_rqs(struct nvme_ctrl *ctrl,
>>> +        busy_iter_fn *fn, void *data)
>>> +{
>>> +    struct nvme_ns *ns;
>>> +
>>> +    down_read(&ctrl->namespaces_rwsem);
>>> +    list_for_each_entry(ns, &ctrl->namespaces, list)
>>> +        blk_mq_queue_tag_busy_iter(ns->queue, fn, data, true);
>>> +    up_read(&ctrl->namespaces_rwsem);
>>> +}
>>> +EXPORT_SYMBOL_GPL(nvme_iterate_inflight_rqs);
>>
>> Hmm... so this aborts ios for namespaces. How about ios that aren't 
>> for a namespace but rather for the controller itself.  For example, 
>> anything on the admin queue ?   Rather critical for those ios be 
>> terminated as well.
> NVM - didn't look at which tag set.  but it does highlight - doesn't 
> cover anything issued by core/transport on the io queues.
> 
Actually, I would consider that an error.
_All_ commands on io queues (internal or block-layer generated) whould 
be tracked sbitmap and hence the tag iterator.
That's what we have the 'private' tags for, and why the iter callback 
has this ominous 'bool' argument...

Cheers,

Hannes
jianchao.wang March 18, 2019, 7 a.m. UTC | #4
Hi James

On 3/16/19 12:33 AM, James Smart wrote:
> On 3/15/2019 1:57 AM, Jianchao Wang wrote:
>> blk_mq_tagset_busy_iter is not safe that it could get stale request
>> in tags->rqs[]. Use blk_mq_queue_tag_busy_iter here. A new helper
>> interface nvme_iterate_inflight_rqs is introduced to iterate
>> all of the ns under a ctrl.
>>
>> Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
>> ---
>>   drivers/nvme/host/core.c   | 12 ++++++++++++
>>   drivers/nvme/host/fc.c     | 12 ++++++------
>>   drivers/nvme/host/nvme.h   |  2 ++
>>   drivers/nvme/host/pci.c    |  5 +++--
>>   drivers/nvme/host/rdma.c   |  6 +++---
>>   drivers/nvme/host/tcp.c    |  5 +++--
>>   drivers/nvme/target/loop.c |  6 +++---
>>   7 files changed, 32 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 6a9dd68..5760fad 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -3816,6 +3816,18 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
>>   }
>>   EXPORT_SYMBOL_GPL(nvme_start_queues);
>>   +void nvme_iterate_inflight_rqs(struct nvme_ctrl *ctrl,
>> +        busy_iter_fn *fn, void *data)
>> +{
>> +    struct nvme_ns *ns;
>> +
>> +    down_read(&ctrl->namespaces_rwsem);
>> +    list_for_each_entry(ns, &ctrl->namespaces, list)
>> +        blk_mq_queue_tag_busy_iter(ns->queue, fn, data, true);
>> +    up_read(&ctrl->namespaces_rwsem);
>> +}
>> +EXPORT_SYMBOL_GPL(nvme_iterate_inflight_rqs);
> 
> Hmm... so this aborts ios for namespaces. How about ios that aren't for a namespace but rather for the controller itself.  For example, anything on the admin queue ?   Rather critical for those ios be terminated as well.
> 

The blk_mq_tagset_busy_iter which is currently used by nvme is iterating the in-flight ios for the controller.
But due to blk_mq_tagset_busy_iter is not safe which is talked in the patch 0/8,
so change it to blk_mq_queue_busy_tag_iter which is for a namespace/request_queue.

Thanks
Jianchao
diff mbox series

Patch

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 6a9dd68..5760fad 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3816,6 +3816,18 @@  void nvme_start_queues(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
 
+void nvme_iterate_inflight_rqs(struct nvme_ctrl *ctrl,
+		busy_iter_fn *fn, void *data)
+{
+	struct nvme_ns *ns;
+
+	down_read(&ctrl->namespaces_rwsem);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		blk_mq_queue_tag_busy_iter(ns->queue, fn, data, true);
+	up_read(&ctrl->namespaces_rwsem);
+}
+EXPORT_SYMBOL_GPL(nvme_iterate_inflight_rqs);
+
 int __init nvme_core_init(void)
 {
 	int result = -ENOMEM;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 89accc7..02b2de0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2368,7 +2368,7 @@  nvme_fc_complete_rq(struct request *rq)
 /*
  * This routine is used by the transport when it needs to find active
  * io on a queue that is to be terminated. The transport uses
- * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
+ * blk_mq_queue_tag_busy_iter() to find the busy requests, which then invoke
  * this routine to kill them on a 1 by 1 basis.
  *
  * As FC allocates FC exchange for each io, the transport must contact
@@ -2729,7 +2729,7 @@  nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	 * If io queues are present, stop them and terminate all outstanding
 	 * ios on them. As FC allocates FC exchange for each io, the
 	 * transport must contact the LLDD to terminate the exchange,
-	 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
+	 * thus releasing the FC exchange. We use blk_mq_queue_tag_busy_iter
 	 * to tell us what io's are busy and invoke a transport routine
 	 * to kill them with the LLDD.  After terminating the exchange
 	 * the LLDD will call the transport's normal io done path, but it
@@ -2739,7 +2739,7 @@  nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	 */
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
-		blk_mq_tagset_busy_iter(&ctrl->tag_set,
+		nvme_iterate_inflight_rqs(&ctrl->ctrl,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 	}
 
@@ -2757,12 +2757,12 @@  nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 
 	/*
 	 * clean up the admin queue. Same thing as above.
-	 * use blk_mq_tagset_busy_itr() and the transport routine to
+	 * use blk_mq_queue_tag_busy_iter() and the transport routine to
 	 * terminate the exchanges.
 	 */
 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
-	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
-				nvme_fc_terminate_exchange, &ctrl->ctrl);
+	blk_mq_queue_tag_busy_iter(ctrl->ctrl.admin_q,
+				nvme_fc_terminate_exchange, &ctrl->ctrl, true);
 
 	/* kill the aens as they are a separate path */
 	nvme_fc_abort_aen_ops(ctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index c4a1bb4..6dc4ff8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -441,6 +441,8 @@  void nvme_unfreeze(struct nvme_ctrl *ctrl);
 void nvme_wait_freeze(struct nvme_ctrl *ctrl);
 void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
 void nvme_start_freeze(struct nvme_ctrl *ctrl);
+void nvme_iterate_inflight_rqs(struct nvme_ctrl *ctrl,
+		busy_iter_fn *fn, void *data);
 
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 7fee665..0f0e361 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2476,8 +2476,9 @@  static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	nvme_suspend_queue(&dev->queues[0]);
 	nvme_pci_disable(dev);
 
-	blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
-	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+	nvme_iterate_inflight_rqs(&dev->ctrl, nvme_cancel_request, &dev->ctrl);
+	blk_mq_queue_tag_busy_iter(dev->ctrl.admin_q,
+			nvme_cancel_request, &dev->ctrl, true);
 
 	/*
 	 * The driver will not be starting up queues again if shutting down so
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 52abc3a..1be5688 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -922,8 +922,8 @@  static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
 {
 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_stop_queue(&ctrl->queues[0]);
-	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request,
-			&ctrl->ctrl);
+	blk_mq_queue_tag_busy_iter(ctrl->ctrl.admin_q, nvme_cancel_request,
+			&ctrl->ctrl, true);
 	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_destroy_admin_queue(ctrl, remove);
 }
@@ -934,7 +934,7 @@  static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		nvme_rdma_stop_io_queues(ctrl);
-		blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request,
+		nvme_iterate_inflight_rqs(&ctrl->ctrl, nvme_cancel_request,
 				&ctrl->ctrl);
 		if (remove)
 			nvme_start_queues(&ctrl->ctrl);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 5f0a004..75eb069 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1686,7 +1686,8 @@  static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
 {
 	blk_mq_quiesce_queue(ctrl->admin_q);
 	nvme_tcp_stop_queue(ctrl, 0);
-	blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl);
+	blk_mq_queue_tag_busy_iter(ctrl->admin_q,
+			nvme_cancel_request, ctrl, true);
 	blk_mq_unquiesce_queue(ctrl->admin_q);
 	nvme_tcp_destroy_admin_queue(ctrl, remove);
 }
@@ -1698,7 +1699,7 @@  static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
 		return;
 	nvme_stop_queues(ctrl);
 	nvme_tcp_stop_io_queues(ctrl);
-	blk_mq_tagset_busy_iter(ctrl->tagset, nvme_cancel_request, ctrl);
+	nvme_iterate_inflight_rqs(ctrl, nvme_cancel_request, ctrl);
 	if (remove)
 		nvme_start_queues(ctrl);
 	nvme_tcp_destroy_io_queues(ctrl, remove);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 4aac1b4..0d25083 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -429,7 +429,7 @@  static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 {
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
-		blk_mq_tagset_busy_iter(&ctrl->tag_set,
+		nvme_iterate_inflight_rqs(&ctrl->ctrl,
 					nvme_cancel_request, &ctrl->ctrl);
 		nvme_loop_destroy_io_queues(ctrl);
 	}
@@ -438,8 +438,8 @@  static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 
 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
-	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
-				nvme_cancel_request, &ctrl->ctrl);
+	blk_mq_queue_tag_busy_iter(ctrl->ctrl.admin_q,
+				nvme_cancel_request, &ctrl->ctrl, true);
 	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_loop_destroy_admin_queue(ctrl);
 }