@@ -1919,7 +1919,6 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
bool remove)
{
- mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
blk_mq_quiesce_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
if (ctrl->admin_tagset) {
@@ -1930,15 +1929,13 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
if (remove)
blk_mq_unquiesce_queue(ctrl->admin_q);
nvme_tcp_destroy_admin_queue(ctrl, remove);
- mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
}
static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
bool remove)
{
- mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
if (ctrl->queue_count <= 1)
- goto out;
+ return;
blk_mq_quiesce_queue(ctrl->admin_q);
nvme_start_freeze(ctrl);
nvme_stop_queues(ctrl);
@@ -1951,8 +1948,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
if (remove)
nvme_start_queues(ctrl);
nvme_tcp_destroy_io_queues(ctrl, remove);
-out:
- mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
}
static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
@@ -1971,6 +1966,10 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
ctrl->opts->reconnect_delay * HZ);
} else {
dev_info(ctrl->device, "Removing controller...\n");
+
+ /* start queues for not blocking removing path */
+ nvme_start_queues(ctrl);
+ blk_mq_unquiesce_queue(ctrl->admin_q);
nvme_delete_ctrl(ctrl);
}
}
@@ -2063,11 +2062,11 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
nvme_stop_keep_alive(ctrl);
+
+ mutex_lock(&tcp_ctrl->teardown_lock);
nvme_tcp_teardown_io_queues(ctrl, false);
- /* unquiesce to fail fast pending requests */
- nvme_start_queues(ctrl);
nvme_tcp_teardown_admin_queue(ctrl, false);
- blk_mq_unquiesce_queue(ctrl->admin_q);
+ mutex_unlock(&tcp_ctrl->teardown_lock);
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
@@ -2084,6 +2083,7 @@ static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
+ mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
nvme_tcp_teardown_io_queues(ctrl, shutdown);
blk_mq_quiesce_queue(ctrl->admin_q);
if (shutdown)
@@ -2091,6 +2091,7 @@ static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
else
nvme_disable_ctrl(ctrl);
nvme_tcp_teardown_admin_queue(ctrl, shutdown);
+ mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
}
static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl)
@@ -2225,22 +2226,41 @@ nvme_tcp_timeout(struct request *rq, bool reserved)
"queue %d: timeout request %#x type %d\n",
nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
- if (ctrl->state != NVME_CTRL_LIVE) {
+ /*
+ * During CONNECTING or DELETING, the controller has been shutdown,
+ * so it is safe to abort the request directly, otherwise timeout
+ * vs. normal completion will be triggered.
+ */
+ if (ctrl->state == NVME_CTRL_CONNECTING ||
+ ctrl->state == NVME_CTRL_DELETING ||
+ ctrl->state == NVME_CTRL_DELETING_NOIO) {
/*
- * If we are resetting, connecting or deleting we should
- * complete immediately because we may block controller
- * teardown or setup sequence
+ * If we are connecting we should complete immediately because
+ * we may block controller setup sequence
* - ctrl disable/shutdown fabrics requests
* - connect requests
* - initialization admin requests
- * - I/O requests that entered after unquiescing and
- * the controller stopped responding
+ */
+ if (!rq->rq_disk) {
+ nvme_tcp_complete_timed_out(rq);
+ return BLK_EH_DONE;
+ }
+
+ /*
+ * During CONNECTING, any in-flight requests are aborted, and
+ * queue is stopped, so in theory not possible to see timed out
+ * requests. And it might happen when one IO timeout is triggered
+ * before changing to CONNECTING, but the timeout handling is
+ * scheduled after updating to CONNECTING, so safe to ignore
+ * this case.
*
- * All other requests should be cancelled by the error
- * recovery work, so it's fine that we fail it here.
+ * During DELETING, tear down controller and make forward
+ * progress.
*/
- nvme_tcp_complete_timed_out(rq);
- return BLK_EH_DONE;
+ if (ctrl->state != NVME_CTRL_CONNECTING) {
+ nvme_tcp_teardown_ctrl(ctrl, false);
+ return BLK_EH_DONE;
+ }
}
/*