diff mbox

nvmet-rdma: Invoke fatal error on error completion

Message ID 1466701290-10356-1-git-send-email-sagi@grimberg.me (mailing list archive)
State Not Applicable
Headers show

Commit Message

Sagi Grimberg June 23, 2016, 5:01 p.m. UTC
In case we got an error completion the rdma queue pair
is in error state, teardown the entire controller. Note
that in recv or read error completion we might not have
a controller yet, so check for the controller exsistence.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/rdma.c | 45 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 8 deletions(-)

Comments

Christoph Hellwig June 24, 2016, 7:11 a.m. UTC | #1
On Thu, Jun 23, 2016 at 08:01:30PM +0300, Sagi Grimberg wrote:
> In case we got an error completion the rdma queue pair
> is in error state, teardown the entire controller. Note
> that in recv or read error completion we might not have
> a controller yet, so check for the controller exsistence.
> 
> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>

This looks fine minus a few minor codingstyle nitpicks that I'd be
happy to fix up:

> +static void nvmet_rdma_error_comp(struct nvmet_rdma_queue *queue)
> +{
> +	if (queue->nvme_sq.ctrl)
> +		nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl);
> +	else
> +		/*
> +		 * we didn't setup the controller yet in case
> +		 * of admin connect error, just disconnect and
> +		 * cleanup the queue
> +		 */
> +		nvmet_rdma_queue_disconnect(queue);
> +}

With such a long comment I'd prefer to have curly braces just to make
the else visually more obvious
> +
> +	if (unlikely(wc->status != IB_WC_SUCCESS &&
> +		wc->status != IB_WC_WR_FLUSH_ERR)) {

Indenting the second line of a condition by a single tab is always wrong,
either indent it with two tabs, or so that it aligns with first line.
The second is probably nicer here:

	if (unlikely(wc->status != IB_WC_SUCCESS &&
		     wc->status != IB_WC_WR_FLUSH_ERR)) {

Given how many !success not !flush_err conditionals we have in various
drivers I wonder if we should have a helper in the RDMA core, though.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg June 26, 2016, 3:57 p.m. UTC | #2
>> +static void nvmet_rdma_error_comp(struct nvmet_rdma_queue *queue)
>> +{
>> +	if (queue->nvme_sq.ctrl)
>> +		nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl);
>> +	else
>> +		/*
>> +		 * we didn't setup the controller yet in case
>> +		 * of admin connect error, just disconnect and
>> +		 * cleanup the queue
>> +		 */
>> +		nvmet_rdma_queue_disconnect(queue);
>> +}
>
> With such a long comment I'd prefer to have curly braces just to make
> the else visually more obvious

Sure.

>> +
>> +	if (unlikely(wc->status != IB_WC_SUCCESS &&
>> +		wc->status != IB_WC_WR_FLUSH_ERR)) {
>
> Indenting the second line of a condition by a single tab is always wrong,
> either indent it with two tabs, or so that it aligns with first line.
> The second is probably nicer here:
>
> 	if (unlikely(wc->status != IB_WC_SUCCESS &&
> 		     wc->status != IB_WC_WR_FLUSH_ERR)) {

OK, I'll do that.

> Given how many !success not !flush_err conditionals we have in various
> drivers I wonder if we should have a helper in the RDMA core, though.

They don't always come together. But I can take a look at it. Given that
it might bring some slight logic shifts, let's do it incrementally.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index b1c6e5bb0b70..f5986a7b776e 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -134,6 +134,7 @@  static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
 static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
 static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
 static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
+static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
 
 static struct nvmet_fabrics_ops nvmet_rdma_ops;
 
@@ -486,12 +487,32 @@  static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
 	nvmet_rdma_put_rsp(rsp);
 }
 
+static void nvmet_rdma_error_comp(struct nvmet_rdma_queue *queue)
+{
+	if (queue->nvme_sq.ctrl)
+		nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl);
+	else
+		/*
+		 * we didn't setup the controller yet in case
+		 * of admin connect error, just disconnect and
+		 * cleanup the queue
+		 */
+		nvmet_rdma_queue_disconnect(queue);
+}
+
 static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
 
 	nvmet_rdma_release_rsp(rsp);
+
+	if (unlikely(wc->status != IB_WC_SUCCESS &&
+		wc->status != IB_WC_WR_FLUSH_ERR)) {
+		pr_err("SEND for CQE 0x%p failed with status %s (%d).\n",
+			wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
+		nvmet_rdma_error_comp(rsp->queue);
+	}
 }
 
 static void nvmet_rdma_queue_response(struct nvmet_req *req)
@@ -534,11 +555,13 @@  static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
 			rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
 	rsp->n_rdma = 0;
 
-	if (unlikely(wc->status != IB_WC_SUCCESS &&
-		wc->status != IB_WC_WR_FLUSH_ERR)) {
-		pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n",
-			wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
-		nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		nvmet_rdma_release_rsp(rsp);
+		if (wc->status != IB_WC_WR_FLUSH_ERR) {
+			pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n",
+				wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
+			nvmet_rdma_error_comp(queue);
+		}
 		return;
 	}
 
@@ -705,13 +728,19 @@  static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 	struct nvmet_rdma_queue *queue = cq->cq_context;
 	struct nvmet_rdma_rsp *rsp;
 
-	if (unlikely(wc->status != IB_WC_SUCCESS))
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		if (wc->status != IB_WC_WR_FLUSH_ERR) {
+			pr_err("RECV for CQE 0x%p failed with status %s (%d)\n",
+				wc->wr_cqe, ib_wc_status_msg(wc->status),
+				wc->status);
+			nvmet_rdma_error_comp(queue);
+		}
 		return;
+	}
 
 	if (unlikely(wc->byte_len < sizeof(struct nvme_command))) {
 		pr_err("Ctrl Fatal Error: capsule size less than 64 bytes\n");
-		if (queue->nvme_sq.ctrl)
-			nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl);
+		nvmet_rdma_error_comp(queue);
 		return;
 	}