Message ID | c04f7c0232402294ce6fc0dd85878c0a21ed46bc.1472746379.git.swise@opengridcomputing.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
> > After address resolution, the nvme_rdma_queue rdma resources are > allocated. If rdma route resolution or the connect fails, or the > controller reconnect times out and gives up, then the rdma resources > need to be freed. Otherwise, rdma resources are leaked. > > Reviewed-by: Christoph Hellwig <hch@lst.de> > Reviewed-by: Sagi Grimberg <sagi@grimbrg.me> > Signed-off-by: Steve Wise <swise@opengridcomputing.com> > --- > drivers/nvme/host/rdma.c | 8 +++++++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c > index 15b0c1d..d97a16f 100644 > --- a/drivers/nvme/host/rdma.c > +++ b/drivers/nvme/host/rdma.c > @@ -82,6 +82,7 @@ struct nvme_rdma_request { > > enum nvme_rdma_queue_flags { > NVME_RDMA_Q_CONNECTED = (1 << 0), > + NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), > }; > > struct nvme_rdma_queue { > @@ -483,6 +484,8 @@ static void nvme_rdma_destroy_queue_ib(struct > nvme_rdma_queue *queue) > struct nvme_rdma_device *dev = queue->device; > struct ib_device *ibdev = dev->dev; > > + if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, > &queue->flags)) > + return; > rdma_destroy_qp(queue->cm_id); > ib_free_cq(queue->ib_cq); > > @@ -533,6 +536,7 @@ static int nvme_rdma_create_queue_ib(struct > nvme_rdma_queue *queue, > ret = -ENOMEM; > goto out_destroy_qp; > } > + set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags); > > return 0; > > @@ -590,6 +594,8 @@ static int nvme_rdma_init_queue(struct > nvme_rdma_ctrl *ctrl, > return 0; > > out_destroy_cm_id: > + if (test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &ctrl- > >queues[0].flags)) > + nvme_rdma_destroy_queue_ib(queue); > rdma_destroy_id(queue->cm_id); > return ret; > } The above chunk is incorrect. It is testing the ALLOCATED bit of the admin queue (ctrl->queues[0]), which is just wrong. This chunk should just call nvme_rdma_destroy_queue_ib() and let that function test/clear the ALLOCATED bit of the queue being cleaned up. This bug was apparently benign and it slipped through my testing. I'll be posting v5 after I fix this and retest... -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 15b0c1d..d97a16f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -82,6 +82,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), + NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), }; struct nvme_rdma_queue { @@ -483,6 +484,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags)) + return; rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); @@ -533,6 +536,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue, ret = -ENOMEM; goto out_destroy_qp; } + set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags); return 0; @@ -590,6 +594,8 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, return 0; out_destroy_cm_id: + if (test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &ctrl->queues[0].flags)) + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); return ret; } @@ -652,7 +658,7 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) return 0; out_free_queues: - for (; i >= 1; i--) + for (i--; i >= 1; i--) nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); return ret;