Message ID | 1484597945-31143-1-git-send-email-parav@mellanox.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
On Mon, Jan 16, 2017 at 02:19:05PM -0600, Parav Pandit wrote: > This patch performs dma sync operations on nvme_command, > inline page(s) and nvme_completion. > > nvme_command and write cmd inline data is synced > (a) on receiving of the recv queue completion for cpu access. > (b) before posting recv wqe back to rdma adapter for device access. > > nvme_completion is synced > (a) on receiving send completion for nvme_completion for cpu access. > (b) before posting send wqe to rdma adapter for device access. > > This patch is generated for git://git.infradead.org/nvme-fabrics.git > Branch: nvmf-4.10 > > Signed-off-by: Parav Pandit <parav@mellanox.com> > Reviewed-by: Max Gurtovoy <maxg@mellanox.com> > --- > drivers/nvme/target/rdma.c | 25 +++++++++++++++++++++++++ > 1 file changed, 25 insertions(+) > > diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c > index 6c1c368..fe7e257 100644 > --- a/drivers/nvme/target/rdma.c > +++ b/drivers/nvme/target/rdma.c > @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, > struct nvmet_rdma_cmd *cmd) > { > struct ib_recv_wr *bad_wr; > + int i; > + > + for (i = 0; i < 2; i++) { > + if (cmd->sge[i].length) > + ib_dma_sync_single_for_device(ndev->device, Aren't we trying to get rid of all these ib_dma_* wrappers? > + cmd->sge[0].addr, cmd->sge[0].length, > + DMA_FROM_DEVICE); > + } > > if (ndev->srq) > return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); > @@ -507,6 +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) > struct nvmet_rdma_rsp *rsp = > container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); > > + ib_dma_sync_single_for_cpu(rsp->queue->dev->device, > + rsp->send_sge.addr, rsp->send_sge.length, > + DMA_TO_DEVICE); > + > nvmet_rdma_release_rsp(rsp); > > if (unlikely(wc->status != IB_WC_SUCCESS && > @@ -538,6 +550,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) > first_wr = &rsp->send_wr; > > nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); > + > + ib_dma_sync_single_for_device(rsp->queue->dev->device, > + rsp->send_sge.addr, rsp->send_sge.length, > + DMA_TO_DEVICE); > + > if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) { > pr_err("sending cmd response failed\n"); > nvmet_rdma_release_rsp(rsp); > @@ -692,12 +709,20 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) > static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, > struct nvmet_rdma_rsp *cmd) > { > + int i; > u16 status; > > cmd->queue = queue; > cmd->n_rdma = 0; > cmd->req.port = queue->port; > > + for (i = 0; i < 2; i++) { > + if (cmd->cmd->sge[i].length) > + ib_dma_sync_single_for_cpu(queue->dev->device, > + cmd->cmd->sge[i].addr, cmd->cmd->sge[i].length, > + DMA_FROM_DEVICE); > + } > + > if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, > &queue->nvme_sq, &nvmet_rdma_ops)) > return; > -- > 1.8.3.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Yuval, > -----Original Message----- > From: Yuval Shaia [mailto:yuval.shaia@oracle.com] > Sent: Monday, January 16, 2017 2:32 PM > To: Parav Pandit <parav@mellanox.com> > Cc: hch@lst.de; sagi@grimberg.me; linux-nvme@lists.infradead.org; linux- > rdma@vger.kernel.org; dledford@redhat.com > Subject: Re: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data > structures > > On Mon, Jan 16, 2017 at 02:19:05PM -0600, Parav Pandit wrote: > > This patch performs dma sync operations on nvme_command, inline > > page(s) and nvme_completion. > > > > nvme_command and write cmd inline data is synced > > (a) on receiving of the recv queue completion for cpu access. > > (b) before posting recv wqe back to rdma adapter for device access. > > > > nvme_completion is synced > > (a) on receiving send completion for nvme_completion for cpu access. > > (b) before posting send wqe to rdma adapter for device access. > > > > This patch is generated for git://git.infradead.org/nvme-fabrics.git > > Branch: nvmf-4.10 > > > > Signed-off-by: Parav Pandit <parav@mellanox.com> > > Reviewed-by: Max Gurtovoy <maxg@mellanox.com> > > --- > > drivers/nvme/target/rdma.c | 25 +++++++++++++++++++++++++ > > 1 file changed, 25 insertions(+) > > > > diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c > > index 6c1c368..fe7e257 100644 > > --- a/drivers/nvme/target/rdma.c > > +++ b/drivers/nvme/target/rdma.c > > @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct > nvmet_rdma_device *ndev, > > struct nvmet_rdma_cmd *cmd) > > { > > struct ib_recv_wr *bad_wr; > > + int i; > > + > > + for (i = 0; i < 2; i++) { > > + if (cmd->sge[i].length) > > + ib_dma_sync_single_for_device(ndev->device, > > Aren't we trying to get rid of all these ib_dma_* wrappers? Yes. We are. Bart patch is still not merged. Lastly there was some issue with SDMA or hfi changes. I have sent out patch on top of Bart's patch for inux-rdma tree using regular dma_xx APIs. However Sagi and Christoph acknowledged that this fix needs to go to 4.8-stable and 4.10-rc as bug fix. So this patch is generated from the nvme-fabrics tree as mentioned in comment. When this gets to linux-rdma tree, Bart's new patch will translate this additional calls to regular dma_xx APIs. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hey Parav, > diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c > index 6c1c368..fe7e257 100644 > --- a/drivers/nvme/target/rdma.c > +++ b/drivers/nvme/target/rdma.c > @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, > struct nvmet_rdma_cmd *cmd) > { > struct ib_recv_wr *bad_wr; > + int i; > + > + for (i = 0; i < 2; i++) { > + if (cmd->sge[i].length) > + ib_dma_sync_single_for_device(ndev->device, > + cmd->sge[0].addr, cmd->sge[0].length, > + DMA_FROM_DEVICE); > + } a. you test on sge[i] but sync sge[0]. b. I don't think we need the for statement, lest keep it open-coded for [0] and [1]. > > if (ndev->srq) > return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); > @@ -507,6 +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) > struct nvmet_rdma_rsp *rsp = > container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); > > + ib_dma_sync_single_for_cpu(rsp->queue->dev->device, > + rsp->send_sge.addr, rsp->send_sge.length, > + DMA_TO_DEVICE); Why do you need to sync_for_cpu here? you have no interest in the data at this point. > + > nvmet_rdma_release_rsp(rsp); > > if (unlikely(wc->status != IB_WC_SUCCESS && > @@ -538,6 +550,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) > first_wr = &rsp->send_wr; > > nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); > + > + ib_dma_sync_single_for_device(rsp->queue->dev->device, > + rsp->send_sge.addr, rsp->send_sge.length, > + DMA_TO_DEVICE); > + > if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) { > pr_err("sending cmd response failed\n"); > nvmet_rdma_release_rsp(rsp); > @@ -692,12 +709,20 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) > static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, > struct nvmet_rdma_rsp *cmd) > { > + int i; > u16 status; > > cmd->queue = queue; > cmd->n_rdma = 0; > cmd->req.port = queue->port; > > + for (i = 0; i < 2; i++) { > + if (cmd->cmd->sge[i].length) > + ib_dma_sync_single_for_cpu(queue->dev->device, > + cmd->cmd->sge[i].addr, cmd->cmd->sge[i].length, > + DMA_FROM_DEVICE); > + } Again, we don't need the for statement. Also, I think we can optimize a bit by syncing the in-capsule page only if: 1. it was posted for recv (sge has length) 2. its a write command 3. it has in-capsule data. So, here lets sync the sqe (sge[0]) and sync the in-capsule page in nvmet_rdma_map_sgl_inline(). -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Sagi, > > + for (i = 0; i < 2; i++) { > > + if (cmd->sge[i].length) > > + ib_dma_sync_single_for_device(ndev->device, > > + cmd->sge[0].addr, cmd->sge[0].length, > > + DMA_FROM_DEVICE); > > + } > > a. you test on sge[i] but sync sge[0]. Crap code. I will fix this. > b. I don't think we need the for statement, lest keep it open-coded for [0] > and [1]. I put for loop because, there was high level agreement in max_sge thread chain between Chuck, Steve and Jason about having generic sg_list/bounce buffer and doing things similar to RW APIs. Now if we generalize at that point, my thoughts were, that this code can eventually move out from every ULP to that generic send() API. So I put a for loop to make is more ULP agnostic from beginning. > > > > > if (ndev->srq) > > return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); > @@ -507,6 > > +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct > ib_wc *wc) > > struct nvmet_rdma_rsp *rsp = > > container_of(wc->wr_cqe, struct nvmet_rdma_rsp, > send_cqe); > > > > + ib_dma_sync_single_for_cpu(rsp->queue->dev->device, > > + rsp->send_sge.addr, rsp->send_sge.length, > > + DMA_TO_DEVICE); > > Why do you need to sync_for_cpu here? you have no interest in the data at > this point. > Before a cqe can be prepared by cpu, it needs to be synced. So once CQE send is completed, that region is ready for preparing new CQE. In error case cqe is prepared by the RDMA layer and sent using nvmet_req_complete. In happy path case cqe is prepared by the core layer before invoking queue_response() callback of fabric_ops. In happy case nvmet-core needs to do the sync_to_cpu. In error case rdma layer needs to do the sync_to_cpu. Instead of messing code at both places, I did the sync_for_cpu in send_done() which is unified place. If there is generic callback in fabric_ops that can be invoked by __nvmet_req_complete(), than its cleaner to do at single place by invoking it. I didn't find it worth enough to increase fabric_ops for this. Let me know if you have different idea to resolve this. > > nvmet_rdma_rsp *rsp) static void nvmet_rdma_handle_command(struct > nvmet_rdma_queue *queue, > > struct nvmet_rdma_rsp *cmd) > > { > > + int i; > > u16 status; > > > > cmd->queue = queue; > > cmd->n_rdma = 0; > > cmd->req.port = queue->port; > > > > + for (i = 0; i < 2; i++) { > > + if (cmd->cmd->sge[i].length) > > + ib_dma_sync_single_for_cpu(queue->dev->device, > > + cmd->cmd->sge[i].addr, cmd->cmd- > >sge[i].length, > > + DMA_FROM_DEVICE); > > + } > > Again, we don't need the for statement. > > Also, I think we can optimize a bit by syncing the in-capsule page only if: > 1. it was posted for recv (sge has length) 2. its a write command 3. it has in- > capsule data. > > So, here lets sync the sqe (sge[0]) and sync the in-capsule page in > nvmet_rdma_map_sgl_inline(). I agree Sagi that this can be differed to _inline(). I was headed to do this in generic code eventually similar to RW API. And thought why not have the clean code now so that migration later to new API would be easy. But if you think we should differ it to later stage, I am fine with that and continue with open coding. Now when I review the code of map_sgl_inline() I am wondering that we should not sync the INLINE data page at all because cpu is not going to read it anyway. Its only the remote device which will read/write and it will do the dma_sync_to_device as part of that driver anyway. So I should just sync nvme_command and not inline data. That brings me back to open code to sync only entry [0]. :-) -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
>>> + for (i = 0; i < 2; i++) { >>> + if (cmd->sge[i].length) >>> + ib_dma_sync_single_for_device(ndev->device, >>> + cmd->sge[0].addr, cmd->sge[0].length, >>> + DMA_FROM_DEVICE); >>> + } >> >> a. you test on sge[i] but sync sge[0]. > Crap code. I will fix this. > >> b. I don't think we need the for statement, lest keep it open-coded for [0] >> and [1]. > > I put for loop because, there was high level agreement in max_sge thread chain between Chuck, Steve and Jason about having generic sg_list/bounce buffer and doing things similar to RW APIs. > Now if we generalize at that point, my thoughts were, that this code can eventually move out from every ULP to that generic send() API. > So I put a for loop to make is more ULP agnostic from beginning. Lets start simple and clear, if we indeed do this (and I should checkout this discussion) we'll move it out like all the rest of the ULPs. >>> if (ndev->srq) >>> return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); >> @@ -507,6 >>> +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct >> ib_wc *wc) >>> struct nvmet_rdma_rsp *rsp = >>> container_of(wc->wr_cqe, struct nvmet_rdma_rsp, >> send_cqe); >>> >>> + ib_dma_sync_single_for_cpu(rsp->queue->dev->device, >>> + rsp->send_sge.addr, rsp->send_sge.length, >>> + DMA_TO_DEVICE); >> >> Why do you need to sync_for_cpu here? you have no interest in the data at >> this point. >> > Before a cqe can be prepared by cpu, it needs to be synced. > So once CQE send is completed, that region is ready for preparing new CQE. > In error case cqe is prepared by the RDMA layer and sent using nvmet_req_complete. > In happy path case cqe is prepared by the core layer before invoking queue_response() callback of fabric_ops. > > In happy case nvmet-core needs to do the sync_to_cpu. > In error case rdma layer needs to do the sync_to_cpu. > > Instead of messing code at both places, I did the sync_for_cpu in send_done() which is unified place. > If there is generic callback in fabric_ops that can be invoked by __nvmet_req_complete(), than its cleaner to do at single place by invoking it. > I didn't find it worth enough to increase fabric_ops for this. > Let me know if you have different idea to resolve this. Why not sync it when you start using the cmd at nvmet_rdma_recv_done()? -- @@ -747,6 +747,10 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) rsp->flags = 0; rsp->req.cmd = cmd->nvme_cmd; + ib_dma_sync_single_for_cpu(rsp->queue->dev->device, + rsp->send_sge.addr, rsp->send_sge.length, + DMA_TO_DEVICE); + if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { unsigned long flags; -- > >>> nvmet_rdma_rsp *rsp) static void nvmet_rdma_handle_command(struct >> nvmet_rdma_queue *queue, >>> struct nvmet_rdma_rsp *cmd) >>> { >>> + int i; >>> u16 status; >>> >>> cmd->queue = queue; >>> cmd->n_rdma = 0; >>> cmd->req.port = queue->port; >>> >>> + for (i = 0; i < 2; i++) { >>> + if (cmd->cmd->sge[i].length) >>> + ib_dma_sync_single_for_cpu(queue->dev->device, >>> + cmd->cmd->sge[i].addr, cmd->cmd- >>> sge[i].length, >>> + DMA_FROM_DEVICE); >>> + } >> >> Again, we don't need the for statement. >> >> Also, I think we can optimize a bit by syncing the in-capsule page only if: >> 1. it was posted for recv (sge has length) 2. its a write command 3. it has in- >> capsule data. >> >> So, here lets sync the sqe (sge[0]) and sync the in-capsule page in >> nvmet_rdma_map_sgl_inline(). > > I agree Sagi that this can be differed to _inline(). I was headed to do this in generic code eventually similar to RW API. > And thought why not have the clean code now so that migration later to new API would be easy. > But if you think we should differ it to later stage, I am fine with that and continue with open coding. > > Now when I review the code of map_sgl_inline() I am wondering that we should not sync the INLINE data page at all because cpu is not going to read it anyway. > Its only the remote device which will read/write and it will do the dma_sync_to_device as part of that driver anyway. Yea, you're right. Let's kill it. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Sagi, > -----Original Message----- > From: Sagi Grimberg [mailto:sagi@grimberg.me] > Sent: Tuesday, January 17, 2017 2:08 AM > To: Parav Pandit <parav@mellanox.com>; hch@lst.de; linux- > nvme@lists.infradead.org; linux-rdma@vger.kernel.org; > dledford@redhat.com > Subject: Re: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data > structures > > >>> + for (i = 0; i < 2; i++) { > >>> + if (cmd->sge[i].length) > >>> + ib_dma_sync_single_for_device(ndev->device, > >>> + cmd->sge[0].addr, cmd->sge[0].length, > >>> + DMA_FROM_DEVICE); > >>> + } > >> > >> a. you test on sge[i] but sync sge[0]. > > Crap code. I will fix this. > > > >> b. I don't think we need the for statement, lest keep it open-coded > >> for [0] and [1]. > > > > I put for loop because, there was high level agreement in max_sge thread > chain between Chuck, Steve and Jason about having generic sg_list/bounce > buffer and doing things similar to RW APIs. > > Now if we generalize at that point, my thoughts were, that this code can > eventually move out from every ULP to that generic send() API. > > So I put a for loop to make is more ULP agnostic from beginning. > > Lets start simple and clear, if we indeed do this (and I should checkout this > discussion) we'll move it out like all the rest of the ULPs. > Yes. So I that's why I changed the code to use send_sge.length instead of sizeof(nvme_completion). > >>> if (ndev->srq) > >>> return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); > >> @@ -507,6 > >>> +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct > >> ib_wc *wc) > >>> struct nvmet_rdma_rsp *rsp = > >>> container_of(wc->wr_cqe, struct nvmet_rdma_rsp, > >> send_cqe); > >>> > >>> + ib_dma_sync_single_for_cpu(rsp->queue->dev->device, > >>> + rsp->send_sge.addr, rsp->send_sge.length, > >>> + DMA_TO_DEVICE); > >> > >> Why do you need to sync_for_cpu here? you have no interest in the > >> data at this point. > >> > > Before a cqe can be prepared by cpu, it needs to be synced. > > So once CQE send is completed, that region is ready for preparing new > CQE. > > In error case cqe is prepared by the RDMA layer and sent using > nvmet_req_complete. > > In happy path case cqe is prepared by the core layer before invoking > queue_response() callback of fabric_ops. > > > > In happy case nvmet-core needs to do the sync_to_cpu. > > In error case rdma layer needs to do the sync_to_cpu. > > > > Instead of messing code at both places, I did the sync_for_cpu in > send_done() which is unified place. > > If there is generic callback in fabric_ops that can be invoked by > __nvmet_req_complete(), than its cleaner to do at single place by invoking it. > > I didn't find it worth enough to increase fabric_ops for this. > > Let me know if you have different idea to resolve this. > > Why not sync it when you start using the cmd at nvmet_rdma_recv_done()? > -- Yes, that works too. nvmet_rdma_handle_command() seems better place to me where sync_cpu() is done for nvme_command as well. This makes is more readable too. Having it in handle_command () also covers the case of receiving RQEs before QP moves to established state. > > Now when I review the code of map_sgl_inline() I am wondering that we > should not sync the INLINE data page at all because cpu is not going to read it > anyway. > > Its only the remote device which will read/write and it will do the > dma_sync_to_device as part of that driver anyway. > > Yea, you're right. Let's kill it. Ok. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 6c1c368..fe7e257 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *cmd) { struct ib_recv_wr *bad_wr; + int i; + + for (i = 0; i < 2; i++) { + if (cmd->sge[i].length) + ib_dma_sync_single_for_device(ndev->device, + cmd->sge[0].addr, cmd->sge[0].length, + DMA_FROM_DEVICE); + } if (ndev->srq) return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); @@ -507,6 +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); + ib_dma_sync_single_for_cpu(rsp->queue->dev->device, + rsp->send_sge.addr, rsp->send_sge.length, + DMA_TO_DEVICE); + nvmet_rdma_release_rsp(rsp); if (unlikely(wc->status != IB_WC_SUCCESS && @@ -538,6 +550,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) first_wr = &rsp->send_wr; nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); + + ib_dma_sync_single_for_device(rsp->queue->dev->device, + rsp->send_sge.addr, rsp->send_sge.length, + DMA_TO_DEVICE); + if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) { pr_err("sending cmd response failed\n"); nvmet_rdma_release_rsp(rsp); @@ -692,12 +709,20 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, struct nvmet_rdma_rsp *cmd) { + int i; u16 status; cmd->queue = queue; cmd->n_rdma = 0; cmd->req.port = queue->port; + for (i = 0; i < 2; i++) { + if (cmd->cmd->sge[i].length) + ib_dma_sync_single_for_cpu(queue->dev->device, + cmd->cmd->sge[i].addr, cmd->cmd->sge[i].length, + DMA_FROM_DEVICE); + } + if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, &queue->nvme_sq, &nvmet_rdma_ops)) return;