From patchwork Tue Jun 5 17:16:45 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Steve Wise X-Patchwork-Id: 10448875 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id 0D7FB6024A for ; Tue, 5 Jun 2018 17:26:46 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id EF93F29A74 for ; Tue, 5 Jun 2018 17:26:45 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id E3CC129A81; Tue, 5 Jun 2018 17:26:45 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00, MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 746B329A7C for ; Tue, 5 Jun 2018 17:26:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752136AbeFER0m (ORCPT ); Tue, 5 Jun 2018 13:26:42 -0400 Received: from 72-48-214-68.dyn.grandenetworks.net ([72.48.214.68]:34917 "EHLO smtp.opengridcomputing.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752178AbeFER0j (ORCPT ); Tue, 5 Jun 2018 13:26:39 -0400 Received: by smtp.opengridcomputing.com (Postfix, from userid 503) id ABB082BA7F; Tue, 5 Jun 2018 12:26:38 -0500 (CDT) Message-Id: In-Reply-To: References: From: Steve Wise Date: Tue, 5 Jun 2018 10:16:45 -0700 Subject: [PATCH v4 2/3] nvme-rdma: support up to 4 segments of inline data To: axboe@kernel.dk, hch@lst.de, keith.busch@intel.com, sagi@grimberg.me, linux-nvme@lists.infradead.org Cc: parav@mellanox.com, maxg@mellanox.com, linux-rdma@vger.kernel.org Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Allow up to 4 segments of inline data for NVMF WRITE operations. This reduces latency for small WRITEs by removing the need for the target to issue a READ WR for IB, or a REG_MR + READ WR chain for iWarp. Also cap the inline segments used based on the limitations of the device. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Steve Wise --- drivers/nvme/host/rdma.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d9f5fbe..622b13b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -40,13 +40,14 @@ #define NVME_RDMA_MAX_SEGMENTS 256 -#define NVME_RDMA_MAX_INLINE_SEGMENTS 1 +#define NVME_RDMA_MAX_INLINE_SEGMENTS 4 struct nvme_rdma_device { struct ib_device *dev; struct ib_pd *pd; struct kref ref; struct list_head entry; + unsigned int num_inline_segments; }; struct nvme_rdma_qe { @@ -117,6 +118,7 @@ struct nvme_rdma_ctrl { struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; + bool use_inline_data; }; static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl) @@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) /* +1 for drain */ init_attr.cap.max_recv_wr = queue->queue_size + 1; init_attr.cap.max_recv_sge = 1; - init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS; + init_attr.cap.max_send_sge = 1 + dev->num_inline_segments; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; init_attr.send_cq = queue->ib_cq; @@ -374,6 +376,8 @@ static int nvme_rdma_dev_get(struct nvme_rdma_device *dev) goto out_free_pd; } + ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS, + ndev->dev->attrs.max_sge - 1); list_add(&ndev->entry, &device_list); out_unlock: mutex_unlock(&device_list_mutex); @@ -1086,19 +1090,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c) } static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, - struct nvme_rdma_request *req, struct nvme_command *c) + struct nvme_rdma_request *req, struct nvme_command *c, + int count) { struct nvme_sgl_desc *sg = &c->common.dptr.sgl; + struct scatterlist *sgl = req->sg_table.sgl; + struct ib_sge *sge = &req->sge[1]; + u32 len = 0; + int i; - req->sge[1].addr = sg_dma_address(req->sg_table.sgl); - req->sge[1].length = sg_dma_len(req->sg_table.sgl); - req->sge[1].lkey = queue->device->pd->local_dma_lkey; + for (i = 0; i < count; i++, sgl++, sge++) { + sge->addr = sg_dma_address(sgl); + sge->length = sg_dma_len(sgl); + sge->lkey = queue->device->pd->local_dma_lkey; + len += sge->length; + } sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff); - sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl)); + sg->length = cpu_to_le32(len); sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET; - req->num_sge++; + req->num_sge += count; return 0; } @@ -1191,13 +1203,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, return -EIO; } - if (count == 1) { + if (count <= dev->num_inline_segments) { if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && + queue->ctrl->use_inline_data && blk_rq_payload_bytes(rq) <= nvme_rdma_inline_data_size(queue)) - return nvme_rdma_map_sg_inline(queue, req, c); + return nvme_rdma_map_sg_inline(queue, req, c, count); - if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) + if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) return nvme_rdma_map_sg_single(queue, req, c); } @@ -1956,6 +1969,9 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, goto out_remove_admin_queue; } + if (ctrl->ctrl.sgls & (1 << 20)) + ctrl->use_inline_data = true; + if (opts->queue_size > ctrl->ctrl.maxcmd) { /* warn if maxcmd is lower than queue_size */ dev_warn(ctrl->ctrl.device,