From patchwork Thu Apr 10 15:13:25 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Steve Wise X-Patchwork-Id: 3963171 Return-Path: X-Original-To: patchwork-linux-nfs@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.19.201]) by patchwork1.web.kernel.org (Postfix) with ESMTP id DD00F9F374 for ; Thu, 10 Apr 2014 15:13:30 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id DC16F2063F for ; Thu, 10 Apr 2014 15:13:29 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id AF84820625 for ; Thu, 10 Apr 2014 15:13:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S965236AbaDJPN1 (ORCPT ); Thu, 10 Apr 2014 11:13:27 -0400 Received: from smtp.opengridcomputing.com ([72.48.136.20]:49379 "EHLO smtp.opengridcomputing.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S965235AbaDJPN0 (ORCPT ); Thu, 10 Apr 2014 11:13:26 -0400 Received: from build.ogc.int (build.ogc.int [10.10.0.2]) by smtp.opengridcomputing.com (Postfix) with ESMTPSA id 068F42B652; Thu, 10 Apr 2014 10:13:26 -0500 (CDT) From: Steve Wise Subject: [PATCH] xprtrdma: mind the device's max fast register page list depth To: trond.myklebust@primarydata.com Cc: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org Date: Thu, 10 Apr 2014 10:13:25 -0500 Message-ID: <20140410151325.28724.92004.stgit@build.ogc.int> User-Agent: StGIT/0.14.3 MIME-Version: 1.0 Sender: linux-nfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org X-Spam-Status: No, score=-7.5 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Some rdma devices don't support a fast register page list depth of at least RPCRDMA_MAX_DATA_SEGS. So xprtrdma needs to chunk its fast register regions according to the minimum of the device max supported depth or RPCRDMA_MAX_DATA_SEGS. Signed-off-by: Steve Wise --- net/sunrpc/xprtrdma/rpc_rdma.c | 2 - net/sunrpc/xprtrdma/verbs.c | 64 ++++++++++++++++++++++++++++----------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 3 files changed, 47 insertions(+), 20 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 96ead52..14d7634 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -249,8 +249,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, req->rl_nchunks = nchunks; BUG_ON(nchunks == 0); - BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) - && (nchunks > 3)); /* * finish off header. If write, marshal discrim and nchunks. diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 93726560..c08f193 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) __func__); memreg = RPCRDMA_REGISTER; #endif + } else { + /* Mind the ia limit on FRMR page list depth */ + ia->ri_max_frmr_depth = min_t(unsigned int, + RPCRDMA_MAX_DATA_SEGS, + devattr.max_fast_reg_page_list_len); } break; } @@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: + case RPCRDMA_FRMR: { + int depth = 7; + /* Add room for frmr register and invalidate WRs. * 1. FRMR reg WR for head * 2. FRMR invalidate WR for head - * 3. FRMR reg WR for pagelist - * 4. FRMR invalidate WR for pagelist + * 3. N FRMR reg WRs for pagelist + * 4. N FRMR invalidate WRs for pagelist * 5. FRMR reg WR for tail * 6. FRMR invalidate WR for tail * 7. The RDMA_SEND WR */ - ep->rep_attr.cap.max_send_wr *= 7; + + /* Calculate N if the device max FRMR depth is smaller than + * RPCRDMA_MAX_DATA_SEGS. + */ + if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { + int delta = RPCRDMA_MAX_DATA_SEGS - + ia->ri_max_frmr_depth; + + do { + depth += 2; /* FRMR reg + invalidate */ + delta -= ia->ri_max_frmr_depth; + } while (delta > 0); + + } + ep->rep_attr.cap.max_send_wr *= depth; if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { - cdata->max_requests = devattr.max_qp_wr / 7; + cdata->max_requests = devattr.max_qp_wr / depth; if (!cdata->max_requests) return -EINVAL; - ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7; + ep->rep_attr.cap.max_send_wr = cdata->max_requests * + depth; } break; + } case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: /* Add room for mw_binds+unbinds - overkill! */ @@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, case RPCRDMA_FRMR: for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, - RPCRDMA_MAX_SEGS); + ia->ri_max_frmr_depth); if (IS_ERR(r->r.frmr.fr_mr)) { rc = PTR_ERR(r->r.frmr.fr_mr); dprintk("RPC: %s: ib_alloc_fast_reg_mr" " failed %i\n", __func__, rc); goto out; } - r->r.frmr.fr_pgl = - ib_alloc_fast_reg_page_list(ia->ri_id->device, - RPCRDMA_MAX_SEGS); + r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( + ia->ri_id->device, + ia->ri_max_frmr_depth); if (IS_ERR(r->r.frmr.fr_pgl)) { rc = PTR_ERR(r->r.frmr.fr_pgl); dprintk("RPC: %s: " @@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, seg1->mr_offset -= pageoff; /* start of page */ seg1->mr_len += pageoff; len = -pageoff; - if (*nsegs > RPCRDMA_MAX_DATA_SEGS) - *nsegs = RPCRDMA_MAX_DATA_SEGS; + if (*nsegs > ia->ri_max_frmr_depth) + *nsegs = ia->ri_max_frmr_depth; for (page_no = i = 0; i < *nsegs;) { rpcrdma_map_one(ia, seg, writing); pa = seg->mr_dma; @@ -1796,6 +1819,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, { struct rpcrdma_ia *ia = &r_xprt->rx_ia; int rc = 0; + int tmp_nsegs = nsegs; switch (ia->ri_memreg_strategy) { @@ -1805,35 +1829,39 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, seg->mr_rkey = ia->ri_bind_mem->rkey; seg->mr_base = seg->mr_dma; seg->mr_nsegs = 1; - nsegs = 1; + tmp_nsegs = 1; break; #endif /* Registration using frmr registration */ case RPCRDMA_FRMR: - rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); + rc = rpcrdma_register_frmr_external(seg, &tmp_nsegs, writing, + ia, r_xprt); break; /* Registration using fmr memory registration */ case RPCRDMA_MTHCAFMR: - rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); + rc = rpcrdma_register_fmr_external(seg, &tmp_nsegs, writing, + ia); break; /* Registration using memory windows */ case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: - rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); + rc = rpcrdma_register_memwin_external(seg, &tmp_nsegs, writing, + ia, r_xprt); break; /* Default registration each time */ default: - rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); + rc = rpcrdma_register_default_external(seg, &tmp_nsegs, writing, + ia); break; } if (rc) return -1; - return nsegs; + return tmp_nsegs; } int diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index cc1445d..98340a3 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -66,6 +66,7 @@ struct rpcrdma_ia { struct completion ri_done; int ri_async_rc; enum rpcrdma_memreg ri_memreg_strategy; + unsigned int ri_max_frmr_depth; }; /*