diff mbox

[v1,09/12] xprtrdma: Prepare rpcrdma_ep_post() for RDMA_NOMSG calls

Message ID 20150709204305.26247.39173.stgit@manet.1015granger.net (mailing list archive)
State Not Applicable
Headers show

Commit Message

Chuck Lever III July 9, 2015, 8:43 p.m. UTC
Only the RPC/RDMA header is sent when making an RDMA_NOMSG call.
That header resides in the first element of the iovec array
passed to rpcrdma_ep_post().

Instead of special casing the iovec element with the pad, just
sync all the elements in the send iovec. Syncing the zero pad is
not strictly necessary, but the pad is rarely if ever used these
days, and the extra cost in that case is small.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  |    4 ++++
 net/sunrpc/xprtrdma/verbs.c     |   27 +++++++++++----------------
 net/sunrpc/xprtrdma/xprt_rdma.h |   18 ++++++++++--------
 3 files changed, 25 insertions(+), 24 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Devesh Sharma July 10, 2015, 11:29 a.m. UTC | #1
we need to honor the max limits of device by checking
dev_attr.max_sge? a vendor may not support 4 sges.

On Fri, Jul 10, 2015 at 2:13 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
> Only the RPC/RDMA header is sent when making an RDMA_NOMSG call.
> That header resides in the first element of the iovec array
> passed to rpcrdma_ep_post().
>
> Instead of special casing the iovec element with the pad, just
> sync all the elements in the send iovec. Syncing the zero pad is
> not strictly necessary, but the pad is rarely if ever used these
> days, and the extra cost in that case is small.
>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/rpc_rdma.c  |    4 ++++
>  net/sunrpc/xprtrdma/verbs.c     |   27 +++++++++++----------------
>  net/sunrpc/xprtrdma/xprt_rdma.h |   18 ++++++++++--------
>  3 files changed, 25 insertions(+), 24 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index cb05233..2e721f2 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -575,6 +575,10 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>         req->rl_send_iov[0].length = hdrlen;
>         req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
>
> +       req->rl_niovs = 1;
> +       if (rtype == rpcrdma_areadch)
> +               return 0;
> +
>         req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
>         req->rl_send_iov[1].length = rpclen;
>         req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index cdf5220..9199436 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -651,7 +651,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
>         if (rc)
>                 return rc;
>         ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
> -       ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
> +       ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
>         ep->rep_attr.cap.max_recv_sge = 1;
>         ep->rep_attr.cap.max_inline_data = 0;
>         ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
> @@ -1303,9 +1303,11 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
>                 struct rpcrdma_ep *ep,
>                 struct rpcrdma_req *req)
>  {
> +       struct ib_device *device = ia->ri_device;
>         struct ib_send_wr send_wr, *send_wr_fail;
>         struct rpcrdma_rep *rep = req->rl_reply;
> -       int rc;
> +       struct ib_sge *iov = req->rl_send_iov;
> +       int i, rc;
>
>         if (rep) {
>                 rc = rpcrdma_ep_post_recv(ia, ep, rep);
> @@ -1316,22 +1318,15 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
>
>         send_wr.next = NULL;
>         send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
> -       send_wr.sg_list = req->rl_send_iov;
> +       send_wr.sg_list = iov;
>         send_wr.num_sge = req->rl_niovs;
>         send_wr.opcode = IB_WR_SEND;
> -       if (send_wr.num_sge == 4)       /* no need to sync any pad (constant) */
> -               ib_dma_sync_single_for_device(ia->ri_device,
> -                                             req->rl_send_iov[3].addr,
> -                                             req->rl_send_iov[3].length,
> -                                             DMA_TO_DEVICE);
> -       ib_dma_sync_single_for_device(ia->ri_device,
> -                                     req->rl_send_iov[1].addr,
> -                                     req->rl_send_iov[1].length,
> -                                     DMA_TO_DEVICE);
> -       ib_dma_sync_single_for_device(ia->ri_device,
> -                                     req->rl_send_iov[0].addr,
> -                                     req->rl_send_iov[0].length,
> -                                     DMA_TO_DEVICE);
> +
> +       for (i = 0; i < send_wr.num_sge; i++)
> +               ib_dma_sync_single_for_device(device, iov[i].addr,
> +                                             iov[i].length, DMA_TO_DEVICE);
> +       dprintk("RPC:       %s: posting %d s/g entries\n",
> +               __func__, send_wr.num_sge);
>
>         if (DECR_CQCOUNT(ep) > 0)
>                 send_wr.send_flags = 0;
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index ce4e79e..90da480 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -256,16 +256,18 @@ struct rpcrdma_mr_seg {           /* chunk descriptors */
>         char            *mr_offset;     /* kva if no page, else offset */
>  };
>
> +#define RPCRDMA_MAX_IOVS       (4)
> +
>  struct rpcrdma_req {
> -       unsigned int    rl_niovs;       /* 0, 2 or 4 */
> -       unsigned int    rl_nchunks;     /* non-zero if chunks */
> -       unsigned int    rl_connect_cookie;      /* retry detection */
> -       struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
> +       unsigned int            rl_niovs;
> +       unsigned int            rl_nchunks;
> +       unsigned int            rl_connect_cookie;
> +       struct rpcrdma_buffer   *rl_buffer;
>         struct rpcrdma_rep      *rl_reply;/* holder for reply buffer */
> -       struct ib_sge   rl_send_iov[4]; /* for active requests */
> -       struct rpcrdma_regbuf *rl_rdmabuf;
> -       struct rpcrdma_regbuf *rl_sendbuf;
> -       struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
> +       struct ib_sge           rl_send_iov[RPCRDMA_MAX_IOVS];
> +       struct rpcrdma_regbuf   *rl_rdmabuf;
> +       struct rpcrdma_regbuf   *rl_sendbuf;
> +       struct rpcrdma_mr_seg   rl_segments[RPCRDMA_MAX_SEGS];
>  };
>
>  static inline struct rpcrdma_req *
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Talpey July 10, 2015, 12:58 p.m. UTC | #2
On 7/10/2015 7:29 AM, Devesh Sharma wrote:
> we need to honor the max limits of device by checking
> dev_attr.max_sge? a vendor may not support 4 sges.

iWARP requires a minimum of 4 send SGEs (draft-hilland-verbs 8.1.3.2)

    An RI MUST support at least four Scatter/Gather Elements per
    Scatter/Gather List when the Scatter/Gather List refers to the Data
    Source of a Send Operation Type or the Data Sink of a Receive
    Operation. An RI is NOT REQUIRED to support more than one
    Scatter/Gather Element per Scatter/Gather List when the
    Scatter/Gather List refers to the Data Source of an RDMA Write.

I'm not certain if IB and RoCE state a similar minimum requirement,
but it seems a very bad idea to have fewer.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Devesh Sharma July 10, 2015, 2:11 p.m. UTC | #3
On Fri, Jul 10, 2015 at 6:28 PM, Tom Talpey <tom@talpey.com> wrote:
> On 7/10/2015 7:29 AM, Devesh Sharma wrote:
>>
>> we need to honor the max limits of device by checking
>> dev_attr.max_sge? a vendor may not support 4 sges.
>
>
> iWARP requires a minimum of 4 send SGEs (draft-hilland-verbs 8.1.3.2)
>
>    An RI MUST support at least four Scatter/Gather Elements per
>    Scatter/Gather List when the Scatter/Gather List refers to the Data
>    Source of a Send Operation Type or the Data Sink of a Receive
>    Operation. An RI is NOT REQUIRED to support more than one
>    Scatter/Gather Element per Scatter/Gather List when the
>    Scatter/Gather List refers to the Data Source of an RDMA Write.
>
> I'm not certain if IB and RoCE state a similar minimum requirement,
> but it seems a very bad idea to have fewer.

To my knowledge IBTA Spec do not pose any such minimum requirement.
RoCE also do not puts any minimum requirement. I think its fine if
xprtrdma honors the device limits, thus covering iWARP devices because
all iWARP devices would support minimum 4.

Chuck would correct me if xprtrdma do have any minimum requirements

>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chuck Lever III July 10, 2015, 2:53 p.m. UTC | #4
On Jul 10, 2015, at 10:11 AM, Devesh Sharma <devesh.sharma@avagotech.com> wrote:

> On Fri, Jul 10, 2015 at 6:28 PM, Tom Talpey <tom@talpey.com> wrote:
>> On 7/10/2015 7:29 AM, Devesh Sharma wrote:
>>> 
>>> we need to honor the max limits of device by checking
>>> dev_attr.max_sge? a vendor may not support 4 sges.
>> 
>> 
>> iWARP requires a minimum of 4 send SGEs (draft-hilland-verbs 8.1.3.2)
>> 
>>   An RI MUST support at least four Scatter/Gather Elements per
>>   Scatter/Gather List when the Scatter/Gather List refers to the Data
>>   Source of a Send Operation Type or the Data Sink of a Receive
>>   Operation. An RI is NOT REQUIRED to support more than one
>>   Scatter/Gather Element per Scatter/Gather List when the
>>   Scatter/Gather List refers to the Data Source of an RDMA Write.
>> 
>> I'm not certain if IB and RoCE state a similar minimum requirement,
>> but it seems a very bad idea to have fewer.
> 
> To my knowledge IBTA Spec do not pose any such minimum requirement.
> RoCE also do not puts any minimum requirement. I think its fine if
> xprtrdma honors the device limits, thus covering iWARP devices because
> all iWARP devices would support minimum 4.
> 
> Chuck would correct me if xprtrdma do have any minimum requirements

At least 2 SGEs are required for normal operation. The code in
rpcrdma_marshal_req() sets up an iovec for the RPC/RDMA header
and one for the RPC message itself. These are used in
rpcrdma_ep_post() to SEND each request.

Four SGEs are needed only for RDMA_MSGP type requests, but so far
I have never seen one of these used. I wonder if that logic can
be removed.

It is certainly possible to examine the device’s max_sge field
in rpcrdma_ep_create() and fail transport creation if the
device’s max_sge is less than RPC_MAX_IOVS.

--
Chuck Lever



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Schumaker, Anna July 10, 2015, 8:43 p.m. UTC | #5
Hi Chuck,

On 07/09/2015 04:43 PM, Chuck Lever wrote:
> Only the RPC/RDMA header is sent when making an RDMA_NOMSG call.
> That header resides in the first element of the iovec array
> passed to rpcrdma_ep_post().
> 
> Instead of special casing the iovec element with the pad, just
> sync all the elements in the send iovec. Syncing the zero pad is
> not strictly necessary, but the pad is rarely if ever used these
> days, and the extra cost in that case is small.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index cdf5220..9199436 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1316,22 +1318,15 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
>  
>  	send_wr.next = NULL;
>  	send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
> -	send_wr.sg_list = req->rl_send_iov;
> +	send_wr.sg_list = iov;
>  	send_wr.num_sge = req->rl_niovs;
>  	send_wr.opcode = IB_WR_SEND;
> -	if (send_wr.num_sge == 4)	/* no need to sync any pad (constant) */
> -		ib_dma_sync_single_for_device(ia->ri_device,
> -					      req->rl_send_iov[3].addr,
> -					      req->rl_send_iov[3].length,
> -					      DMA_TO_DEVICE);
> -	ib_dma_sync_single_for_device(ia->ri_device,
> -				      req->rl_send_iov[1].addr,
> -				      req->rl_send_iov[1].length,
> -				      DMA_TO_DEVICE);
> -	ib_dma_sync_single_for_device(ia->ri_device,
> -				      req->rl_send_iov[0].addr,
> -				      req->rl_send_iov[0].length,
> -				      DMA_TO_DEVICE);
> +
> +	for (i = 0; i < send_wr.num_sge; i++)
> +		ib_dma_sync_single_for_device(device, iov[i].addr,
> +					      iov[i].length, DMA_TO_DEVICE);

Two questions here:
1) Is syncing order important?  The original code went 3 - 1 - 0, but now we're going 0 - 1 - 2 - 3.
2) Is iov[2] the zero pad you mentioned in your commit message?  If not, do you know why it wasn't already syncing?

Thanks,
Anna

> +	dprintk("RPC:       %s: posting %d s/g entries\n",
> +		__func__, send_wr.num_sge);
>  
>  	if (DECR_CQCOUNT(ep) > 0)
>  		send_wr.send_flags = 0;

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chuck Lever July 10, 2015, 8:52 p.m. UTC | #6
> On Jul 10, 2015, at 4:43 PM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote:
> 
> Hi Chuck,
> 
> On 07/09/2015 04:43 PM, Chuck Lever wrote:
>> Only the RPC/RDMA header is sent when making an RDMA_NOMSG call.
>> That header resides in the first element of the iovec array
>> passed to rpcrdma_ep_post().
>> 
>> Instead of special casing the iovec element with the pad, just
>> sync all the elements in the send iovec. Syncing the zero pad is
>> not strictly necessary, but the pad is rarely if ever used these
>> days, and the extra cost in that case is small.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index cdf5220..9199436 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1316,22 +1318,15 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
>> 
>> 	send_wr.next = NULL;
>> 	send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
>> -	send_wr.sg_list = req->rl_send_iov;
>> +	send_wr.sg_list = iov;
>> 	send_wr.num_sge = req->rl_niovs;
>> 	send_wr.opcode = IB_WR_SEND;
>> -	if (send_wr.num_sge == 4)	/* no need to sync any pad (constant) */
>> -		ib_dma_sync_single_for_device(ia->ri_device,
>> -					      req->rl_send_iov[3].addr,
>> -					      req->rl_send_iov[3].length,
>> -					      DMA_TO_DEVICE);
>> -	ib_dma_sync_single_for_device(ia->ri_device,
>> -				      req->rl_send_iov[1].addr,
>> -				      req->rl_send_iov[1].length,
>> -				      DMA_TO_DEVICE);
>> -	ib_dma_sync_single_for_device(ia->ri_device,
>> -				      req->rl_send_iov[0].addr,
>> -				      req->rl_send_iov[0].length,
>> -				      DMA_TO_DEVICE);
>> +
>> +	for (i = 0; i < send_wr.num_sge; i++)
>> +		ib_dma_sync_single_for_device(device, iov[i].addr,
>> +					      iov[i].length, DMA_TO_DEVICE);
> 
> Two questions here:
> 1) Is syncing order important?  The original code went 3 - 1 - 0, but now we're going 0 - 1 - 2 - 3.

No, the syncing order isn’t important. What’s important is that
the sge’s be synced _before_ the SEND Work Request is posted so
that the HCA can see the freshly marshaled RPC.


> 2) Is iov[2] the zero pad you mentioned in your commit message?

Yes, it is.

> If not, do you know why it wasn't already syncing?

The pad always contains zeroes. Syncing it is optional, AFAICT.

v2 of this series will have some changes in this area, and the
zero pad will be gone entirely.


> Thanks,
> Anna
> 
>> +	dprintk("RPC:       %s: posting %d s/g entries\n",
>> +		__func__, send_wr.num_sge);
>> 
>> 	if (DECR_CQCOUNT(ep) > 0)
>> 		send_wr.send_flags = 0;
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chucklever@gmail.com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe July 10, 2015, 10:44 p.m. UTC | #7
On Fri, Jul 10, 2015 at 10:53:40AM -0400, Chuck Lever wrote:

> It is certainly possible to examine the device’s max_sge field
> in rpcrdma_ep_create() and fail transport creation if the
> device’s max_sge is less than RPC_MAX_IOVS.

I just want to draw Sagi's attention to this problem, when considering
my thoughts on an alternate API for posting.

This thread is about NFS needing 4 sges for a (rare?) message type.

The API direction I suggested addresses this kind of issue as well,
because under the covers the driver/core can spin up a temporary MR
for this case and thus support an infinite s/g list for all posts.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index cb05233..2e721f2 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -575,6 +575,10 @@  rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	req->rl_send_iov[0].length = hdrlen;
 	req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
 
+	req->rl_niovs = 1;
+	if (rtype == rpcrdma_areadch)
+		return 0;
+
 	req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
 	req->rl_send_iov[1].length = rpclen;
 	req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index cdf5220..9199436 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -651,7 +651,7 @@  rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	if (rc)
 		return rc;
 	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
-	ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
+	ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
 	ep->rep_attr.cap.max_recv_sge = 1;
 	ep->rep_attr.cap.max_inline_data = 0;
 	ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -1303,9 +1303,11 @@  rpcrdma_ep_post(struct rpcrdma_ia *ia,
 		struct rpcrdma_ep *ep,
 		struct rpcrdma_req *req)
 {
+	struct ib_device *device = ia->ri_device;
 	struct ib_send_wr send_wr, *send_wr_fail;
 	struct rpcrdma_rep *rep = req->rl_reply;
-	int rc;
+	struct ib_sge *iov = req->rl_send_iov;
+	int i, rc;
 
 	if (rep) {
 		rc = rpcrdma_ep_post_recv(ia, ep, rep);
@@ -1316,22 +1318,15 @@  rpcrdma_ep_post(struct rpcrdma_ia *ia,
 
 	send_wr.next = NULL;
 	send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
-	send_wr.sg_list = req->rl_send_iov;
+	send_wr.sg_list = iov;
 	send_wr.num_sge = req->rl_niovs;
 	send_wr.opcode = IB_WR_SEND;
-	if (send_wr.num_sge == 4)	/* no need to sync any pad (constant) */
-		ib_dma_sync_single_for_device(ia->ri_device,
-					      req->rl_send_iov[3].addr,
-					      req->rl_send_iov[3].length,
-					      DMA_TO_DEVICE);
-	ib_dma_sync_single_for_device(ia->ri_device,
-				      req->rl_send_iov[1].addr,
-				      req->rl_send_iov[1].length,
-				      DMA_TO_DEVICE);
-	ib_dma_sync_single_for_device(ia->ri_device,
-				      req->rl_send_iov[0].addr,
-				      req->rl_send_iov[0].length,
-				      DMA_TO_DEVICE);
+
+	for (i = 0; i < send_wr.num_sge; i++)
+		ib_dma_sync_single_for_device(device, iov[i].addr,
+					      iov[i].length, DMA_TO_DEVICE);
+	dprintk("RPC:       %s: posting %d s/g entries\n",
+		__func__, send_wr.num_sge);
 
 	if (DECR_CQCOUNT(ep) > 0)
 		send_wr.send_flags = 0;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ce4e79e..90da480 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -256,16 +256,18 @@  struct rpcrdma_mr_seg {		/* chunk descriptors */
 	char		*mr_offset;	/* kva if no page, else offset */
 };
 
+#define RPCRDMA_MAX_IOVS	(4)
+
 struct rpcrdma_req {
-	unsigned int	rl_niovs;	/* 0, 2 or 4 */
-	unsigned int	rl_nchunks;	/* non-zero if chunks */
-	unsigned int	rl_connect_cookie;	/* retry detection */
-	struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
+	unsigned int		rl_niovs;
+	unsigned int		rl_nchunks;
+	unsigned int		rl_connect_cookie;
+	struct rpcrdma_buffer	*rl_buffer;
 	struct rpcrdma_rep	*rl_reply;/* holder for reply buffer */
-	struct ib_sge	rl_send_iov[4];	/* for active requests */
-	struct rpcrdma_regbuf *rl_rdmabuf;
-	struct rpcrdma_regbuf *rl_sendbuf;
-	struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
+	struct ib_sge		rl_send_iov[RPCRDMA_MAX_IOVS];
+	struct rpcrdma_regbuf	*rl_rdmabuf;
+	struct rpcrdma_regbuf	*rl_sendbuf;
+	struct rpcrdma_mr_seg	rl_segments[RPCRDMA_MAX_SEGS];
 };
 
 static inline struct rpcrdma_req *