diff mbox

[v2,2/5] xprtrdma: Per-connection pad optimization

Message ID 20170123205254.21699.44329.stgit@manet.1015granger.net (mailing list archive)
State New, archived
Headers show

Commit Message

Chuck Lever Jan. 23, 2017, 8:52 p.m. UTC
Pad optimization is changed by echoing into
/proc/sys/sunrpc/rdma_pad_optimize. This is a global setting,
affecting all RPC-over-RDMA connections to all servers.

The marshaling code picks up that value and uses it for decisions
about how to construct each RPC-over-RDMA frame. Having it change
suddenly in mid-operation can result in unexpected failures. And
some servers a client mounts might need chunk round-up, while
others don't.

So instead, copy the pad_optimize setting into each connection's
rpcrdma_ia when the transport is created, and use the copy, which
can't change during the life of the connection, instead.

This also removes a hack: rpcrdma_convert_iovs was using
the remote-invalidation-expected flag to predict when it could leave
out Write chunk padding. This is because the Linux server handles
implicit XDR padding on Write chunks correctly, and only Linux
servers can set the connection's remote-invalidation-expected flag.

It's more sensible to use the pad optimization setting instead.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  |   28 ++++++++++++++--------------
 net/sunrpc/xprtrdma/verbs.c     |    1 +
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
 3 files changed, 16 insertions(+), 14 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Schumaker, Anna Jan. 24, 2017, 7:12 p.m. UTC | #1
Hi Chuck,

On 01/23/2017 03:52 PM, Chuck Lever wrote:
> Pad optimization is changed by echoing into
> /proc/sys/sunrpc/rdma_pad_optimize. This is a global setting,
> affecting all RPC-over-RDMA connections to all servers.
> 
> The marshaling code picks up that value and uses it for decisions
> about how to construct each RPC-over-RDMA frame. Having it change
> suddenly in mid-operation can result in unexpected failures. And
> some servers a client mounts might need chunk round-up, while
> others don't.
> 
> So instead, copy the pad_optimize setting into each connection's
> rpcrdma_ia when the transport is created, and use the copy, which
> can't change during the life of the connection, instead.

Does the rdma_pad_optimize option have documentation somewhere that needs to be updated to describe the new behavior?

Anna

> 
> This also removes a hack: rpcrdma_convert_iovs was using
> the remote-invalidation-expected flag to predict when it could leave
> out Write chunk padding. This is because the Linux server handles
> implicit XDR padding on Write chunks correctly, and only Linux
> servers can set the connection's remote-invalidation-expected flag.
> 
> It's more sensible to use the pad optimization setting instead.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/rpc_rdma.c  |   28 ++++++++++++++--------------
>  net/sunrpc/xprtrdma/verbs.c     |    1 +
>  net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
>  3 files changed, 16 insertions(+), 14 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index a524d3c..c634f0f 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -186,9 +186,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>   */
>  
>  static int
> -rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
> -	enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
> -	bool reminv_expected)
> +rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
> +		     unsigned int pos, enum rpcrdma_chunktype type,
> +		     struct rpcrdma_mr_seg *seg)
>  {
>  	int len, n, p, page_base;
>  	struct page **ppages;
> @@ -229,14 +229,15 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>  	/* When encoding a Read chunk, the tail iovec contains an
>  	 * XDR pad and may be omitted.
>  	 */
> -	if (type == rpcrdma_readch && xprt_rdma_pad_optimize)
> +	if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
>  		return n;
>  
> -	/* When encoding the Write list, some servers need to see an extra
> -	 * segment for odd-length Write chunks. The upper layer provides
> -	 * space in the tail iovec for this purpose.
> +	/* When encoding a Write chunk, some servers need to see an
> +	 * extra segment for non-XDR-aligned Write chunks. The upper
> +	 * layer provides space in the tail iovec that may be used
> +	 * for this purpose.
>  	 */
> -	if (type == rpcrdma_writech && reminv_expected)
> +	if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
>  		return n;
>  
>  	if (xdrbuf->tail[0].iov_len) {
> @@ -291,7 +292,8 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>  	if (rtype == rpcrdma_areadch)
>  		pos = 0;
>  	seg = req->rl_segments;
> -	nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
> +	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
> +				     rtype, seg);
>  	if (nsegs < 0)
>  		return ERR_PTR(nsegs);
>  
> @@ -353,10 +355,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>  	}
>  
>  	seg = req->rl_segments;
> -	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
> +	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
>  				     rqst->rq_rcv_buf.head[0].iov_len,
> -				     wtype, seg,
> -				     r_xprt->rx_ia.ri_reminv_expected);
> +				     wtype, seg);
>  	if (nsegs < 0)
>  		return ERR_PTR(nsegs);
>  
> @@ -421,8 +422,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>  	}
>  
>  	seg = req->rl_segments;
> -	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
> -				     r_xprt->rx_ia.ri_reminv_expected);
> +	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
>  	if (nsegs < 0)
>  		return ERR_PTR(nsegs);
>  
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 11d0774..2a6a367 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -208,6 +208,7 @@
>  
>  	/* Default settings for RPC-over-RDMA Version One */
>  	r_xprt->rx_ia.ri_reminv_expected = false;
> +	r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
>  	rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
>  	wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
>  
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index e35efd4..c137154 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -75,6 +75,7 @@ struct rpcrdma_ia {
>  	unsigned int		ri_max_inline_write;
>  	unsigned int		ri_max_inline_read;
>  	bool			ri_reminv_expected;
> +	bool			ri_implicit_roundup;
>  	enum ib_mr_type		ri_mrtype;
>  	struct ib_qp_attr	ri_qp_attr;
>  	struct ib_qp_init_attr	ri_qp_init_attr;
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chuck Lever Jan. 24, 2017, 7:16 p.m. UTC | #2
> On Jan 24, 2017, at 2:12 PM, Anna Schumaker <Anna.Schumaker@Netapp.com> wrote:
> 
> Hi Chuck,
> 
> On 01/23/2017 03:52 PM, Chuck Lever wrote:
>> Pad optimization is changed by echoing into
>> /proc/sys/sunrpc/rdma_pad_optimize. This is a global setting,
>> affecting all RPC-over-RDMA connections to all servers.
>> 
>> The marshaling code picks up that value and uses it for decisions
>> about how to construct each RPC-over-RDMA frame. Having it change
>> suddenly in mid-operation can result in unexpected failures. And
>> some servers a client mounts might need chunk round-up, while
>> others don't.
>> 
>> So instead, copy the pad_optimize setting into each connection's
>> rpcrdma_ia when the transport is created, and use the copy, which
>> can't change during the life of the connection, instead.
> 
> Does the rdma_pad_optimize option have documentation somewhere that needs to be updated to describe the new behavior?

I'm not aware of any. No documentation was changed by d5440e27d3e5.


> Anna
> 
>> 
>> This also removes a hack: rpcrdma_convert_iovs was using
>> the remote-invalidation-expected flag to predict when it could leave
>> out Write chunk padding. This is because the Linux server handles
>> implicit XDR padding on Write chunks correctly, and only Linux
>> servers can set the connection's remote-invalidation-expected flag.
>> 
>> It's more sensible to use the pad optimization setting instead.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> net/sunrpc/xprtrdma/rpc_rdma.c  |   28 ++++++++++++++--------------
>> net/sunrpc/xprtrdma/verbs.c     |    1 +
>> net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
>> 3 files changed, 16 insertions(+), 14 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
>> index a524d3c..c634f0f 100644
>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>> @@ -186,9 +186,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>>  */
>> 
>> static int
>> -rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
>> -	enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
>> -	bool reminv_expected)
>> +rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
>> +		     unsigned int pos, enum rpcrdma_chunktype type,
>> +		     struct rpcrdma_mr_seg *seg)
>> {
>> 	int len, n, p, page_base;
>> 	struct page **ppages;
>> @@ -229,14 +229,15 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>> 	/* When encoding a Read chunk, the tail iovec contains an
>> 	 * XDR pad and may be omitted.
>> 	 */
>> -	if (type == rpcrdma_readch && xprt_rdma_pad_optimize)
>> +	if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
>> 		return n;
>> 
>> -	/* When encoding the Write list, some servers need to see an extra
>> -	 * segment for odd-length Write chunks. The upper layer provides
>> -	 * space in the tail iovec for this purpose.
>> +	/* When encoding a Write chunk, some servers need to see an
>> +	 * extra segment for non-XDR-aligned Write chunks. The upper
>> +	 * layer provides space in the tail iovec that may be used
>> +	 * for this purpose.
>> 	 */
>> -	if (type == rpcrdma_writech && reminv_expected)
>> +	if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
>> 		return n;
>> 
>> 	if (xdrbuf->tail[0].iov_len) {
>> @@ -291,7 +292,8 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>> 	if (rtype == rpcrdma_areadch)
>> 		pos = 0;
>> 	seg = req->rl_segments;
>> -	nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
>> +	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
>> +				     rtype, seg);
>> 	if (nsegs < 0)
>> 		return ERR_PTR(nsegs);
>> 
>> @@ -353,10 +355,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>> 	}
>> 
>> 	seg = req->rl_segments;
>> -	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
>> +	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
>> 				     rqst->rq_rcv_buf.head[0].iov_len,
>> -				     wtype, seg,
>> -				     r_xprt->rx_ia.ri_reminv_expected);
>> +				     wtype, seg);
>> 	if (nsegs < 0)
>> 		return ERR_PTR(nsegs);
>> 
>> @@ -421,8 +422,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>> 	}
>> 
>> 	seg = req->rl_segments;
>> -	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
>> -				     r_xprt->rx_ia.ri_reminv_expected);
>> +	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
>> 	if (nsegs < 0)
>> 		return ERR_PTR(nsegs);
>> 
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index 11d0774..2a6a367 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -208,6 +208,7 @@
>> 
>> 	/* Default settings for RPC-over-RDMA Version One */
>> 	r_xprt->rx_ia.ri_reminv_expected = false;
>> +	r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
>> 	rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
>> 	wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
>> 
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>> index e35efd4..c137154 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -75,6 +75,7 @@ struct rpcrdma_ia {
>> 	unsigned int		ri_max_inline_write;
>> 	unsigned int		ri_max_inline_read;
>> 	bool			ri_reminv_expected;
>> +	bool			ri_implicit_roundup;
>> 	enum ib_mr_type		ri_mrtype;
>> 	struct ib_qp_attr	ri_qp_attr;
>> 	struct ib_qp_init_attr	ri_qp_init_attr;
>> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever



--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Schumaker, Anna Jan. 24, 2017, 7:23 p.m. UTC | #3
On 01/24/2017 02:16 PM, Chuck Lever wrote:
> 
>> On Jan 24, 2017, at 2:12 PM, Anna Schumaker <Anna.Schumaker@Netapp.com> wrote:
>>
>> Hi Chuck,
>>
>> On 01/23/2017 03:52 PM, Chuck Lever wrote:
>>> Pad optimization is changed by echoing into
>>> /proc/sys/sunrpc/rdma_pad_optimize. This is a global setting,
>>> affecting all RPC-over-RDMA connections to all servers.
>>>
>>> The marshaling code picks up that value and uses it for decisions
>>> about how to construct each RPC-over-RDMA frame. Having it change
>>> suddenly in mid-operation can result in unexpected failures. And
>>> some servers a client mounts might need chunk round-up, while
>>> others don't.
>>>
>>> So instead, copy the pad_optimize setting into each connection's
>>> rpcrdma_ia when the transport is created, and use the copy, which
>>> can't change during the life of the connection, instead.
>>
>> Does the rdma_pad_optimize option have documentation somewhere that needs to be updated to describe the new behavior?
> 
> I'm not aware of any. No documentation was changed by d5440e27d3e5.

Okay, I just wanted to make sure everything was covered :)

Thanks,
Anna

> 
> 
>> Anna
>>
>>>
>>> This also removes a hack: rpcrdma_convert_iovs was using
>>> the remote-invalidation-expected flag to predict when it could leave
>>> out Write chunk padding. This is because the Linux server handles
>>> implicit XDR padding on Write chunks correctly, and only Linux
>>> servers can set the connection's remote-invalidation-expected flag.
>>>
>>> It's more sensible to use the pad optimization setting instead.
>>>
>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>>> ---
>>> net/sunrpc/xprtrdma/rpc_rdma.c  |   28 ++++++++++++++--------------
>>> net/sunrpc/xprtrdma/verbs.c     |    1 +
>>> net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
>>> 3 files changed, 16 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
>>> index a524d3c..c634f0f 100644
>>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>>> @@ -186,9 +186,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>>>  */
>>>
>>> static int
>>> -rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
>>> -	enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
>>> -	bool reminv_expected)
>>> +rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
>>> +		     unsigned int pos, enum rpcrdma_chunktype type,
>>> +		     struct rpcrdma_mr_seg *seg)
>>> {
>>> 	int len, n, p, page_base;
>>> 	struct page **ppages;
>>> @@ -229,14 +229,15 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>>> 	/* When encoding a Read chunk, the tail iovec contains an
>>> 	 * XDR pad and may be omitted.
>>> 	 */
>>> -	if (type == rpcrdma_readch && xprt_rdma_pad_optimize)
>>> +	if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
>>> 		return n;
>>>
>>> -	/* When encoding the Write list, some servers need to see an extra
>>> -	 * segment for odd-length Write chunks. The upper layer provides
>>> -	 * space in the tail iovec for this purpose.
>>> +	/* When encoding a Write chunk, some servers need to see an
>>> +	 * extra segment for non-XDR-aligned Write chunks. The upper
>>> +	 * layer provides space in the tail iovec that may be used
>>> +	 * for this purpose.
>>> 	 */
>>> -	if (type == rpcrdma_writech && reminv_expected)
>>> +	if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
>>> 		return n;
>>>
>>> 	if (xdrbuf->tail[0].iov_len) {
>>> @@ -291,7 +292,8 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>>> 	if (rtype == rpcrdma_areadch)
>>> 		pos = 0;
>>> 	seg = req->rl_segments;
>>> -	nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
>>> +	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
>>> +				     rtype, seg);
>>> 	if (nsegs < 0)
>>> 		return ERR_PTR(nsegs);
>>>
>>> @@ -353,10 +355,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>>> 	}
>>>
>>> 	seg = req->rl_segments;
>>> -	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
>>> +	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
>>> 				     rqst->rq_rcv_buf.head[0].iov_len,
>>> -				     wtype, seg,
>>> -				     r_xprt->rx_ia.ri_reminv_expected);
>>> +				     wtype, seg);
>>> 	if (nsegs < 0)
>>> 		return ERR_PTR(nsegs);
>>>
>>> @@ -421,8 +422,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
>>> 	}
>>>
>>> 	seg = req->rl_segments;
>>> -	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
>>> -				     r_xprt->rx_ia.ri_reminv_expected);
>>> +	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
>>> 	if (nsegs < 0)
>>> 		return ERR_PTR(nsegs);
>>>
>>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>>> index 11d0774..2a6a367 100644
>>> --- a/net/sunrpc/xprtrdma/verbs.c
>>> +++ b/net/sunrpc/xprtrdma/verbs.c
>>> @@ -208,6 +208,7 @@
>>>
>>> 	/* Default settings for RPC-over-RDMA Version One */
>>> 	r_xprt->rx_ia.ri_reminv_expected = false;
>>> +	r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
>>> 	rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
>>> 	wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
>>>
>>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>>> index e35efd4..c137154 100644
>>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>>> @@ -75,6 +75,7 @@ struct rpcrdma_ia {
>>> 	unsigned int		ri_max_inline_write;
>>> 	unsigned int		ri_max_inline_read;
>>> 	bool			ri_reminv_expected;
>>> +	bool			ri_implicit_roundup;
>>> 	enum ib_mr_type		ri_mrtype;
>>> 	struct ib_qp_attr	ri_qp_attr;
>>> 	struct ib_qp_init_attr	ri_qp_init_attr;
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> Chuck Lever
> 
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a524d3c..c634f0f 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -186,9 +186,9 @@  static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
  */
 
 static int
-rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
-	enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
-	bool reminv_expected)
+rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
+		     unsigned int pos, enum rpcrdma_chunktype type,
+		     struct rpcrdma_mr_seg *seg)
 {
 	int len, n, p, page_base;
 	struct page **ppages;
@@ -229,14 +229,15 @@  static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	/* When encoding a Read chunk, the tail iovec contains an
 	 * XDR pad and may be omitted.
 	 */
-	if (type == rpcrdma_readch && xprt_rdma_pad_optimize)
+	if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
 		return n;
 
-	/* When encoding the Write list, some servers need to see an extra
-	 * segment for odd-length Write chunks. The upper layer provides
-	 * space in the tail iovec for this purpose.
+	/* When encoding a Write chunk, some servers need to see an
+	 * extra segment for non-XDR-aligned Write chunks. The upper
+	 * layer provides space in the tail iovec that may be used
+	 * for this purpose.
 	 */
-	if (type == rpcrdma_writech && reminv_expected)
+	if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
 		return n;
 
 	if (xdrbuf->tail[0].iov_len) {
@@ -291,7 +292,8 @@  static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	if (rtype == rpcrdma_areadch)
 		pos = 0;
 	seg = req->rl_segments;
-	nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
+	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
+				     rtype, seg);
 	if (nsegs < 0)
 		return ERR_PTR(nsegs);
 
@@ -353,10 +355,9 @@  static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	}
 
 	seg = req->rl_segments;
-	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
+	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
 				     rqst->rq_rcv_buf.head[0].iov_len,
-				     wtype, seg,
-				     r_xprt->rx_ia.ri_reminv_expected);
+				     wtype, seg);
 	if (nsegs < 0)
 		return ERR_PTR(nsegs);
 
@@ -421,8 +422,7 @@  static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	}
 
 	seg = req->rl_segments;
-	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
-				     r_xprt->rx_ia.ri_reminv_expected);
+	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
 	if (nsegs < 0)
 		return ERR_PTR(nsegs);
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 11d0774..2a6a367 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -208,6 +208,7 @@ 
 
 	/* Default settings for RPC-over-RDMA Version One */
 	r_xprt->rx_ia.ri_reminv_expected = false;
+	r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
 	rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
 	wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
 
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e35efd4..c137154 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -75,6 +75,7 @@  struct rpcrdma_ia {
 	unsigned int		ri_max_inline_write;
 	unsigned int		ri_max_inline_read;
 	bool			ri_reminv_expected;
+	bool			ri_implicit_roundup;
 	enum ib_mr_type		ri_mrtype;
 	struct ib_qp_attr	ri_qp_attr;
 	struct ib_qp_init_attr	ri_qp_init_attr;