diff mbox series

[v4,07/30] xprtrdma: Fix ri_max_segs and the result of ro_maxpages

Message ID 20181217163958.24133.16923.stgit@manet.1015granger.net (mailing list archive)
State New, archived
Headers show
Series NFS/RDMA client for next | expand

Commit Message

Chuck Lever Dec. 17, 2018, 4:39 p.m. UTC
With certain combinations of krb5i/p, MR size, and r/wsize, I/O can
fail with EMSGSIZE. This is because the calculated value of
ri_max_segs (the max number of MRs per RPC) exceeded
RPCRDMA_MAX_HDR_SEGS, which caused Read or Write list encoding to
walk off the end of the transport header.

Once that was addressed, the ro_maxpages result has to be corrected
to account for the number of MRs needed for Reply chunks, which is
2 MRs smaller than a normal Read or Write chunk.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/fmr_ops.c   |    7 +++++--
 net/sunrpc/xprtrdma/frwr_ops.c  |    7 +++++--
 net/sunrpc/xprtrdma/transport.c |    6 ++++--
 3 files changed, 14 insertions(+), 6 deletions(-)

Comments

Anna Schumaker Dec. 18, 2018, 7:35 p.m. UTC | #1
Hi Chuck,

On Mon, 2018-12-17 at 11:39 -0500, Chuck Lever wrote:
> With certain combinations of krb5i/p, MR size, and r/wsize, I/O can
> fail with EMSGSIZE. This is because the calculated value of
> ri_max_segs (the max number of MRs per RPC) exceeded
> RPCRDMA_MAX_HDR_SEGS, which caused Read or Write list encoding to
> walk off the end of the transport header.
> 
> Once that was addressed, the ro_maxpages result has to be corrected
> to account for the number of MRs needed for Reply chunks, which is
> 2 MRs smaller than a normal Read or Write chunk.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/fmr_ops.c   |    7 +++++--
>  net/sunrpc/xprtrdma/frwr_ops.c  |    7 +++++--
>  net/sunrpc/xprtrdma/transport.c |    6 ++++--
>  3 files changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
> index 7f5632c..78a0224 100644
> --- a/net/sunrpc/xprtrdma/fmr_ops.c
> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
> @@ -176,7 +176,10 @@ enum {
>  
>  	ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
>  				RPCRDMA_MAX_FMR_SGES);
> -	ia->ri_max_segs += 2;	/* segments for head and tail buffers */
> +	/* Reply chunks require segments for head and tail buffers */
> +	ia->ri_max_segs += 2;
> +	if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
> +		ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
>  	return 0;
>  }
>  
> @@ -186,7 +189,7 @@ enum {
>  fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>  {
>  	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
> -		     RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
> +		     (ia->ri_max_segs - 2) * RPCRDMA_MAX_FMR_SGES);

ia isn't defined in this function.  Should that be r_xprt->rx_ia.ri_max_segs
instead?

Thanks,
Anna

>  }
>  
>  /* Use the ib_map_phys_fmr() verb to register a memory region
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 27222c0..f587e44 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -244,7 +244,10 @@
>  
>  	ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
>  				ia->ri_max_frwr_depth);
> -	ia->ri_max_segs += 2;	/* segments for head and tail buffers */
> +	/* Reply chunks require segments for head and tail buffers */
> +	ia->ri_max_segs += 2;
> +	if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
> +		ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
>  	return 0;
>  }
>  
> @@ -257,7 +260,7 @@
>  	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>  
>  	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
> -		     RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
> +		     (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
>  }
>  
>  static void
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index a16296b..fbb14bf 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -704,8 +704,10 @@
>   *	%-ENOTCONN if the caller should reconnect and call again
>   *	%-EAGAIN if the caller should call again
>   *	%-ENOBUFS if the caller should call again after a delay
> - *	%-EIO if a permanent error occurred and the request was not
> - *		sent. Do not try to send this message again.
> + *	%-EMSGSIZE if encoding ran out of buffer space. The request
> + *		was not sent. Do not try to send this message again.
> + *	%-EIO if an I/O error occurred. The request was not sent.
> + *		Do not try to send this message again.
>   */
>  static int
>  xprt_rdma_send_request(struct rpc_rqst *rqst)
>
Chuck Lever Dec. 18, 2018, 7:39 p.m. UTC | #2
> On Dec 18, 2018, at 2:35 PM, Anna Schumaker <schumaker.anna@gmail.com> wrote:
> 
> Hi Chuck,
> 
> On Mon, 2018-12-17 at 11:39 -0500, Chuck Lever wrote:
>> With certain combinations of krb5i/p, MR size, and r/wsize, I/O can
>> fail with EMSGSIZE. This is because the calculated value of
>> ri_max_segs (the max number of MRs per RPC) exceeded
>> RPCRDMA_MAX_HDR_SEGS, which caused Read or Write list encoding to
>> walk off the end of the transport header.
>> 
>> Once that was addressed, the ro_maxpages result has to be corrected
>> to account for the number of MRs needed for Reply chunks, which is
>> 2 MRs smaller than a normal Read or Write chunk.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> net/sunrpc/xprtrdma/fmr_ops.c   |    7 +++++--
>> net/sunrpc/xprtrdma/frwr_ops.c  |    7 +++++--
>> net/sunrpc/xprtrdma/transport.c |    6 ++++--
>> 3 files changed, 14 insertions(+), 6 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index 7f5632c..78a0224 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -176,7 +176,10 @@ enum {
>> 
>> 	ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
>> 				RPCRDMA_MAX_FMR_SGES);
>> -	ia->ri_max_segs += 2;	/* segments for head and tail buffers */
>> +	/* Reply chunks require segments for head and tail buffers */
>> +	ia->ri_max_segs += 2;
>> +	if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
>> +		ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
>> 	return 0;
>> }
>> 
>> @@ -186,7 +189,7 @@ enum {
>> fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
>> {
>> 	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
>> -		     RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
>> +		     (ia->ri_max_segs - 2) * RPCRDMA_MAX_FMR_SGES);
> 
> ia isn't defined in this function.  Should that be r_xprt->rx_ia.ri_max_segs
> instead?

Yeah. I compile-tested the tip of the branch, and so missed
this.

I'm going to have to send you a v5, so I'll fix this up in
the next post of this series.


> Thanks,
> Anna
> 
>> }
>> 
>> /* Use the ib_map_phys_fmr() verb to register a memory region
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index 27222c0..f587e44 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -244,7 +244,10 @@
>> 
>> 	ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
>> 				ia->ri_max_frwr_depth);
>> -	ia->ri_max_segs += 2;	/* segments for head and tail buffers */
>> +	/* Reply chunks require segments for head and tail buffers */
>> +	ia->ri_max_segs += 2;
>> +	if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
>> +		ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
>> 	return 0;
>> }
>> 
>> @@ -257,7 +260,7 @@
>> 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> 
>> 	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
>> -		     RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
>> +		     (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
>> }
>> 
>> static void
>> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
>> index a16296b..fbb14bf 100644
>> --- a/net/sunrpc/xprtrdma/transport.c
>> +++ b/net/sunrpc/xprtrdma/transport.c
>> @@ -704,8 +704,10 @@
>>  *	%-ENOTCONN if the caller should reconnect and call again
>>  *	%-EAGAIN if the caller should call again
>>  *	%-ENOBUFS if the caller should call again after a delay
>> - *	%-EIO if a permanent error occurred and the request was not
>> - *		sent. Do not try to send this message again.
>> + *	%-EMSGSIZE if encoding ran out of buffer space. The request
>> + *		was not sent. Do not try to send this message again.
>> + *	%-EIO if an I/O error occurred. The request was not sent.
>> + *		Do not try to send this message again.
>>  */
>> static int
>> xprt_rdma_send_request(struct rpc_rqst *rqst)

--
Chuck Lever
diff mbox series

Patch

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 7f5632c..78a0224 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -176,7 +176,10 @@  enum {
 
 	ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
 				RPCRDMA_MAX_FMR_SGES);
-	ia->ri_max_segs += 2;	/* segments for head and tail buffers */
+	/* Reply chunks require segments for head and tail buffers */
+	ia->ri_max_segs += 2;
+	if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
+		ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
 	return 0;
 }
 
@@ -186,7 +189,7 @@  enum {
 fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
 {
 	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
-		     RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
+		     (ia->ri_max_segs - 2) * RPCRDMA_MAX_FMR_SGES);
 }
 
 /* Use the ib_map_phys_fmr() verb to register a memory region
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 27222c0..f587e44 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -244,7 +244,10 @@ 
 
 	ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
 				ia->ri_max_frwr_depth);
-	ia->ri_max_segs += 2;	/* segments for head and tail buffers */
+	/* Reply chunks require segments for head and tail buffers */
+	ia->ri_max_segs += 2;
+	if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
+		ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
 	return 0;
 }
 
@@ -257,7 +260,7 @@ 
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
 	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
-		     RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
+		     (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
 }
 
 static void
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a16296b..fbb14bf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -704,8 +704,10 @@ 
  *	%-ENOTCONN if the caller should reconnect and call again
  *	%-EAGAIN if the caller should call again
  *	%-ENOBUFS if the caller should call again after a delay
- *	%-EIO if a permanent error occurred and the request was not
- *		sent. Do not try to send this message again.
+ *	%-EMSGSIZE if encoding ran out of buffer space. The request
+ *		was not sent. Do not try to send this message again.
+ *	%-EIO if an I/O error occurred. The request was not sent.
+ *		Do not try to send this message again.
  */
 static int
 xprt_rdma_send_request(struct rpc_rqst *rqst)