diff mbox

[v1,05/12] xprtrdma: Account for RPC/RDMA header size when deciding to inline

Message ID 20150709204227.26247.51111.stgit@manet.1015granger.net (mailing list archive)
State New, archived
Headers show

Commit Message

Chuck Lever July 9, 2015, 8:42 p.m. UTC
When marshaling RPC/RDMA requests, ensure the combined size of
RPC/RDMA header and RPC header do not exceed the inline threshold.
Endpoints typically reject RPC/RDMA messages that exceed the size
of their receive buffers.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c |   29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Devesh Sharma July 10, 2015, 10:55 a.m. UTC | #1
Looks good

Reveiwed-By: Devesh Sharma <devesh.sharma@avagotech.com>

On Fri, Jul 10, 2015 at 2:12 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
> When marshaling RPC/RDMA requests, ensure the combined size of
> RPC/RDMA header and RPC header do not exceed the inline threshold.
> Endpoints typically reject RPC/RDMA messages that exceed the size
> of their receive buffers.
>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/rpc_rdma.c |   29 +++++++++++++++++++++++++++--
>  1 file changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 84ea37d..8cf9402 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -71,6 +71,31 @@ static const char transfertypes[][12] = {
>  };
>  #endif
>
> +/* The client can send a request inline as long as the RPCRDMA header
> + * plus the RPC call fit under the transport's inline limit. If the
> + * combined call message size exceeds that limit, the client must use
> + * the read chunk list for this operation.
> + */
> +static bool rpcrdma_args_inline(struct rpc_rqst *rqst)
> +{
> +       unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len;
> +
> +       return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
> +}
> +
> +/* The client can’t know how large the actual reply will be. Thus it
> + * plans for the largest possible reply for that particular ULP
> + * operation. If the maximum combined reply message size exceeds that
> + * limit, the client must provide a write list or a reply chunk for
> + * this request.
> + */
> +static bool rpcrdma_results_inline(struct rpc_rqst *rqst)
> +{
> +       unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen;
> +
> +       return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst);
> +}
> +
>  /*
>   * Chunk assembly from upper layer xdr_buf.
>   *
> @@ -418,7 +443,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>          * a READ, then use write chunks to separate the file data
>          * into pages; otherwise use reply chunks.
>          */
> -       if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
> +       if (rpcrdma_results_inline(rqst))
>                 wtype = rpcrdma_noch;
>         else if (rqst->rq_rcv_buf.page_len == 0)
>                 wtype = rpcrdma_replych;
> @@ -441,7 +466,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>          * implies the op is a write.
>          * TBD check NFSv4 setacl
>          */
> -       if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
> +       if (rpcrdma_args_inline(rqst))
>                 rtype = rpcrdma_noch;
>         else if (rqst->rq_snd_buf.page_len == 0)
>                 rtype = rpcrdma_areadch;
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Schumaker, Anna July 10, 2015, 8:08 p.m. UTC | #2
Hi Chuck,

On 07/09/2015 04:42 PM, Chuck Lever wrote:
> When marshaling RPC/RDMA requests, ensure the combined size of
> RPC/RDMA header and RPC header do not exceed the inline threshold.
> Endpoints typically reject RPC/RDMA messages that exceed the size
> of their receive buffers.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/rpc_rdma.c |   29 +++++++++++++++++++++++++++--
>  1 file changed, 27 insertions(+), 2 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 84ea37d..8cf9402 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -71,6 +71,31 @@ static const char transfertypes[][12] = {
>  };
>  #endif
>  
> +/* The client can send a request inline as long as the RPCRDMA header
> + * plus the RPC call fit under the transport's inline limit. If the
> + * combined call message size exceeds that limit, the client must use
> + * the read chunk list for this operation.
> + */
> +static bool rpcrdma_args_inline(struct rpc_rqst *rqst)
> +{
> +	unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len;
> +
> +	return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
> +}
> +
> +/* The client can’t know how large the actual reply will be. Thus it
                 ^^^^^
This is showing up as "can<80><99>t" in git-show, leading me to think that the apostrophe was replaced with a unicode-apostrophe.  Google might have made the switch, but can you double check the patch on your side just in case?

Thanks,
Anna

> + * plans for the largest possible reply for that particular ULP
> + * operation. If the maximum combined reply message size exceeds that
> + * limit, the client must provide a write list or a reply chunk for
> + * this request.
> + */
> +static bool rpcrdma_results_inline(struct rpc_rqst *rqst)
> +{
> +	unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen;
> +
> +	return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst);
> +}
> +
>  /*
>   * Chunk assembly from upper layer xdr_buf.
>   *
> @@ -418,7 +443,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>  	 * a READ, then use write chunks to separate the file data
>  	 * into pages; otherwise use reply chunks.
>  	 */
> -	if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
> +	if (rpcrdma_results_inline(rqst))
>  		wtype = rpcrdma_noch;
>  	else if (rqst->rq_rcv_buf.page_len == 0)
>  		wtype = rpcrdma_replych;
> @@ -441,7 +466,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>  	 * implies the op is a write.
>  	 * TBD check NFSv4 setacl
>  	 */
> -	if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
> +	if (rpcrdma_args_inline(rqst))
>  		rtype = rpcrdma_noch;
>  	else if (rqst->rq_snd_buf.page_len == 0)
>  		rtype = rpcrdma_areadch;
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chuck Lever July 10, 2015, 8:28 p.m. UTC | #3
On Jul 10, 2015, at 4:08 PM, Anna Schumaker <Anna.Schumaker@netapp.com> wrote:

> Hi Chuck,
> 
> On 07/09/2015 04:42 PM, Chuck Lever wrote:
>> When marshaling RPC/RDMA requests, ensure the combined size of
>> RPC/RDMA header and RPC header do not exceed the inline threshold.
>> Endpoints typically reject RPC/RDMA messages that exceed the size
>> of their receive buffers.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> net/sunrpc/xprtrdma/rpc_rdma.c |   29 +++++++++++++++++++++++++++--
>> 1 file changed, 27 insertions(+), 2 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
>> index 84ea37d..8cf9402 100644
>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>> @@ -71,6 +71,31 @@ static const char transfertypes[][12] = {
>> };
>> #endif
>> 
>> +/* The client can send a request inline as long as the RPCRDMA header
>> + * plus the RPC call fit under the transport's inline limit. If the
>> + * combined call message size exceeds that limit, the client must use
>> + * the read chunk list for this operation.
>> + */
>> +static bool rpcrdma_args_inline(struct rpc_rqst *rqst)
>> +{
>> +	unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len;
>> +
>> +	return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
>> +}
>> +
>> +/* The client can’t know how large the actual reply will be. Thus it
>                 ^^^^^
> This is showing up as "can<80><99>t" in git-show, leading me to think that the apostrophe was replaced with a unicode-apostrophe.  Google might have made the switch, but can you double check the patch on your side just in case?

Fixed.


> 
> Thanks,
> Anna
> 
>> + * plans for the largest possible reply for that particular ULP
>> + * operation. If the maximum combined reply message size exceeds that
>> + * limit, the client must provide a write list or a reply chunk for
>> + * this request.
>> + */
>> +static bool rpcrdma_results_inline(struct rpc_rqst *rqst)
>> +{
>> +	unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen;
>> +
>> +	return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst);
>> +}
>> +
>> /*
>>  * Chunk assembly from upper layer xdr_buf.
>>  *
>> @@ -418,7 +443,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>> 	 * a READ, then use write chunks to separate the file data
>> 	 * into pages; otherwise use reply chunks.
>> 	 */
>> -	if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
>> +	if (rpcrdma_results_inline(rqst))
>> 		wtype = rpcrdma_noch;
>> 	else if (rqst->rq_rcv_buf.page_len == 0)
>> 		wtype = rpcrdma_replych;
>> @@ -441,7 +466,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>> 	 * implies the op is a write.
>> 	 * TBD check NFSv4 setacl
>> 	 */
>> -	if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
>> +	if (rpcrdma_args_inline(rqst))
>> 		rtype = rpcrdma_noch;
>> 	else if (rqst->rq_snd_buf.page_len == 0)
>> 		rtype = rpcrdma_areadch;
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> 
> 

--
Chuck Lever



--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg July 12, 2015, 2:37 p.m. UTC | #4
On 7/9/2015 11:42 PM, Chuck Lever wrote:
> When marshaling RPC/RDMA requests, ensure the combined size of
> RPC/RDMA header and RPC header do not exceed the inline threshold.
> Endpoints typically reject RPC/RDMA messages that exceed the size
> of their receive buffers.

Did this solve a bug? because is seems like it does.
Maybe it will be a good idea to describe this bug.

>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>   net/sunrpc/xprtrdma/rpc_rdma.c |   29 +++++++++++++++++++++++++++--
>   1 file changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 84ea37d..8cf9402 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -71,6 +71,31 @@ static const char transfertypes[][12] = {
>   };
>   #endif
>
> +/* The client can send a request inline as long as the RPCRDMA header
> + * plus the RPC call fit under the transport's inline limit. If the
> + * combined call message size exceeds that limit, the client must use
> + * the read chunk list for this operation.
> + */
> +static bool rpcrdma_args_inline(struct rpc_rqst *rqst)

maybe static inline?

> +{
> +	unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len;
> +
> +	return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
> +}
> +
> +/* The client can’t know how large the actual reply will be. Thus it
> + * plans for the largest possible reply for that particular ULP
> + * operation. If the maximum combined reply message size exceeds that
> + * limit, the client must provide a write list or a reply chunk for
> + * this request.
> + */
> +static bool rpcrdma_results_inline(struct rpc_rqst *rqst)
> +{
> +	unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen;
> +
> +	return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst);
> +}
> +
>   /*
>    * Chunk assembly from upper layer xdr_buf.
>    *
> @@ -418,7 +443,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>   	 * a READ, then use write chunks to separate the file data
>   	 * into pages; otherwise use reply chunks.
>   	 */
> -	if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
> +	if (rpcrdma_results_inline(rqst))
>   		wtype = rpcrdma_noch;
>   	else if (rqst->rq_rcv_buf.page_len == 0)
>   		wtype = rpcrdma_replych;
> @@ -441,7 +466,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>   	 * implies the op is a write.
>   	 * TBD check NFSv4 setacl
>   	 */
> -	if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
> +	if (rpcrdma_args_inline(rqst))
>   		rtype = rpcrdma_noch;
>   	else if (rqst->rq_snd_buf.page_len == 0)
>   		rtype = rpcrdma_areadch;
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chuck Lever July 12, 2015, 5:52 p.m. UTC | #5
Hi Sagi-


On Jul 12, 2015, at 10:37 AM, Sagi Grimberg <sagig@dev.mellanox.co.il> wrote:

> On 7/9/2015 11:42 PM, Chuck Lever wrote:
>> When marshaling RPC/RDMA requests, ensure the combined size of
>> RPC/RDMA header and RPC header do not exceed the inline threshold.
>> Endpoints typically reject RPC/RDMA messages that exceed the size
>> of their receive buffers.
> 
> Did this solve a bug? because is seems like it does.
> Maybe it will be a good idea to describe this bug.

There’s no bugzilla for this, as no issue has been encountered
in the field so far. It’s hard to trigger and servers are
forgiving.

I added some text in the patch description.


>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>>  net/sunrpc/xprtrdma/rpc_rdma.c |   29 +++++++++++++++++++++++++++--
>>  1 file changed, 27 insertions(+), 2 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
>> index 84ea37d..8cf9402 100644
>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>> @@ -71,6 +71,31 @@ static const char transfertypes[][12] = {
>>  };
>>  #endif
>> 
>> +/* The client can send a request inline as long as the RPCRDMA header
>> + * plus the RPC call fit under the transport's inline limit. If the
>> + * combined call message size exceeds that limit, the client must use
>> + * the read chunk list for this operation.
>> + */
>> +static bool rpcrdma_args_inline(struct rpc_rqst *rqst)
> 
> maybe static inline?

The final paragraph of Chapter 15 of Documentation/CodingStyle
suggests “static inline” is undesirable here. I think gcc makes
the correct inlining choice here by itself.


>> +{
>> +	unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len;
>> +
>> +	return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
>> +}
>> +
>> +/* The client can’t know how large the actual reply will be. Thus it
>> + * plans for the largest possible reply for that particular ULP
>> + * operation. If the maximum combined reply message size exceeds that
>> + * limit, the client must provide a write list or a reply chunk for
>> + * this request.
>> + */
>> +static bool rpcrdma_results_inline(struct rpc_rqst *rqst)
>> +{
>> +	unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen;
>> +
>> +	return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst);
>> +}
>> +
>>  /*
>>   * Chunk assembly from upper layer xdr_buf.
>>   *
>> @@ -418,7 +443,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>>  	 * a READ, then use write chunks to separate the file data
>>  	 * into pages; otherwise use reply chunks.
>>  	 */
>> -	if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
>> +	if (rpcrdma_results_inline(rqst))
>>  		wtype = rpcrdma_noch;
>>  	else if (rqst->rq_rcv_buf.page_len == 0)
>>  		wtype = rpcrdma_replych;
>> @@ -441,7 +466,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
>>  	 * implies the op is a write.
>>  	 * TBD check NFSv4 setacl
>>  	 */
>> -	if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
>> +	if (rpcrdma_args_inline(rqst))
>>  		rtype = rpcrdma_noch;
>>  	else if (rqst->rq_snd_buf.page_len == 0)
>>  		rtype = rpcrdma_areadch;
>> 

--
Chuck Lever



--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 84ea37d..8cf9402 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -71,6 +71,31 @@  static const char transfertypes[][12] = {
 };
 #endif
 
+/* The client can send a request inline as long as the RPCRDMA header
+ * plus the RPC call fit under the transport's inline limit. If the
+ * combined call message size exceeds that limit, the client must use
+ * the read chunk list for this operation.
+ */
+static bool rpcrdma_args_inline(struct rpc_rqst *rqst)
+{
+	unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len;
+
+	return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
+}
+
+/* The client can’t know how large the actual reply will be. Thus it
+ * plans for the largest possible reply for that particular ULP
+ * operation. If the maximum combined reply message size exceeds that
+ * limit, the client must provide a write list or a reply chunk for
+ * this request.
+ */
+static bool rpcrdma_results_inline(struct rpc_rqst *rqst)
+{
+	unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen;
+
+	return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst);
+}
+
 /*
  * Chunk assembly from upper layer xdr_buf.
  *
@@ -418,7 +443,7 @@  rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	 * a READ, then use write chunks to separate the file data
 	 * into pages; otherwise use reply chunks.
 	 */
-	if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
+	if (rpcrdma_results_inline(rqst))
 		wtype = rpcrdma_noch;
 	else if (rqst->rq_rcv_buf.page_len == 0)
 		wtype = rpcrdma_replych;
@@ -441,7 +466,7 @@  rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	 * implies the op is a write.
 	 * TBD check NFSv4 setacl
 	 */
-	if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
+	if (rpcrdma_args_inline(rqst))
 		rtype = rpcrdma_noch;
 	else if (rqst->rq_snd_buf.page_len == 0)
 		rtype = rpcrdma_areadch;