diff mbox series

[RFC,1/2] sunrpc: Replace the rq_bvec array with dynamically-allocated memory

Message ID 20250416152854.15269-2-cel@kernel.org (mailing list archive)
State RFC
Delegated to: Chuck Lever
Headers show
Series Move rq_vec[] and rq_bvec[] out of svc_rqst | expand

Commit Message

Chuck Lever April 16, 2025, 3:28 p.m. UTC
From: Chuck Lever <chuck.lever@oracle.com>

As a step towards making NFSD's maximum rsize and wsize variable,
replace the fixed-size rq_bvec[] array in struct svc_rqst with a
chunk of dynamically-allocated memory.

On a system with 8-byte pointers and 4KB pages, pahole reports that
the rq_bvec[] array is 4144 bytes. Replacing it with a single
pointer reduces the size of struct svc_rqst to about 7500 bytes.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 2 +-
 net/sunrpc/svc.c           | 6 ++++++
 net/sunrpc/svcsock.c       | 7 +++----
 3 files changed, 10 insertions(+), 5 deletions(-)

Comments

Jeff Layton April 16, 2025, 6:42 p.m. UTC | #1
On Wed, 2025-04-16 at 11:28 -0400, cel@kernel.org wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> As a step towards making NFSD's maximum rsize and wsize variable,
> replace the fixed-size rq_bvec[] array in struct svc_rqst with a
> chunk of dynamically-allocated memory.
> 
> On a system with 8-byte pointers and 4KB pages, pahole reports that
> the rq_bvec[] array is 4144 bytes. Replacing it with a single
> pointer reduces the size of struct svc_rqst to about 7500 bytes.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  include/linux/sunrpc/svc.h | 2 +-
>  net/sunrpc/svc.c           | 6 ++++++
>  net/sunrpc/svcsock.c       | 7 +++----
>  3 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 74658cca0f38..225c385085c3 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -195,7 +195,7 @@ struct svc_rqst {
>  
>  	struct folio_batch	rq_fbatch;
>  	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
> -	struct bio_vec		rq_bvec[RPCSVC_MAXPAGES];
> +	struct bio_vec		*rq_bvec;

It's a reasonable start.

What would also be good to do here is to replace the invocations of
RPCSVC_MAXPAGES that involve this array with a helper function that
returns the length of it.

For now it could just return RPCSVC_MAXPAGES, but eventually you could
add (e.g.) a rqstp->rq_bvec_len field and use that to indicate how many
entries there are in rq_bvec.

>  
>  	__be32			rq_xid;		/* transmission id */
>  	u32			rq_prog;	/* program number */
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index e7f9c295d13c..db29819716b8 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -673,6 +673,7 @@ static void
>  svc_rqst_free(struct svc_rqst *rqstp)
>  {
>  	folio_batch_release(&rqstp->rq_fbatch);
> +	kfree(rqstp->rq_bvec);
>  	svc_release_buffer(rqstp);
>  	if (rqstp->rq_scratch_page)
>  		put_page(rqstp->rq_scratch_page);
> @@ -711,6 +712,11 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
>  	if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
>  		goto out_enomem;
>  
> +	rqstp->rq_bvec = kcalloc_node(RPCSVC_MAXPAGES, sizeof(struct bio_vec),
> +				      GFP_KERNEL, node);
> +	if (!rqstp->rq_bvec)
> +		goto out_enomem;
> +
>  	rqstp->rq_err = -EAGAIN; /* No error yet */
>  
>  	serv->sv_nrthreads += 1;
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 72e5a01df3d3..671640933f18 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -713,8 +713,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
>  	if (svc_xprt_is_dead(xprt))
>  		goto out_notconn;
>  
> -	count = xdr_buf_to_bvec(rqstp->rq_bvec,
> -				ARRAY_SIZE(rqstp->rq_bvec), xdr);
> +	count = xdr_buf_to_bvec(rqstp->rq_bvec, RPCSVC_MAXPAGES, xdr);
>  
>  	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
>  		      count, rqstp->rq_res.len);
> @@ -1219,8 +1218,8 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
>  	memcpy(buf, &marker, sizeof(marker));
>  	bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
>  
> -	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1,
> -				ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res);
> +	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, RPCSVC_MAXPAGES,
> +				&rqstp->rq_res);
>  
>  	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
>  		      1 + count, sizeof(marker) + rqstp->rq_res.len);
Chuck Lever April 16, 2025, 6:45 p.m. UTC | #2
On 4/16/25 2:42 PM, Jeff Layton wrote:
> On Wed, 2025-04-16 at 11:28 -0400, cel@kernel.org wrote:
>> From: Chuck Lever <chuck.lever@oracle.com>
>>
>> As a step towards making NFSD's maximum rsize and wsize variable,
>> replace the fixed-size rq_bvec[] array in struct svc_rqst with a
>> chunk of dynamically-allocated memory.
>>
>> On a system with 8-byte pointers and 4KB pages, pahole reports that
>> the rq_bvec[] array is 4144 bytes. Replacing it with a single
>> pointer reduces the size of struct svc_rqst to about 7500 bytes.
>>
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>>  include/linux/sunrpc/svc.h | 2 +-
>>  net/sunrpc/svc.c           | 6 ++++++
>>  net/sunrpc/svcsock.c       | 7 +++----
>>  3 files changed, 10 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>> index 74658cca0f38..225c385085c3 100644
>> --- a/include/linux/sunrpc/svc.h
>> +++ b/include/linux/sunrpc/svc.h
>> @@ -195,7 +195,7 @@ struct svc_rqst {
>>  
>>  	struct folio_batch	rq_fbatch;
>>  	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
>> -	struct bio_vec		rq_bvec[RPCSVC_MAXPAGES];
>> +	struct bio_vec		*rq_bvec;
> 
> It's a reasonable start.
> 
> What would also be good to do here is to replace the invocations of
> RPCSVC_MAXPAGES that involve this array with a helper function that
> returns the length of it.
> 
> For now it could just return RPCSVC_MAXPAGES, but eventually you could
> add (e.g.) a rqstp->rq_bvec_len field and use that to indicate how many
> entries there are in rq_bvec.

rq_vec, rq_pages, and rq_bvec all have the same entry count (plus or
minus one) so only one new field is necessary. There are a few other
places that allocate arrays of size RPCSVC_MAXPAGES that will need
similar treatment.

Stay tuned for v2.


>>  	__be32			rq_xid;		/* transmission id */
>>  	u32			rq_prog;	/* program number */
>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
>> index e7f9c295d13c..db29819716b8 100644
>> --- a/net/sunrpc/svc.c
>> +++ b/net/sunrpc/svc.c
>> @@ -673,6 +673,7 @@ static void
>>  svc_rqst_free(struct svc_rqst *rqstp)
>>  {
>>  	folio_batch_release(&rqstp->rq_fbatch);
>> +	kfree(rqstp->rq_bvec);
>>  	svc_release_buffer(rqstp);
>>  	if (rqstp->rq_scratch_page)
>>  		put_page(rqstp->rq_scratch_page);
>> @@ -711,6 +712,11 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
>>  	if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
>>  		goto out_enomem;
>>  
>> +	rqstp->rq_bvec = kcalloc_node(RPCSVC_MAXPAGES, sizeof(struct bio_vec),
>> +				      GFP_KERNEL, node);
>> +	if (!rqstp->rq_bvec)
>> +		goto out_enomem;
>> +
>>  	rqstp->rq_err = -EAGAIN; /* No error yet */
>>  
>>  	serv->sv_nrthreads += 1;
>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
>> index 72e5a01df3d3..671640933f18 100644
>> --- a/net/sunrpc/svcsock.c
>> +++ b/net/sunrpc/svcsock.c
>> @@ -713,8 +713,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
>>  	if (svc_xprt_is_dead(xprt))
>>  		goto out_notconn;
>>  
>> -	count = xdr_buf_to_bvec(rqstp->rq_bvec,
>> -				ARRAY_SIZE(rqstp->rq_bvec), xdr);
>> +	count = xdr_buf_to_bvec(rqstp->rq_bvec, RPCSVC_MAXPAGES, xdr);
>>  
>>  	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
>>  		      count, rqstp->rq_res.len);
>> @@ -1219,8 +1218,8 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
>>  	memcpy(buf, &marker, sizeof(marker));
>>  	bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
>>  
>> -	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1,
>> -				ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res);
>> +	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, RPCSVC_MAXPAGES,
>> +				&rqstp->rq_res);
>>  
>>  	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
>>  		      1 + count, sizeof(marker) + rqstp->rq_res.len);
>
Jeff Layton April 16, 2025, 6:55 p.m. UTC | #3
On Wed, 2025-04-16 at 14:45 -0400, Chuck Lever wrote:
> On 4/16/25 2:42 PM, Jeff Layton wrote:
> > On Wed, 2025-04-16 at 11:28 -0400, cel@kernel.org wrote:
> > > From: Chuck Lever <chuck.lever@oracle.com>
> > > 
> > > As a step towards making NFSD's maximum rsize and wsize variable,
> > > replace the fixed-size rq_bvec[] array in struct svc_rqst with a
> > > chunk of dynamically-allocated memory.
> > > 
> > > On a system with 8-byte pointers and 4KB pages, pahole reports that
> > > the rq_bvec[] array is 4144 bytes. Replacing it with a single
> > > pointer reduces the size of struct svc_rqst to about 7500 bytes.
> > > 
> > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > > ---
> > >  include/linux/sunrpc/svc.h | 2 +-
> > >  net/sunrpc/svc.c           | 6 ++++++
> > >  net/sunrpc/svcsock.c       | 7 +++----
> > >  3 files changed, 10 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > > index 74658cca0f38..225c385085c3 100644
> > > --- a/include/linux/sunrpc/svc.h
> > > +++ b/include/linux/sunrpc/svc.h
> > > @@ -195,7 +195,7 @@ struct svc_rqst {
> > >  
> > >  	struct folio_batch	rq_fbatch;
> > >  	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
> > > -	struct bio_vec		rq_bvec[RPCSVC_MAXPAGES];
> > > +	struct bio_vec		*rq_bvec;
> > 
> > It's a reasonable start.
> > 
> > What would also be good to do here is to replace the invocations of
> > RPCSVC_MAXPAGES that involve this array with a helper function that
> > returns the length of it.
> > 
> > For now it could just return RPCSVC_MAXPAGES, but eventually you could
> > add (e.g.) a rqstp->rq_bvec_len field and use that to indicate how many
> > entries there are in rq_bvec.
> 
> rq_vec, rq_pages, and rq_bvec all have the same entry count (plus or
> minus one) so only one new field is necessary. There are a few other
> places that allocate arrays of size RPCSVC_MAXPAGES that will need
> similar treatment.
>
> Stay tuned for v2.
> 

Ok. I think I didn't articulate this well. Let me try again:

If you're looking to break the assumption that the length of these
arrays is RPCSVC_MAXPAGES, then the thing to do is to eliminate the
places where we make that assumption.

In particular, the two places where you're adding new RPCSVC_MAXPAGES
invocations would be better replaced with a helper function that we can
change the return value of later.

> 
> > >  	__be32			rq_xid;		/* transmission id */
> > >  	u32			rq_prog;	/* program number */
> > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > > index e7f9c295d13c..db29819716b8 100644
> > > --- a/net/sunrpc/svc.c
> > > +++ b/net/sunrpc/svc.c
> > > @@ -673,6 +673,7 @@ static void
> > >  svc_rqst_free(struct svc_rqst *rqstp)
> > >  {
> > >  	folio_batch_release(&rqstp->rq_fbatch);
> > > +	kfree(rqstp->rq_bvec);
> > >  	svc_release_buffer(rqstp);
> > >  	if (rqstp->rq_scratch_page)
> > >  		put_page(rqstp->rq_scratch_page);
> > > @@ -711,6 +712,11 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
> > >  	if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
> > >  		goto out_enomem;
> > >  
> > > +	rqstp->rq_bvec = kcalloc_node(RPCSVC_MAXPAGES, sizeof(struct bio_vec),
> > > +				      GFP_KERNEL, node);
> > > +	if (!rqstp->rq_bvec)
> > > +		goto out_enomem;
> > > +
> > >  	rqstp->rq_err = -EAGAIN; /* No error yet */
> > >  
> > >  	serv->sv_nrthreads += 1;
> > > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> > > index 72e5a01df3d3..671640933f18 100644
> > > --- a/net/sunrpc/svcsock.c
> > > +++ b/net/sunrpc/svcsock.c
> > > @@ -713,8 +713,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
> > >  	if (svc_xprt_is_dead(xprt))
> > >  		goto out_notconn;
> > >  
> > > -	count = xdr_buf_to_bvec(rqstp->rq_bvec,
> > > -				ARRAY_SIZE(rqstp->rq_bvec), xdr);
> > > +	count = xdr_buf_to_bvec(rqstp->rq_bvec, RPCSVC_MAXPAGES, xdr);
> > >  
> > >  	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> > >  		      count, rqstp->rq_res.len);
> > > @@ -1219,8 +1218,8 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
> > >  	memcpy(buf, &marker, sizeof(marker));
> > >  	bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
> > >  
> > > -	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1,
> > > -				ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res);
> > > +	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, RPCSVC_MAXPAGES,
> > > +				&rqstp->rq_res);
> > > 
>
> > >  
> > >  	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> > >  		      1 + count, sizeof(marker) + rqstp->rq_res.len);
> > 
>
diff mbox series

Patch

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 74658cca0f38..225c385085c3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -195,7 +195,7 @@  struct svc_rqst {
 
 	struct folio_batch	rq_fbatch;
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
-	struct bio_vec		rq_bvec[RPCSVC_MAXPAGES];
+	struct bio_vec		*rq_bvec;
 
 	__be32			rq_xid;		/* transmission id */
 	u32			rq_prog;	/* program number */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e7f9c295d13c..db29819716b8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -673,6 +673,7 @@  static void
 svc_rqst_free(struct svc_rqst *rqstp)
 {
 	folio_batch_release(&rqstp->rq_fbatch);
+	kfree(rqstp->rq_bvec);
 	svc_release_buffer(rqstp);
 	if (rqstp->rq_scratch_page)
 		put_page(rqstp->rq_scratch_page);
@@ -711,6 +712,11 @@  svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 	if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
 		goto out_enomem;
 
+	rqstp->rq_bvec = kcalloc_node(RPCSVC_MAXPAGES, sizeof(struct bio_vec),
+				      GFP_KERNEL, node);
+	if (!rqstp->rq_bvec)
+		goto out_enomem;
+
 	rqstp->rq_err = -EAGAIN; /* No error yet */
 
 	serv->sv_nrthreads += 1;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 72e5a01df3d3..671640933f18 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -713,8 +713,7 @@  static int svc_udp_sendto(struct svc_rqst *rqstp)
 	if (svc_xprt_is_dead(xprt))
 		goto out_notconn;
 
-	count = xdr_buf_to_bvec(rqstp->rq_bvec,
-				ARRAY_SIZE(rqstp->rq_bvec), xdr);
+	count = xdr_buf_to_bvec(rqstp->rq_bvec, RPCSVC_MAXPAGES, xdr);
 
 	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
 		      count, rqstp->rq_res.len);
@@ -1219,8 +1218,8 @@  static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
 	memcpy(buf, &marker, sizeof(marker));
 	bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
 
-	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1,
-				ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res);
+	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, RPCSVC_MAXPAGES,
+				&rqstp->rq_res);
 
 	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
 		      1 + count, sizeof(marker) + rqstp->rq_res.len);