diff mbox series

[v5,12/13] NFSD: Allocate an rhashtable for nfs4_file objects

Message ID 166665108857.50761.11874625810370383309.stgit@klimt.1015granger.net (mailing list archive)
State New, archived
Headers show
Series A course change, for sure | expand

Commit Message

Chuck Lever III Oct. 24, 2022, 10:38 p.m. UTC
Introduce the infrastructure for managing nfs4_file objects in an
rhashtable. This infrastructure will be used by the next patch.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c |   23 ++++++++++++++++++++++-
 fs/nfsd/state.h     |    1 +
 2 files changed, 23 insertions(+), 1 deletion(-)

Comments

NeilBrown Oct. 24, 2022, 11:37 p.m. UTC | #1
On Tue, 25 Oct 2022, Chuck Lever wrote:
> Introduce the infrastructure for managing nfs4_file objects in an
> rhashtable. This infrastructure will be used by the next patch.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/nfs4state.c |   23 ++++++++++++++++++++++-
>  fs/nfsd/state.h     |    1 +
>  2 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index abed795bb4ec..681cb2daa843 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -44,7 +44,9 @@
>  #include <linux/jhash.h>
>  #include <linux/string_helpers.h>
>  #include <linux/fsnotify.h>
> +#include <linux/rhashtable.h>
>  #include <linux/nfs_ssc.h>
> +
>  #include "xdr4.h"
>  #include "xdr4cb.h"
>  #include "vfs.h"
> @@ -721,6 +723,18 @@ static unsigned int file_hashval(const struct svc_fh *fh)
>  
>  static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
>  
> +static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;
> +
> +static const struct rhashtable_params nfs4_file_rhash_params = {
> +	.key_len		= sizeof_field(struct nfs4_file, fi_inode),
> +	.key_offset		= offsetof(struct nfs4_file, fi_inode),
> +	.head_offset		= offsetof(struct nfs4_file, fi_rlist),
> +
> +	/* Reduce resizing churn on light workloads */
> +	.min_size		= 256,		/* buckets */

Every time I see this line a groan a little bit.  Probably I'm groaning
at rhashtable - you shouldn't need to have to worry about these sorts of
details when using an API...  but I agree that avoiding churn is likely
a good idea.

Where did 256 come from?  Would PAGE_SIZE/sizeof(void*) make more sense
(though that is 512).
How much churn is too much?  The default is 4 and we grow at >75% and
shrink at <30%, so at 4 entries the table would resize to 8, and that 2
entries it  would shrink back.  That does sound like churn.
If we choose 8, then at 7 we grow to 16 and at 4 we go back to 8.
If we choose 16, then at 13 we grow to 32 and at 9 we go back to 16.

If we choose 64, then at 49 we grow to 128 and at 39 we go back to 64.

The margin seems rather narrow.  May 30% is too high - 15% might be a
lot better.  But changing that might be difficult.

So I don't have a good recommendation, but I don't like magic numbers.
Maybe PAGE_SIZE/sizeof(void*) ??

But either way
  Reviewed-by: NeilBrown <neilb@suse.de>

Thanks,
NeilBrown


> +	.automatic_shrinking	= true,
> +};
> +
>  /*
>   * Check if courtesy clients have conflicting access and resolve it if possible
>   *
> @@ -8023,10 +8037,16 @@ nfs4_state_start(void)
>  {
>  	int ret;
>  
> -	ret = nfsd4_create_callback_queue();
> +	ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params);
>  	if (ret)
>  		return ret;
>  
> +	ret = nfsd4_create_callback_queue();
> +	if (ret) {
> +		rhltable_destroy(&nfs4_file_rhltable);
> +		return ret;
> +	}
> +
>  	set_max_delegations();
>  	return 0;
>  }
> @@ -8057,6 +8077,7 @@ nfs4_state_shutdown_net(struct net *net)
>  
>  	nfsd4_client_tracking_exit(net);
>  	nfs4_state_destroy_net(net);
> +	rhltable_destroy(&nfs4_file_rhltable);
>  #ifdef CONFIG_NFSD_V4_2_INTER_SSC
>  	nfsd4_ssc_shutdown_umount(nn);
>  #endif
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index e2daef3cc003..190fc7e418a4 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -546,6 +546,7 @@ struct nfs4_file {
>  	bool			fi_aliased;
>  	spinlock_t		fi_lock;
>  	struct hlist_node       fi_hash;	/* hash on fi_fhandle */
> +	struct rhlist_head	fi_rlist;
>  	struct list_head        fi_stateids;
>  	union {
>  		struct list_head	fi_delegations;
> 
> 
>
Chuck Lever III Oct. 25, 2022, 12:13 a.m. UTC | #2
> On Oct 24, 2022, at 7:37 PM, NeilBrown <neilb@suse.de> wrote:
> 
> On Tue, 25 Oct 2022, Chuck Lever wrote:
>> Introduce the infrastructure for managing nfs4_file objects in an
>> rhashtable. This infrastructure will be used by the next patch.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> fs/nfsd/nfs4state.c |   23 ++++++++++++++++++++++-
>> fs/nfsd/state.h     |    1 +
>> 2 files changed, 23 insertions(+), 1 deletion(-)
>> 
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index abed795bb4ec..681cb2daa843 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -44,7 +44,9 @@
>> #include <linux/jhash.h>
>> #include <linux/string_helpers.h>
>> #include <linux/fsnotify.h>
>> +#include <linux/rhashtable.h>
>> #include <linux/nfs_ssc.h>
>> +
>> #include "xdr4.h"
>> #include "xdr4cb.h"
>> #include "vfs.h"
>> @@ -721,6 +723,18 @@ static unsigned int file_hashval(const struct svc_fh *fh)
>> 
>> static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
>> 
>> +static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;
>> +
>> +static const struct rhashtable_params nfs4_file_rhash_params = {
>> +	.key_len		= sizeof_field(struct nfs4_file, fi_inode),
>> +	.key_offset		= offsetof(struct nfs4_file, fi_inode),
>> +	.head_offset		= offsetof(struct nfs4_file, fi_rlist),
>> +
>> +	/* Reduce resizing churn on light workloads */
>> +	.min_size		= 256,		/* buckets */
> 
> Every time I see this line a groan a little bit.  Probably I'm groaning
> at rhashtable - you shouldn't need to have to worry about these sorts of
> details when using an API...  but I agree that avoiding churn is likely
> a good idea.
> 
> Where did 256 come from?

Here's the current file_hashtbl definition:

 710 /* hash table for nfs4_file */
 711 #define FILE_HASH_BITS                   8
 712 #define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
 713 
 714 static unsigned int file_hashval(const struct svc_fh *fh)
 715 {
 716         struct inode *inode = d_inode(fh->fh_dentry);
 717 
 718         /* XXX: why not (here & in file cache) use inode? */
 719         return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS);
 720 }
 721 
 722 static struct hlist_head file_hashtbl[FILE_HASH_SIZE];

256 buckets is the size of the existing file_hashtbl.


>  Would PAGE_SIZE/sizeof(void*) make more sense
> (though that is 512).

For rhashtable, you need to account for sizeof(struct bucket_table),
if I'm reading nested_bucket_table_alloc() correctly.

256 is 2048 bytes + sizeof(struct bucket_table). 512 buckets would
push us over a page.


> How much churn is too much?  The default is 4 and we grow at >75% and
> shrink at <30%, so at 4 entries the table would resize to 8, and that 2
> entries it  would shrink back.  That does sound like churn.
> If we choose 8, then at 7 we grow to 16 and at 4 we go back to 8.
> If we choose 16, then at 13 we grow to 32 and at 9 we go back to 16.
> 
> If we choose 64, then at 49 we grow to 128 and at 39 we go back to 64.
> 
> The margin seems rather narrow.  May 30% is too high - 15% might be a
> lot better.  But changing that might be difficult.

I could go a little smaller. Basically table resizing is OK when we're
talking about a lot of buckets because that overhead is very likely to
be amortized over many insertions and removals.


> So I don't have a good recommendation, but I don't like magic numbers.
> Maybe PAGE_SIZE/sizeof(void*) ??

The only thing I can think of would be

#define NFS4_FILE_HASH_SIZE  (some number or constant calculation)

which to me isn't much better than

	.size	= 256,	/* buckets */

I will ponder some more.


> But either way
>  Reviewed-by: NeilBrown <neilb@suse.de>
> 
> Thanks,
> NeilBrown
> 
> 
>> +	.automatic_shrinking	= true,
>> +};
>> +
>> /*
>>  * Check if courtesy clients have conflicting access and resolve it if possible
>>  *
>> @@ -8023,10 +8037,16 @@ nfs4_state_start(void)
>> {
>> 	int ret;
>> 
>> -	ret = nfsd4_create_callback_queue();
>> +	ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params);
>> 	if (ret)
>> 		return ret;
>> 
>> +	ret = nfsd4_create_callback_queue();
>> +	if (ret) {
>> +		rhltable_destroy(&nfs4_file_rhltable);
>> +		return ret;
>> +	}
>> +
>> 	set_max_delegations();
>> 	return 0;
>> }
>> @@ -8057,6 +8077,7 @@ nfs4_state_shutdown_net(struct net *net)
>> 
>> 	nfsd4_client_tracking_exit(net);
>> 	nfs4_state_destroy_net(net);
>> +	rhltable_destroy(&nfs4_file_rhltable);
>> #ifdef CONFIG_NFSD_V4_2_INTER_SSC
>> 	nfsd4_ssc_shutdown_umount(nn);
>> #endif
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index e2daef3cc003..190fc7e418a4 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -546,6 +546,7 @@ struct nfs4_file {
>> 	bool			fi_aliased;
>> 	spinlock_t		fi_lock;
>> 	struct hlist_node       fi_hash;	/* hash on fi_fhandle */
>> +	struct rhlist_head	fi_rlist;
>> 	struct list_head        fi_stateids;
>> 	union {
>> 		struct list_head	fi_delegations;
>> 
>> 
>> 

--
Chuck Lever
NeilBrown Oct. 25, 2022, 2:26 a.m. UTC | #3
On Tue, 25 Oct 2022, Chuck Lever III wrote:
> 
> > On Oct 24, 2022, at 7:37 PM, NeilBrown <neilb@suse.de> wrote:
> > 
> > On Tue, 25 Oct 2022, Chuck Lever wrote:
> >> Introduce the infrastructure for managing nfs4_file objects in an
> >> rhashtable. This infrastructure will be used by the next patch.
> >> 
> >> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> >> ---
> >> fs/nfsd/nfs4state.c |   23 ++++++++++++++++++++++-
> >> fs/nfsd/state.h     |    1 +
> >> 2 files changed, 23 insertions(+), 1 deletion(-)
> >> 
> >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> >> index abed795bb4ec..681cb2daa843 100644
> >> --- a/fs/nfsd/nfs4state.c
> >> +++ b/fs/nfsd/nfs4state.c
> >> @@ -44,7 +44,9 @@
> >> #include <linux/jhash.h>
> >> #include <linux/string_helpers.h>
> >> #include <linux/fsnotify.h>
> >> +#include <linux/rhashtable.h>
> >> #include <linux/nfs_ssc.h>
> >> +
> >> #include "xdr4.h"
> >> #include "xdr4cb.h"
> >> #include "vfs.h"
> >> @@ -721,6 +723,18 @@ static unsigned int file_hashval(const struct svc_fh *fh)
> >> 
> >> static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
> >> 
> >> +static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;
> >> +
> >> +static const struct rhashtable_params nfs4_file_rhash_params = {
> >> +	.key_len		= sizeof_field(struct nfs4_file, fi_inode),
> >> +	.key_offset		= offsetof(struct nfs4_file, fi_inode),
> >> +	.head_offset		= offsetof(struct nfs4_file, fi_rlist),
> >> +
> >> +	/* Reduce resizing churn on light workloads */
> >> +	.min_size		= 256,		/* buckets */
> > 
> > Every time I see this line a groan a little bit.  Probably I'm groaning
> > at rhashtable - you shouldn't need to have to worry about these sorts of
> > details when using an API...  but I agree that avoiding churn is likely
> > a good idea.
> > 
> > Where did 256 come from?
> 
> Here's the current file_hashtbl definition:
> 
>  710 /* hash table for nfs4_file */
>  711 #define FILE_HASH_BITS                   8
>  712 #define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
>  713 
>  714 static unsigned int file_hashval(const struct svc_fh *fh)
>  715 {
>  716         struct inode *inode = d_inode(fh->fh_dentry);
>  717 
>  718         /* XXX: why not (here & in file cache) use inode? */
>  719         return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS);
>  720 }
>  721 
>  722 static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
> 
> 256 buckets is the size of the existing file_hashtbl.
> 
> 
> >  Would PAGE_SIZE/sizeof(void*) make more sense
> > (though that is 512).
> 
> For rhashtable, you need to account for sizeof(struct bucket_table),
> if I'm reading nested_bucket_table_alloc() correctly.
> 
> 256 is 2048 bytes + sizeof(struct bucket_table). 512 buckets would
> push us over a page.

Ah yes, of course.  The does suggest that 256 is a better choice.

> 
> 
> > How much churn is too much?  The default is 4 and we grow at >75% and
> > shrink at <30%, so at 4 entries the table would resize to 8, and that 2
> > entries it  would shrink back.  That does sound like churn.
> > If we choose 8, then at 7 we grow to 16 and at 4 we go back to 8.
> > If we choose 16, then at 13 we grow to 32 and at 9 we go back to 16.
> > 
> > If we choose 64, then at 49 we grow to 128 and at 39 we go back to 64.
> > 
> > The margin seems rather narrow.  May 30% is too high - 15% might be a
> > lot better.  But changing that might be difficult.
> 
> I could go a little smaller. Basically table resizing is OK when we're
> talking about a lot of buckets because that overhead is very likely to
> be amortized over many insertions and removals.
> 
> 
> > So I don't have a good recommendation, but I don't like magic numbers.
> > Maybe PAGE_SIZE/sizeof(void*) ??
> 
> The only thing I can think of would be
> 
> #define NFS4_FILE_HASH_SIZE  (some number or constant calculation)
> 
> which to me isn't much better than
> 
> 	.size	= 256,	/* buckets */
> 
> I will ponder some more.
> 

Maybe just a comment saying that this number will allow the
struct bucket_table to fit in one 4K page.

Thanks,
NeilBrown


> 
> > But either way
> >  Reviewed-by: NeilBrown <neilb@suse.de>
> > 
> > Thanks,
> > NeilBrown
> > 
> > 
> >> +	.automatic_shrinking	= true,
> >> +};
> >> +
> >> /*
> >>  * Check if courtesy clients have conflicting access and resolve it if possible
> >>  *
> >> @@ -8023,10 +8037,16 @@ nfs4_state_start(void)
> >> {
> >> 	int ret;
> >> 
> >> -	ret = nfsd4_create_callback_queue();
> >> +	ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params);
> >> 	if (ret)
> >> 		return ret;
> >> 
> >> +	ret = nfsd4_create_callback_queue();
> >> +	if (ret) {
> >> +		rhltable_destroy(&nfs4_file_rhltable);
> >> +		return ret;
> >> +	}
> >> +
> >> 	set_max_delegations();
> >> 	return 0;
> >> }
> >> @@ -8057,6 +8077,7 @@ nfs4_state_shutdown_net(struct net *net)
> >> 
> >> 	nfsd4_client_tracking_exit(net);
> >> 	nfs4_state_destroy_net(net);
> >> +	rhltable_destroy(&nfs4_file_rhltable);
> >> #ifdef CONFIG_NFSD_V4_2_INTER_SSC
> >> 	nfsd4_ssc_shutdown_umount(nn);
> >> #endif
> >> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> >> index e2daef3cc003..190fc7e418a4 100644
> >> --- a/fs/nfsd/state.h
> >> +++ b/fs/nfsd/state.h
> >> @@ -546,6 +546,7 @@ struct nfs4_file {
> >> 	bool			fi_aliased;
> >> 	spinlock_t		fi_lock;
> >> 	struct hlist_node       fi_hash;	/* hash on fi_fhandle */
> >> +	struct rhlist_head	fi_rlist;
> >> 	struct list_head        fi_stateids;
> >> 	union {
> >> 		struct list_head	fi_delegations;
> >> 
> >> 
> >> 
> 
> --
> Chuck Lever
> 
> 
> 
>
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index abed795bb4ec..681cb2daa843 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -44,7 +44,9 @@ 
 #include <linux/jhash.h>
 #include <linux/string_helpers.h>
 #include <linux/fsnotify.h>
+#include <linux/rhashtable.h>
 #include <linux/nfs_ssc.h>
+
 #include "xdr4.h"
 #include "xdr4cb.h"
 #include "vfs.h"
@@ -721,6 +723,18 @@  static unsigned int file_hashval(const struct svc_fh *fh)
 
 static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
 
+static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;
+
+static const struct rhashtable_params nfs4_file_rhash_params = {
+	.key_len		= sizeof_field(struct nfs4_file, fi_inode),
+	.key_offset		= offsetof(struct nfs4_file, fi_inode),
+	.head_offset		= offsetof(struct nfs4_file, fi_rlist),
+
+	/* Reduce resizing churn on light workloads */
+	.min_size		= 256,		/* buckets */
+	.automatic_shrinking	= true,
+};
+
 /*
  * Check if courtesy clients have conflicting access and resolve it if possible
  *
@@ -8023,10 +8037,16 @@  nfs4_state_start(void)
 {
 	int ret;
 
-	ret = nfsd4_create_callback_queue();
+	ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params);
 	if (ret)
 		return ret;
 
+	ret = nfsd4_create_callback_queue();
+	if (ret) {
+		rhltable_destroy(&nfs4_file_rhltable);
+		return ret;
+	}
+
 	set_max_delegations();
 	return 0;
 }
@@ -8057,6 +8077,7 @@  nfs4_state_shutdown_net(struct net *net)
 
 	nfsd4_client_tracking_exit(net);
 	nfs4_state_destroy_net(net);
+	rhltable_destroy(&nfs4_file_rhltable);
 #ifdef CONFIG_NFSD_V4_2_INTER_SSC
 	nfsd4_ssc_shutdown_umount(nn);
 #endif
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e2daef3cc003..190fc7e418a4 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -546,6 +546,7 @@  struct nfs4_file {
 	bool			fi_aliased;
 	spinlock_t		fi_lock;
 	struct hlist_node       fi_hash;	/* hash on fi_fhandle */
+	struct rhlist_head	fi_rlist;
 	struct list_head        fi_stateids;
 	union {
 		struct list_head	fi_delegations;