nfsd: convert nfs4_file searches to use RCU

Message ID	1413541275-3884-1-git-send-email-jlayton@primarydata.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-nfs-owner@kernel.org> From: Jeff Layton <jlayton@primarydata.com> To: bfields@fieldses.org Cc: linux-nfs@vger.kernel.org Subject: [PATCH] nfsd: convert nfs4_file searches to use RCU Date: Fri, 17 Oct 2014 06:21:15 -0400 Message-Id: <1413541275-3884-1-git-send-email-jlayton@primarydata.com> Sender: linux-nfs-owner@vger.kernel.org Precedence: bulk

Message ID

1413541275-3884-1-git-send-email-jlayton@primarydata.com (mailing list archive)

State

New, archived

Headers

From: Jeff Layton <jlayton@primarydata.com>
To: bfields@fieldses.org
Cc: linux-nfs@vger.kernel.org
Subject: [PATCH] nfsd: convert nfs4_file searches to use RCU
Date: Fri, 17 Oct 2014 06:21:15 -0400
Message-Id: <1413541275-3884-1-git-send-email-jlayton@primarydata.com>
Sender: linux-nfs-owner@vger.kernel.org
Precedence: bulk

Commit Message

Jeff Layton Oct. 17, 2014, 10:21 a.m. UTC

The global state_lock protects the file_hashtbl, and that has the
potential to be a scalability bottleneck.

Address this by making the file_hashtbl use RCU. Add a rcu_head to the
nfs4_file and use that when freeing ones that have been hashed.

Convert find_file to use a lockless lookup. Convert find_or_add_file to
attempt a lockless lookup first, and then fall back to doing the
"normal" locked search and insert if that fails to find anything.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/nfsd/nfs4state.c | 36 +++++++++++++++++++++++++++---------
 fs/nfsd/state.h     |  1 +
 2 files changed, 28 insertions(+), 9 deletions(-)

Comments

Christoph Hellwig Oct. 21, 2014, 10:40 a.m. UTC | #1

On Fri, Oct 17, 2014 at 06:21:15AM -0400, Jeff Layton wrote:
> The global state_lock protects the file_hashtbl, and that has the
> potential to be a scalability bottleneck.
> 
> Address this by making the file_hashtbl use RCU. Add a rcu_head to the
> nfs4_file and use that when freeing ones that have been hashed.
> 
> Convert find_file to use a lockless lookup. Convert find_or_add_file to
> attempt a lockless lookup first, and then fall back to doing the
> "normal" locked search and insert if that fails to find anything.
> 
> Signed-off-by: Jeff Layton <jlayton@primarydata.com>
> ---
>  fs/nfsd/nfs4state.c | 36 +++++++++++++++++++++++++++---------
>  fs/nfsd/state.h     |  1 +
>  2 files changed, 28 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index e9c3afe4b5d3..9bd3bcfee3c2 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -280,15 +280,22 @@ static void nfsd4_free_file(struct nfs4_file *f)
>  	kmem_cache_free(file_slab, f);
>  }
>  
> +static void nfsd4_free_file_rcu(struct rcu_head *rcu)
> +{
> +	struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu);
> +
> +	nfsd4_free_file(fp);

You might as well kill the pointless nfsd4_free_file wrapper while
you're at it.

> @@ -3313,12 +3320,19 @@ find_file_locked(struct knfsd_fh *fh)
>  static struct nfs4_file *
>  find_file(struct knfsd_fh *fh)
>  {
> -	struct nfs4_file *fp;
> +	struct nfs4_file *fp, *ret = NULL;
> +	unsigned int hashval = file_hashval(fh);
>  
> -	spin_lock(&state_lock);
> -	fp = find_file_locked(fh);
> -	spin_unlock(&state_lock);
> -	return fp;
> +	rcu_read_lock();
> +	hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
> +		if (nfsd_fh_match(&fp->fi_fhandle, fh)) {
> +			if (atomic_inc_not_zero(&fp->fi_ref))
> +				ret = fp;
> +			break;
> +		}
> +	}
> +	rcu_read_unlock();
> +	return ret;

I think it would be better to just switch find_file_locked ti use
hlist_for_each_entry_rcu instead of duplicating it.

> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 8e85e07efce6..530470a35ecd 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -490,6 +490,7 @@ struct nfs4_file {
>  	atomic_t		fi_access[2];
>  	u32			fi_share_deny;
>  	struct file		*fi_deleg_file;
> +	struct rcu_head		fi_rcu;

Can we union this over a field that's guaranteed to be unused on
a file that has been unhashed?


Also a slightly related question:  Is the small fixes size hash table
still fine for the workloads where the RCU access matters?  It seems
like we should aim for a more scalable data structure to look up the
files.  It also irks me a bit how this duplicates the inode cache,
which for some filesystems (e.g. XFS) already is very scalable.

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Jeff Layton Oct. 21, 2014, 11:16 a.m. UTC | #2

On Tue, 21 Oct 2014 03:40:13 -0700
Christoph Hellwig <hch@infradead.org> wrote:

> On Fri, Oct 17, 2014 at 06:21:15AM -0400, Jeff Layton wrote:
> > The global state_lock protects the file_hashtbl, and that has the
> > potential to be a scalability bottleneck.
> > 
> > Address this by making the file_hashtbl use RCU. Add a rcu_head to the
> > nfs4_file and use that when freeing ones that have been hashed.
> > 
> > Convert find_file to use a lockless lookup. Convert find_or_add_file to
> > attempt a lockless lookup first, and then fall back to doing the
> > "normal" locked search and insert if that fails to find anything.
> > 
> > Signed-off-by: Jeff Layton <jlayton@primarydata.com>
> > ---
> >  fs/nfsd/nfs4state.c | 36 +++++++++++++++++++++++++++---------
> >  fs/nfsd/state.h     |  1 +
> >  2 files changed, 28 insertions(+), 9 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index e9c3afe4b5d3..9bd3bcfee3c2 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -280,15 +280,22 @@ static void nfsd4_free_file(struct nfs4_file *f)
> >  	kmem_cache_free(file_slab, f);
> >  }
> >  
> > +static void nfsd4_free_file_rcu(struct rcu_head *rcu)
> > +{
> > +	struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu);
> > +
> > +	nfsd4_free_file(fp);
> 
> You might as well kill the pointless nfsd4_free_file wrapper while
> you're at it.
> 

Hmm, ok. There is one place that still calls it, but we can just make
that use kmem_cache_free.

> > @@ -3313,12 +3320,19 @@ find_file_locked(struct knfsd_fh *fh)
> >  static struct nfs4_file *
> >  find_file(struct knfsd_fh *fh)
> >  {
> > -	struct nfs4_file *fp;
> > +	struct nfs4_file *fp, *ret = NULL;
> > +	unsigned int hashval = file_hashval(fh);
> >  
> > -	spin_lock(&state_lock);
> > -	fp = find_file_locked(fh);
> > -	spin_unlock(&state_lock);
> > -	return fp;
> > +	rcu_read_lock();
> > +	hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
> > +		if (nfsd_fh_match(&fp->fi_fhandle, fh)) {
> > +			if (atomic_inc_not_zero(&fp->fi_ref))
> > +				ret = fp;
> > +			break;
> > +		}
> > +	}
> > +	rcu_read_unlock();
> > +	return ret;
> 
> I think it would be better to just switch find_file_locked ti use
> hlist_for_each_entry_rcu instead of duplicating it.
> 

I'll have to think about that. We do have to do an atomic_inc_not_zero
if we're doing an unlocked search, but that's not really necessary if
the spinlock is held. I guess it won't hurt in that case, so we should
be able to merge the two functions. I'll respin and do that...

> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 8e85e07efce6..530470a35ecd 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -490,6 +490,7 @@ struct nfs4_file {
> >  	atomic_t		fi_access[2];
> >  	u32			fi_share_deny;
> >  	struct file		*fi_deleg_file;
> > +	struct rcu_head		fi_rcu;
> 
> Can we union this over a field that's guaranteed to be unused on
> a file that has been unhashed?
> 

Yeah, that's probably fine. Suggestions on what to union it with?

struct callback_head is two pointers, so maybe we can use one of the
list_heads (fi_delegations maybe?).

> 
> Also a slightly related question:  Is the small fixes size hash table
> still fine for the workloads where the RCU access matters?  It seems
> like we should aim for a more scalable data structure to look up the
> files.  It also irks me a bit how this duplicates the inode cache,
> which for some filesystems (e.g. XFS) already is very scalable.
> 

TBH, I haven't done any real performance measurements on this
hashtable. The main impetus for this patch was to clear the way for
some changes that I'm doing for some pnfsd-related work (I need to be
able to walk a list of nfs4_files w/o holding a spinlock).

I think it makes sense to allow searching for nfs4_files w/o holding
a lock. It's unlikely to hurt performance, and may help it.

If we do want to change to a different type of structure I'd be fine
with that, but would prefer that it be RCU-friendly. What sort of
structure did you have in mind?

Thanks for the review so far!

Christoph Hellwig Oct. 21, 2014, 11:52 a.m. UTC | #3

On Tue, Oct 21, 2014 at 07:16:06AM -0400, Jeff Layton wrote:
> Yeah, that's probably fine. Suggestions on what to union it with?
> 
> struct callback_head is two pointers, so maybe we can use one of the
> list_heads (fi_delegations maybe?).

Sounds reasonable to me.

> If we do want to change to a different type of structure I'd be fine
> with that, but would prefer that it be RCU-friendly. What sort of
> structure did you have in mind?

For the XFS inode lookup we use multiple radix trees, which work
very well for how inode numbers work in XFS with a clear allocation
group component, and usual clustering of inode numbers.  A simple
radix tree might work fine for NFSd, or maybe the new resizable hash
tables from the networking folks?
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e9c3afe4b5d3..9bd3bcfee3c2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -280,15 +280,22 @@  static void nfsd4_free_file(struct nfs4_file *f)
 	kmem_cache_free(file_slab, f);
 }
 
+static void nfsd4_free_file_rcu(struct rcu_head *rcu)
+{
+	struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu);
+
+	nfsd4_free_file(fp);
+}
+
 static inline void
 put_nfs4_file(struct nfs4_file *fi)
 {
 	might_lock(&state_lock);
 
 	if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
-		hlist_del(&fi->fi_hash);
+		hlist_del_rcu(&fi->fi_hash);
 		spin_unlock(&state_lock);
-		nfsd4_free_file(fi);
+		call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
 	}
 }
 
@@ -3073,7 +3080,7 @@  static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh)
 	fp->fi_share_deny = 0;
 	memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
 	memset(fp->fi_access, 0, sizeof(fp->fi_access));
-	hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
+	hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
 }
 
 void
@@ -3313,12 +3320,19 @@  find_file_locked(struct knfsd_fh *fh)
 static struct nfs4_file *
 find_file(struct knfsd_fh *fh)
 {
-	struct nfs4_file *fp;
+	struct nfs4_file *fp, *ret = NULL;
+	unsigned int hashval = file_hashval(fh);
 
-	spin_lock(&state_lock);
-	fp = find_file_locked(fh);
-	spin_unlock(&state_lock);
-	return fp;
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
+		if (nfsd_fh_match(&fp->fi_fhandle, fh)) {
+			if (atomic_inc_not_zero(&fp->fi_ref))
+				ret = fp;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
 }
 
 static struct nfs4_file *
@@ -3326,9 +3340,13 @@  find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
 {
 	struct nfs4_file *fp;
 
+	fp = find_file(fh);
+	if (fp)
+		return fp;
+
 	spin_lock(&state_lock);
 	fp = find_file_locked(fh);
-	if (fp == NULL) {
+	if (likely(fp == NULL)) {
 		nfsd4_init_file(new, fh);
 		fp = new;
 	}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 8e85e07efce6..530470a35ecd 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -490,6 +490,7 @@  struct nfs4_file {
 	atomic_t		fi_access[2];
 	u32			fi_share_deny;
 	struct file		*fi_deleg_file;
+	struct rcu_head		fi_rcu;
 	atomic_t		fi_delegees;
 	struct knfsd_fh		fi_fhandle;
 	bool			fi_had_conflict;

nfsd: convert nfs4_file searches to use RCU

Commit Message

Comments

Patch