diff mbox series

[v5,3/6] kernfs: use VFS negative dentry caching

Message ID 162306072498.69474.16160057168984328507.stgit@web.messagingengine.com (mailing list archive)
State New, archived
Headers show
Series kernfs: proposed locking and concurrency improvement | expand

Commit Message

Ian Kent June 7, 2021, 10:12 a.m. UTC
If there are many lookups for non-existent paths these negative lookups
can lead to a lot of overhead during path walks.

The VFS allows dentries to be created as negative and hashed, and caches
them so they can be used to reduce the fairly high overhead alloc/free
cycle that occurs during these lookups.

Use the kernfs node parent revision to identify if a change has been
made to the containing directory so that the negative dentry can be
discarded and the lookup redone.

Signed-off-by: Ian Kent <raven@themaw.net>
---
 fs/kernfs/dir.c |   53 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

Comments

Eric W. Biederman June 7, 2021, 6:27 p.m. UTC | #1
Ian Kent <raven@themaw.net> writes:

> If there are many lookups for non-existent paths these negative lookups
> can lead to a lot of overhead during path walks.
>
> The VFS allows dentries to be created as negative and hashed, and caches
> them so they can be used to reduce the fairly high overhead alloc/free
> cycle that occurs during these lookups.
>
> Use the kernfs node parent revision to identify if a change has been
> made to the containing directory so that the negative dentry can be
> discarded and the lookup redone.
>
> Signed-off-by: Ian Kent <raven@themaw.net>
> ---
>  fs/kernfs/dir.c |   53 +++++++++++++++++++++++++++++++----------------------
>  1 file changed, 31 insertions(+), 22 deletions(-)
>
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index b88432c48851f..5ae95e8d1aea1 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -1039,13 +1039,32 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
>  	if (flags & LOOKUP_RCU)
>  		return -ECHILD;
>  
> -	/* Always perform fresh lookup for negatives */
> -	if (d_really_is_negative(dentry))
> -		goto out_bad_unlocked;
> -
>  	kn = kernfs_dentry_node(dentry);
>  	mutex_lock(&kernfs_mutex);
>  
> +	/* Negative hashed dentry? */
> +	if (!kn) {
> +		struct dentry *d_parent = dget_parent(dentry);
> +		struct kernfs_node *parent;
> +
> +		/* If the kernfs parent node has changed discard and
> +		 * proceed to ->lookup.
> +		 */
> +		parent = kernfs_dentry_node(d_parent);
> +		if (parent) {
> +			if (kernfs_dir_changed(parent, dentry)) {
> +				dput(d_parent);
> +				goto out_bad;
> +			}
> +		}
> +		dput(d_parent);
> +
> +		/* The kernfs node doesn't exist, leave the dentry
> +		 * negative and return success.
> +		 */
> +		goto out;
> +	}

What part of this new negative hashed dentry check needs the
kernfs_mutex?

I guess it is the reading of kn->dir.rev.

Since all you are doing is comparing if two fields are equal it
really should not matter.  Maybe somewhere there needs to be a
sprinkling of primitives like READ_ONCE.

It just seems like such a waste to put all of that under kernfs_mutex
on the off chance kn->dir.rev will change while it is being read.

Eric
Ian Kent June 8, 2021, 1:56 a.m. UTC | #2
On Mon, 2021-06-07 at 13:27 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > If there are many lookups for non-existent paths these negative
> > lookups
> > can lead to a lot of overhead during path walks.
> > 
> > The VFS allows dentries to be created as negative and hashed, and
> > caches
> > them so they can be used to reduce the fairly high overhead
> > alloc/free
> > cycle that occurs during these lookups.
> > 
> > Use the kernfs node parent revision to identify if a change has
> > been
> > made to the containing directory so that the negative dentry can be
> > discarded and the lookup redone.
> > 
> > Signed-off-by: Ian Kent <raven@themaw.net>
> > ---
> >  fs/kernfs/dir.c |   53 +++++++++++++++++++++++++++++++------------
> > ----------
> >  1 file changed, 31 insertions(+), 22 deletions(-)
> > 
> > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> > index b88432c48851f..5ae95e8d1aea1 100644
> > --- a/fs/kernfs/dir.c
> > +++ b/fs/kernfs/dir.c
> > @@ -1039,13 +1039,32 @@ static int kernfs_dop_revalidate(struct
> > dentry *dentry, unsigned int flags)
> >         if (flags & LOOKUP_RCU)
> >                 return -ECHILD;
> >  
> > -       /* Always perform fresh lookup for negatives */
> > -       if (d_really_is_negative(dentry))
> > -               goto out_bad_unlocked;
> > -
> >         kn = kernfs_dentry_node(dentry);
> >         mutex_lock(&kernfs_mutex);
> >  
> > +       /* Negative hashed dentry? */
> > +       if (!kn) {
> > +               struct dentry *d_parent = dget_parent(dentry);
> > +               struct kernfs_node *parent;
> > +
> > +               /* If the kernfs parent node has changed discard
> > and
> > +                * proceed to ->lookup.
> > +                */
> > +               parent = kernfs_dentry_node(d_parent);
> > +               if (parent) {
> > +                       if (kernfs_dir_changed(parent, dentry)) {
> > +                               dput(d_parent);
> > +                               goto out_bad;
> > +                       }
> > +               }
> > +               dput(d_parent);
> > +
> > +               /* The kernfs node doesn't exist, leave the dentry
> > +                * negative and return success.
> > +                */
> > +               goto out;
> > +       }
> 
> What part of this new negative hashed dentry check needs the
> kernfs_mutex?
> 
> I guess it is the reading of kn->dir.rev.

I have an irresistible urge to keep the rb tree stable when
accessing it. It was probably not necessary most of the times
I did it, IIUC even a rebalance will leave the node address
unchanged so it should be just removals and moves to worry
about.
 
> 
> Since all you are doing is comparing if two fields are equal it
> really should not matter.  Maybe somewhere there needs to be a
> sprinkling of primitives like READ_ONCE.

There is one case that looks tricky, rename will call ->rename()
and a bit later do the move. Thinking about it a READ_ONCE might
be needed even now but taking the rwsem is probably enough.

Not sure about that one?

Moving this out from under the rwsem would be good to do.

Ian
> 
> It just seems like such a waste to put all of that under kernfs_mutex
> on the off chance kn->dir.rev will change while it is being read.
> 
> Eric
diff mbox series

Patch

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index b88432c48851f..5ae95e8d1aea1 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1039,13 +1039,32 @@  static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	/* Always perform fresh lookup for negatives */
-	if (d_really_is_negative(dentry))
-		goto out_bad_unlocked;
-
 	kn = kernfs_dentry_node(dentry);
 	mutex_lock(&kernfs_mutex);
 
+	/* Negative hashed dentry? */
+	if (!kn) {
+		struct dentry *d_parent = dget_parent(dentry);
+		struct kernfs_node *parent;
+
+		/* If the kernfs parent node has changed discard and
+		 * proceed to ->lookup.
+		 */
+		parent = kernfs_dentry_node(d_parent);
+		if (parent) {
+			if (kernfs_dir_changed(parent, dentry)) {
+				dput(d_parent);
+				goto out_bad;
+			}
+		}
+		dput(d_parent);
+
+		/* The kernfs node doesn't exist, leave the dentry
+		 * negative and return success.
+		 */
+		goto out;
+	}
+
 	/* The kernfs node has been deactivated */
 	if (!kernfs_active(kn))
 		goto out_bad;
@@ -1062,12 +1081,11 @@  static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 	if (kn->parent && kernfs_ns_enabled(kn->parent) &&
 	    kernfs_info(dentry->d_sb)->ns != kn->ns)
 		goto out_bad;
-
+out:
 	mutex_unlock(&kernfs_mutex);
 	return 1;
 out_bad:
 	mutex_unlock(&kernfs_mutex);
-out_bad_unlocked:
 	return 0;
 }
 
@@ -1082,30 +1100,21 @@  static struct dentry *kernfs_iop_lookup(struct inode *dir,
 	struct dentry *ret;
 	struct kernfs_node *parent = dir->i_private;
 	struct kernfs_node *kn;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	const void *ns = NULL;
 
 	mutex_lock(&kernfs_mutex);
-
 	if (kernfs_ns_enabled(parent))
 		ns = kernfs_info(dir->i_sb)->ns;
 
 	kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
-
-	/* no such entry */
-	if (!kn || !kernfs_active(kn)) {
-		ret = NULL;
-		goto out_unlock;
-	}
-
 	/* attach dentry and inode */
-	inode = kernfs_get_inode(dir->i_sb, kn);
-	if (!inode) {
-		ret = ERR_PTR(-ENOMEM);
-		goto out_unlock;
+	if (kn && kernfs_active(kn)) {
+		inode = kernfs_get_inode(dir->i_sb, kn);
+		if (!inode)
+			inode = ERR_PTR(-ENOMEM);
 	}
-
-	/* instantiate and hash dentry */
+	/* instantiate and hash (possibly negative) dentry */
 	ret = d_splice_alias(inode, dentry);
 	if (!IS_ERR(ret)) {
 		if (unlikely(ret))
@@ -1113,8 +1122,8 @@  static struct dentry *kernfs_iop_lookup(struct inode *dir,
 		else
 			kernfs_set_rev(parent, dentry);
 	}
- out_unlock:
 	mutex_unlock(&kernfs_mutex);
+
 	return ret;
 }