diff mbox series

[v5,2/6] kernfs: add a revision to identify directory node changes

Message ID 162306071065.69474.8064509709844383785.stgit@web.messagingengine.com (mailing list archive)
State New, archived
Headers show
Series kernfs: proposed locking and concurrency improvement | expand

Commit Message

Ian Kent June 7, 2021, 10:11 a.m. UTC
Add a revision counter to kernfs directory nodes so it can be used
to detect if a directory node has changed.

There's an assumption that sizeof(unsigned long) <= sizeof(pointer)
on all architectures and as far as I know that assumption holds.

So adding a revision counter to the struct kernfs_elem_dir variant of
the kernfs_node type union won't increase the size of the kernfs_node
struct. This is because struct kernfs_elem_dir is at least
sizeof(pointer) smaller than the largest union variant. It's tempting
to make the revision counter a u64 but that would increase the size of
kernfs_node on archs where sizeof(pointer) is smaller than the revision
counter.

Signed-off-by: Ian Kent <raven@themaw.net>
---
 fs/kernfs/dir.c             |    8 ++++++++
 fs/kernfs/kernfs-internal.h |   24 ++++++++++++++++++++++++
 include/linux/kernfs.h      |    5 +++++
 3 files changed, 37 insertions(+)

Comments

Eric W. Biederman June 7, 2021, 5:53 p.m. UTC | #1
Ian Kent <raven@themaw.net> writes:

> Add a revision counter to kernfs directory nodes so it can be used
> to detect if a directory node has changed.
>
> There's an assumption that sizeof(unsigned long) <= sizeof(pointer)
> on all architectures and as far as I know that assumption holds.
>
> So adding a revision counter to the struct kernfs_elem_dir variant of
> the kernfs_node type union won't increase the size of the kernfs_node
> struct. This is because struct kernfs_elem_dir is at least
> sizeof(pointer) smaller than the largest union variant. It's tempting
> to make the revision counter a u64 but that would increase the size of
> kernfs_node on archs where sizeof(pointer) is smaller than the revision
> counter.
>
> Signed-off-by: Ian Kent <raven@themaw.net>
> ---
>  fs/kernfs/dir.c             |    8 ++++++++
>  fs/kernfs/kernfs-internal.h |   24 ++++++++++++++++++++++++
>  include/linux/kernfs.h      |    5 +++++
>  3 files changed, 37 insertions(+)
>
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index 33166ec90a112..b88432c48851f 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -372,6 +372,7 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
>  	/* successfully added, account subdir number */
>  	if (kernfs_type(kn) == KERNFS_DIR)
>  		kn->parent->dir.subdirs++;
> +	kernfs_inc_rev(kn->parent);
>  
>  	return 0;
>  }
> @@ -394,6 +395,7 @@ static bool kernfs_unlink_sibling(struct kernfs_node *kn)
>  
>  	if (kernfs_type(kn) == KERNFS_DIR)
>  		kn->parent->dir.subdirs--;
> +	kernfs_inc_rev(kn->parent);
>  
>  	rb_erase(&kn->rb, &kn->parent->dir.children);
>  	RB_CLEAR_NODE(&kn->rb);
> @@ -1105,6 +1107,12 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
>  
>  	/* instantiate and hash dentry */
>  	ret = d_splice_alias(inode, dentry);
> +	if (!IS_ERR(ret)) {
> +		if (unlikely(ret))
> +			kernfs_set_rev(parent, ret);
> +		else
> +			kernfs_set_rev(parent, dentry);

Do we care about d_time on non-NULL dentries?

For d_splice_alias to return a different dentry implies
that the dentry was non-NULL.

I am wondering if having a guarantee that d_time never changes could
help simplify the implementation.  For never changing it would see to
make sense to call kernfs_set_rev before d_splice_alias on dentry, and
simply not worry about it after d_splice_alias.

> +	}
>   out_unlock:
>  	mutex_unlock(&kernfs_mutex);
>  	return ret;
> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> index ccc3b44f6306f..1536002584fc4 100644
> --- a/fs/kernfs/kernfs-internal.h
> +++ b/fs/kernfs/kernfs-internal.h
> @@ -81,6 +81,30 @@ static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry)
>  	return d_inode(dentry)->i_private;
>  }
>  
> +static inline void kernfs_set_rev(struct kernfs_node *kn,
> +				  struct dentry *dentry)
> +{
> +	if (kernfs_type(kn) == KERNFS_DIR)
> +		dentry->d_time = kn->dir.rev;
> +}
> +
> +static inline void kernfs_inc_rev(struct kernfs_node *kn)
> +{
> +	if (kernfs_type(kn) == KERNFS_DIR)
> +		kn->dir.rev++;
> +}
> +
> +static inline bool kernfs_dir_changed(struct kernfs_node *kn,
> +				      struct dentry *dentry)
> +{
> +	if (kernfs_type(kn) == KERNFS_DIR) {
> +		/* Not really a time bit it does what's needed */
> +		if (time_after(kn->dir.rev, dentry->d_time))
> +			return true;

Why not simply make this:
		if (kn->dir.rev != dentry->d_time)
	        	return true;

I don't see what is gained by not counting as changed something in the
wrong half of the values.

> +	}
> +	return false;
> +}
> +
>  extern const struct super_operations kernfs_sops;
>  extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
>  
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index 9e8ca8743c268..7947acb1163d7 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -98,6 +98,11 @@ struct kernfs_elem_dir {
>  	 * better directly in kernfs_node but is here to save space.
>  	 */
>  	struct kernfs_root	*root;
> +	/*
> +	 * Monotonic revision counter, used to identify if a directory
> +	 * node has changed during revalidation.
> +	 */
> +	unsigned long rev;
>  };
>  
>  struct kernfs_elem_symlink {

Eric
Ian Kent June 8, 2021, 1:26 a.m. UTC | #2
On Mon, 2021-06-07 at 12:53 -0500, Eric W. Biederman wrote:
> Ian Kent <raven@themaw.net> writes:
> 
> > Add a revision counter to kernfs directory nodes so it can be used
> > to detect if a directory node has changed.
> > 
> > There's an assumption that sizeof(unsigned long) <= sizeof(pointer)
> > on all architectures and as far as I know that assumption holds.
> > 
> > So adding a revision counter to the struct kernfs_elem_dir variant
> > of
> > the kernfs_node type union won't increase the size of the
> > kernfs_node
> > struct. This is because struct kernfs_elem_dir is at least
> > sizeof(pointer) smaller than the largest union variant. It's
> > tempting
> > to make the revision counter a u64 but that would increase the size
> > of
> > kernfs_node on archs where sizeof(pointer) is smaller than the
> > revision
> > counter.
> > 
> > Signed-off-by: Ian Kent <raven@themaw.net>
> > ---
> >  fs/kernfs/dir.c             |    8 ++++++++
> >  fs/kernfs/kernfs-internal.h |   24 ++++++++++++++++++++++++
> >  include/linux/kernfs.h      |    5 +++++
> >  3 files changed, 37 insertions(+)
> > 
> > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> > index 33166ec90a112..b88432c48851f 100644
> > --- a/fs/kernfs/dir.c
> > +++ b/fs/kernfs/dir.c
> > @@ -372,6 +372,7 @@ static int kernfs_link_sibling(struct
> > kernfs_node *kn)
> >         /* successfully added, account subdir number */
> >         if (kernfs_type(kn) == KERNFS_DIR)
> >                 kn->parent->dir.subdirs++;
> > +       kernfs_inc_rev(kn->parent);
> >  
> >         return 0;
> >  }
> > @@ -394,6 +395,7 @@ static bool kernfs_unlink_sibling(struct
> > kernfs_node *kn)
> >  
> >         if (kernfs_type(kn) == KERNFS_DIR)
> >                 kn->parent->dir.subdirs--;
> > +       kernfs_inc_rev(kn->parent);
> >  
> >         rb_erase(&kn->rb, &kn->parent->dir.children);
> >         RB_CLEAR_NODE(&kn->rb);
> > @@ -1105,6 +1107,12 @@ static struct dentry
> > *kernfs_iop_lookup(struct inode *dir,
> >  
> >         /* instantiate and hash dentry */
> >         ret = d_splice_alias(inode, dentry);
> > +       if (!IS_ERR(ret)) {
> > +               if (unlikely(ret))
> > +                       kernfs_set_rev(parent, ret);
> > +               else
> > +                       kernfs_set_rev(parent, dentry);
> 
> Do we care about d_time on non-NULL dentries?

Would we ever need to use it avoid a search for any other cases?

Probably not ... those export ops mean that some dentries might
not have d_time set.

Maybe it's best to put a comment in about only using it for
negative dentries and set it unconditionally in ->lookup() as
you describe.

> 
> For d_splice_alias to return a different dentry implies
> that the dentry was non-NULL.
> 
> I am wondering if having a guarantee that d_time never changes could
> help simplify the implementation.  For never changing it would see to
> make sense to call kernfs_set_rev before d_splice_alias on dentry,
> and
> simply not worry about it after d_splice_alias.

Yes, I was tempted to do that.

> 
> > +       }
> >   out_unlock:
> >         mutex_unlock(&kernfs_mutex);
> >         return ret;
> > diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-
> > internal.h
> > index ccc3b44f6306f..1536002584fc4 100644
> > --- a/fs/kernfs/kernfs-internal.h
> > +++ b/fs/kernfs/kernfs-internal.h
> > @@ -81,6 +81,30 @@ static inline struct kernfs_node
> > *kernfs_dentry_node(struct dentry *dentry)
> >         return d_inode(dentry)->i_private;
> >  }
> >  
> > +static inline void kernfs_set_rev(struct kernfs_node *kn,
> > +                                 struct dentry *dentry)
> > +{
> > +       if (kernfs_type(kn) == KERNFS_DIR)
> > +               dentry->d_time = kn->dir.rev;
> > +}
> > +
> > +static inline void kernfs_inc_rev(struct kernfs_node *kn)
> > +{
> > +       if (kernfs_type(kn) == KERNFS_DIR)
> > +               kn->dir.rev++;
> > +}
> > +
> > +static inline bool kernfs_dir_changed(struct kernfs_node *kn,
> > +                                     struct dentry *dentry)
> > +{
> > +       if (kernfs_type(kn) == KERNFS_DIR) {
> > +               /* Not really a time bit it does what's needed */
> > +               if (time_after(kn->dir.rev, dentry->d_time))
> > +                       return true;
> 
> Why not simply make this:
>                 if (kn->dir.rev != dentry->d_time)
>                         return true;
> 
> I don't see what is gained by not counting as changed something in
> the
> wrong half of the values.

Yes, it was like that originally and really shouldn't make
any difference. I'll change it back.

Ian
> 
> > +       }
> > +       return false;
> > +}
> > +
> >  extern const struct super_operations kernfs_sops;
> >  extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
> >  
> > diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> > index 9e8ca8743c268..7947acb1163d7 100644
> > --- a/include/linux/kernfs.h
> > +++ b/include/linux/kernfs.h
> > @@ -98,6 +98,11 @@ struct kernfs_elem_dir {
> >          * better directly in kernfs_node but is here to save
> > space.
> >          */
> >         struct kernfs_root      *root;
> > +       /*
> > +        * Monotonic revision counter, used to identify if a
> > directory
> > +        * node has changed during revalidation.
> > +        */
> > +       unsigned long rev;
> >  };
> >  
> >  struct kernfs_elem_symlink {
> 
> Eric
diff mbox series

Patch

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 33166ec90a112..b88432c48851f 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -372,6 +372,7 @@  static int kernfs_link_sibling(struct kernfs_node *kn)
 	/* successfully added, account subdir number */
 	if (kernfs_type(kn) == KERNFS_DIR)
 		kn->parent->dir.subdirs++;
+	kernfs_inc_rev(kn->parent);
 
 	return 0;
 }
@@ -394,6 +395,7 @@  static bool kernfs_unlink_sibling(struct kernfs_node *kn)
 
 	if (kernfs_type(kn) == KERNFS_DIR)
 		kn->parent->dir.subdirs--;
+	kernfs_inc_rev(kn->parent);
 
 	rb_erase(&kn->rb, &kn->parent->dir.children);
 	RB_CLEAR_NODE(&kn->rb);
@@ -1105,6 +1107,12 @@  static struct dentry *kernfs_iop_lookup(struct inode *dir,
 
 	/* instantiate and hash dentry */
 	ret = d_splice_alias(inode, dentry);
+	if (!IS_ERR(ret)) {
+		if (unlikely(ret))
+			kernfs_set_rev(parent, ret);
+		else
+			kernfs_set_rev(parent, dentry);
+	}
  out_unlock:
 	mutex_unlock(&kernfs_mutex);
 	return ret;
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index ccc3b44f6306f..1536002584fc4 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -81,6 +81,30 @@  static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry)
 	return d_inode(dentry)->i_private;
 }
 
+static inline void kernfs_set_rev(struct kernfs_node *kn,
+				  struct dentry *dentry)
+{
+	if (kernfs_type(kn) == KERNFS_DIR)
+		dentry->d_time = kn->dir.rev;
+}
+
+static inline void kernfs_inc_rev(struct kernfs_node *kn)
+{
+	if (kernfs_type(kn) == KERNFS_DIR)
+		kn->dir.rev++;
+}
+
+static inline bool kernfs_dir_changed(struct kernfs_node *kn,
+				      struct dentry *dentry)
+{
+	if (kernfs_type(kn) == KERNFS_DIR) {
+		/* Not really a time bit it does what's needed */
+		if (time_after(kn->dir.rev, dentry->d_time))
+			return true;
+	}
+	return false;
+}
+
 extern const struct super_operations kernfs_sops;
 extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
 
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 9e8ca8743c268..7947acb1163d7 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -98,6 +98,11 @@  struct kernfs_elem_dir {
 	 * better directly in kernfs_node but is here to save space.
 	 */
 	struct kernfs_root	*root;
+	/*
+	 * Monotonic revision counter, used to identify if a directory
+	 * node has changed during revalidation.
+	 */
+	unsigned long rev;
 };
 
 struct kernfs_elem_symlink {