diff mbox

[03/11] kernfs: add an API to get kernfs node from inode number

Message ID 41d336f7006d63c6dd5bddf407c16de8064debc3.1496432591.git.shli@fb.com (mailing list archive)
State New, archived
Headers show

Commit Message

Shaohua Li June 2, 2017, 9:53 p.m. UTC
From: Shaohua Li <shli@fb.com>

Add an API to get kernfs node from inode number. We will need this to
implement exportfs operations.

To make the API lock free, kernfs node is freed in RCU context. And we
depend on kernfs_node count/ino number to filter stale kernfs nodes.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 fs/kernfs/dir.c             | 35 +++++++++++++++++++++++++++++++++++
 fs/kernfs/kernfs-internal.h |  2 ++
 fs/kernfs/mount.c           |  4 +++-
 3 files changed, 40 insertions(+), 1 deletion(-)

Comments

Eduardo Valentin June 2, 2017, 10:03 p.m. UTC | #1
On Fri, Jun 02, 2017 at 02:53:56PM -0700, Shaohua Li wrote:
> From: Shaohua Li <shli@fb.com>
> 
> Add an API to get kernfs node from inode number. We will need this to
> implement exportfs operations.
> 
> To make the API lock free, kernfs node is freed in RCU context. And we
> depend on kernfs_node count/ino number to filter stale kernfs nodes.
> 
> Signed-off-by: Shaohua Li <shli@fb.com>
> ---
>  fs/kernfs/dir.c             | 35 +++++++++++++++++++++++++++++++++++
>  fs/kernfs/kernfs-internal.h |  2 ++
>  fs/kernfs/mount.c           |  4 +++-
>  3 files changed, 40 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index 8e8545a..4c86e4c 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -643,6 +643,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
>  	kn->ino = ret;
>  	kn->generation = atomic_inc_return(&root->next_generation);
>  
> +	/* set ino first. Above atomic_inc_return has a barrier */
>  	atomic_set(&kn->count, 1);
>  	atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
>  	RB_CLEAR_NODE(&kn->rb);
> @@ -674,6 +675,40 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
>  	return kn;
>  }
>  
> +/*
> + * kernfs_get_node_by_ino - get kernfs_node from inode number
> + * @root: the kernfs root
> + * @ino: inode number
> + *
> + * RETURNS:
> + * NULL on failure. Return a kernfs node with reference counter incremented
> + */

Is the above supposed to be a valid kernel doc entry?

> +struct kernfs_node *kernfs_get_node_by_ino(struct kernfs_root *root,
> +					   unsigned int ino)
> +{
> +	struct kernfs_node *kn;
> +
> +	rcu_read_lock();
> +	kn = idr_find(&root->ino_idr, ino);
> +	if (!kn)
> +		goto out;
> +	/* kernfs_put removes the ino after count is 0 */
> +	if (!atomic_inc_not_zero(&kn->count)) {
> +		kn = NULL;

Why do yo need to set kn to NULL?

> +		goto out;
> +	}
> +	/* If this node is reused, __kernfs_new_node sets ino before count */
> +	if (kn->ino != ino)
> +		goto out;
> +	rcu_read_unlock();
> +
> +	return kn;
> +out:
> +	rcu_read_unlock();
> +	kernfs_put(kn);
> +	return NULL;
> +}
> +
>  /**
>   *	kernfs_add_one - add kernfs_node to parent without warning
>   *	@kn: kernfs_node to be added
> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> index 2d5144a..3534cfe 100644
> --- a/fs/kernfs/kernfs-internal.h
> +++ b/fs/kernfs/kernfs-internal.h
> @@ -98,6 +98,8 @@ int kernfs_add_one(struct kernfs_node *kn);
>  struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
>  				    const char *name, umode_t mode,
>  				    unsigned flags);
> +struct kernfs_node *kernfs_get_node_by_ino(struct kernfs_root *root,
> +					   unsigned int ino);
>  
>  /*
>   * file.c
> diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
> index d5b149a..343dfeb 100644
> --- a/fs/kernfs/mount.c
> +++ b/fs/kernfs/mount.c
> @@ -332,5 +332,7 @@ void __init kernfs_init(void)
>  {
>  	kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
>  					      sizeof(struct kernfs_node),
> -					      0, SLAB_PANIC, NULL);
> +					      0,
> +					      SLAB_PANIC | SLAB_TYPESAFE_BY_RCU,
> +					      NULL);
>  }
> -- 
> 2.9.3
> 
>
Shaohua Li June 2, 2017, 11:36 p.m. UTC | #2
On Fri, Jun 02, 2017 at 03:03:45PM -0700, Eduardo Valentin wrote:
> On Fri, Jun 02, 2017 at 02:53:56PM -0700, Shaohua Li wrote:
> > From: Shaohua Li <shli@fb.com>
> > 
> > Add an API to get kernfs node from inode number. We will need this to
> > implement exportfs operations.
> > 
> > To make the API lock free, kernfs node is freed in RCU context. And we
> > depend on kernfs_node count/ino number to filter stale kernfs nodes.
> > 
> > Signed-off-by: Shaohua Li <shli@fb.com>
> > ---
> >  fs/kernfs/dir.c             | 35 +++++++++++++++++++++++++++++++++++
> >  fs/kernfs/kernfs-internal.h |  2 ++
> >  fs/kernfs/mount.c           |  4 +++-
> >  3 files changed, 40 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> > index 8e8545a..4c86e4c 100644
> > --- a/fs/kernfs/dir.c
> > +++ b/fs/kernfs/dir.c
> > @@ -643,6 +643,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
> >  	kn->ino = ret;
> >  	kn->generation = atomic_inc_return(&root->next_generation);
> >  
> > +	/* set ino first. Above atomic_inc_return has a barrier */
> >  	atomic_set(&kn->count, 1);
> >  	atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
> >  	RB_CLEAR_NODE(&kn->rb);
> > @@ -674,6 +675,40 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
> >  	return kn;
> >  }
> >  
> > +/*
> > + * kernfs_get_node_by_ino - get kernfs_node from inode number
> > + * @root: the kernfs root
> > + * @ino: inode number
> > + *
> > + * RETURNS:
> > + * NULL on failure. Return a kernfs node with reference counter incremented
> > + */
> 
> Is the above supposed to be a valid kernel doc entry?

what do you expect? The function name explains it very well actually.
 
> > +struct kernfs_node *kernfs_get_node_by_ino(struct kernfs_root *root,
> > +					   unsigned int ino)
> > +{
> > +	struct kernfs_node *kn;
> > +
> > +	rcu_read_lock();
> > +	kn = idr_find(&root->ino_idr, ino);
> > +	if (!kn)
> > +		goto out;
> > +	/* kernfs_put removes the ino after count is 0 */
> > +	if (!atomic_inc_not_zero(&kn->count)) {
> > +		kn = NULL;
> 
> Why do yo need to set kn to NULL?

I don't know what kind of explanation you expect. This is quite obvious
actually. If the count == 0, we don't increase the ref count, so we don't
decrease the ref count later (in kernfs_put).

> > +		goto out;
> > +	}
> > +	/* If this node is reused, __kernfs_new_node sets ino before count */
> > +	if (kn->ino != ino)
> > +		goto out;
> > +	rcu_read_unlock();
> > +
> > +	return kn;
> > +out:
> > +	rcu_read_unlock();
> > +	kernfs_put(kn);
> > +	return NULL;
> > +}
> > +
> >  /**
> >   *	kernfs_add_one - add kernfs_node to parent without warning
> >   *	@kn: kernfs_node to be added
> > diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> > index 2d5144a..3534cfe 100644
> > --- a/fs/kernfs/kernfs-internal.h
> > +++ b/fs/kernfs/kernfs-internal.h
> > @@ -98,6 +98,8 @@ int kernfs_add_one(struct kernfs_node *kn);
> >  struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
> >  				    const char *name, umode_t mode,
> >  				    unsigned flags);
> > +struct kernfs_node *kernfs_get_node_by_ino(struct kernfs_root *root,
> > +					   unsigned int ino);
> >  
> >  /*
> >   * file.c
> > diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
> > index d5b149a..343dfeb 100644
> > --- a/fs/kernfs/mount.c
> > +++ b/fs/kernfs/mount.c
> > @@ -332,5 +332,7 @@ void __init kernfs_init(void)
> >  {
> >  	kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
> >  					      sizeof(struct kernfs_node),
> > -					      0, SLAB_PANIC, NULL);
> > +					      0,
> > +					      SLAB_PANIC | SLAB_TYPESAFE_BY_RCU,
> > +					      NULL);
> >  }
> > -- 
> > 2.9.3
> > 
> > 
> 
> -- 
> All the best,
> Eduardo Valentin
Tejun Heo June 12, 2017, 6:20 p.m. UTC | #3
Hello,

On Fri, Jun 02, 2017 at 02:53:56PM -0700, Shaohua Li wrote:
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -643,6 +643,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
>  	kn->ino = ret;
>  	kn->generation = atomic_inc_return(&root->next_generation);
>  
> +	/* set ino first. Above atomic_inc_return has a barrier */
>  	atomic_set(&kn->count, 1);
>  	atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
>  	RB_CLEAR_NODE(&kn->rb);

Ah, you filter not-fully-alive ones here w/ kn->count.  Hmm... this
definitely can use more documentation including what this is paired
with (the inc_not_zero in kernfs_get_node_by_ino()) and why we need
this.

> +/*
> + * kernfs_get_node_by_ino - get kernfs_node from inode number
> + * @root: the kernfs root
> + * @ino: inode number
> + *
> + * RETURNS:
> + * NULL on failure. Return a kernfs node with reference counter incremented
> + */
> +struct kernfs_node *kernfs_get_node_by_ino(struct kernfs_root *root,
> +					   unsigned int ino)
> +{
> +	struct kernfs_node *kn;
> +
> +	rcu_read_lock();
> +	kn = idr_find(&root->ino_idr, ino);
> +	if (!kn)
> +		goto out;
> +	/* kernfs_put removes the ino after count is 0 */
> +	if (!atomic_inc_not_zero(&kn->count)) {
> +		kn = NULL;
> +		goto out;
> +	}
> +	/* If this node is reused, __kernfs_new_node sets ino before count */
> +	if (kn->ino != ino)
> +		goto out;
> +	rcu_read_unlock();
> +
> +	return kn;
> +out:
> +	rcu_read_unlock();
> +	kernfs_put(kn);
> +	return NULL;
> +}

Yeah, I think this should work.  I think we could have gone with
dumber "use the same lock for lookup" but this isn't too complicated
either and has obvious scalability benefits.  That said, let's please
be more verbose on how the two paths interlock with each other.

Thanks.
Tejun Heo June 12, 2017, 6:37 p.m. UTC | #4
Ooh, one more thing.

On Mon, Jun 12, 2017 at 02:20:28PM -0400, Tejun Heo wrote:
> > +struct kernfs_node *kernfs_get_node_by_ino(struct kernfs_root *root,
> > +					   unsigned int ino)

Can we name this kernfs_find_and_get_by_ino() for consistency?  And
the RCU optimization does seem prominent compared to other find/get
functions which all just use kernfs_mutex (still not objecting).

Thanks.
diff mbox

Patch

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 8e8545a..4c86e4c 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -643,6 +643,7 @@  static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 	kn->ino = ret;
 	kn->generation = atomic_inc_return(&root->next_generation);
 
+	/* set ino first. Above atomic_inc_return has a barrier */
 	atomic_set(&kn->count, 1);
 	atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
 	RB_CLEAR_NODE(&kn->rb);
@@ -674,6 +675,40 @@  struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 	return kn;
 }
 
+/*
+ * kernfs_get_node_by_ino - get kernfs_node from inode number
+ * @root: the kernfs root
+ * @ino: inode number
+ *
+ * RETURNS:
+ * NULL on failure. Return a kernfs node with reference counter incremented
+ */
+struct kernfs_node *kernfs_get_node_by_ino(struct kernfs_root *root,
+					   unsigned int ino)
+{
+	struct kernfs_node *kn;
+
+	rcu_read_lock();
+	kn = idr_find(&root->ino_idr, ino);
+	if (!kn)
+		goto out;
+	/* kernfs_put removes the ino after count is 0 */
+	if (!atomic_inc_not_zero(&kn->count)) {
+		kn = NULL;
+		goto out;
+	}
+	/* If this node is reused, __kernfs_new_node sets ino before count */
+	if (kn->ino != ino)
+		goto out;
+	rcu_read_unlock();
+
+	return kn;
+out:
+	rcu_read_unlock();
+	kernfs_put(kn);
+	return NULL;
+}
+
 /**
  *	kernfs_add_one - add kernfs_node to parent without warning
  *	@kn: kernfs_node to be added
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 2d5144a..3534cfe 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -98,6 +98,8 @@  int kernfs_add_one(struct kernfs_node *kn);
 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 				    const char *name, umode_t mode,
 				    unsigned flags);
+struct kernfs_node *kernfs_get_node_by_ino(struct kernfs_root *root,
+					   unsigned int ino);
 
 /*
  * file.c
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d5b149a..343dfeb 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -332,5 +332,7 @@  void __init kernfs_init(void)
 {
 	kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
 					      sizeof(struct kernfs_node),
-					      0, SLAB_PANIC, NULL);
+					      0,
+					      SLAB_PANIC | SLAB_TYPESAFE_BY_RCU,
+					      NULL);
 }