diff mbox

[v2,7/8] shift percpu_counter_destroy() into destroy_super_work()

Message ID 20150811170416.GA26931@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Oleg Nesterov Aug. 11, 2015, 5:04 p.m. UTC
Of course, this patch is ugly as hell. It will be (partially)
reverted later. We add it to ensure that other WIP changes in
percpu_rw_semaphore won't break fs/super.c.

We do not even need this change right now, percpu_free_rwsem()
is fine in atomic context. But we are going to change this, it
will be might_sleep() after we merge the rcu_sync() patches.

And even after that we do not really need destroy_super_work(),
we will kill it in any case. Instead, destroy_super_rcu() should
just check that rss->cb_state == CB_IDLE and do call_rcu() again
in the (very unlikely) case this is not true.

So this is just the temporary kludge which helps us to avoid the
conflicts with the changes which will be (hopefully) routed via
rcu tree.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 fs/super.c         |   23 +++++++++++++++++++----
 include/linux/fs.h |    3 ++-
 2 files changed, 21 insertions(+), 5 deletions(-)

Comments

Jan Kara Aug. 13, 2015, 10:35 a.m. UTC | #1
On Tue 11-08-15 19:04:16, Oleg Nesterov wrote:
> Of course, this patch is ugly as hell. It will be (partially)
> reverted later. We add it to ensure that other WIP changes in
> percpu_rw_semaphore won't break fs/super.c.
> 
> We do not even need this change right now, percpu_free_rwsem()
> is fine in atomic context. But we are going to change this, it
> will be might_sleep() after we merge the rcu_sync() patches.
> 
> And even after that we do not really need destroy_super_work(),
> we will kill it in any case. Instead, destroy_super_rcu() should
> just check that rss->cb_state == CB_IDLE and do call_rcu() again
> in the (very unlikely) case this is not true.
> 
> So this is just the temporary kludge which helps us to avoid the
> conflicts with the changes which will be (hopefully) routed via
> rcu tree.
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Looking into this again, it would seem somewhat cleaner to me to move the
destruction to deactivate_locked_super() instead. We already do this for
list_lru_destroy() because that can sleep as well and so I'd prefer to keep
these two things together. You have to be somewhat careful with the failure
path in alloc_super() but that's easily doable...

								Honza

> ---
>  fs/super.c         |   23 +++++++++++++++++++----
>  include/linux/fs.h |    3 ++-
>  2 files changed, 21 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/super.c b/fs/super.c
> index 89b58fb..75436e2 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -135,6 +135,24 @@ static unsigned long super_cache_count(struct shrinker *shrink,
>  	return total_objects;
>  }
>  
> +static void destroy_super_work(struct work_struct *work)
> +{
> +	struct super_block *s = container_of(work, struct super_block,
> +							destroy_work);
> +	int i;
> +
> +	for (i = 0; i < SB_FREEZE_LEVELS; i++)
> +		percpu_counter_destroy(&s->s_writers.counter[i]);
> +	kfree(s);
> +}
> +
> +static void destroy_super_rcu(struct rcu_head *head)
> +{
> +	struct super_block *s = container_of(head, struct super_block, rcu);
> +	INIT_WORK(&s->destroy_work, destroy_super_work);
> +	schedule_work(&s->destroy_work);
> +}
> +
>  /**
>   *	destroy_super	-	frees a superblock
>   *	@s: superblock to free
> @@ -143,16 +161,13 @@ static unsigned long super_cache_count(struct shrinker *shrink,
>   */
>  static void destroy_super(struct super_block *s)
>  {
> -	int i;
>  	list_lru_destroy(&s->s_dentry_lru);
>  	list_lru_destroy(&s->s_inode_lru);
> -	for (i = 0; i < SB_FREEZE_LEVELS; i++)
> -		percpu_counter_destroy(&s->s_writers.counter[i]);
>  	security_sb_free(s);
>  	WARN_ON(!list_empty(&s->s_mounts));
>  	kfree(s->s_subtype);
>  	kfree(s->s_options);
> -	kfree_rcu(s, rcu);
> +	call_rcu(&s->rcu, destroy_super_rcu);
>  }
>  
>  /**
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 78ac768..6addccc 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -30,6 +30,7 @@
>  #include <linux/lockdep.h>
>  #include <linux/percpu-rwsem.h>
>  #include <linux/blk_types.h>
> +#include <linux/workqueue.h>
>  
>  #include <asm/byteorder.h>
>  #include <uapi/linux/fs.h>
> @@ -1346,7 +1347,7 @@ struct super_block {
>  	struct list_lru		s_dentry_lru ____cacheline_aligned_in_smp;
>  	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
>  	struct rcu_head		rcu;
> -
> +	struct work_struct	destroy_work;
>  	/*
>  	 * Indicates how deep in a filesystem stack this SB is
>  	 */
> -- 
> 1.5.5.1
>
Oleg Nesterov Aug. 13, 2015, 1:36 p.m. UTC | #2
On 08/13, Jan Kara wrote:
>
> On Tue 11-08-15 19:04:16, Oleg Nesterov wrote:
> >
> > So this is just the temporary kludge which helps us to avoid the
> > conflicts with the changes which will be (hopefully) routed via
> > rcu tree.
> >
> > Signed-off-by: Oleg Nesterov <oleg@redhat.com>
>
> Looking into this again, it would seem somewhat cleaner to me to move the
> destruction to deactivate_locked_super() instead.

Heh ;) You know, I was looking at deactivate_locked_super(). However, I
simply do not understand this code enough, I failed to verify it would
be safe to destroy s_writers there.

And. Please note destroy_super() in alloc_super() error path, so this
needs a bit more changes in any case.

Can't we live with this hack for now? To remind, it will be reverted
(at least partially) in any case. Yes, yes, it is very ugly and the
changelog documents this fact. But it looks simple and safe. To me
it would be better to make the conversion first, then cleanup this
horror after another discussion.

What do you think?

Oleg.

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kara Aug. 13, 2015, 2:09 p.m. UTC | #3
On Thu 13-08-15 15:36:16, Oleg Nesterov wrote:
> On 08/13, Jan Kara wrote:
> >
> > On Tue 11-08-15 19:04:16, Oleg Nesterov wrote:
> > >
> > > So this is just the temporary kludge which helps us to avoid the
> > > conflicts with the changes which will be (hopefully) routed via
> > > rcu tree.
> > >
> > > Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> >
> > Looking into this again, it would seem somewhat cleaner to me to move the
> > destruction to deactivate_locked_super() instead.
> 
> Heh ;) You know, I was looking at deactivate_locked_super(). However, I
> simply do not understand this code enough, I failed to verify it would
> be safe to destroy s_writers there.

Yes, it will be safe. After ->kill_sb() callback the filesystem is dead.
There can be someone still holding reference to superblock but these are
just users inspecting the structure definitely not caring about freeze
protection.

> And. Please note destroy_super() in alloc_super() error path, so this
> needs a bit more changes in any case.

Yes. But you can sleep in alloc_super() so that would be easy enough.

> Can't we live with this hack for now? To remind, it will be reverted
> (at least partially) in any case. Yes, yes, it is very ugly and the
> changelog documents this fact. But it looks simple and safe. To me
> it would be better to make the conversion first, then cleanup this
> horror after another discussion.

All I care about is that long-term, all handling from destroy_super() that
needs to sleep ends up in one place. So if you promise you'll make this
happen I can live with the workqueue solution for now (but you have to
convince also Al as a maintainer ;).

								Honza
Oleg Nesterov Aug. 13, 2015, 3:20 p.m. UTC | #4
On 08/13, Jan Kara wrote:
>
> On Thu 13-08-15 15:36:16, Oleg Nesterov wrote:
> > On 08/13, Jan Kara wrote:
> > >
> > > Looking into this again, it would seem somewhat cleaner to me to move the
> > > destruction to deactivate_locked_super() instead.
> >
> > Heh ;) You know, I was looking at deactivate_locked_super(). However, I
> > simply do not understand this code enough, I failed to verify it would
> > be safe to destroy s_writers there.
>
> Yes, it will be safe. After ->kill_sb() callback the filesystem is dead.
> There can be someone still holding reference to superblock but these are
> just users inspecting the structure definitely not caring about freeze
> protection.

OK, thanks.

> > And. Please note destroy_super() in alloc_super() error path, so this
> > needs a bit more changes in any case.
>
> Yes. But you can sleep in alloc_super() so that would be easy enough.

Yes, yes, I didn't mean this is a problem.

> > Can't we live with this hack for now? To remind, it will be reverted
> > (at least partially) in any case. Yes, yes, it is very ugly and the
> > changelog documents this fact. But it looks simple and safe. To me
> > it would be better to make the conversion first, then cleanup this
> > horror after another discussion.
>
> All I care about is that long-term, all handling from destroy_super() that
> needs to sleep ends up in one place. So if you promise you'll make this
> happen I can live with the workqueue solution for now

I certainly promise I will try to do something in any case ;)

But let me repeat another reason why I think we should do this later.
The necessary changes depend on other work-in-progress rcu_sync changes
in percpu_rw_semaphore.

Now that you confirm that we should not worry about sb_writers after
deactivate_locked_super(), the cleanup looks even simpler than I
thought initially:

	1. We do not even need to destroy the counters in
	   deactivate_locked_super(). It should only stop the
	   (potentially) pending rcu-callback(s).

	2. Just revert this patch altogether.

> (but you have to
> convince also Al as a maintainer ;).

Perhaps he won't notice how ugly this change is? If you won't tell him.

Oleg.

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/super.c b/fs/super.c
index 89b58fb..75436e2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -135,6 +135,24 @@  static unsigned long super_cache_count(struct shrinker *shrink,
 	return total_objects;
 }
 
+static void destroy_super_work(struct work_struct *work)
+{
+	struct super_block *s = container_of(work, struct super_block,
+							destroy_work);
+	int i;
+
+	for (i = 0; i < SB_FREEZE_LEVELS; i++)
+		percpu_counter_destroy(&s->s_writers.counter[i]);
+	kfree(s);
+}
+
+static void destroy_super_rcu(struct rcu_head *head)
+{
+	struct super_block *s = container_of(head, struct super_block, rcu);
+	INIT_WORK(&s->destroy_work, destroy_super_work);
+	schedule_work(&s->destroy_work);
+}
+
 /**
  *	destroy_super	-	frees a superblock
  *	@s: superblock to free
@@ -143,16 +161,13 @@  static unsigned long super_cache_count(struct shrinker *shrink,
  */
 static void destroy_super(struct super_block *s)
 {
-	int i;
 	list_lru_destroy(&s->s_dentry_lru);
 	list_lru_destroy(&s->s_inode_lru);
-	for (i = 0; i < SB_FREEZE_LEVELS; i++)
-		percpu_counter_destroy(&s->s_writers.counter[i]);
 	security_sb_free(s);
 	WARN_ON(!list_empty(&s->s_mounts));
 	kfree(s->s_subtype);
 	kfree(s->s_options);
-	kfree_rcu(s, rcu);
+	call_rcu(&s->rcu, destroy_super_rcu);
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 78ac768..6addccc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -30,6 +30,7 @@ 
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/blk_types.h>
+#include <linux/workqueue.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1346,7 +1347,7 @@  struct super_block {
 	struct list_lru		s_dentry_lru ____cacheline_aligned_in_smp;
 	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
 	struct rcu_head		rcu;
-
+	struct work_struct	destroy_work;
 	/*
 	 * Indicates how deep in a filesystem stack this SB is
 	 */