diff mbox series

[1/6] ext4: replace kthread freezing with auto fs freezing

Message ID 20250401-work-freeze-v1-1-d000611d4ab0@kernel.org (mailing list archive)
State New
Headers show
Series power: wire-up filesystem freeze/thaw with suspend/resume | expand

Commit Message

Christian Brauner April 1, 2025, 12:32 a.m. UTC
From: Luis Chamberlain <mcgrof@kernel.org>

The kernel power management now supports allowing the VFS
to handle filesystem freezing freezes and thawing. Take advantage
of that and remove the kthread freezing. This is needed so that we
properly really stop IO in flight without races after userspace
has been frozen. Without this we rely on kthread freezing and
its semantics are loose and error prone.

The filesystem therefore is in charge of properly dealing with
quiescing of the filesystem through its callbacks if it thinks
it knows better than how the VFS handles it.

The following Coccinelle rule was used as to remove the now superfluous
freezer calls:

make coccicheck MODE=patch SPFLAGS="--in-place --no-show-diff" COCCI=./fs-freeze-cleanup.cocci M=fs/ext4

virtual patch

@ remove_set_freezable @
expression time;
statement S, S2;
expression task, current;
@@

(
-       set_freezable();
|
-       if (try_to_freeze())
-               continue;
|
-       try_to_freeze();
|
-       freezable_schedule();
+       schedule();
|
-       freezable_schedule_timeout(time);
+       schedule_timeout(time);
|
-       if (freezing(task)) { S }
|
-       if (freezing(task)) { S }
-       else
	    { S2 }
|
-       freezing(current)
)

@ remove_wq_freezable @
expression WQ_E, WQ_ARG1, WQ_ARG2, WQ_ARG3, WQ_ARG4;
identifier fs_wq_fn;
@@

(
    WQ_E = alloc_workqueue(WQ_ARG1,
-                              WQ_ARG2 | WQ_FREEZABLE,
+                              WQ_ARG2,
			   ...);
|
    WQ_E = alloc_workqueue(WQ_ARG1,
-                              WQ_ARG2 | WQ_FREEZABLE | WQ_ARG3,
+                              WQ_ARG2 | WQ_ARG3,
			   ...);
|
    WQ_E = alloc_workqueue(WQ_ARG1,
-                              WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE,
+                              WQ_ARG2 | WQ_ARG3,
			   ...);
|
    WQ_E = alloc_workqueue(WQ_ARG1,
-                              WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE | WQ_ARG4,
+                              WQ_ARG2 | WQ_ARG3 | WQ_ARG4,
			   ...);
|
	    WQ_E =
-               WQ_ARG1 | WQ_FREEZABLE
+               WQ_ARG1
|
	    WQ_E =
-               WQ_ARG1 | WQ_FREEZABLE | WQ_ARG3
+               WQ_ARG1 | WQ_ARG3
|
    fs_wq_fn(
-               WQ_FREEZABLE | WQ_ARG2 | WQ_ARG3
+               WQ_ARG2 | WQ_ARG3
    )
|
    fs_wq_fn(
-               WQ_FREEZABLE | WQ_ARG2
+               WQ_ARG2
    )
|
    fs_wq_fn(
-               WQ_FREEZABLE
+               0
    )
)

@ add_auto_flag @
expression E1;
identifier fs_type;
@@

struct file_system_type fs_type = {
	.fs_flags = E1
+                   | FS_AUTOFREEZE
	,
};

Generated-by: Coccinelle SmPL
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/r/20250326112220.1988619-5-mcgrof@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ext4/mballoc.c | 2 +-
 fs/ext4/super.c   | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

Comments

Jan Kara April 1, 2025, 9:16 a.m. UTC | #1
On Tue 01-04-25 02:32:46, Christian Brauner wrote:
> From: Luis Chamberlain <mcgrof@kernel.org>
> 
> The kernel power management now supports allowing the VFS
> to handle filesystem freezing freezes and thawing. Take advantage
> of that and remove the kthread freezing. This is needed so that we
> properly really stop IO in flight without races after userspace
> has been frozen. Without this we rely on kthread freezing and
> its semantics are loose and error prone.
> 
> The filesystem therefore is in charge of properly dealing with
> quiescing of the filesystem through its callbacks if it thinks
> it knows better than how the VFS handles it.
> 
> The following Coccinelle rule was used as to remove the now superfluous
> freezer calls:
> 
> make coccicheck MODE=patch SPFLAGS="--in-place --no-show-diff" COCCI=./fs-freeze-cleanup.cocci M=fs/ext4
> 
> virtual patch
> 
> @ remove_set_freezable @
> expression time;
> statement S, S2;
> expression task, current;
> @@
> 
> (
> -       set_freezable();
> |
> -       if (try_to_freeze())
> -               continue;
> |
> -       try_to_freeze();
> |
> -       freezable_schedule();
> +       schedule();
> |
> -       freezable_schedule_timeout(time);
> +       schedule_timeout(time);
> |
> -       if (freezing(task)) { S }
> |
> -       if (freezing(task)) { S }
> -       else
> 	    { S2 }
> |
> -       freezing(current)
> )
> 
> @ remove_wq_freezable @
> expression WQ_E, WQ_ARG1, WQ_ARG2, WQ_ARG3, WQ_ARG4;
> identifier fs_wq_fn;
> @@
> 
> (
>     WQ_E = alloc_workqueue(WQ_ARG1,
> -                              WQ_ARG2 | WQ_FREEZABLE,
> +                              WQ_ARG2,
> 			   ...);
> |
>     WQ_E = alloc_workqueue(WQ_ARG1,
> -                              WQ_ARG2 | WQ_FREEZABLE | WQ_ARG3,
> +                              WQ_ARG2 | WQ_ARG3,
> 			   ...);
> |
>     WQ_E = alloc_workqueue(WQ_ARG1,
> -                              WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE,
> +                              WQ_ARG2 | WQ_ARG3,
> 			   ...);
> |
>     WQ_E = alloc_workqueue(WQ_ARG1,
> -                              WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE | WQ_ARG4,
> +                              WQ_ARG2 | WQ_ARG3 | WQ_ARG4,
> 			   ...);
> |
> 	    WQ_E =
> -               WQ_ARG1 | WQ_FREEZABLE
> +               WQ_ARG1
> |
> 	    WQ_E =
> -               WQ_ARG1 | WQ_FREEZABLE | WQ_ARG3
> +               WQ_ARG1 | WQ_ARG3
> |
>     fs_wq_fn(
> -               WQ_FREEZABLE | WQ_ARG2 | WQ_ARG3
> +               WQ_ARG2 | WQ_ARG3
>     )
> |
>     fs_wq_fn(
> -               WQ_FREEZABLE | WQ_ARG2
> +               WQ_ARG2
>     )
> |
>     fs_wq_fn(
> -               WQ_FREEZABLE
> +               0
>     )
> )
> 
> @ add_auto_flag @
> expression E1;
> identifier fs_type;
> @@
> 
> struct file_system_type fs_type = {
> 	.fs_flags = E1
> +                   | FS_AUTOFREEZE
> 	,
> };
> 
> Generated-by: Coccinelle SmPL
> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> Link: https://lore.kernel.org/r/20250326112220.1988619-5-mcgrof@kernel.org
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
>  fs/ext4/mballoc.c | 2 +-
>  fs/ext4/super.c   | 3 ---
>  2 files changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 0d523e9fb3d5..ae235ec5ff3a 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -6782,7 +6782,7 @@ static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
>  
>  static bool ext4_trim_interrupted(void)
>  {
> -	return fatal_signal_pending(current) || freezing(current);
> +	return fatal_signal_pending(current);
>  }

This change should not happen. ext4_trim_interrupted() makes sure FITRIM
ioctl doesn't cause hibernation failures and has nothing to do with kthread
freezing...

Otherwise the patch looks good.

								Honza
Christian Brauner April 1, 2025, 9:35 a.m. UTC | #2
On Tue, Apr 01, 2025 at 11:16:18AM +0200, Jan Kara wrote:
> On Tue 01-04-25 02:32:46, Christian Brauner wrote:
> > From: Luis Chamberlain <mcgrof@kernel.org>
> > 
> > The kernel power management now supports allowing the VFS
> > to handle filesystem freezing freezes and thawing. Take advantage
> > of that and remove the kthread freezing. This is needed so that we
> > properly really stop IO in flight without races after userspace
> > has been frozen. Without this we rely on kthread freezing and
> > its semantics are loose and error prone.
> > 
> > The filesystem therefore is in charge of properly dealing with
> > quiescing of the filesystem through its callbacks if it thinks
> > it knows better than how the VFS handles it.
> > 
> > The following Coccinelle rule was used as to remove the now superfluous
> > freezer calls:
> > 
> > make coccicheck MODE=patch SPFLAGS="--in-place --no-show-diff" COCCI=./fs-freeze-cleanup.cocci M=fs/ext4
> > 
> > virtual patch
> > 
> > @ remove_set_freezable @
> > expression time;
> > statement S, S2;
> > expression task, current;
> > @@
> > 
> > (
> > -       set_freezable();
> > |
> > -       if (try_to_freeze())
> > -               continue;
> > |
> > -       try_to_freeze();
> > |
> > -       freezable_schedule();
> > +       schedule();
> > |
> > -       freezable_schedule_timeout(time);
> > +       schedule_timeout(time);
> > |
> > -       if (freezing(task)) { S }
> > |
> > -       if (freezing(task)) { S }
> > -       else
> > 	    { S2 }
> > |
> > -       freezing(current)
> > )
> > 
> > @ remove_wq_freezable @
> > expression WQ_E, WQ_ARG1, WQ_ARG2, WQ_ARG3, WQ_ARG4;
> > identifier fs_wq_fn;
> > @@
> > 
> > (
> >     WQ_E = alloc_workqueue(WQ_ARG1,
> > -                              WQ_ARG2 | WQ_FREEZABLE,
> > +                              WQ_ARG2,
> > 			   ...);
> > |
> >     WQ_E = alloc_workqueue(WQ_ARG1,
> > -                              WQ_ARG2 | WQ_FREEZABLE | WQ_ARG3,
> > +                              WQ_ARG2 | WQ_ARG3,
> > 			   ...);
> > |
> >     WQ_E = alloc_workqueue(WQ_ARG1,
> > -                              WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE,
> > +                              WQ_ARG2 | WQ_ARG3,
> > 			   ...);
> > |
> >     WQ_E = alloc_workqueue(WQ_ARG1,
> > -                              WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE | WQ_ARG4,
> > +                              WQ_ARG2 | WQ_ARG3 | WQ_ARG4,
> > 			   ...);
> > |
> > 	    WQ_E =
> > -               WQ_ARG1 | WQ_FREEZABLE
> > +               WQ_ARG1
> > |
> > 	    WQ_E =
> > -               WQ_ARG1 | WQ_FREEZABLE | WQ_ARG3
> > +               WQ_ARG1 | WQ_ARG3
> > |
> >     fs_wq_fn(
> > -               WQ_FREEZABLE | WQ_ARG2 | WQ_ARG3
> > +               WQ_ARG2 | WQ_ARG3
> >     )
> > |
> >     fs_wq_fn(
> > -               WQ_FREEZABLE | WQ_ARG2
> > +               WQ_ARG2
> >     )
> > |
> >     fs_wq_fn(
> > -               WQ_FREEZABLE
> > +               0
> >     )
> > )
> > 
> > @ add_auto_flag @
> > expression E1;
> > identifier fs_type;
> > @@
> > 
> > struct file_system_type fs_type = {
> > 	.fs_flags = E1
> > +                   | FS_AUTOFREEZE
> > 	,
> > };
> > 
> > Generated-by: Coccinelle SmPL
> > Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> > Link: https://lore.kernel.org/r/20250326112220.1988619-5-mcgrof@kernel.org
> > Signed-off-by: Christian Brauner <brauner@kernel.org>
> > ---
> >  fs/ext4/mballoc.c | 2 +-
> >  fs/ext4/super.c   | 3 ---
> >  2 files changed, 1 insertion(+), 4 deletions(-)
> > 
> > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> > index 0d523e9fb3d5..ae235ec5ff3a 100644
> > --- a/fs/ext4/mballoc.c
> > +++ b/fs/ext4/mballoc.c
> > @@ -6782,7 +6782,7 @@ static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
> >  
> >  static bool ext4_trim_interrupted(void)
> >  {
> > -	return fatal_signal_pending(current) || freezing(current);
> > +	return fatal_signal_pending(current);
> >  }
> 
> This change should not happen. ext4_trim_interrupted() makes sure FITRIM
> ioctl doesn't cause hibernation failures and has nothing to do with kthread
> freezing...
> 
> Otherwise the patch looks good.

Afaict, we don't have to do these changes now. Yes, once fsfreeze
reliably works in the suspend/resume codepaths then we can switch all
that off and remove the old freezer. But we should only do that once we
have some experience with the new filesystem freezing during
suspend/hibernate. So we should place this under a
/sys/power/freeze_filesystems knob and wait a few kernel releases to see
whether we see significant problems. How does that sound to you?
Jan Kara April 1, 2025, 10:08 a.m. UTC | #3
On Tue 01-04-25 11:35:56, Christian Brauner wrote:
> On Tue, Apr 01, 2025 at 11:16:18AM +0200, Jan Kara wrote:
> > > ---
> > >  fs/ext4/mballoc.c | 2 +-
> > >  fs/ext4/super.c   | 3 ---
> > >  2 files changed, 1 insertion(+), 4 deletions(-)
> > > 
> > > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> > > index 0d523e9fb3d5..ae235ec5ff3a 100644
> > > --- a/fs/ext4/mballoc.c
> > > +++ b/fs/ext4/mballoc.c
> > > @@ -6782,7 +6782,7 @@ static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
> > >  
> > >  static bool ext4_trim_interrupted(void)
> > >  {
> > > -	return fatal_signal_pending(current) || freezing(current);
> > > +	return fatal_signal_pending(current);
> > >  }
> > 
> > This change should not happen. ext4_trim_interrupted() makes sure FITRIM
> > ioctl doesn't cause hibernation failures and has nothing to do with kthread
> > freezing...
> > 
> > Otherwise the patch looks good.
> 
> Afaict, we don't have to do these changes now. Yes, once fsfreeze
> reliably works in the suspend/resume codepaths then we can switch all
> that off and remove the old freezer. But we should only do that once we
> have some experience with the new filesystem freezing during
> suspend/hibernate. So we should place this under a
> /sys/power/freeze_filesystems knob and wait a few kernel releases to see
> whether we see significant problems. How does that sound to you?

I agree that enabling this with some knob to allow easy way out if things
don't work makes sense. And the removal of kthread freezing can be done
somewhat later when we are more confident filesystem freezing on
hibernation is solid.

								Honza
diff mbox series

Patch

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0d523e9fb3d5..ae235ec5ff3a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -6782,7 +6782,7 @@  static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
 
 static bool ext4_trim_interrupted(void)
 {
-	return fatal_signal_pending(current) || freezing(current);
+	return fatal_signal_pending(current);
 }
 
 static int ext4_try_to_trim_range(struct super_block *sb,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8122d4ffb3b5..020c818078d7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3778,7 +3778,6 @@  static int ext4_lazyinit_thread(void *arg)
 	unsigned long next_wakeup, cur;
 
 	BUG_ON(NULL == eli);
-	set_freezable();
 
 cont_thread:
 	while (true) {
@@ -3837,8 +3836,6 @@  static int ext4_lazyinit_thread(void *arg)
 		}
 		mutex_unlock(&eli->li_list_mtx);
 
-		try_to_freeze();
-
 		cur = jiffies;
 		if (!next_wakeup_initialized || time_after_eq(cur, next_wakeup)) {
 			cond_resched();