diff mbox series

[3/6] xfs: replace kthread freezing with auto fs freezing

Message ID 20250401-work-freeze-v1-3-d000611d4ab0@kernel.org (mailing list archive)
State New
Headers show
Series power: wire-up filesystem freeze/thaw with suspend/resume | expand

Commit Message

Christian Brauner April 1, 2025, 12:32 a.m. UTC
From: Luis Chamberlain <mcgrof@kernel.org>

The kernel power management now supports allowing the VFS
to handle filesystem freezing freezes and thawing. Take advantage
of that and remove the kthread freezing. This is needed so that we
properly really stop IO in flight without races after userspace
has been frozen. Without this we rely on kthread freezing and
its semantics are loose and error prone.

The filesystem therefore is in charge of properly dealing with
quiescing of the filesystem through its callbacks if it thinks
it knows better than how the VFS handles it.

The following Coccinelle rule was used as to remove the now superfluous
freezer calls:

make coccicheck MODE=patch SPFLAGS="--in-place --no-show-diff" COCCI=./fs-freeze-cleanup.cocci M=fs/xfs

virtual patch

@ remove_set_freezable @
expression time;
statement S, S2;
expression task, current;
@@

(
-       set_freezable();
|
-       if (try_to_freeze())
-               continue;
|
-       try_to_freeze();
|
-       freezable_schedule();
+       schedule();
|
-       freezable_schedule_timeout(time);
+       schedule_timeout(time);
|
-       if (freezing(task)) { S }
|
-       if (freezing(task)) { S }
-       else
	    { S2 }
|
-       freezing(current)
)

@ remove_wq_freezable @
expression WQ_E, WQ_ARG1, WQ_ARG2, WQ_ARG3, WQ_ARG4;
identifier fs_wq_fn;
@@

(
    WQ_E = alloc_workqueue(WQ_ARG1,
-                              WQ_ARG2 | WQ_FREEZABLE,
+                              WQ_ARG2,
			   ...);
|
    WQ_E = alloc_workqueue(WQ_ARG1,
-                              WQ_ARG2 | WQ_FREEZABLE | WQ_ARG3,
+                              WQ_ARG2 | WQ_ARG3,
			   ...);
|
    WQ_E = alloc_workqueue(WQ_ARG1,
-                              WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE,
+                              WQ_ARG2 | WQ_ARG3,
			   ...);
|
    WQ_E = alloc_workqueue(WQ_ARG1,
-                              WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE | WQ_ARG4,
+                              WQ_ARG2 | WQ_ARG3 | WQ_ARG4,
			   ...);
|
	    WQ_E =
-               WQ_ARG1 | WQ_FREEZABLE
+               WQ_ARG1
|
	    WQ_E =
-               WQ_ARG1 | WQ_FREEZABLE | WQ_ARG3
+               WQ_ARG1 | WQ_ARG3
|
    fs_wq_fn(
-               WQ_FREEZABLE | WQ_ARG2 | WQ_ARG3
+               WQ_ARG2 | WQ_ARG3
    )
|
    fs_wq_fn(
-               WQ_FREEZABLE | WQ_ARG2
+               WQ_ARG2
    )
|
    fs_wq_fn(
-               WQ_FREEZABLE
+               0
    )
)

@ add_auto_flag @
expression E1;
identifier fs_type;
@@

struct file_system_type fs_type = {
	.fs_flags = E1
+                   | FS_AUTOFREEZE
	,
};

Generated-by: Coccinelle SmPL
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/r/20250326112220.1988619-7-mcgrof@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_discard.c   |  2 +-
 fs/xfs/xfs_log.c       |  3 +--
 fs/xfs/xfs_log_cil.c   |  2 +-
 fs/xfs/xfs_mru_cache.c |  2 +-
 fs/xfs/xfs_pwork.c     |  2 +-
 fs/xfs/xfs_super.c     | 14 +++++++-------
 fs/xfs/xfs_trans_ail.c |  3 ---
 fs/xfs/xfs_zone_gc.c   |  2 --
 8 files changed, 12 insertions(+), 18 deletions(-)

Comments

Dave Chinner April 1, 2025, 1:11 a.m. UTC | #1
On Tue, Apr 01, 2025 at 02:32:48AM +0200, Christian Brauner wrote:
> From: Luis Chamberlain <mcgrof@kernel.org>
> 
> The kernel power management now supports allowing the VFS
> to handle filesystem freezing freezes and thawing. Take advantage
> of that and remove the kthread freezing. This is needed so that we
> properly really stop IO in flight without races after userspace
> has been frozen. Without this we rely on kthread freezing and
> its semantics are loose and error prone.
> 
> The filesystem therefore is in charge of properly dealing with
> quiescing of the filesystem through its callbacks if it thinks
> it knows better than how the VFS handles it.
> 
.....

> diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
> index 0fcb1828e598..ad8183db0780 100644
> --- a/fs/xfs/xfs_trans_ail.c
> +++ b/fs/xfs/xfs_trans_ail.c
> @@ -636,7 +636,6 @@ xfsaild(
>  	unsigned int	noreclaim_flag;
>  
>  	noreclaim_flag = memalloc_noreclaim_save();
> -	set_freezable();
>  
>  	while (1) {
>  		/*
> @@ -695,8 +694,6 @@ xfsaild(
>  
>  		__set_current_state(TASK_RUNNING);
>  
> -		try_to_freeze();
> -
>  		tout = xfsaild_push(ailp);
>  	}
>  

So what about the TASK_FREEZABLE flag that is set in this code
before sleeping?

i.e. this code before we schedule():

                if (tout && tout <= 20)
                        set_current_state(TASK_KILLABLE|TASK_FREEZABLE);
                else
                        set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);

Shouldn't TASK_FREEZABLE go away, too?

> diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
> index c5136ea9bb1d..1875b6551ab0 100644
> --- a/fs/xfs/xfs_zone_gc.c
> +++ b/fs/xfs/xfs_zone_gc.c
> @@ -993,7 +993,6 @@ xfs_zone_gc_handle_work(
>  	}
>  
>  	__set_current_state(TASK_RUNNING);
> -	try_to_freeze();
>  
>  	if (reset_list)
>  		xfs_zone_gc_reset_zones(data, reset_list);
> @@ -1041,7 +1040,6 @@ xfs_zoned_gcd(
>  	unsigned int		nofs_flag;
>  
>  	nofs_flag = memalloc_nofs_save();
> -	set_freezable();
>  
>  	for (;;) {
>  		set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE);

Same question here for this newly merged code, too...

-Dave.
Christian Brauner April 1, 2025, 7:17 a.m. UTC | #2
On Tue, Apr 01, 2025 at 12:11:04PM +1100, Dave Chinner wrote:
> On Tue, Apr 01, 2025 at 02:32:48AM +0200, Christian Brauner wrote:
> > From: Luis Chamberlain <mcgrof@kernel.org>
> > 
> > The kernel power management now supports allowing the VFS
> > to handle filesystem freezing freezes and thawing. Take advantage
> > of that and remove the kthread freezing. This is needed so that we
> > properly really stop IO in flight without races after userspace
> > has been frozen. Without this we rely on kthread freezing and
> > its semantics are loose and error prone.
> > 
> > The filesystem therefore is in charge of properly dealing with
> > quiescing of the filesystem through its callbacks if it thinks
> > it knows better than how the VFS handles it.
> > 
> .....
> 
> > diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
> > index 0fcb1828e598..ad8183db0780 100644
> > --- a/fs/xfs/xfs_trans_ail.c
> > +++ b/fs/xfs/xfs_trans_ail.c
> > @@ -636,7 +636,6 @@ xfsaild(
> >  	unsigned int	noreclaim_flag;
> >  
> >  	noreclaim_flag = memalloc_noreclaim_save();
> > -	set_freezable();
> >  
> >  	while (1) {
> >  		/*
> > @@ -695,8 +694,6 @@ xfsaild(
> >  
> >  		__set_current_state(TASK_RUNNING);
> >  
> > -		try_to_freeze();
> > -
> >  		tout = xfsaild_push(ailp);
> >  	}
> >  
> 
> So what about the TASK_FREEZABLE flag that is set in this code
> before sleeping?
> 
> i.e. this code before we schedule():
> 
>                 if (tout && tout <= 20)
>                         set_current_state(TASK_KILLABLE|TASK_FREEZABLE);
>                 else
>                         set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
> 
> Shouldn't TASK_FREEZABLE go away, too?

Thanks for spotting! Yes, yesterday late at night I just took Luis
patches as they are and had only gotten around to testing btrfs. The
coccinelle scripts seemed to have missed those. I'll wait for comments
and will do another pass and send out v2.

> > diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
> > index c5136ea9bb1d..1875b6551ab0 100644
> > --- a/fs/xfs/xfs_zone_gc.c
> > +++ b/fs/xfs/xfs_zone_gc.c
> > @@ -993,7 +993,6 @@ xfs_zone_gc_handle_work(
> >  	}
> >  
> >  	__set_current_state(TASK_RUNNING);
> > -	try_to_freeze();
> >  
> >  	if (reset_list)
> >  		xfs_zone_gc_reset_zones(data, reset_list);
> > @@ -1041,7 +1040,6 @@ xfs_zoned_gcd(
> >  	unsigned int		nofs_flag;
> >  
> >  	nofs_flag = memalloc_nofs_save();
> > -	set_freezable();
> >  
> >  	for (;;) {
> >  		set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE);
> 
> Same question here for this newly merged code, too...

I'm not sure if this is supposed to be a snipe or not but just in case
this is a hidden question: This isn't merged. Per the cover letter this
is in a work.* branch. Anything that is considered mergable is in
vfs-6.16.* branches. But since we're pre -rc1 even those branches are
not yet showing up in -next.
Dave Chinner April 1, 2025, 11:35 a.m. UTC | #3
On Tue, Apr 01, 2025 at 09:17:12AM +0200, Christian Brauner wrote:
> On Tue, Apr 01, 2025 at 12:11:04PM +1100, Dave Chinner wrote:
> > On Tue, Apr 01, 2025 at 02:32:48AM +0200, Christian Brauner wrote:
> > > diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
> > > index c5136ea9bb1d..1875b6551ab0 100644
> > > --- a/fs/xfs/xfs_zone_gc.c
> > > +++ b/fs/xfs/xfs_zone_gc.c
> > > @@ -993,7 +993,6 @@ xfs_zone_gc_handle_work(
> > >  	}
> > >  
> > >  	__set_current_state(TASK_RUNNING);
> > > -	try_to_freeze();
> > >  
> > >  	if (reset_list)
> > >  		xfs_zone_gc_reset_zones(data, reset_list);
> > > @@ -1041,7 +1040,6 @@ xfs_zoned_gcd(
> > >  	unsigned int		nofs_flag;
> > >  
> > >  	nofs_flag = memalloc_nofs_save();
> > > -	set_freezable();
> > >  
> > >  	for (;;) {
> > >  		set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE);
> > 
> > Same question here for this newly merged code, too...
>
> I'm not sure if this is supposed to be a snipe or not but just in case
> this is a hidden question:

No, I meant that this is changing shiny new just-merged XFS code
(part of zone device support). It only just arrived this merge
window and is largely just doing the same thing as the older aild
code. It is probably safe to assume that this new code has never
been tested against hibernate...

-Dave.
Christian Brauner April 1, 2025, 12:45 p.m. UTC | #4
On Tue, Apr 01, 2025 at 10:35:58PM +1100, Dave Chinner wrote:
> On Tue, Apr 01, 2025 at 09:17:12AM +0200, Christian Brauner wrote:
> > On Tue, Apr 01, 2025 at 12:11:04PM +1100, Dave Chinner wrote:
> > > On Tue, Apr 01, 2025 at 02:32:48AM +0200, Christian Brauner wrote:
> > > > diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
> > > > index c5136ea9bb1d..1875b6551ab0 100644
> > > > --- a/fs/xfs/xfs_zone_gc.c
> > > > +++ b/fs/xfs/xfs_zone_gc.c
> > > > @@ -993,7 +993,6 @@ xfs_zone_gc_handle_work(
> > > >  	}
> > > >  
> > > >  	__set_current_state(TASK_RUNNING);
> > > > -	try_to_freeze();
> > > >  
> > > >  	if (reset_list)
> > > >  		xfs_zone_gc_reset_zones(data, reset_list);
> > > > @@ -1041,7 +1040,6 @@ xfs_zoned_gcd(
> > > >  	unsigned int		nofs_flag;
> > > >  
> > > >  	nofs_flag = memalloc_nofs_save();
> > > > -	set_freezable();
> > > >  
> > > >  	for (;;) {
> > > >  		set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE);
> > > 
> > > Same question here for this newly merged code, too...
> >
> > I'm not sure if this is supposed to be a snipe or not but just in case
> > this is a hidden question:
> 
> No, I meant that this is changing shiny new just-merged XFS code
> (part of zone device support). It only just arrived this merge
> window and is largely just doing the same thing as the older aild
> code. It is probably safe to assume that this new code has never
> been tested against hibernate...

Ah, my brain is completely fried. Apparently reading English is a skill
I've lost since coming back from Montreal. Thanks!
diff mbox series

Patch

diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index c1a306268ae4..1596cf0ecb9b 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -333,7 +333,7 @@  xfs_trim_gather_extents(
 static bool
 xfs_trim_should_stop(void)
 {
-	return fatal_signal_pending(current) || freezing(current);
+	return fatal_signal_pending(current);
 }
 
 /*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 6493bdb57351..317f6db292fb 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1489,8 +1489,7 @@  xlog_alloc_log(
 	log->l_iclog->ic_prev = prev_iclog;	/* re-write 1st prev ptr */
 
 	log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
-			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM |
-				    WQ_HIGHPRI),
+			XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_HIGHPRI),
 			0, mp->m_super->s_id);
 	if (!log->l_ioend_workqueue)
 		goto out_free_iclog;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 1ca406ec1b40..8ff5d68394e6 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -1932,7 +1932,7 @@  xlog_cil_init(
 	 * concurrency the log spinlocks will be exposed to.
 	 */
 	cil->xc_push_wq = alloc_workqueue("xfs-cil/%s",
-			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND),
+			XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_UNBOUND),
 			4, log->l_mp->m_super->s_id);
 	if (!cil->xc_push_wq)
 		goto out_destroy_cil;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index d0f5b403bdbe..c9a49c6f6129 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -293,7 +293,7 @@  int
 xfs_mru_cache_init(void)
 {
 	xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache",
-			XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 1);
+			XFS_WQFLAGS(WQ_MEM_RECLAIM), 1);
 	if (!xfs_mru_reap_wq)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c
index c283b801cc5d..3f5bf53f8778 100644
--- a/fs/xfs/xfs_pwork.c
+++ b/fs/xfs/xfs_pwork.c
@@ -72,7 +72,7 @@  xfs_pwork_init(
 	trace_xfs_pwork_init(mp, nr_threads, current->pid);
 
 	pctl->wq = alloc_workqueue("%s-%d",
-			WQ_UNBOUND | WQ_SYSFS | WQ_FREEZABLE, nr_threads, tag,
+			WQ_UNBOUND | WQ_SYSFS, nr_threads, tag,
 			current->pid);
 	if (!pctl->wq)
 		return -ENOMEM;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 53944cc7af24..06eb51a3d13b 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -565,37 +565,37 @@  xfs_init_mount_workqueues(
 	struct xfs_mount	*mp)
 {
 	mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
-			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+			XFS_WQFLAGS(WQ_MEM_RECLAIM),
 			1, mp->m_super->s_id);
 	if (!mp->m_buf_workqueue)
 		goto out;
 
 	mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
-			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+			XFS_WQFLAGS(WQ_MEM_RECLAIM),
 			0, mp->m_super->s_id);
 	if (!mp->m_unwritten_workqueue)
 		goto out_destroy_buf;
 
 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
-			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+			XFS_WQFLAGS(WQ_MEM_RECLAIM),
 			0, mp->m_super->s_id);
 	if (!mp->m_reclaim_workqueue)
 		goto out_destroy_unwritten;
 
 	mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
-			XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
+			XFS_WQFLAGS(WQ_UNBOUND | WQ_MEM_RECLAIM),
 			0, mp->m_super->s_id);
 	if (!mp->m_blockgc_wq)
 		goto out_destroy_reclaim;
 
 	mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
-			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+			XFS_WQFLAGS(WQ_MEM_RECLAIM),
 			1, mp->m_super->s_id);
 	if (!mp->m_inodegc_wq)
 		goto out_destroy_blockgc;
 
 	mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
-			XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
+			XFS_WQFLAGS(0), 0, mp->m_super->s_id);
 	if (!mp->m_sync_workqueue)
 		goto out_destroy_inodegc;
 
@@ -2488,7 +2488,7 @@  xfs_init_workqueues(void)
 	 * max_active value for this workqueue.
 	 */
 	xfs_alloc_wq = alloc_workqueue("xfsalloc",
-			XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0);
+			XFS_WQFLAGS(WQ_MEM_RECLAIM), 0);
 	if (!xfs_alloc_wq)
 		return -ENOMEM;
 
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 0fcb1828e598..ad8183db0780 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -636,7 +636,6 @@  xfsaild(
 	unsigned int	noreclaim_flag;
 
 	noreclaim_flag = memalloc_noreclaim_save();
-	set_freezable();
 
 	while (1) {
 		/*
@@ -695,8 +694,6 @@  xfsaild(
 
 		__set_current_state(TASK_RUNNING);
 
-		try_to_freeze();
-
 		tout = xfsaild_push(ailp);
 	}
 
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index c5136ea9bb1d..1875b6551ab0 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -993,7 +993,6 @@  xfs_zone_gc_handle_work(
 	}
 
 	__set_current_state(TASK_RUNNING);
-	try_to_freeze();
 
 	if (reset_list)
 		xfs_zone_gc_reset_zones(data, reset_list);
@@ -1041,7 +1040,6 @@  xfs_zoned_gcd(
 	unsigned int		nofs_flag;
 
 	nofs_flag = memalloc_nofs_save();
-	set_freezable();
 
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE);