diff mbox series

[1/4] vfs: allow filesystem freeze callers to denote who froze the fs

Message ID 168308293892.734377.10931394426623343285.stgit@frogsfrogsfrogs (mailing list archive)
State Mainlined, archived
Headers show
Series xfs: online repair for fs summary counters with exclusive fsfreeze | expand

Commit Message

Darrick J. Wong May 3, 2023, 3:02 a.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Add a tag field to sb_writers so that the freeze code tracks who froze a
filesystem.  For now the only tag is for userspace-initiated freezing,
but in the next few patches we'll introduce the ability for in-kernel
callers to freeze an fs exclusively.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/super.c         |   41 ++++++++++++++++++++++++++++++++++-------
 include/linux/fs.h |    1 +
 2 files changed, 35 insertions(+), 7 deletions(-)

Comments

Luis Chamberlain May 7, 2023, 5:23 a.m. UTC | #1
On Tue, May 02, 2023 at 08:02:18PM -0700, Darrick J. Wong wrote:
> diff --git a/fs/super.c b/fs/super.c
> index 04bc62ab7dfe..01891f9e6d5e 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -1736,18 +1747,33 @@ int freeze_super(struct super_block *sb)
>  	up_write(&sb->s_umount);
>  	return 0;
>  }
> +
> +/*
> + * freeze_super - lock the filesystem and force it into a consistent state
> + * @sb: the super to lock
> + *
> + * Syncs the super to make sure the filesystem is consistent and calls the fs's
> + * freeze_fs.  Subsequent calls to this without first thawing the fs will return
> + * -EBUSY.  See the comment for __freeze_super for more information.
> + */
> +int freeze_super(struct super_block *sb)
> +{
> +	return __freeze_super(sb, USERSPACE_FREEZE_COOKIE);
> +}
>  EXPORT_SYMBOL(freeze_super);
>  
> -static int thaw_super_locked(struct super_block *sb)
> +static int thaw_super_locked(struct super_block *sb, unsigned long cookie)
>  {
>  	int error;
>  
> -	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
> +	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE ||
> +	    sb->s_writers.freeze_cookie != cookie) {
>  		up_write(&sb->s_umount);
>  		return -EINVAL;

We get the same by just having drivers use freeze_super(sb, true) in the
patches I have, ie, we treat it a user-initiated.

On freeze() we have:

int freeze_super(struct super_block *sb, bool usercall)                                              
{                                                                                                    
	int ret;                                                                                     
	
	if(!usercall && sb_is_frozen(sb))                                                           
		return 0;                                                                            

	if (!sb_is_unfrozen(sb))
	return -EBUSY;
	...
}

On thaw we end up with:

int thaw_super(struct super_block *sb, bool usercall)
{
	int error;

	if (!usercall) {
		/*
		 * If userspace initiated the freeze don't let the kernel
		 *  thaw it on return from a kernel initiated freeze.
		 */
		 if (sb_is_unfrozen(sb) || sb_is_frozen_by_user(sb))
		 	return 0;
	}

	if (!sb_is_frozen(sb))
		return -EINVAL;
	...
}

As I had it, I had made the drivers and the bdev freeze use the usercall as
true and so there is no change.

In case there is a filesystem already frozen then which was initiated by
the filesystem, for whatever reason, the filesystem the kernel auto-freeze
will chug on happy with the system freeze, it bails out withour error
and moves on to the next filesystem to freeze.

Upon thaw, the kernel auto-thaw will detect that the filesystem was
frozen by user on sb_is_frozen_by_user() and so will just bail and not
thaw it.

If the mechanism you want to introduce is to allow a filesystem to even
prevent kernel auto-freeze with -EBUSY it begs the question if that
shouldn't also prevent suspend. Because it would anyway as you have it
right now with your patch but it would return -EINVAL. I also ask because of
the possible issues with the filesystem not going to suspend but the backing
or other possible related devices going to suspend.

Since I think the goal is to prevent the kernel auto-freeze due to
online fsck to complete, then I think you *do* want to prevent full
system suspend from moving forward. In that case, why not just have
the filesystem check for that and return -EBUSY on its respective
filesystem sb->s_op->freeze_fs(sb) callback?

  Luis
Shiyang Ruan May 17, 2023, 5:13 p.m. UTC | #2
在 2023/5/7 13:23, Luis Chamberlain 写道:
> On Tue, May 02, 2023 at 08:02:18PM -0700, Darrick J. Wong wrote:
>> diff --git a/fs/super.c b/fs/super.c
>> index 04bc62ab7dfe..01891f9e6d5e 100644
>> --- a/fs/super.c
>> +++ b/fs/super.c
>> @@ -1736,18 +1747,33 @@ int freeze_super(struct super_block *sb)
>>   	up_write(&sb->s_umount);
>>   	return 0;
>>   }
>> +
>> +/*
>> + * freeze_super - lock the filesystem and force it into a consistent state
>> + * @sb: the super to lock
>> + *
>> + * Syncs the super to make sure the filesystem is consistent and calls the fs's
>> + * freeze_fs.  Subsequent calls to this without first thawing the fs will return
>> + * -EBUSY.  See the comment for __freeze_super for more information.
>> + */
>> +int freeze_super(struct super_block *sb)
>> +{
>> +	return __freeze_super(sb, USERSPACE_FREEZE_COOKIE);
>> +}
>>   EXPORT_SYMBOL(freeze_super);
>>   
>> -static int thaw_super_locked(struct super_block *sb)
>> +static int thaw_super_locked(struct super_block *sb, unsigned long cookie)
>>   {
>>   	int error;
>>   
>> -	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
>> +	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE ||
>> +	    sb->s_writers.freeze_cookie != cookie) {
>>   		up_write(&sb->s_umount);
>>   		return -EINVAL;
> 
> We get the same by just having drivers use freeze_super(sb, true) in the
> patches I have, ie, we treat it a user-initiated.
> 
> On freeze() we have:
> 
> int freeze_super(struct super_block *sb, bool usercall)
> {
> 	int ret;
> 	
> 	if(!usercall && sb_is_frozen(sb))
> 		return 0;
> 
> 	if (!sb_is_unfrozen(sb))
> 	return -EBUSY;
> 	...
> }
> 
> On thaw we end up with:
> 
> int thaw_super(struct super_block *sb, bool usercall)
> {
> 	int error;
> 
> 	if (!usercall) {
> 		/*
> 		 * If userspace initiated the freeze don't let the kernel
> 		 *  thaw it on return from a kernel initiated freeze.
> 		 */
> 		 if (sb_is_unfrozen(sb) || sb_is_frozen_by_user(sb))
> 		 	return 0;
> 	}
> 
> 	if (!sb_is_frozen(sb))
> 		return -EINVAL;
> 	...
> }
> 
> As I had it, I had made the drivers and the bdev freeze use the usercall as
> true and so there is no change.
> 
> In case there is a filesystem already frozen then which was initiated by
> the filesystem, for whatever reason, the filesystem the kernel auto-freeze
> will chug on happy with the system freeze, it bails out withour error
> and moves on to the next filesystem to freeze.
> 
> Upon thaw, the kernel auto-thaw will detect that the filesystem was
> frozen by user on sb_is_frozen_by_user() and so will just bail and not
> thaw it.

Hi, Luis

Thanks for the great idea.  I also need this upgraded API for a unbind 
mechanism on pmem device, which is finally called in 
xfs_notify_failure.c where we want to freeze the fs to prevent any other 
new file mappings from being created.  In my case, I think we should 
think it as a kernel-initiated freeze, and hope it won't be thaw by 
others, especially userspace-initiated thaw.

In my understanding of your implementation, if there is a 
userspace-initiated thaw, with @usercall is set true, thaw_super(sb, 
true) will ignore any others' freeze and thaw the fs anyway.  But, 
except in my case, I think the order of userspace-initiated freeze/thaw 
may be messed up due to bugs in the user app, then the kernel-initiated 
freeze state could be accidentally broken...  In my opinion, the kernel 
code is more reliable.  Therefore, kernel-initiated freeze should be 
exclusive at least.


--
Thanks,
Ruan.

> 
> If the mechanism you want to introduce is to allow a filesystem to even
> prevent kernel auto-freeze with -EBUSY it begs the question if that
> shouldn't also prevent suspend. Because it would anyway as you have it
> right now with your patch but it would return -EINVAL. I also ask because of
> the possible issues with the filesystem not going to suspend but the backing
> or other possible related devices going to suspend.
> 
> Since I think the goal is to prevent the kernel auto-freeze due to
> online fsck to complete, then I think you *do* want to prevent full
> system suspend from moving forward. In that case, why not just have
> the filesystem check for that and return -EBUSY on its respective
> filesystem sb->s_op->freeze_fs(sb) callback?
> 
>    Luis
Darrick J. Wong May 18, 2023, 6:07 a.m. UTC | #3
On Sat, May 06, 2023 at 10:23:13PM -0700, Luis Chamberlain wrote:
> On Tue, May 02, 2023 at 08:02:18PM -0700, Darrick J. Wong wrote:
> > diff --git a/fs/super.c b/fs/super.c
> > index 04bc62ab7dfe..01891f9e6d5e 100644
> > --- a/fs/super.c
> > +++ b/fs/super.c
> > @@ -1736,18 +1747,33 @@ int freeze_super(struct super_block *sb)
> >  	up_write(&sb->s_umount);
> >  	return 0;
> >  }
> > +
> > +/*
> > + * freeze_super - lock the filesystem and force it into a consistent state
> > + * @sb: the super to lock
> > + *
> > + * Syncs the super to make sure the filesystem is consistent and calls the fs's
> > + * freeze_fs.  Subsequent calls to this without first thawing the fs will return
> > + * -EBUSY.  See the comment for __freeze_super for more information.
> > + */
> > +int freeze_super(struct super_block *sb)
> > +{
> > +	return __freeze_super(sb, USERSPACE_FREEZE_COOKIE);
> > +}
> >  EXPORT_SYMBOL(freeze_super);
> >  
> > -static int thaw_super_locked(struct super_block *sb)
> > +static int thaw_super_locked(struct super_block *sb, unsigned long cookie)
> >  {
> >  	int error;
> >  
> > -	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
> > +	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE ||
> > +	    sb->s_writers.freeze_cookie != cookie) {
> >  		up_write(&sb->s_umount);
> >  		return -EINVAL;
> 
> We get the same by just having drivers use freeze_super(sb, true) in the
> patches I have, ie, we treat it a user-initiated.
> 
> On freeze() we have:
> 
> int freeze_super(struct super_block *sb, bool usercall)                                              
> {                                                                                                    
> 	int ret;                                                                                     
> 	
> 	if(!usercall && sb_is_frozen(sb))                                                           
> 		return 0;                                                                            
> 
> 	if (!sb_is_unfrozen(sb))
> 	return -EBUSY;
> 	...
> }
> 
> On thaw we end up with:
> 
> int thaw_super(struct super_block *sb, bool usercall)
> {
> 	int error;
> 
> 	if (!usercall) {
> 		/*
> 		 * If userspace initiated the freeze don't let the kernel
> 		 *  thaw it on return from a kernel initiated freeze.
> 		 */
> 		 if (sb_is_unfrozen(sb) || sb_is_frozen_by_user(sb))
> 		 	return 0;
> 	}
> 
> 	if (!sb_is_frozen(sb))
> 		return -EINVAL;
> 	...
> }
> 
> As I had it, I had made the drivers and the bdev freeze use the usercall as
> true and so there is no change.
> 
> In case there is a filesystem already frozen then which was initiated by
> the filesystem, for whatever reason, the filesystem the kernel auto-freeze
> will chug on happy with the system freeze, it bails out withour error
> and moves on to the next filesystem to freeze.

Yes.  Your patchset has the nicer behavior that a kernel freeze isn't
preempted by userspace already having frozen the fs, but at a cost that
userspace can thaw the fs while the kernel still thinks it's frozen.

> Upon thaw, the kernel auto-thaw will detect that the filesystem was
> frozen by user on sb_is_frozen_by_user() and so will just bail and not
> thaw it.
> 
> If the mechanism you want to introduce is to allow a filesystem to even
> prevent kernel auto-freeze with -EBUSY it begs the question if that

For the scrub case, yes, we do want to block suspend because (a) we're
running metadata transactions and (b) we haven't quiesced the xfs log,
because scrub doesn't need (or want) to wait for that.  It only needs to
prevent writes, write faults, writeback, and background garbage
collection.  And it can't allow userspace to turn any of that back on.

> shouldn't also prevent suspend. Because it would anyway as you have it
> right now with your patch but it would return -EINVAL.

Huh?  With this patchset, freeze_super returns EBUSY if the fs is
frozen, no matter who froze it, or who wants to freeze it.

> I also ask because of
> the possible issues with the filesystem not going to suspend but the backing
> or other possible related devices going to suspend.

Hm.  Perhaps the freeze state needs to track one bit for userspace
freeze and another for kernel freeze.  Kernel freezes are mutually
exclusive, but they can combine with userspace freezes.  Userspace
freezes remain shared.

Kernel thaw cannot break a userspace thaw, nor can userspace thaws break
a kernel thaw.

Eh, it's late, I'll think about this more tomorrow.

> Since I think the goal is to prevent the kernel auto-freeze due to
> online fsck to complete, then I think you *do* want to prevent full
> system suspend from moving forward. In that case, why not just have
> the filesystem check for that and return -EBUSY on its respective
> filesystem sb->s_op->freeze_fs(sb) callback?

Well... either we forego the fscounters optimization, or I guess we
figure out how to take s_umount and then set a flag.

--D

>   Luis
diff mbox series

Patch

diff --git a/fs/super.c b/fs/super.c
index 04bc62ab7dfe..01891f9e6d5e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -39,7 +39,10 @@ 
 #include <uapi/linux/mount.h>
 #include "internal.h"
 
-static int thaw_super_locked(struct super_block *sb);
+static int thaw_super_locked(struct super_block *sb, unsigned long cookie);
+
+/* Freeze cookie denoting that userspace froze the filesystem. */
+#define USERSPACE_FREEZE_COOKIE	((unsigned long)freeze_super)
 
 static LIST_HEAD(super_blocks);
 static DEFINE_SPINLOCK(sb_lock);
@@ -1027,7 +1030,7 @@  static void do_thaw_all_callback(struct super_block *sb)
 	down_write(&sb->s_umount);
 	if (sb->s_root && sb->s_flags & SB_BORN) {
 		emergency_thaw_bdev(sb);
-		thaw_super_locked(sb);
+		thaw_super_locked(sb, USERSPACE_FREEZE_COOKIE);
 	} else {
 		up_write(&sb->s_umount);
 	}
@@ -1636,13 +1639,18 @@  static void sb_freeze_unlock(struct super_block *sb, int level)
 }
 
 /**
- * freeze_super - lock the filesystem and force it into a consistent state
+ * __freeze_super - lock the filesystem and force it into a consistent state
  * @sb: the super to lock
+ * @cookie: magic value telling us who tried to freeze the fs
  *
  * Syncs the super to make sure the filesystem is consistent and calls the fs's
  * freeze_fs.  Subsequent calls to this without first thawing the fs will return
  * -EBUSY.
  *
+ * If a filesystem freeze is initiated, the sb->s_writers.freeze_cookie value
+ * is set to the @cookie.  The filesystem can only be thawed with the same
+ * cookie value.
+ *
  * During this function, sb->s_writers.frozen goes through these values:
  *
  * SB_UNFROZEN: File system is normal, all writes progress as usual.
@@ -1668,7 +1676,7 @@  static void sb_freeze_unlock(struct super_block *sb, int level)
  *
  * sb->s_writers.frozen is protected by sb->s_umount.
  */
-int freeze_super(struct super_block *sb)
+static int __freeze_super(struct super_block *sb, unsigned long cookie)
 {
 	int ret;
 
@@ -1684,6 +1692,7 @@  int freeze_super(struct super_block *sb)
 		return 0;	/* sic - it's "nothing to do" */
 	}
 
+	sb->s_writers.freeze_cookie = cookie;
 	if (sb_rdonly(sb)) {
 		/* Nothing to do really... */
 		sb->s_writers.frozen = SB_FREEZE_COMPLETE;
@@ -1704,6 +1713,7 @@  int freeze_super(struct super_block *sb)
 	/* All writers are done so after syncing there won't be dirty data */
 	ret = sync_filesystem(sb);
 	if (ret) {
+		sb->s_writers.freeze_cookie = 0;
 		sb->s_writers.frozen = SB_UNFROZEN;
 		sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
 		wake_up(&sb->s_writers.wait_unfrozen);
@@ -1720,6 +1730,7 @@  int freeze_super(struct super_block *sb)
 		if (ret) {
 			printk(KERN_ERR
 				"VFS:Filesystem freeze failed\n");
+			sb->s_writers.freeze_cookie = 0;
 			sb->s_writers.frozen = SB_UNFROZEN;
 			sb_freeze_unlock(sb, SB_FREEZE_FS);
 			wake_up(&sb->s_writers.wait_unfrozen);
@@ -1736,18 +1747,33 @@  int freeze_super(struct super_block *sb)
 	up_write(&sb->s_umount);
 	return 0;
 }
+
+/*
+ * freeze_super - lock the filesystem and force it into a consistent state
+ * @sb: the super to lock
+ *
+ * Syncs the super to make sure the filesystem is consistent and calls the fs's
+ * freeze_fs.  Subsequent calls to this without first thawing the fs will return
+ * -EBUSY.  See the comment for __freeze_super for more information.
+ */
+int freeze_super(struct super_block *sb)
+{
+	return __freeze_super(sb, USERSPACE_FREEZE_COOKIE);
+}
 EXPORT_SYMBOL(freeze_super);
 
-static int thaw_super_locked(struct super_block *sb)
+static int thaw_super_locked(struct super_block *sb, unsigned long cookie)
 {
 	int error;
 
-	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
+	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE ||
+	    sb->s_writers.freeze_cookie != cookie) {
 		up_write(&sb->s_umount);
 		return -EINVAL;
 	}
 
 	if (sb_rdonly(sb)) {
+		sb->s_writers.freeze_cookie = 0;
 		sb->s_writers.frozen = SB_UNFROZEN;
 		goto out;
 	}
@@ -1765,6 +1791,7 @@  static int thaw_super_locked(struct super_block *sb)
 		}
 	}
 
+	sb->s_writers.freeze_cookie = 0;
 	sb->s_writers.frozen = SB_UNFROZEN;
 	sb_freeze_unlock(sb, SB_FREEZE_FS);
 out:
@@ -1782,7 +1809,7 @@  static int thaw_super_locked(struct super_block *sb)
 int thaw_super(struct super_block *sb)
 {
 	down_write(&sb->s_umount);
-	return thaw_super_locked(sb);
+	return thaw_super_locked(sb, USERSPACE_FREEZE_COOKIE);
 }
 EXPORT_SYMBOL(thaw_super);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c85916e9f7db..800772361b1e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1129,6 +1129,7 @@  enum {
 
 struct sb_writers {
 	int				frozen;		/* Is sb frozen? */
+	unsigned long			freeze_cookie;	/* who froze us? */
 	wait_queue_head_t		wait_unfrozen;	/* wait for thaw */
 	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
 };