diff mbox series

[2/3] fs: wait for partially frozen filesystems

Message ID 168653972832.755178.18389114450766371923.stgit@frogsfrogsfrogs (mailing list archive)
State Superseded
Headers show
Series fs: kernel and userspace filesystem freeze | expand

Commit Message

Darrick J. Wong June 12, 2023, 3:15 a.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Jan Kara suggested that when one thread is in the middle of freezing a
filesystem, another thread trying to freeze the same fs but with a
different freeze_holder should wait until the freezer reaches either end
state (UNFROZEN or COMPLETE) instead of returning EBUSY immediately.

Plumb in the extra coded needed to wait for the fs freezer to reach an
end state and try the freeze again.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/super.c |   27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

Comments

Christoph Hellwig June 12, 2023, 4:01 a.m. UTC | #1
On Sun, Jun 11, 2023 at 08:15:28PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Jan Kara suggested that when one thread is in the middle of freezing a
> filesystem, another thread trying to freeze the same fs but with a
> different freeze_holder should wait until the freezer reaches either end
> state (UNFROZEN or COMPLETE) instead of returning EBUSY immediately.
> 
> Plumb in the extra coded needed to wait for the fs freezer to reach an
> end state and try the freeze again.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/super.c |   27 +++++++++++++++++++++++++--
>  1 file changed, 25 insertions(+), 2 deletions(-)
> 
> 
> diff --git a/fs/super.c b/fs/super.c
> index 36adccecc828..151e0eeff2c2 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -1647,6 +1647,15 @@ static int freeze_frozen_super(struct super_block *sb, enum freeze_holder who)
>  	return 0;
>  }
>  
> +static void wait_for_partially_frozen(struct super_block *sb)
> +{
> +	up_write(&sb->s_umount);
> +	wait_var_event(&sb->s_writers.frozen,
> +			sb->s_writers.frozen == SB_UNFROZEN ||
> +			sb->s_writers.frozen == SB_FREEZE_COMPLETE);
> +	down_write(&sb->s_umount);

Does sb->s_writers.frozen need WRITE_ONCE/READ_ONCE treatment if we want
to check it outside of s_umount?  Or should we maybe just open code
wait_var_event and only drop the lock after checking the variable?

>  	if (sb->s_writers.frozen != SB_UNFROZEN) {
> -		deactivate_locked_super(sb);
> -		return -EBUSY;
> +		if (!try_again) {
> +			deactivate_locked_super(sb);
> +			return -EBUSY;
> +		}
> +
> +		wait_for_partially_frozen(sb);
> +		try_again = false;
> +		goto retry;

Can you throw in a comment on wait we're only waiting for a partial
freeze one here?
Jan Kara June 12, 2023, 11:35 a.m. UTC | #2
On Sun 11-06-23 20:15:28, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Jan Kara suggested that when one thread is in the middle of freezing a
> filesystem, another thread trying to freeze the same fs but with a
> different freeze_holder should wait until the freezer reaches either end
> state (UNFROZEN or COMPLETE) instead of returning EBUSY immediately.
> 
> Plumb in the extra coded needed to wait for the fs freezer to reach an
> end state and try the freeze again.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>

You can maybe copy my reasoning from my reply to your patch 1/3 to the
changelog to explain why waiting is more desirable.

> @@ -1690,11 +1699,13 @@ static int freeze_frozen_super(struct super_block *sb, enum freeze_holder who)
>   */
>  int freeze_super(struct super_block *sb, enum freeze_holder who)
>  {
> +	bool try_again = true;
>  	int ret;
>  
>  	atomic_inc(&sb->s_active);
>  	down_write(&sb->s_umount);
>  
> +retry:
>  	if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
>  		ret = freeze_frozen_super(sb, who);
>  		deactivate_locked_super(sb);
> @@ -1702,8 +1713,14 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
>  	}
>  
>  	if (sb->s_writers.frozen != SB_UNFROZEN) {
> -		deactivate_locked_super(sb);
> -		return -EBUSY;
> +		if (!try_again) {
> +			deactivate_locked_super(sb);
> +			return -EBUSY;
> +		}

Why only one retry? Do you envision someone will be freezing & thawing
filesystem so intensively that livelocks are possible? In that case I'd
think the system has other problems than freeze taking long... Honestly,
I'd be looping as long as we either succeed or return error for other
reasons.

What we could be doing to limit unnecessary waiting is that we'd update
freeze_holders already when we enter freeze_super() and lock s_umount
(bailing if our holder type is already set). That way we'd have at most one
process for each holder type freezing the fs / waiting for freezing to
complete.

> +
> +		wait_for_partially_frozen(sb);
> +		try_again = false;
> +		goto retry;
>  	}
>  
...
> @@ -1810,6 +1831,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
>  	if (sb_rdonly(sb)) {
>  		sb->s_writers.freeze_holders &= ~who;
>  		sb->s_writers.frozen = SB_UNFROZEN;
> +		wake_up_var(&sb->s_writers.frozen);
>  		goto out;
>  	}
>  
> @@ -1828,6 +1850,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
>  
>  	sb->s_writers.freeze_holders &= ~who;
>  	sb->s_writers.frozen = SB_UNFROZEN;
> +	wake_up_var(&sb->s_writers.frozen);
>  	sb_freeze_unlock(sb, SB_FREEZE_FS);
>  out:
>  	wake_up(&sb->s_writers.wait_unfrozen);

I don't think wakeups on thaw are really needed because the waiter should
be woken already once the freezing task exits from freeze_super(). I guess
you've sprinkled them here just for good measure?

								Honza
Darrick J. Wong June 12, 2023, 6:33 p.m. UTC | #3
On Sun, Jun 11, 2023 at 09:01:48PM -0700, Christoph Hellwig wrote:
> On Sun, Jun 11, 2023 at 08:15:28PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Jan Kara suggested that when one thread is in the middle of freezing a
> > filesystem, another thread trying to freeze the same fs but with a
> > different freeze_holder should wait until the freezer reaches either end
> > state (UNFROZEN or COMPLETE) instead of returning EBUSY immediately.
> > 
> > Plumb in the extra coded needed to wait for the fs freezer to reach an
> > end state and try the freeze again.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  fs/super.c |   27 +++++++++++++++++++++++++--
> >  1 file changed, 25 insertions(+), 2 deletions(-)
> > 
> > 
> > diff --git a/fs/super.c b/fs/super.c
> > index 36adccecc828..151e0eeff2c2 100644
> > --- a/fs/super.c
> > +++ b/fs/super.c
> > @@ -1647,6 +1647,15 @@ static int freeze_frozen_super(struct super_block *sb, enum freeze_holder who)
> >  	return 0;
> >  }
> >  
> > +static void wait_for_partially_frozen(struct super_block *sb)
> > +{
> > +	up_write(&sb->s_umount);
> > +	wait_var_event(&sb->s_writers.frozen,
> > +			sb->s_writers.frozen == SB_UNFROZEN ||
> > +			sb->s_writers.frozen == SB_FREEZE_COMPLETE);
> > +	down_write(&sb->s_umount);
> 
> Does sb->s_writers.frozen need WRITE_ONCE/READ_ONCE treatment if we want
> to check it outside of s_umount?  Or should we maybe just open code
> wait_var_event and only drop the lock after checking the variable?

How about something like:

	do {
		up_write(&sb->s_umount);
		down_write(&sb->s_umount);
	} while (sb->s_writers.frozen != SB_UNFROZEN &&
		 sb->s_writers.frozen != SB_FREEZE_COMPLETE);

so that we always return in either end state of a freezer transition?

> >  	if (sb->s_writers.frozen != SB_UNFROZEN) {
> > -		deactivate_locked_super(sb);
> > -		return -EBUSY;
> > +		if (!try_again) {
> > +			deactivate_locked_super(sb);
> > +			return -EBUSY;
> > +		}
> > +
> > +		wait_for_partially_frozen(sb);
> > +		try_again = false;
> > +		goto retry;
> 
> Can you throw in a comment on wait we're only waiting for a partial
> freeze one here?

I didn't want a thread to get stuck in the retry forever if it always
loses the race.  However, I think any other threads running freeze_super
will always end at UNFROZEN or COMPLETE; and thaw_super always goes
straight froM COMPLETE to UNFROZEN, so I think I'll get rid of the retry
flag logic entirely.

--D
Darrick J. Wong June 12, 2023, 6:36 p.m. UTC | #4
On Mon, Jun 12, 2023 at 01:35:26PM +0200, Jan Kara wrote:
> On Sun 11-06-23 20:15:28, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Jan Kara suggested that when one thread is in the middle of freezing a
> > filesystem, another thread trying to freeze the same fs but with a
> > different freeze_holder should wait until the freezer reaches either end
> > state (UNFROZEN or COMPLETE) instead of returning EBUSY immediately.
> > 
> > Plumb in the extra coded needed to wait for the fs freezer to reach an
> > end state and try the freeze again.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> 
> You can maybe copy my reasoning from my reply to your patch 1/3 to the
> changelog to explain why waiting is more desirable.

Done.

"Neither caller can do anything sensible with this race other than retry
but they cannot really distinguish EBUSY as in "someone other holder of
the same type has the sb already frozen" from "freezing raced with
holder of a different type"

is now added to the commit message.

> > @@ -1690,11 +1699,13 @@ static int freeze_frozen_super(struct super_block *sb, enum freeze_holder who)
> >   */
> >  int freeze_super(struct super_block *sb, enum freeze_holder who)
> >  {
> > +	bool try_again = true;
> >  	int ret;
> >  
> >  	atomic_inc(&sb->s_active);
> >  	down_write(&sb->s_umount);
> >  
> > +retry:
> >  	if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
> >  		ret = freeze_frozen_super(sb, who);
> >  		deactivate_locked_super(sb);
> > @@ -1702,8 +1713,14 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
> >  	}
> >  
> >  	if (sb->s_writers.frozen != SB_UNFROZEN) {
> > -		deactivate_locked_super(sb);
> > -		return -EBUSY;
> > +		if (!try_again) {
> > +			deactivate_locked_super(sb);
> > +			return -EBUSY;
> > +		}
> 
> Why only one retry? Do you envision someone will be freezing & thawing
> filesystem so intensively that livelocks are possible?

I was worried about that, but the more I think about it, the more I can
convince myself that we can take the simpler approach of cycling
s_umount until we get the state we want.

> In that case I'd
> think the system has other problems than freeze taking long... Honestly,
> I'd be looping as long as we either succeed or return error for other
> reasons.

Ok.

> What we could be doing to limit unnecessary waiting is that we'd update
> freeze_holders already when we enter freeze_super() and lock s_umount
> (bailing if our holder type is already set). That way we'd have at most one
> process for each holder type freezing the fs / waiting for freezing to
> complete.

<shrug> I don't know how often we even really have threads contending
for s_umount and elevated freeze state.  How about we go with the
simpler wait_for_partially_frozen and see if complaints crop up?

> > +
> > +		wait_for_partially_frozen(sb);
> > +		try_again = false;
> > +		goto retry;
> >  	}
> >  
> ...
> > @@ -1810,6 +1831,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
> >  	if (sb_rdonly(sb)) {
> >  		sb->s_writers.freeze_holders &= ~who;
> >  		sb->s_writers.frozen = SB_UNFROZEN;
> > +		wake_up_var(&sb->s_writers.frozen);
> >  		goto out;
> >  	}
> >  
> > @@ -1828,6 +1850,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
> >  
> >  	sb->s_writers.freeze_holders &= ~who;
> >  	sb->s_writers.frozen = SB_UNFROZEN;
> > +	wake_up_var(&sb->s_writers.frozen);
> >  	sb_freeze_unlock(sb, SB_FREEZE_FS);
> >  out:
> >  	wake_up(&sb->s_writers.wait_unfrozen);
> 
> I don't think wakeups on thaw are really needed because the waiter should
> be woken already once the freezing task exits from freeze_super(). I guess
> you've sprinkled them here just for good measure?

I've changed wait_for_partially_frozen to cycle s_umount until we find
the locked state we want, so the wake_up_var calls aren't needed
anymore.

--D

> 								Honza
> -- 
> Jan Kara <jack@suse.com>
> SUSE Labs, CR
Darrick J. Wong June 12, 2023, 6:47 p.m. UTC | #5
On Mon, Jun 12, 2023 at 11:33:02AM -0700, Darrick J. Wong wrote:
> On Sun, Jun 11, 2023 at 09:01:48PM -0700, Christoph Hellwig wrote:
> > On Sun, Jun 11, 2023 at 08:15:28PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Jan Kara suggested that when one thread is in the middle of freezing a
> > > filesystem, another thread trying to freeze the same fs but with a
> > > different freeze_holder should wait until the freezer reaches either end
> > > state (UNFROZEN or COMPLETE) instead of returning EBUSY immediately.
> > > 
> > > Plumb in the extra coded needed to wait for the fs freezer to reach an
> > > end state and try the freeze again.
> > > 
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > >  fs/super.c |   27 +++++++++++++++++++++++++--
> > >  1 file changed, 25 insertions(+), 2 deletions(-)
> > > 
> > > 
> > > diff --git a/fs/super.c b/fs/super.c
> > > index 36adccecc828..151e0eeff2c2 100644
> > > --- a/fs/super.c
> > > +++ b/fs/super.c
> > > @@ -1647,6 +1647,15 @@ static int freeze_frozen_super(struct super_block *sb, enum freeze_holder who)
> > >  	return 0;
> > >  }
> > >  
> > > +static void wait_for_partially_frozen(struct super_block *sb)
> > > +{
> > > +	up_write(&sb->s_umount);
> > > +	wait_var_event(&sb->s_writers.frozen,
> > > +			sb->s_writers.frozen == SB_UNFROZEN ||
> > > +			sb->s_writers.frozen == SB_FREEZE_COMPLETE);
> > > +	down_write(&sb->s_umount);
> > 
> > Does sb->s_writers.frozen need WRITE_ONCE/READ_ONCE treatment if we want
> > to check it outside of s_umount?  Or should we maybe just open code
> > wait_var_event and only drop the lock after checking the variable?
> 
> How about something like:
> 
> 	do {
> 		up_write(&sb->s_umount);
> 		down_write(&sb->s_umount);
> 	} while (sb->s_writers.frozen != SB_UNFROZEN &&
> 		 sb->s_writers.frozen != SB_FREEZE_COMPLETE);
> 
> so that we always return in either end state of a freezer transition?

Of course as soon as I hit send I realize that no, we don't want to be
cycling s_umount repeatedly even sb->s_writers.frozen hasn't changed.
And maybe we want the wait to be killable too?

static int wait_for_partially_frozen(struct super_block *sb)
{
	int ret = 0;

	do {
		unsigned short old = sb->s_writers.frozen;

		up_write(&sb->s_umount);
		ret = wait_var_event_killable(&sb->s_writers.frozen,
					       sb->s_writers.frozen != old);
		down_write(&sb->s_umount);
	} while (ret == 0 &&
		 sb->s_writers.frozen != SB_UNFROZEN &&
		 sb->s_writers.frozen != SB_FREEZE_COMPLETE);

	return ret;
}

I'll try this out and report back.

--D

> > >  	if (sb->s_writers.frozen != SB_UNFROZEN) {
> > > -		deactivate_locked_super(sb);
> > > -		return -EBUSY;
> > > +		if (!try_again) {
> > > +			deactivate_locked_super(sb);
> > > +			return -EBUSY;
> > > +		}
> > > +
> > > +		wait_for_partially_frozen(sb);
> > > +		try_again = false;
> > > +		goto retry;
> > 
> > Can you throw in a comment on wait we're only waiting for a partial
> > freeze one here?
> 
> I didn't want a thread to get stuck in the retry forever if it always
> loses the race.  However, I think any other threads running freeze_super
> will always end at UNFROZEN or COMPLETE; and thaw_super always goes
> straight froM COMPLETE to UNFROZEN, so I think I'll get rid of the retry
> flag logic entirely.
> 
> --D
Jan Kara June 13, 2023, 7:52 a.m. UTC | #6
On Mon 12-06-23 11:36:57, Darrick J. Wong wrote:
> On Mon, Jun 12, 2023 at 01:35:26PM +0200, Jan Kara wrote:
> > What we could be doing to limit unnecessary waiting is that we'd update
> > freeze_holders already when we enter freeze_super() and lock s_umount
> > (bailing if our holder type is already set). That way we'd have at most one
> > process for each holder type freezing the fs / waiting for freezing to
> > complete.
> 
> <shrug> I don't know how often we even really have threads contending
> for s_umount and elevated freeze state.  How about we go with the
> simpler wait_for_partially_frozen and see if complaints crop up?

Yeah, I'm for the simpler approach as well. This was more a suggestion if
you think that is not viable.

								Honza
diff mbox series

Patch

diff --git a/fs/super.c b/fs/super.c
index 36adccecc828..151e0eeff2c2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1647,6 +1647,15 @@  static int freeze_frozen_super(struct super_block *sb, enum freeze_holder who)
 	return 0;
 }
 
+static void wait_for_partially_frozen(struct super_block *sb)
+{
+	up_write(&sb->s_umount);
+	wait_var_event(&sb->s_writers.frozen,
+			sb->s_writers.frozen == SB_UNFROZEN ||
+			sb->s_writers.frozen == SB_FREEZE_COMPLETE);
+	down_write(&sb->s_umount);
+}
+
 /**
  * freeze_super - lock the filesystem and force it into a consistent state
  * @sb: the super to lock
@@ -1690,11 +1699,13 @@  static int freeze_frozen_super(struct super_block *sb, enum freeze_holder who)
  */
 int freeze_super(struct super_block *sb, enum freeze_holder who)
 {
+	bool try_again = true;
 	int ret;
 
 	atomic_inc(&sb->s_active);
 	down_write(&sb->s_umount);
 
+retry:
 	if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
 		ret = freeze_frozen_super(sb, who);
 		deactivate_locked_super(sb);
@@ -1702,8 +1713,14 @@  int freeze_super(struct super_block *sb, enum freeze_holder who)
 	}
 
 	if (sb->s_writers.frozen != SB_UNFROZEN) {
-		deactivate_locked_super(sb);
-		return -EBUSY;
+		if (!try_again) {
+			deactivate_locked_super(sb);
+			return -EBUSY;
+		}
+
+		wait_for_partially_frozen(sb);
+		try_again = false;
+		goto retry;
 	}
 
 	if (!(sb->s_flags & SB_BORN)) {
@@ -1716,6 +1733,7 @@  int freeze_super(struct super_block *sb, enum freeze_holder who)
 		/* Nothing to do really... */
 		sb->s_writers.freeze_holders |= who;
 		sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+		wake_up_var(&sb->s_writers.frozen);
 		up_write(&sb->s_umount);
 		return 0;
 	}
@@ -1736,6 +1754,7 @@  int freeze_super(struct super_block *sb, enum freeze_holder who)
 		sb->s_writers.frozen = SB_UNFROZEN;
 		sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
 		wake_up(&sb->s_writers.wait_unfrozen);
+		wake_up_var(&sb->s_writers.frozen);
 		deactivate_locked_super(sb);
 		return ret;
 	}
@@ -1752,6 +1771,7 @@  int freeze_super(struct super_block *sb, enum freeze_holder who)
 			sb->s_writers.frozen = SB_UNFROZEN;
 			sb_freeze_unlock(sb, SB_FREEZE_FS);
 			wake_up(&sb->s_writers.wait_unfrozen);
+			wake_up_var(&sb->s_writers.frozen);
 			deactivate_locked_super(sb);
 			return ret;
 		}
@@ -1762,6 +1782,7 @@  int freeze_super(struct super_block *sb, enum freeze_holder who)
 	 */
 	sb->s_writers.freeze_holders |= who;
 	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+	wake_up_var(&sb->s_writers.frozen);
 	lockdep_sb_freeze_release(sb);
 	up_write(&sb->s_umount);
 	return 0;
@@ -1810,6 +1831,7 @@  static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
 	if (sb_rdonly(sb)) {
 		sb->s_writers.freeze_holders &= ~who;
 		sb->s_writers.frozen = SB_UNFROZEN;
+		wake_up_var(&sb->s_writers.frozen);
 		goto out;
 	}
 
@@ -1828,6 +1850,7 @@  static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
 
 	sb->s_writers.freeze_holders &= ~who;
 	sb->s_writers.frozen = SB_UNFROZEN;
+	wake_up_var(&sb->s_writers.frozen);
 	sb_freeze_unlock(sb, SB_FREEZE_FS);
 out:
 	wake_up(&sb->s_writers.wait_unfrozen);