diff mbox series

[1/9] xfs: sync lazy sb accounting on quiesce of read-only mounts

Message ID 20210106174127.805660-2-bfoster@redhat.com (mailing list archive)
State Superseded
Headers show
Series xfs: rework log quiesce to cover the log | expand

Commit Message

Brian Foster Jan. 6, 2021, 5:41 p.m. UTC
xfs_log_sbcount() syncs the superblock specifically to accumulate
the in-core percpu superblock counters and commit them to disk. This
is required to maintain filesystem consistency across quiesce
(freeze, read-only mount/remount) or unmount when lazy superblock
accounting is enabled because individual transactions do not update
the superblock directly.

This mechanism works as expected for writable mounts, but
xfs_log_sbcount() skips the update for read-only mounts. Read-only
mounts otherwise still allow log recovery and write out an unmount
record during log quiesce. If a read-only mount performs log
recovery, it can modify the in-core superblock counters and write an
unmount record when the filesystem unmounts without ever syncing the
in-core counters. This leaves the filesystem with a clean log but in
an inconsistent state with regard to lazy sb counters.

Update xfs_log_sbcount() to use the same logic
xfs_log_unmount_write() uses to determine when to write an unmount
record. We can drop the freeze state check because the update is
already allowed during the freezing process and no context calls
this function on an already frozen fs. This ensures that lazy
accounting is always synced before the log is cleaned. Refactor this
logic into a new helper to distinguish between a writable filesystem
and a writable log. Specifically, the log is writable unless the
filesystem is mounted with the norecovery mount option, the
underlying log device is read-only, or the filesystem is shutdown.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
---
 fs/xfs/xfs_log.c   | 28 ++++++++++++++++++++--------
 fs/xfs/xfs_log.h   |  1 +
 fs/xfs/xfs_mount.c |  3 +--
 3 files changed, 22 insertions(+), 10 deletions(-)

Comments

Allison Henderson Jan. 6, 2021, 10:50 p.m. UTC | #1
On 1/6/21 10:41 AM, Brian Foster wrote:
> xfs_log_sbcount() syncs the superblock specifically to accumulate
> the in-core percpu superblock counters and commit them to disk. This
> is required to maintain filesystem consistency across quiesce
> (freeze, read-only mount/remount) or unmount when lazy superblock
> accounting is enabled because individual transactions do not update
> the superblock directly.
> 
> This mechanism works as expected for writable mounts, but
> xfs_log_sbcount() skips the update for read-only mounts. Read-only
> mounts otherwise still allow log recovery and write out an unmount
> record during log quiesce. If a read-only mount performs log
> recovery, it can modify the in-core superblock counters and write an
> unmount record when the filesystem unmounts without ever syncing the
> in-core counters. This leaves the filesystem with a clean log but in
> an inconsistent state with regard to lazy sb counters.
> 
> Update xfs_log_sbcount() to use the same logic
> xfs_log_unmount_write() uses to determine when to write an unmount
> record. We can drop the freeze state check because the update is
> already allowed during the freezing process and no context calls
> this function on an already frozen fs. This ensures that lazy
> accounting is always synced before the log is cleaned. Refactor this
> logic into a new helper to distinguish between a writable filesystem
> and a writable log. Specifically, the log is writable unless the
> filesystem is mounted with the norecovery mount option, the
> underlying log device is read-only, or the filesystem is shutdown.
> 
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> Reviewed-by: Gao Xiang <hsiangkao@redhat.com>

Ok makes sense
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
> ---
>   fs/xfs/xfs_log.c   | 28 ++++++++++++++++++++--------
>   fs/xfs/xfs_log.h   |  1 +
>   fs/xfs/xfs_mount.c |  3 +--
>   3 files changed, 22 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index fa2d05e65ff1..b445e63cbc3c 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -347,6 +347,25 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
>   	tic->t_res_num++;
>   }
>   
> +bool
> +xfs_log_writable(
> +	struct xfs_mount	*mp)
> +{
> +	/*
> +	 * Never write to the log on norecovery mounts, if the block device is
> +	 * read-only, or if the filesystem is shutdown. Read-only mounts still
> +	 * allow internal writes for log recovery and unmount purposes, so don't
> +	 * restrict that case here.
> +	 */
> +	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
> +		return false;
> +	if (xfs_readonly_buftarg(mp->m_log->l_targ))
> +		return false;
> +	if (XFS_FORCED_SHUTDOWN(mp))
> +		return false;
> +	return true;
> +}
> +
>   /*
>    * Replenish the byte reservation required by moving the grant write head.
>    */
> @@ -886,15 +905,8 @@ xfs_log_unmount_write(
>   {
>   	struct xlog		*log = mp->m_log;
>   
> -	/*
> -	 * Don't write out unmount record on norecovery mounts or ro devices.
> -	 * Or, if we are doing a forced umount (typically because of IO errors).
> -	 */
> -	if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
> -	    xfs_readonly_buftarg(log->l_targ)) {
> -		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
> +	if (!xfs_log_writable(mp))
>   		return;
> -	}
>   
>   	xfs_log_force(mp, XFS_LOG_SYNC);
>   
> diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> index 58c3fcbec94a..98c913da7587 100644
> --- a/fs/xfs/xfs_log.h
> +++ b/fs/xfs/xfs_log.h
> @@ -127,6 +127,7 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
>   int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
>   void      xfs_log_unmount(struct xfs_mount *mp);
>   int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
> +bool	xfs_log_writable(struct xfs_mount *mp);
>   
>   struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
>   void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 7110507a2b6b..a62b8a574409 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -1176,8 +1176,7 @@ xfs_fs_writable(
>   int
>   xfs_log_sbcount(xfs_mount_t *mp)
>   {
> -	/* allow this to proceed during the freeze sequence... */
> -	if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
> +	if (!xfs_log_writable(mp))
>   		return 0;
>   
>   	/*
>
Darrick J. Wong Jan. 7, 2021, 7:06 p.m. UTC | #2
On Wed, Jan 06, 2021 at 12:41:19PM -0500, Brian Foster wrote:
> xfs_log_sbcount() syncs the superblock specifically to accumulate
> the in-core percpu superblock counters and commit them to disk. This
> is required to maintain filesystem consistency across quiesce
> (freeze, read-only mount/remount) or unmount when lazy superblock
> accounting is enabled because individual transactions do not update
> the superblock directly.
> 
> This mechanism works as expected for writable mounts, but
> xfs_log_sbcount() skips the update for read-only mounts. Read-only
> mounts otherwise still allow log recovery and write out an unmount
> record during log quiesce. If a read-only mount performs log
> recovery, it can modify the in-core superblock counters and write an
> unmount record when the filesystem unmounts without ever syncing the
> in-core counters. This leaves the filesystem with a clean log but in
> an inconsistent state with regard to lazy sb counters.
> 
> Update xfs_log_sbcount() to use the same logic
> xfs_log_unmount_write() uses to determine when to write an unmount
> record. We can drop the freeze state check because the update is
> already allowed during the freezing process and no context calls
> this function on an already frozen fs. This ensures that lazy
> accounting is always synced before the log is cleaned. Refactor this
> logic into a new helper to distinguish between a writable filesystem
> and a writable log. Specifically, the log is writable unless the
> filesystem is mounted with the norecovery mount option, the
> underlying log device is read-only, or the filesystem is shutdown.
> 
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> Reviewed-by: Gao Xiang <hsiangkao@redhat.com>

Looks good to me,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_log.c   | 28 ++++++++++++++++++++--------
>  fs/xfs/xfs_log.h   |  1 +
>  fs/xfs/xfs_mount.c |  3 +--
>  3 files changed, 22 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index fa2d05e65ff1..b445e63cbc3c 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -347,6 +347,25 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
>  	tic->t_res_num++;
>  }
>  
> +bool
> +xfs_log_writable(
> +	struct xfs_mount	*mp)
> +{
> +	/*
> +	 * Never write to the log on norecovery mounts, if the block device is
> +	 * read-only, or if the filesystem is shutdown. Read-only mounts still
> +	 * allow internal writes for log recovery and unmount purposes, so don't
> +	 * restrict that case here.
> +	 */
> +	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
> +		return false;
> +	if (xfs_readonly_buftarg(mp->m_log->l_targ))
> +		return false;
> +	if (XFS_FORCED_SHUTDOWN(mp))
> +		return false;
> +	return true;
> +}
> +
>  /*
>   * Replenish the byte reservation required by moving the grant write head.
>   */
> @@ -886,15 +905,8 @@ xfs_log_unmount_write(
>  {
>  	struct xlog		*log = mp->m_log;
>  
> -	/*
> -	 * Don't write out unmount record on norecovery mounts or ro devices.
> -	 * Or, if we are doing a forced umount (typically because of IO errors).
> -	 */
> -	if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
> -	    xfs_readonly_buftarg(log->l_targ)) {
> -		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
> +	if (!xfs_log_writable(mp))
>  		return;
> -	}
>  
>  	xfs_log_force(mp, XFS_LOG_SYNC);
>  
> diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> index 58c3fcbec94a..98c913da7587 100644
> --- a/fs/xfs/xfs_log.h
> +++ b/fs/xfs/xfs_log.h
> @@ -127,6 +127,7 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
>  int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
>  void      xfs_log_unmount(struct xfs_mount *mp);
>  int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
> +bool	xfs_log_writable(struct xfs_mount *mp);
>  
>  struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
>  void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 7110507a2b6b..a62b8a574409 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -1176,8 +1176,7 @@ xfs_fs_writable(
>  int
>  xfs_log_sbcount(xfs_mount_t *mp)
>  {
> -	/* allow this to proceed during the freeze sequence... */
> -	if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
> +	if (!xfs_log_writable(mp))
>  		return 0;
>  
>  	/*
> -- 
> 2.26.2
>
Christoph Hellwig Jan. 11, 2021, 5:38 p.m. UTC | #3
On Wed, Jan 06, 2021 at 12:41:19PM -0500, Brian Foster wrote:
> Update xfs_log_sbcount() to use the same logic
> xfs_log_unmount_write() uses to determine when to write an unmount
> record.

But it isn't the same old logic - a shutdown check is added as well.
Brian Foster Jan. 12, 2021, 2:55 p.m. UTC | #4
On Mon, Jan 11, 2021 at 05:38:51PM +0000, Christoph Hellwig wrote:
> On Wed, Jan 06, 2021 at 12:41:19PM -0500, Brian Foster wrote:
> > Update xfs_log_sbcount() to use the same logic
> > xfs_log_unmount_write() uses to determine when to write an unmount
> > record.
> 
> But it isn't the same old logic - a shutdown check is added as well.
> 

xfs_log_unmount_write() does have a shutdown check, it just doesn't show
up in the diff because I wanted to retain the post-log force check in
that function (in the event that the force triggers a shutdown).

Brian
Christoph Hellwig Jan. 12, 2021, 6:20 p.m. UTC | #5
On Tue, Jan 12, 2021 at 09:55:19AM -0500, Brian Foster wrote:
> xfs_log_unmount_write() does have a shutdown check, it just doesn't show
> up in the diff because I wanted to retain the post-log force check in
> that function (in the event that the force triggers a shutdown).

Ok.  Maybe mention you are throwing in another shutdown check just
because we can't have 'nough of them.
Bill O'Donnell Jan. 21, 2021, 3:08 p.m. UTC | #6
On Wed, Jan 06, 2021 at 12:41:19PM -0500, Brian Foster wrote:
> xfs_log_sbcount() syncs the superblock specifically to accumulate
> the in-core percpu superblock counters and commit them to disk. This
> is required to maintain filesystem consistency across quiesce
> (freeze, read-only mount/remount) or unmount when lazy superblock
> accounting is enabled because individual transactions do not update
> the superblock directly.
> 
> This mechanism works as expected for writable mounts, but
> xfs_log_sbcount() skips the update for read-only mounts. Read-only
> mounts otherwise still allow log recovery and write out an unmount
> record during log quiesce. If a read-only mount performs log
> recovery, it can modify the in-core superblock counters and write an
> unmount record when the filesystem unmounts without ever syncing the
> in-core counters. This leaves the filesystem with a clean log but in
> an inconsistent state with regard to lazy sb counters.
> 
> Update xfs_log_sbcount() to use the same logic
> xfs_log_unmount_write() uses to determine when to write an unmount
> record. We can drop the freeze state check because the update is
> already allowed during the freezing process and no context calls
> this function on an already frozen fs. This ensures that lazy
> accounting is always synced before the log is cleaned. Refactor this
> logic into a new helper to distinguish between a writable filesystem
> and a writable log. Specifically, the log is writable unless the
> filesystem is mounted with the norecovery mount option, the
> underlying log device is read-only, or the filesystem is shutdown.
> 
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> Reviewed-by: Gao Xiang <hsiangkao@redhat.com>

works for me.
Reviewed-by: Bill O'Donnell <billodo@redhat.com>

> ---
>  fs/xfs/xfs_log.c   | 28 ++++++++++++++++++++--------
>  fs/xfs/xfs_log.h   |  1 +
>  fs/xfs/xfs_mount.c |  3 +--
>  3 files changed, 22 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index fa2d05e65ff1..b445e63cbc3c 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -347,6 +347,25 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
>  	tic->t_res_num++;
>  }
>  
> +bool
> +xfs_log_writable(
> +	struct xfs_mount	*mp)
> +{
> +	/*
> +	 * Never write to the log on norecovery mounts, if the block device is
> +	 * read-only, or if the filesystem is shutdown. Read-only mounts still
> +	 * allow internal writes for log recovery and unmount purposes, so don't
> +	 * restrict that case here.
> +	 */
> +	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
> +		return false;
> +	if (xfs_readonly_buftarg(mp->m_log->l_targ))
> +		return false;
> +	if (XFS_FORCED_SHUTDOWN(mp))
> +		return false;
> +	return true;
> +}
> +
>  /*
>   * Replenish the byte reservation required by moving the grant write head.
>   */
> @@ -886,15 +905,8 @@ xfs_log_unmount_write(
>  {
>  	struct xlog		*log = mp->m_log;
>  
> -	/*
> -	 * Don't write out unmount record on norecovery mounts or ro devices.
> -	 * Or, if we are doing a forced umount (typically because of IO errors).
> -	 */
> -	if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
> -	    xfs_readonly_buftarg(log->l_targ)) {
> -		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
> +	if (!xfs_log_writable(mp))
>  		return;
> -	}
>  
>  	xfs_log_force(mp, XFS_LOG_SYNC);
>  
> diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> index 58c3fcbec94a..98c913da7587 100644
> --- a/fs/xfs/xfs_log.h
> +++ b/fs/xfs/xfs_log.h
> @@ -127,6 +127,7 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
>  int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
>  void      xfs_log_unmount(struct xfs_mount *mp);
>  int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
> +bool	xfs_log_writable(struct xfs_mount *mp);
>  
>  struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
>  void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 7110507a2b6b..a62b8a574409 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -1176,8 +1176,7 @@ xfs_fs_writable(
>  int
>  xfs_log_sbcount(xfs_mount_t *mp)
>  {
> -	/* allow this to proceed during the freeze sequence... */
> -	if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
> +	if (!xfs_log_writable(mp))
>  		return 0;
>  
>  	/*
> -- 
> 2.26.2
>
Darrick J. Wong Jan. 21, 2021, 4:49 p.m. UTC | #7
On Thu, Jan 21, 2021 at 09:08:27AM -0600, Bill O'Donnell wrote:
> On Wed, Jan 06, 2021 at 12:41:19PM -0500, Brian Foster wrote:
> > xfs_log_sbcount() syncs the superblock specifically to accumulate
> > the in-core percpu superblock counters and commit them to disk. This
> > is required to maintain filesystem consistency across quiesce
> > (freeze, read-only mount/remount) or unmount when lazy superblock
> > accounting is enabled because individual transactions do not update
> > the superblock directly.
> > 
> > This mechanism works as expected for writable mounts, but
> > xfs_log_sbcount() skips the update for read-only mounts. Read-only
> > mounts otherwise still allow log recovery and write out an unmount
> > record during log quiesce. If a read-only mount performs log
> > recovery, it can modify the in-core superblock counters and write an
> > unmount record when the filesystem unmounts without ever syncing the
> > in-core counters. This leaves the filesystem with a clean log but in
> > an inconsistent state with regard to lazy sb counters.
> > 
> > Update xfs_log_sbcount() to use the same logic
> > xfs_log_unmount_write() uses to determine when to write an unmount
> > record. We can drop the freeze state check because the update is
> > already allowed during the freezing process and no context calls
> > this function on an already frozen fs. This ensures that lazy
> > accounting is always synced before the log is cleaned. Refactor this
> > logic into a new helper to distinguish between a writable filesystem
> > and a writable log. Specifically, the log is writable unless the
> > filesystem is mounted with the norecovery mount option, the
> > underlying log device is read-only, or the filesystem is shutdown.
> > 
> > Signed-off-by: Brian Foster <bfoster@redhat.com>
> > Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
> 
> works for me.
> Reviewed-by: Bill O'Donnell <billodo@redhat.com>

Does this also apply to the v3 series that Brian just sent?

--D

> 
> > ---
> >  fs/xfs/xfs_log.c   | 28 ++++++++++++++++++++--------
> >  fs/xfs/xfs_log.h   |  1 +
> >  fs/xfs/xfs_mount.c |  3 +--
> >  3 files changed, 22 insertions(+), 10 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> > index fa2d05e65ff1..b445e63cbc3c 100644
> > --- a/fs/xfs/xfs_log.c
> > +++ b/fs/xfs/xfs_log.c
> > @@ -347,6 +347,25 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
> >  	tic->t_res_num++;
> >  }
> >  
> > +bool
> > +xfs_log_writable(
> > +	struct xfs_mount	*mp)
> > +{
> > +	/*
> > +	 * Never write to the log on norecovery mounts, if the block device is
> > +	 * read-only, or if the filesystem is shutdown. Read-only mounts still
> > +	 * allow internal writes for log recovery and unmount purposes, so don't
> > +	 * restrict that case here.
> > +	 */
> > +	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
> > +		return false;
> > +	if (xfs_readonly_buftarg(mp->m_log->l_targ))
> > +		return false;
> > +	if (XFS_FORCED_SHUTDOWN(mp))
> > +		return false;
> > +	return true;
> > +}
> > +
> >  /*
> >   * Replenish the byte reservation required by moving the grant write head.
> >   */
> > @@ -886,15 +905,8 @@ xfs_log_unmount_write(
> >  {
> >  	struct xlog		*log = mp->m_log;
> >  
> > -	/*
> > -	 * Don't write out unmount record on norecovery mounts or ro devices.
> > -	 * Or, if we are doing a forced umount (typically because of IO errors).
> > -	 */
> > -	if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
> > -	    xfs_readonly_buftarg(log->l_targ)) {
> > -		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
> > +	if (!xfs_log_writable(mp))
> >  		return;
> > -	}
> >  
> >  	xfs_log_force(mp, XFS_LOG_SYNC);
> >  
> > diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> > index 58c3fcbec94a..98c913da7587 100644
> > --- a/fs/xfs/xfs_log.h
> > +++ b/fs/xfs/xfs_log.h
> > @@ -127,6 +127,7 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
> >  int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
> >  void      xfs_log_unmount(struct xfs_mount *mp);
> >  int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
> > +bool	xfs_log_writable(struct xfs_mount *mp);
> >  
> >  struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
> >  void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
> > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> > index 7110507a2b6b..a62b8a574409 100644
> > --- a/fs/xfs/xfs_mount.c
> > +++ b/fs/xfs/xfs_mount.c
> > @@ -1176,8 +1176,7 @@ xfs_fs_writable(
> >  int
> >  xfs_log_sbcount(xfs_mount_t *mp)
> >  {
> > -	/* allow this to proceed during the freeze sequence... */
> > -	if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
> > +	if (!xfs_log_writable(mp))
> >  		return 0;
> >  
> >  	/*
> > -- 
> > 2.26.2
> > 
>
Bill O'Donnell Jan. 21, 2021, 5:17 p.m. UTC | #8
On Thu, Jan 21, 2021 at 08:49:33AM -0800, Darrick J. Wong wrote:
> On Thu, Jan 21, 2021 at 09:08:27AM -0600, Bill O'Donnell wrote:
> > On Wed, Jan 06, 2021 at 12:41:19PM -0500, Brian Foster wrote:
> > > xfs_log_sbcount() syncs the superblock specifically to accumulate
> > > the in-core percpu superblock counters and commit them to disk. This
> > > is required to maintain filesystem consistency across quiesce
> > > (freeze, read-only mount/remount) or unmount when lazy superblock
> > > accounting is enabled because individual transactions do not update
> > > the superblock directly.
> > > 
> > > This mechanism works as expected for writable mounts, but
> > > xfs_log_sbcount() skips the update for read-only mounts. Read-only
> > > mounts otherwise still allow log recovery and write out an unmount
> > > record during log quiesce. If a read-only mount performs log
> > > recovery, it can modify the in-core superblock counters and write an
> > > unmount record when the filesystem unmounts without ever syncing the
> > > in-core counters. This leaves the filesystem with a clean log but in
> > > an inconsistent state with regard to lazy sb counters.
> > > 
> > > Update xfs_log_sbcount() to use the same logic
> > > xfs_log_unmount_write() uses to determine when to write an unmount
> > > record. We can drop the freeze state check because the update is
> > > already allowed during the freezing process and no context calls
> > > this function on an already frozen fs. This ensures that lazy
> > > accounting is always synced before the log is cleaned. Refactor this
> > > logic into a new helper to distinguish between a writable filesystem
> > > and a writable log. Specifically, the log is writable unless the
> > > filesystem is mounted with the norecovery mount option, the
> > > underlying log device is read-only, or the filesystem is shutdown.
> > > 
> > > Signed-off-by: Brian Foster <bfoster@redhat.com>
> > > Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
> > 
> > works for me.
> > Reviewed-by: Bill O'Donnell <billodo@redhat.com>
> 
> Does this also apply to the v3 series that Brian just sent?

I only used this patch 1/9, as that was all I was focusing on.

> 
> --D
> 
> > 
> > > ---
> > >  fs/xfs/xfs_log.c   | 28 ++++++++++++++++++++--------
> > >  fs/xfs/xfs_log.h   |  1 +
> > >  fs/xfs/xfs_mount.c |  3 +--
> > >  3 files changed, 22 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> > > index fa2d05e65ff1..b445e63cbc3c 100644
> > > --- a/fs/xfs/xfs_log.c
> > > +++ b/fs/xfs/xfs_log.c
> > > @@ -347,6 +347,25 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
> > >  	tic->t_res_num++;
> > >  }
> > >  
> > > +bool
> > > +xfs_log_writable(
> > > +	struct xfs_mount	*mp)
> > > +{
> > > +	/*
> > > +	 * Never write to the log on norecovery mounts, if the block device is
> > > +	 * read-only, or if the filesystem is shutdown. Read-only mounts still
> > > +	 * allow internal writes for log recovery and unmount purposes, so don't
> > > +	 * restrict that case here.
> > > +	 */
> > > +	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
> > > +		return false;
> > > +	if (xfs_readonly_buftarg(mp->m_log->l_targ))
> > > +		return false;
> > > +	if (XFS_FORCED_SHUTDOWN(mp))
> > > +		return false;
> > > +	return true;
> > > +}
> > > +
> > >  /*
> > >   * Replenish the byte reservation required by moving the grant write head.
> > >   */
> > > @@ -886,15 +905,8 @@ xfs_log_unmount_write(
> > >  {
> > >  	struct xlog		*log = mp->m_log;
> > >  
> > > -	/*
> > > -	 * Don't write out unmount record on norecovery mounts or ro devices.
> > > -	 * Or, if we are doing a forced umount (typically because of IO errors).
> > > -	 */
> > > -	if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
> > > -	    xfs_readonly_buftarg(log->l_targ)) {
> > > -		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
> > > +	if (!xfs_log_writable(mp))
> > >  		return;
> > > -	}
> > >  
> > >  	xfs_log_force(mp, XFS_LOG_SYNC);
> > >  
> > > diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> > > index 58c3fcbec94a..98c913da7587 100644
> > > --- a/fs/xfs/xfs_log.h
> > > +++ b/fs/xfs/xfs_log.h
> > > @@ -127,6 +127,7 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
> > >  int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
> > >  void      xfs_log_unmount(struct xfs_mount *mp);
> > >  int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
> > > +bool	xfs_log_writable(struct xfs_mount *mp);
> > >  
> > >  struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
> > >  void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
> > > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> > > index 7110507a2b6b..a62b8a574409 100644
> > > --- a/fs/xfs/xfs_mount.c
> > > +++ b/fs/xfs/xfs_mount.c
> > > @@ -1176,8 +1176,7 @@ xfs_fs_writable(
> > >  int
> > >  xfs_log_sbcount(xfs_mount_t *mp)
> > >  {
> > > -	/* allow this to proceed during the freeze sequence... */
> > > -	if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
> > > +	if (!xfs_log_writable(mp))
> > >  		return 0;
> > >  
> > >  	/*
> > > -- 
> > > 2.26.2
> > > 
> > 
>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index fa2d05e65ff1..b445e63cbc3c 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -347,6 +347,25 @@  xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
 	tic->t_res_num++;
 }
 
+bool
+xfs_log_writable(
+	struct xfs_mount	*mp)
+{
+	/*
+	 * Never write to the log on norecovery mounts, if the block device is
+	 * read-only, or if the filesystem is shutdown. Read-only mounts still
+	 * allow internal writes for log recovery and unmount purposes, so don't
+	 * restrict that case here.
+	 */
+	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+		return false;
+	if (xfs_readonly_buftarg(mp->m_log->l_targ))
+		return false;
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return false;
+	return true;
+}
+
 /*
  * Replenish the byte reservation required by moving the grant write head.
  */
@@ -886,15 +905,8 @@  xfs_log_unmount_write(
 {
 	struct xlog		*log = mp->m_log;
 
-	/*
-	 * Don't write out unmount record on norecovery mounts or ro devices.
-	 * Or, if we are doing a forced umount (typically because of IO errors).
-	 */
-	if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
-	    xfs_readonly_buftarg(log->l_targ)) {
-		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
+	if (!xfs_log_writable(mp))
 		return;
-	}
 
 	xfs_log_force(mp, XFS_LOG_SYNC);
 
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 58c3fcbec94a..98c913da7587 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -127,6 +127,7 @@  int	  xfs_log_reserve(struct xfs_mount *mp,
 int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
 void      xfs_log_unmount(struct xfs_mount *mp);
 int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
+bool	xfs_log_writable(struct xfs_mount *mp);
 
 struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
 void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 7110507a2b6b..a62b8a574409 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1176,8 +1176,7 @@  xfs_fs_writable(
 int
 xfs_log_sbcount(xfs_mount_t *mp)
 {
-	/* allow this to proceed during the freeze sequence... */
-	if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
+	if (!xfs_log_writable(mp))
 		return 0;
 
 	/*