diff mbox series

[RFC,2/4] xfs: check ag is empty

Message ID 20210414195240.1802221-3-hsiangkao@redhat.com (mailing list archive)
State New
Headers show
Series xfs: support shrinking empty AGs | expand

Commit Message

Gao Xiang April 14, 2021, 7:52 p.m. UTC
After a perag is stableized as inactive, we could check if such ag
is empty. In order to achieve that, AGFL is also needed to be
emptified in advance to make sure that only one freespace extent
will exist here.

Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
 fs/xfs/libxfs/xfs_alloc.c | 97 +++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_alloc.h |  4 ++
 2 files changed, 101 insertions(+)

Comments

Dave Chinner April 15, 2021, 3:52 a.m. UTC | #1
On Thu, Apr 15, 2021 at 03:52:38AM +0800, Gao Xiang wrote:
> After a perag is stableized as inactive, we could check if such ag
> is empty. In order to achieve that, AGFL is also needed to be
> emptified in advance to make sure that only one freespace extent
> will exist here.
> 
> Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
> ---
>  fs/xfs/libxfs/xfs_alloc.c | 97 +++++++++++++++++++++++++++++++++++++++
>  fs/xfs/libxfs/xfs_alloc.h |  4 ++
>  2 files changed, 101 insertions(+)
> 
> diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
> index 01d4e4d4c1d6..60a8c134c00e 100644
> --- a/fs/xfs/libxfs/xfs_alloc.c
> +++ b/fs/xfs/libxfs/xfs_alloc.c
> @@ -2474,6 +2474,103 @@ xfs_defer_agfl_block(
>  	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
>  }
>  
> +int
> +xfs_ag_emptify_agfl(
> +	struct xfs_buf		*agfbp)
> +{
> +	struct xfs_mount	*mp = agfbp->b_mount;
> +	struct xfs_perag	*pag = agfbp->b_pag;
> +	struct xfs_trans	*tp;
> +	int			error;
> +	struct xfs_owner_info	oinfo = XFS_RMAP_OINFO_AG;
> +
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 0, 0,
> +				XFS_TRANS_RESERVE, &tp);
> +	if (error)
> +		return error;
> +
> +	/* attach to the transaction and keep it from unlocked */
> +	xfs_trans_bjoin(tp, agfbp);
> +	xfs_trans_bhold(tp, agfbp);
> +
> +	while (pag->pagf_flcount) {
> +		xfs_agblock_t	bno;
> +		int		error;
> +
> +		error = xfs_alloc_get_freelist(tp, agfbp, &bno, 0);
> +		if (error)
> +			break;
> +
> +		ASSERT(bno != NULLAGBLOCK);
> +		xfs_defer_agfl_block(tp, pag->pag_agno, bno, &oinfo);
> +	}
> +	xfs_trans_set_sync(tp);
> +	xfs_trans_commit(tp);
> +	return error;
> +}

Why do we need to empty the agfl to determine the AG is empty?

> +int
> +xfs_ag_is_empty(
> +	struct xfs_buf		*agfbp)
> +{
> +	struct xfs_mount	*mp = agfbp->b_mount;
> +	struct xfs_perag	*pag = agfbp->b_pag;
> +	struct xfs_agf		*agf = agfbp->b_addr;
> +	struct xfs_btree_cur	*cnt_cur;
> +	xfs_agblock_t		nfbno;
> +	xfs_extlen_t		nflen;
> +	int			error, i;
> +
> +	if (!pag->pag_inactive)
> +		return -EINVAL;
> +
> +	if (pag->pagf_freeblks + pag->pagf_flcount !=
> +	    be32_to_cpu(agf->agf_length) - mp->m_ag_prealloc_blocks)
> +		return -ENOTEMPTY;

This is the empty check right here, yes?

Hmmm - this has to fail if the log is in this AG, right? Because we
can't move the log (yet), so we should explicitly be checking that
the log is in this AG before check the amount of free space...

> +
> +	if (pag->pagf_flcount) {
> +		error = xfs_ag_emptify_agfl(agfbp);
> +		if (error)
> +			return error;
> +
> +		if (pag->pagf_freeblks !=
> +		    be32_to_cpu(agf->agf_length) - mp->m_ag_prealloc_blocks)
> +			return -ENOTEMPTY;
> +	}
> +
> +	if (pag->pagi_count > 0 || pag->pagi_freecount > 0)
> +		return -ENOTEMPTY;
> +
> +	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > 1 ||
> +	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > 1)
> +		return -ENOTEMPTY;
> +
> +	cnt_cur = xfs_allocbt_init_cursor(mp, NULL, agfbp,
> +					  pag->pag_agno, XFS_BTNUM_CNT);
> +	ASSERT(cnt_cur->bc_nlevels == 1);
> +	error = xfs_alloc_lookup_ge(cnt_cur, 0,
> +				    be32_to_cpu(agf->agf_longest), &i);
> +	if (error || !i)
> +		goto out;
> +
> +	error = xfs_alloc_get_rec(cnt_cur, &nfbno, &nflen, &i);
> +	if (error)
> +		goto out;
> +
> +	if (XFS_IS_CORRUPT(mp, i != 1)) {
> +		error = -EFSCORRUPTED;
> +		goto out;
> +	}
> +
> +	error = -ENOTEMPTY;
> +	if (nfbno == mp->m_ag_prealloc_blocks &&
> +	    nflen == pag->pagf_freeblks)
> +		error = 0;

Ah, that's why you are trying to empty the AGFL.

This won't work because the AG btree roots can be anywhere in the AG
once the tree has grown beyond a single block. Hence when the AG is
fully empty, the btree root blocks can still break up free space
into multiple extents that are each less than a maximally sized
single extent. e.g. from my workstation:

$ xfs_db -r -c "agf 0" -c "p" /dev/mapper/base-main 
magicnum = 0x58414746
versionnum = 1
seqno = 0
length = 13732864
bnoroot = 29039
cntroot = 922363
rmaproot = 8461704
refcntroot = 6
bnolevel = 2
cntlevel = 2
rmaplevel = 3
....

none of the root blocks are inside the m_ag_prealloc_blocks region
of the AG. m_ag_prealloc_blocks is just for space accounting and
does not imply physical location of the btree root blocks...

Hence I think the only checks that need to be done here are whether
the number of free blocks equals the maximal number of blocks
available in the given AG.

Cheers,

Dave.
Gao Xiang April 15, 2021, 4:34 a.m. UTC | #2
Hi Dave,

On Thu, Apr 15, 2021 at 01:52:52PM +1000, Dave Chinner wrote:
> On Thu, Apr 15, 2021 at 03:52:38AM +0800, Gao Xiang wrote:
> > After a perag is stableized as inactive, we could check if such ag
> > is empty. In order to achieve that, AGFL is also needed to be
> > emptified in advance to make sure that only one freespace extent
> > will exist here.
> > 
> > Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
> > ---
> >  fs/xfs/libxfs/xfs_alloc.c | 97 +++++++++++++++++++++++++++++++++++++++
> >  fs/xfs/libxfs/xfs_alloc.h |  4 ++
> >  2 files changed, 101 insertions(+)
> > 
> > diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
> > index 01d4e4d4c1d6..60a8c134c00e 100644
> > --- a/fs/xfs/libxfs/xfs_alloc.c
> > +++ b/fs/xfs/libxfs/xfs_alloc.c
> > @@ -2474,6 +2474,103 @@ xfs_defer_agfl_block(
> >  	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
> >  }
> >  
> > +int
> > +xfs_ag_emptify_agfl(
> > +	struct xfs_buf		*agfbp)
> > +{
> > +	struct xfs_mount	*mp = agfbp->b_mount;
> > +	struct xfs_perag	*pag = agfbp->b_pag;
> > +	struct xfs_trans	*tp;
> > +	int			error;
> > +	struct xfs_owner_info	oinfo = XFS_RMAP_OINFO_AG;
> > +
> > +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 0, 0,
> > +				XFS_TRANS_RESERVE, &tp);
> > +	if (error)
> > +		return error;
> > +
> > +	/* attach to the transaction and keep it from unlocked */
> > +	xfs_trans_bjoin(tp, agfbp);
> > +	xfs_trans_bhold(tp, agfbp);
> > +
> > +	while (pag->pagf_flcount) {
> > +		xfs_agblock_t	bno;
> > +		int		error;
> > +
> > +		error = xfs_alloc_get_freelist(tp, agfbp, &bno, 0);
> > +		if (error)
> > +			break;
> > +
> > +		ASSERT(bno != NULLAGBLOCK);
> > +		xfs_defer_agfl_block(tp, pag->pag_agno, bno, &oinfo);
> > +	}
> > +	xfs_trans_set_sync(tp);
> > +	xfs_trans_commit(tp);
> > +	return error;
> > +}
> 
> Why do we need to empty the agfl to determine the AG is empty?
> 
> > +int
> > +xfs_ag_is_empty(
> > +	struct xfs_buf		*agfbp)
> > +{
> > +	struct xfs_mount	*mp = agfbp->b_mount;
> > +	struct xfs_perag	*pag = agfbp->b_pag;
> > +	struct xfs_agf		*agf = agfbp->b_addr;
> > +	struct xfs_btree_cur	*cnt_cur;
> > +	xfs_agblock_t		nfbno;
> > +	xfs_extlen_t		nflen;
> > +	int			error, i;
> > +
> > +	if (!pag->pag_inactive)
> > +		return -EINVAL;
> > +
> > +	if (pag->pagf_freeblks + pag->pagf_flcount !=
> > +	    be32_to_cpu(agf->agf_length) - mp->m_ag_prealloc_blocks)
> > +		return -ENOTEMPTY;
> 
> This is the empty check right here, yes?
> 
> Hmmm - this has to fail if the log is in this AG, right? Because we
> can't move the log (yet), so we should explicitly be checking that
> the log is in this AG before check the amount of free space...

Ok.

> 
> > +
> > +	if (pag->pagf_flcount) {
> > +		error = xfs_ag_emptify_agfl(agfbp);
> > +		if (error)
> > +			return error;
> > +
> > +		if (pag->pagf_freeblks !=
> > +		    be32_to_cpu(agf->agf_length) - mp->m_ag_prealloc_blocks)
> > +			return -ENOTEMPTY;
> > +	}
> > +
> > +	if (pag->pagi_count > 0 || pag->pagi_freecount > 0)
> > +		return -ENOTEMPTY;
> > +
> > +	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > 1 ||
> > +	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > 1)
> > +		return -ENOTEMPTY;
> > +
> > +	cnt_cur = xfs_allocbt_init_cursor(mp, NULL, agfbp,
> > +					  pag->pag_agno, XFS_BTNUM_CNT);
> > +	ASSERT(cnt_cur->bc_nlevels == 1);
> > +	error = xfs_alloc_lookup_ge(cnt_cur, 0,
> > +				    be32_to_cpu(agf->agf_longest), &i);
> > +	if (error || !i)
> > +		goto out;
> > +
> > +	error = xfs_alloc_get_rec(cnt_cur, &nfbno, &nflen, &i);
> > +	if (error)
> > +		goto out;
> > +
> > +	if (XFS_IS_CORRUPT(mp, i != 1)) {
> > +		error = -EFSCORRUPTED;
> > +		goto out;
> > +	}
> > +
> > +	error = -ENOTEMPTY;
> > +	if (nfbno == mp->m_ag_prealloc_blocks &&
> > +	    nflen == pag->pagf_freeblks)
> > +		error = 0;
> 
> Ah, that's why you are trying to empty the AGFL.
> 
> This won't work because the AG btree roots can be anywhere in the AG
> once the tree has grown beyond a single block. Hence when the AG is
> fully empty, the btree root blocks can still break up free space
> into multiple extents that are each less than a maximally sized
> single extent. e.g. from my workstation:
> 
> $ xfs_db -r -c "agf 0" -c "p" /dev/mapper/base-main 
> magicnum = 0x58414746
> versionnum = 1
> seqno = 0
> length = 13732864
> bnoroot = 29039
> cntroot = 922363
> rmaproot = 8461704
> refcntroot = 6
> bnolevel = 2
> cntlevel = 2
> rmaplevel = 3
> ....
> 
> none of the root blocks are inside the m_ag_prealloc_blocks region
> of the AG. m_ag_prealloc_blocks is just for space accounting and
> does not imply physical location of the btree root blocks...
> 
> Hence I think the only checks that need to be done here are whether
> the number of free blocks equals the maximal number of blocks
> available in the given AG.

Yeah, I forgot about it, thanks for reminder. Yet I vaguely remembered
you mentioned to check the freespace btree integrity before shrinking
months ago. If that is completely needed, I tend to kill such check
and leave the following check only,

	if (pag->pagf_freeblks + pag->pagf_flcount !=
	    be32_to_cpu(agf->agf_length) - mp->m_ag_prealloc_blocks)
		return -ENOTEMPTY;

Thanks,
Gao Xiang

> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
>
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 01d4e4d4c1d6..60a8c134c00e 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2474,6 +2474,103 @@  xfs_defer_agfl_block(
 	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
 }
 
+int
+xfs_ag_emptify_agfl(
+	struct xfs_buf		*agfbp)
+{
+	struct xfs_mount	*mp = agfbp->b_mount;
+	struct xfs_perag	*pag = agfbp->b_pag;
+	struct xfs_trans	*tp;
+	int			error;
+	struct xfs_owner_info	oinfo = XFS_RMAP_OINFO_AG;
+
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 0, 0,
+				XFS_TRANS_RESERVE, &tp);
+	if (error)
+		return error;
+
+	/* attach to the transaction and keep it from unlocked */
+	xfs_trans_bjoin(tp, agfbp);
+	xfs_trans_bhold(tp, agfbp);
+
+	while (pag->pagf_flcount) {
+		xfs_agblock_t	bno;
+		int		error;
+
+		error = xfs_alloc_get_freelist(tp, agfbp, &bno, 0);
+		if (error)
+			break;
+
+		ASSERT(bno != NULLAGBLOCK);
+		xfs_defer_agfl_block(tp, pag->pag_agno, bno, &oinfo);
+	}
+	xfs_trans_set_sync(tp);
+	xfs_trans_commit(tp);
+	return error;
+}
+
+int
+xfs_ag_is_empty(
+	struct xfs_buf		*agfbp)
+{
+	struct xfs_mount	*mp = agfbp->b_mount;
+	struct xfs_perag	*pag = agfbp->b_pag;
+	struct xfs_agf		*agf = agfbp->b_addr;
+	struct xfs_btree_cur	*cnt_cur;
+	xfs_agblock_t		nfbno;
+	xfs_extlen_t		nflen;
+	int			error, i;
+
+	if (!pag->pag_inactive)
+		return -EINVAL;
+
+	if (pag->pagf_freeblks + pag->pagf_flcount !=
+	    be32_to_cpu(agf->agf_length) - mp->m_ag_prealloc_blocks)
+		return -ENOTEMPTY;
+
+	if (pag->pagf_flcount) {
+		error = xfs_ag_emptify_agfl(agfbp);
+		if (error)
+			return error;
+
+		if (pag->pagf_freeblks !=
+		    be32_to_cpu(agf->agf_length) - mp->m_ag_prealloc_blocks)
+			return -ENOTEMPTY;
+	}
+
+	if (pag->pagi_count > 0 || pag->pagi_freecount > 0)
+		return -ENOTEMPTY;
+
+	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > 1 ||
+	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > 1)
+		return -ENOTEMPTY;
+
+	cnt_cur = xfs_allocbt_init_cursor(mp, NULL, agfbp,
+					  pag->pag_agno, XFS_BTNUM_CNT);
+	ASSERT(cnt_cur->bc_nlevels == 1);
+	error = xfs_alloc_lookup_ge(cnt_cur, 0,
+				    be32_to_cpu(agf->agf_longest), &i);
+	if (error || !i)
+		goto out;
+
+	error = xfs_alloc_get_rec(cnt_cur, &nfbno, &nflen, &i);
+	if (error)
+		goto out;
+
+	if (XFS_IS_CORRUPT(mp, i != 1)) {
+		error = -EFSCORRUPTED;
+		goto out;
+	}
+
+	error = -ENOTEMPTY;
+	if (nfbno == mp->m_ag_prealloc_blocks &&
+	    nflen == pag->pagf_freeblks)
+		error = 0;
+out:
+	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+	return error;
+}
+
 #ifdef DEBUG
 /*
  * Check if an AGF has a free extent record whose length is equal to
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index a4427c5775c2..a7015b971075 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -156,6 +156,10 @@  xfs_alloc_read_agf(
 	int		flags,		/* XFS_ALLOC_FLAG_... */
 	struct xfs_buf	**bpp);		/* buffer for the ag freelist header */
 
+int
+xfs_ag_is_empty(
+	struct xfs_buf		*agfbp);
+
 /*
  * Allocate an extent (variable-size).
  */