Message ID | 20210108190919.623672-5-hsiangkao@redhat.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: support shrinking free space in the last AG | expand |
On Sat, Jan 09, 2021 at 03:09:19AM +0800, Gao Xiang wrote: > As the first step of shrinking, this attempts to enable shrinking > unused space in the last allocation group by fixing up freespace > btree, agi, agf and adjusting super block and introduce a helper > xfs_ag_shrink_space() to fixup the last AG. > > This can be all done in one transaction for now, so I think no > additional protection is needed. > > Signed-off-by: Gao Xiang <hsiangkao@redhat.com> > --- > fs/xfs/libxfs/xfs_ag.c | 72 ++++++++++++++++++++++++++++++++++++++++++ > fs/xfs/libxfs/xfs_ag.h | 2 ++ > fs/xfs/xfs_fsops.c | 69 ++++++++++++++++++++++++++++++---------- > fs/xfs/xfs_trans.c | 1 - > 4 files changed, 126 insertions(+), 18 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c > index 9331f3516afa..bec10c85e2a9 100644 > --- a/fs/xfs/libxfs/xfs_ag.c > +++ b/fs/xfs/libxfs/xfs_ag.c > @@ -485,6 +485,78 @@ xfs_ag_init_headers( > return error; > } > > +int > +xfs_ag_shrink_space( > + struct xfs_mount *mp, > + struct xfs_trans *tp, > + struct aghdr_init_data *id, > + xfs_extlen_t len) > +{ > + struct xfs_buf *agibp, *agfbp; > + struct xfs_agi *agi; > + struct xfs_agf *agf; > + int error, err2; > + struct xfs_alloc_arg args = { > + .tp = tp, > + .mp = mp, > + .type = XFS_ALLOCTYPE_THIS_BNO, > + .minlen = len, > + .maxlen = len, > + .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, > + .resv = XFS_AG_RESV_NONE, > + .prod = 1 > + }; > + > + ASSERT(id->agno == mp->m_sb.sb_agcount - 1); > + error = xfs_ialloc_read_agi(mp, tp, id->agno, &agibp); > + if (error) > + return error; > + > + agi = agibp->b_addr; > + > + error = xfs_alloc_read_agf(mp, tp, id->agno, 0, &agfbp); > + if (error) > + return error; > + > + args.fsbno = XFS_AGB_TO_FSB(mp, id->agno, > + be32_to_cpu(agi->agi_length) - len); > + > + /* remove the preallocations before allocation and re-establish then */ > + error = xfs_ag_resv_free(agibp->b_pag); > + if (error) > + return error; > + > + /* internal log shouldn't also show up in the free space btrees */ > + error = xfs_alloc_vextent(&args); > + if (error) > + goto out; > + > + if (args.agbno == NULLAGBLOCK) { > + error = -ENOSPC; > + goto out; > + } > + > + /* Change the agi length */ > + be32_add_cpu(&agi->agi_length, -len); > + xfs_ialloc_log_agi(tp, agibp, XFS_AGI_LENGTH); > + > + /* Change agf length */ > + agf = agfbp->b_addr; > + be32_add_cpu(&agf->agf_length, -len); > + ASSERT(agf->agf_length == agi->agi_length); > + xfs_alloc_log_agf(tp, agfbp, XFS_AGF_LENGTH); > + > +out: > + err2 = xfs_ag_resv_init(agibp->b_pag, tp); > + if (err2 && err2 != -ENOSPC) { > + xfs_warn(mp, > +"Error %d reserving per-AG metadata reserve pool.", err2); > + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); > + return err2; > + } > + return error; > +} > + > /* > * Extent the AG indicated by the @id by the length passed in > */ > diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h > index 5166322807e7..f3b5bbfeadce 100644 > --- a/fs/xfs/libxfs/xfs_ag.h > +++ b/fs/xfs/libxfs/xfs_ag.h > @@ -24,6 +24,8 @@ struct aghdr_init_data { > }; > > int xfs_ag_init_headers(struct xfs_mount *mp, struct aghdr_init_data *id); > +int xfs_ag_shrink_space(struct xfs_mount *mp, struct xfs_trans *tp, > + struct aghdr_init_data *id, xfs_extlen_t len); > int xfs_ag_extend_space(struct xfs_mount *mp, struct xfs_trans *tp, > struct aghdr_init_data *id, xfs_extlen_t len); > int xfs_ag_get_geometry(struct xfs_mount *mp, xfs_agnumber_t agno, > diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c > index a792d1f0ac55..eea38395e804 100644 > --- a/fs/xfs/xfs_fsops.c > +++ b/fs/xfs/xfs_fsops.c > @@ -79,19 +79,22 @@ xfs_growfs_data_private( > xfs_rfsblock_t delta; > xfs_agnumber_t oagcount; > struct xfs_trans *tp; > + bool extend; > struct aghdr_init_data id = {}; > > nb = in->newblocks; > - if (nb < mp->m_sb.sb_dblocks) > - return -EINVAL; > - if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) > + error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); > + if (error) > return error; > - error = xfs_buf_read_uncached(mp->m_ddev_targp, > + > + if (nb > mp->m_sb.sb_dblocks) { > + error = xfs_buf_read_uncached(mp->m_ddev_targp, > XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), > XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); > - if (error) > - return error; > - xfs_buf_relse(bp); > + if (error) > + return error; > + xfs_buf_relse(bp); > + } > > delta = nb; /* use delta as a temporary here */ Yikes, can this become a separate variable please? > nb_mod = do_div(delta, mp->m_sb.sb_agblocks); > @@ -99,10 +102,18 @@ xfs_growfs_data_private( > if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { > nagcount--; > nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; > - if (nb < mp->m_sb.sb_dblocks) > + if (nagcount < 2) > return -EINVAL; > } > - delta = nb - mp->m_sb.sb_dblocks; > + > + if (nb > mp->m_sb.sb_dblocks) { > + delta = nb - mp->m_sb.sb_dblocks; > + extend = true; > + } else { > + delta = mp->m_sb.sb_dblocks - nb; > + extend = false; /me wonders why delta isn't simply int64_t, and then you can do things like: if (delta > 0) growfs else if (delta < 0) shrinkfs ? > + } > + > oagcount = mp->m_sb.sb_agcount; > > /* allocate the new per-ag structures */ > @@ -110,22 +121,34 @@ xfs_growfs_data_private( > error = xfs_initialize_perag(mp, nagcount, &nagimax); > if (error) > return error; > + } else if (nagcount != oagcount) { > + /* TODO: shrinking the entire AGs hasn't yet completed */ > + return -EINVAL; > } > > error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, > - XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); > + (extend ? XFS_GROWFS_SPACE_RES(mp) : delta), 0, > + XFS_TRANS_RESERVE, &tp); > if (error) > return error; > > - error = xfs_resizefs_init_new_ags(mp, &id, oagcount, nagcount, &delta); > - if (error) > - goto out_trans_cancel; > - > + if (extend) { > + error = xfs_resizefs_init_new_ags(mp, &id, oagcount, > + nagcount, &delta); > + if (error) > + goto out_trans_cancel; > + } > xfs_trans_agblocks_delta(tp, id.nfree); > > - /* If there are new blocks in the old last AG, extend it. */ > + /* If there are some blocks in the last AG, resize it. */ > if (delta) { > - error = xfs_ag_extend_space(mp, tp, &id, delta); > + if (extend) { > + error = xfs_ag_extend_space(mp, tp, &id, delta); > + } else { > + id.agno = nagcount - 1; > + error = xfs_ag_shrink_space(mp, tp, &id, delta); > + } > + > if (error) > goto out_trans_cancel; > } > @@ -137,11 +160,19 @@ xfs_growfs_data_private( > */ > if (nagcount > oagcount) > xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); > - if (nb > mp->m_sb.sb_dblocks) > + if (nb != mp->m_sb.sb_dblocks) > xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS, > nb - mp->m_sb.sb_dblocks); > if (id.nfree) > xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, id.nfree); > + > + /* > + * update in-core counters (especially sb_fdblocks) now > + * so xfs_validate_sb_write() can pass. > + */ > + if (xfs_sb_version_haslazysbcount(&mp->m_sb)) > + xfs_log_sb(tp); > + > xfs_trans_set_sync(tp); > error = xfs_trans_commit(tp); > if (error) > @@ -178,6 +209,10 @@ xfs_growfs_data_private( > return error; > > out_trans_cancel: > + if (!extend && (tp->t_flags & XFS_TRANS_DIRTY)) { > + xfs_trans_commit(tp); > + return error; When do we encounter the (!extend && DIRTY && cancelled) state? --D > + } > xfs_trans_cancel(tp); > return error; > } > diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c > index e72730f85af1..fd2cbf414b80 100644 > --- a/fs/xfs/xfs_trans.c > +++ b/fs/xfs/xfs_trans.c > @@ -419,7 +419,6 @@ xfs_trans_mod_sb( > tp->t_res_frextents_delta += delta; > break; > case XFS_TRANS_SB_DBLOCKS: > - ASSERT(delta > 0); > tp->t_dblocks_delta += delta; > break; > case XFS_TRANS_SB_AGCOUNT: > -- > 2.27.0 >
Hi Darrick, On Fri, Jan 08, 2021 at 01:19:42PM -0800, Darrick J. Wong wrote: > On Sat, Jan 09, 2021 at 03:09:19AM +0800, Gao Xiang wrote: ... > > delta = nb; /* use delta as a temporary here */ > > Yikes, can this become a separate variable please? Ok. > > > nb_mod = do_div(delta, mp->m_sb.sb_agblocks); > > @@ -99,10 +102,18 @@ xfs_growfs_data_private( > > if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { > > nagcount--; > > nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; > > - if (nb < mp->m_sb.sb_dblocks) > > + if (nagcount < 2) > > return -EINVAL; > > } > > - delta = nb - mp->m_sb.sb_dblocks; > > + > > + if (nb > mp->m_sb.sb_dblocks) { > > + delta = nb - mp->m_sb.sb_dblocks; > > + extend = true; > > + } else { > > + delta = mp->m_sb.sb_dblocks - nb; > > + extend = false; > > /me wonders why delta isn't simply int64_t, and then you can do things > like: > > if (delta > 0) > growfs > else if (delta < 0) > shrinkfs > > ? Yeah, delta changed into a signed variable in mycurrent experimental shrinking entire AG hack as well. Will update this here in advance. Thanks, Gao Xiang
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 9331f3516afa..bec10c85e2a9 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -485,6 +485,78 @@ xfs_ag_init_headers( return error; } +int +xfs_ag_shrink_space( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct aghdr_init_data *id, + xfs_extlen_t len) +{ + struct xfs_buf *agibp, *agfbp; + struct xfs_agi *agi; + struct xfs_agf *agf; + int error, err2; + struct xfs_alloc_arg args = { + .tp = tp, + .mp = mp, + .type = XFS_ALLOCTYPE_THIS_BNO, + .minlen = len, + .maxlen = len, + .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, + .resv = XFS_AG_RESV_NONE, + .prod = 1 + }; + + ASSERT(id->agno == mp->m_sb.sb_agcount - 1); + error = xfs_ialloc_read_agi(mp, tp, id->agno, &agibp); + if (error) + return error; + + agi = agibp->b_addr; + + error = xfs_alloc_read_agf(mp, tp, id->agno, 0, &agfbp); + if (error) + return error; + + args.fsbno = XFS_AGB_TO_FSB(mp, id->agno, + be32_to_cpu(agi->agi_length) - len); + + /* remove the preallocations before allocation and re-establish then */ + error = xfs_ag_resv_free(agibp->b_pag); + if (error) + return error; + + /* internal log shouldn't also show up in the free space btrees */ + error = xfs_alloc_vextent(&args); + if (error) + goto out; + + if (args.agbno == NULLAGBLOCK) { + error = -ENOSPC; + goto out; + } + + /* Change the agi length */ + be32_add_cpu(&agi->agi_length, -len); + xfs_ialloc_log_agi(tp, agibp, XFS_AGI_LENGTH); + + /* Change agf length */ + agf = agfbp->b_addr; + be32_add_cpu(&agf->agf_length, -len); + ASSERT(agf->agf_length == agi->agi_length); + xfs_alloc_log_agf(tp, agfbp, XFS_AGF_LENGTH); + +out: + err2 = xfs_ag_resv_init(agibp->b_pag, tp); + if (err2 && err2 != -ENOSPC) { + xfs_warn(mp, +"Error %d reserving per-AG metadata reserve pool.", err2); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return err2; + } + return error; +} + /* * Extent the AG indicated by the @id by the length passed in */ diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 5166322807e7..f3b5bbfeadce 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -24,6 +24,8 @@ struct aghdr_init_data { }; int xfs_ag_init_headers(struct xfs_mount *mp, struct aghdr_init_data *id); +int xfs_ag_shrink_space(struct xfs_mount *mp, struct xfs_trans *tp, + struct aghdr_init_data *id, xfs_extlen_t len); int xfs_ag_extend_space(struct xfs_mount *mp, struct xfs_trans *tp, struct aghdr_init_data *id, xfs_extlen_t len); int xfs_ag_get_geometry(struct xfs_mount *mp, xfs_agnumber_t agno, diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index a792d1f0ac55..eea38395e804 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -79,19 +79,22 @@ xfs_growfs_data_private( xfs_rfsblock_t delta; xfs_agnumber_t oagcount; struct xfs_trans *tp; + bool extend; struct aghdr_init_data id = {}; nb = in->newblocks; - if (nb < mp->m_sb.sb_dblocks) - return -EINVAL; - if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) + error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); + if (error) return error; - error = xfs_buf_read_uncached(mp->m_ddev_targp, + + if (nb > mp->m_sb.sb_dblocks) { + error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); - if (error) - return error; - xfs_buf_relse(bp); + if (error) + return error; + xfs_buf_relse(bp); + } delta = nb; /* use delta as a temporary here */ nb_mod = do_div(delta, mp->m_sb.sb_agblocks); @@ -99,10 +102,18 @@ xfs_growfs_data_private( if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { nagcount--; nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; - if (nb < mp->m_sb.sb_dblocks) + if (nagcount < 2) return -EINVAL; } - delta = nb - mp->m_sb.sb_dblocks; + + if (nb > mp->m_sb.sb_dblocks) { + delta = nb - mp->m_sb.sb_dblocks; + extend = true; + } else { + delta = mp->m_sb.sb_dblocks - nb; + extend = false; + } + oagcount = mp->m_sb.sb_agcount; /* allocate the new per-ag structures */ @@ -110,22 +121,34 @@ xfs_growfs_data_private( error = xfs_initialize_perag(mp, nagcount, &nagimax); if (error) return error; + } else if (nagcount != oagcount) { + /* TODO: shrinking the entire AGs hasn't yet completed */ + return -EINVAL; } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, - XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); + (extend ? XFS_GROWFS_SPACE_RES(mp) : delta), 0, + XFS_TRANS_RESERVE, &tp); if (error) return error; - error = xfs_resizefs_init_new_ags(mp, &id, oagcount, nagcount, &delta); - if (error) - goto out_trans_cancel; - + if (extend) { + error = xfs_resizefs_init_new_ags(mp, &id, oagcount, + nagcount, &delta); + if (error) + goto out_trans_cancel; + } xfs_trans_agblocks_delta(tp, id.nfree); - /* If there are new blocks in the old last AG, extend it. */ + /* If there are some blocks in the last AG, resize it. */ if (delta) { - error = xfs_ag_extend_space(mp, tp, &id, delta); + if (extend) { + error = xfs_ag_extend_space(mp, tp, &id, delta); + } else { + id.agno = nagcount - 1; + error = xfs_ag_shrink_space(mp, tp, &id, delta); + } + if (error) goto out_trans_cancel; } @@ -137,11 +160,19 @@ xfs_growfs_data_private( */ if (nagcount > oagcount) xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); - if (nb > mp->m_sb.sb_dblocks) + if (nb != mp->m_sb.sb_dblocks) xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS, nb - mp->m_sb.sb_dblocks); if (id.nfree) xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, id.nfree); + + /* + * update in-core counters (especially sb_fdblocks) now + * so xfs_validate_sb_write() can pass. + */ + if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + xfs_log_sb(tp); + xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); if (error) @@ -178,6 +209,10 @@ xfs_growfs_data_private( return error; out_trans_cancel: + if (!extend && (tp->t_flags & XFS_TRANS_DIRTY)) { + xfs_trans_commit(tp); + return error; + } xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index e72730f85af1..fd2cbf414b80 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -419,7 +419,6 @@ xfs_trans_mod_sb( tp->t_res_frextents_delta += delta; break; case XFS_TRANS_SB_DBLOCKS: - ASSERT(delta > 0); tp->t_dblocks_delta += delta; break; case XFS_TRANS_SB_AGCOUNT:
As the first step of shrinking, this attempts to enable shrinking unused space in the last allocation group by fixing up freespace btree, agi, agf and adjusting super block and introduce a helper xfs_ag_shrink_space() to fixup the last AG. This can be all done in one transaction for now, so I think no additional protection is needed. Signed-off-by: Gao Xiang <hsiangkao@redhat.com> --- fs/xfs/libxfs/xfs_ag.c | 72 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_ag.h | 2 ++ fs/xfs/xfs_fsops.c | 69 ++++++++++++++++++++++++++++++---------- fs/xfs/xfs_trans.c | 1 - 4 files changed, 126 insertions(+), 18 deletions(-)