Message ID | 20210324010621.2244671-5-hsiangkao@redhat.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | xfs: support shrinking free space in the last AG | expand |
On Wed, Mar 24, 2021 at 09:06:20AM +0800, Gao Xiang wrote: > As the first step of shrinking, this attempts to enable shrinking > unused space in the last allocation group by fixing up freespace > btree, agi, agf and adjusting super block and use a helper > xfs_ag_shrink_space() to fixup the last AG. > > This can be all done in one transaction for now, so I think no > additional protection is needed. > > Reviewed-by: Darrick J. Wong <djwong@kernel.org> > Signed-off-by: Gao Xiang <hsiangkao@redhat.com> > --- > fs/xfs/xfs_fsops.c | 84 +++++++++++++++++++++++++++------------------- > fs/xfs/xfs_trans.c | 1 - > 2 files changed, 50 insertions(+), 35 deletions(-) > > diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c > index d1ba04124c28..9457b0691ece 100644 > --- a/fs/xfs/xfs_fsops.c > +++ b/fs/xfs/xfs_fsops.c > @@ -91,23 +91,25 @@ xfs_growfs_data_private( > xfs_agnumber_t nagcount; > xfs_agnumber_t nagimax = 0; > xfs_rfsblock_t nb, nb_div, nb_mod; > - xfs_rfsblock_t delta; > + int64_t delta; > bool lastag_extended; > xfs_agnumber_t oagcount; > struct xfs_trans *tp; > struct aghdr_init_data id = {}; > > nb = in->newblocks; > - if (nb < mp->m_sb.sb_dblocks) > - return -EINVAL; > - if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) > + error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); > + if (error) > return error; > - error = xfs_buf_read_uncached(mp->m_ddev_targp, > + > + if (nb > mp->m_sb.sb_dblocks) { > + error = xfs_buf_read_uncached(mp->m_ddev_targp, > XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), > XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); > - if (error) > - return error; > - xfs_buf_relse(bp); > + if (error) > + return error; > + xfs_buf_relse(bp); > + } > > nb_div = nb; > nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); > @@ -115,10 +117,16 @@ xfs_growfs_data_private( > if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { > nagcount--; > nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; > - if (nb < mp->m_sb.sb_dblocks) > - return -EINVAL; > } > delta = nb - mp->m_sb.sb_dblocks; > + /* > + * Reject filesystems with a single AG because they are not > + * supported, and reject a shrink operation that would cause a > + * filesystem to become unsupported. > + */ > + if (delta < 0 && nagcount < 2) > + return -EINVAL; > + > oagcount = mp->m_sb.sb_agcount; > > /* allocate the new per-ag structures */ > @@ -126,15 +134,22 @@ xfs_growfs_data_private( > error = xfs_initialize_perag(mp, nagcount, &nagimax); > if (error) > return error; > + } else if (nagcount < oagcount) { > + /* TODO: shrinking the entire AGs hasn't yet completed */ > + return -EINVAL; > } > > error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, > - XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); > + (delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0, > + XFS_TRANS_RESERVE, &tp); > if (error) > return error; > > - error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, > - delta, &lastag_extended); > + if (delta > 0) > + error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, > + delta, &lastag_extended); > + else > + error = xfs_ag_shrink_space(mp, &tp, nagcount - 1, -delta); Assuming I don't hear anyone yelling NAK in the next day or so, I think I'll stage this for 5.13 with the following change to warn that the shrink feature is still EXPERIMENTAL: By the way, are you going to send a patch to shrink the realtime device too? diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 9457b0691ece..b33c894b6cf3 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -145,11 +145,20 @@ xfs_growfs_data_private( if (error) return error; - if (delta > 0) + if (delta > 0) { error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, delta, &lastag_extended); - else + } else { + static struct ratelimit_state shrink_warning = \ + RATELIMIT_STATE_INIT("shrink_warning", 86400 * HZ, 1); + ratelimit_set_flags(&shrink_warning, RATELIMIT_MSG_ON_RELEASE); + + if (__ratelimit(&shrink_warning)) + xfs_alert(mp, + "EXPERIMENTAL online shrink feature in use. Use at your own risk!"); + error = xfs_ag_shrink_space(mp, &tp, nagcount - 1, -delta); + } if (error) goto out_trans_cancel; --D > if (error) > goto out_trans_cancel; > > @@ -169,28 +184,29 @@ xfs_growfs_data_private( > xfs_set_low_space_thresholds(mp); > mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); > > - /* > - * If we expanded the last AG, free the per-AG reservation > - * so we can reinitialize it with the new size. > - */ > - if (lastag_extended) { > - struct xfs_perag *pag; > - > - pag = xfs_perag_get(mp, id.agno); > - error = xfs_ag_resv_free(pag); > - xfs_perag_put(pag); > - if (error) > - return error; > + if (delta > 0) { > + /* > + * If we expanded the last AG, free the per-AG reservation > + * so we can reinitialize it with the new size. > + */ > + if (lastag_extended) { > + struct xfs_perag *pag; > + > + pag = xfs_perag_get(mp, id.agno); > + error = xfs_ag_resv_free(pag); > + xfs_perag_put(pag); > + if (error) > + return error; > + } > + /* > + * Reserve AG metadata blocks. ENOSPC here does not mean there > + * was a growfs failure, just that there still isn't space for > + * new user data after the grow has been run. > + */ > + error = xfs_fs_reserve_ag_blocks(mp); > + if (error == -ENOSPC) > + error = 0; > } > - > - /* > - * Reserve AG metadata blocks. ENOSPC here does not mean there was a > - * growfs failure, just that there still isn't space for new user data > - * after the grow has been run. > - */ > - error = xfs_fs_reserve_ag_blocks(mp); > - if (error == -ENOSPC) > - error = 0; > return error; > > out_trans_cancel: > diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c > index b22a09e9daee..052274321993 100644 > --- a/fs/xfs/xfs_trans.c > +++ b/fs/xfs/xfs_trans.c > @@ -436,7 +436,6 @@ xfs_trans_mod_sb( > tp->t_res_frextents_delta += delta; > break; > case XFS_TRANS_SB_DBLOCKS: > - ASSERT(delta > 0); > tp->t_dblocks_delta += delta; > break; > case XFS_TRANS_SB_AGCOUNT: > -- > 2.27.0 >
Hi Darrick, On Wed, Mar 24, 2021 at 10:34:38AM -0700, Darrick J. Wong wrote: > On Wed, Mar 24, 2021 at 09:06:20AM +0800, Gao Xiang wrote: > > As the first step of shrinking, this attempts to enable shrinking > > unused space in the last allocation group by fixing up freespace > > btree, agi, agf and adjusting super block and use a helper > > xfs_ag_shrink_space() to fixup the last AG. > > > > This can be all done in one transaction for now, so I think no > > additional protection is needed. > > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org> > > Signed-off-by: Gao Xiang <hsiangkao@redhat.com> > > --- > > fs/xfs/xfs_fsops.c | 84 +++++++++++++++++++++++++++------------------- > > fs/xfs/xfs_trans.c | 1 - > > 2 files changed, 50 insertions(+), 35 deletions(-) > > > > diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c > > index d1ba04124c28..9457b0691ece 100644 > > --- a/fs/xfs/xfs_fsops.c > > +++ b/fs/xfs/xfs_fsops.c > > @@ -91,23 +91,25 @@ xfs_growfs_data_private( > > xfs_agnumber_t nagcount; > > xfs_agnumber_t nagimax = 0; > > xfs_rfsblock_t nb, nb_div, nb_mod; > > - xfs_rfsblock_t delta; > > + int64_t delta; > > bool lastag_extended; > > xfs_agnumber_t oagcount; > > struct xfs_trans *tp; > > struct aghdr_init_data id = {}; > > > > nb = in->newblocks; > > - if (nb < mp->m_sb.sb_dblocks) > > - return -EINVAL; > > - if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) > > + error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); > > + if (error) > > return error; > > - error = xfs_buf_read_uncached(mp->m_ddev_targp, > > + > > + if (nb > mp->m_sb.sb_dblocks) { > > + error = xfs_buf_read_uncached(mp->m_ddev_targp, > > XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), > > XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); > > - if (error) > > - return error; > > - xfs_buf_relse(bp); > > + if (error) > > + return error; > > + xfs_buf_relse(bp); > > + } > > > > nb_div = nb; > > nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); > > @@ -115,10 +117,16 @@ xfs_growfs_data_private( > > if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { > > nagcount--; > > nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; > > - if (nb < mp->m_sb.sb_dblocks) > > - return -EINVAL; > > } > > delta = nb - mp->m_sb.sb_dblocks; > > + /* > > + * Reject filesystems with a single AG because they are not > > + * supported, and reject a shrink operation that would cause a > > + * filesystem to become unsupported. > > + */ > > + if (delta < 0 && nagcount < 2) > > + return -EINVAL; > > + > > oagcount = mp->m_sb.sb_agcount; > > > > /* allocate the new per-ag structures */ > > @@ -126,15 +134,22 @@ xfs_growfs_data_private( > > error = xfs_initialize_perag(mp, nagcount, &nagimax); > > if (error) > > return error; > > + } else if (nagcount < oagcount) { > > + /* TODO: shrinking the entire AGs hasn't yet completed */ > > + return -EINVAL; > > } > > > > error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, > > - XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); > > + (delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0, > > + XFS_TRANS_RESERVE, &tp); > > if (error) > > return error; > > > > - error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, > > - delta, &lastag_extended); > > + if (delta > 0) > > + error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, > > + delta, &lastag_extended); > > + else > > + error = xfs_ag_shrink_space(mp, &tp, nagcount - 1, -delta); > > Assuming I don't hear anyone yelling NAK in the next day or so, I think > I'll stage this for 5.13 with the following change to warn that the > shrink feature is still EXPERIMENTAL: > > By the way, are you going to send a patch to shrink the realtime device > too? My first priority will form the formal shrink-whole-AG patchset after this is finalized. I have little experience about rt device, maybe leave it later. > > diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c > index 9457b0691ece..b33c894b6cf3 100644 > --- a/fs/xfs/xfs_fsops.c > +++ b/fs/xfs/xfs_fsops.c > @@ -145,11 +145,20 @@ xfs_growfs_data_private( > if (error) > return error; > > - if (delta > 0) > + if (delta > 0) { > error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, > delta, &lastag_extended); > - else > + } else { > + static struct ratelimit_state shrink_warning = \ > + RATELIMIT_STATE_INIT("shrink_warning", 86400 * HZ, 1); > + ratelimit_set_flags(&shrink_warning, RATELIMIT_MSG_ON_RELEASE); > + > + if (__ratelimit(&shrink_warning)) > + xfs_alert(mp, > + "EXPERIMENTAL online shrink feature in use. Use at your own risk!"); > + > error = xfs_ag_shrink_space(mp, &tp, nagcount - 1, -delta); > + } > if (error) > goto out_trans_cancel; I'm fine with that, thanks for updating. :) Thanks, Gao Xiang > > --D > > > if (error) > > goto out_trans_cancel; > > > > @@ -169,28 +184,29 @@ xfs_growfs_data_private( > > xfs_set_low_space_thresholds(mp); > > mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); > > > > - /* > > - * If we expanded the last AG, free the per-AG reservation > > - * so we can reinitialize it with the new size. > > - */ > > - if (lastag_extended) { > > - struct xfs_perag *pag; > > - > > - pag = xfs_perag_get(mp, id.agno); > > - error = xfs_ag_resv_free(pag); > > - xfs_perag_put(pag); > > - if (error) > > - return error; > > + if (delta > 0) { > > + /* > > + * If we expanded the last AG, free the per-AG reservation > > + * so we can reinitialize it with the new size. > > + */ > > + if (lastag_extended) { > > + struct xfs_perag *pag; > > + > > + pag = xfs_perag_get(mp, id.agno); > > + error = xfs_ag_resv_free(pag); > > + xfs_perag_put(pag); > > + if (error) > > + return error; > > + } > > + /* > > + * Reserve AG metadata blocks. ENOSPC here does not mean there > > + * was a growfs failure, just that there still isn't space for > > + * new user data after the grow has been run. > > + */ > > + error = xfs_fs_reserve_ag_blocks(mp); > > + if (error == -ENOSPC) > > + error = 0; > > } > > - > > - /* > > - * Reserve AG metadata blocks. ENOSPC here does not mean there was a > > - * growfs failure, just that there still isn't space for new user data > > - * after the grow has been run. > > - */ > > - error = xfs_fs_reserve_ag_blocks(mp); > > - if (error == -ENOSPC) > > - error = 0; > > return error; > > > > out_trans_cancel: > > diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c > > index b22a09e9daee..052274321993 100644 > > --- a/fs/xfs/xfs_trans.c > > +++ b/fs/xfs/xfs_trans.c > > @@ -436,7 +436,6 @@ xfs_trans_mod_sb( > > tp->t_res_frextents_delta += delta; > > break; > > case XFS_TRANS_SB_DBLOCKS: > > - ASSERT(delta > 0); > > tp->t_dblocks_delta += delta; > > break; > > case XFS_TRANS_SB_AGCOUNT: > > -- > > 2.27.0 > > >
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index d1ba04124c28..9457b0691ece 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -91,23 +91,25 @@ xfs_growfs_data_private( xfs_agnumber_t nagcount; xfs_agnumber_t nagimax = 0; xfs_rfsblock_t nb, nb_div, nb_mod; - xfs_rfsblock_t delta; + int64_t delta; bool lastag_extended; xfs_agnumber_t oagcount; struct xfs_trans *tp; struct aghdr_init_data id = {}; nb = in->newblocks; - if (nb < mp->m_sb.sb_dblocks) - return -EINVAL; - if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) + error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); + if (error) return error; - error = xfs_buf_read_uncached(mp->m_ddev_targp, + + if (nb > mp->m_sb.sb_dblocks) { + error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); - if (error) - return error; - xfs_buf_relse(bp); + if (error) + return error; + xfs_buf_relse(bp); + } nb_div = nb; nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); @@ -115,10 +117,16 @@ xfs_growfs_data_private( if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { nagcount--; nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; - if (nb < mp->m_sb.sb_dblocks) - return -EINVAL; } delta = nb - mp->m_sb.sb_dblocks; + /* + * Reject filesystems with a single AG because they are not + * supported, and reject a shrink operation that would cause a + * filesystem to become unsupported. + */ + if (delta < 0 && nagcount < 2) + return -EINVAL; + oagcount = mp->m_sb.sb_agcount; /* allocate the new per-ag structures */ @@ -126,15 +134,22 @@ xfs_growfs_data_private( error = xfs_initialize_perag(mp, nagcount, &nagimax); if (error) return error; + } else if (nagcount < oagcount) { + /* TODO: shrinking the entire AGs hasn't yet completed */ + return -EINVAL; } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, - XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); + (delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0, + XFS_TRANS_RESERVE, &tp); if (error) return error; - error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, - delta, &lastag_extended); + if (delta > 0) + error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, + delta, &lastag_extended); + else + error = xfs_ag_shrink_space(mp, &tp, nagcount - 1, -delta); if (error) goto out_trans_cancel; @@ -169,28 +184,29 @@ xfs_growfs_data_private( xfs_set_low_space_thresholds(mp); mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); - /* - * If we expanded the last AG, free the per-AG reservation - * so we can reinitialize it with the new size. - */ - if (lastag_extended) { - struct xfs_perag *pag; - - pag = xfs_perag_get(mp, id.agno); - error = xfs_ag_resv_free(pag); - xfs_perag_put(pag); - if (error) - return error; + if (delta > 0) { + /* + * If we expanded the last AG, free the per-AG reservation + * so we can reinitialize it with the new size. + */ + if (lastag_extended) { + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, id.agno); + error = xfs_ag_resv_free(pag); + xfs_perag_put(pag); + if (error) + return error; + } + /* + * Reserve AG metadata blocks. ENOSPC here does not mean there + * was a growfs failure, just that there still isn't space for + * new user data after the grow has been run. + */ + error = xfs_fs_reserve_ag_blocks(mp); + if (error == -ENOSPC) + error = 0; } - - /* - * Reserve AG metadata blocks. ENOSPC here does not mean there was a - * growfs failure, just that there still isn't space for new user data - * after the grow has been run. - */ - error = xfs_fs_reserve_ag_blocks(mp); - if (error == -ENOSPC) - error = 0; return error; out_trans_cancel: diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index b22a09e9daee..052274321993 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -436,7 +436,6 @@ xfs_trans_mod_sb( tp->t_res_frextents_delta += delta; break; case XFS_TRANS_SB_DBLOCKS: - ASSERT(delta > 0); tp->t_dblocks_delta += delta; break; case XFS_TRANS_SB_AGCOUNT: