Message ID | 20230118224505.1964941-4-david@fromorbit.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: per-ag centric allocation alogrithms | expand |
On Thu, 2023-01-19 at 09:44 +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > When we enter xfs_bmbt_alloc_block() without having first allocated > a data extent (i.e. tp->t_firstblock == NULLFSBLOCK) because we > are doing something like unwritten extent conversion, the transaction > block reservation is used as the minleft value. > > This works for operations like unwritten extent conversion, but it > assumes that the block reservation is only for a BMBT split. THis is > not always true, and sometimes results in larger than necessary > minleft values being set. We only actually need enough space for a > btree split, something we already handle correctly in > xfs_bmapi_write() via the xfs_bmapi_minleft() calculation. > > We should use xfs_bmapi_minleft() in xfs_bmbt_alloc_block() to > calculate the number of blocks a BMBT split on this inode is going to > require, not use the transaction block reservation that contains the > maximum number of blocks this transaction may consume in it... > > Signed-off-by: Dave Chinner <dchinner@redhat.com> Ok, makes sense Reviewed-by: Allison Henderson <allison.henderson@oracle.com> > --- > fs/xfs/libxfs/xfs_bmap.c | 2 +- > fs/xfs/libxfs/xfs_bmap.h | 2 ++ > fs/xfs/libxfs/xfs_bmap_btree.c | 19 +++++++++---------- > 3 files changed, 12 insertions(+), 11 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > index 018837bd72c8..9dc33cdc2ab9 100644 > --- a/fs/xfs/libxfs/xfs_bmap.c > +++ b/fs/xfs/libxfs/xfs_bmap.c > @@ -4242,7 +4242,7 @@ xfs_bmapi_convert_unwritten( > return 0; > } > > -static inline xfs_extlen_t > +xfs_extlen_t > xfs_bmapi_minleft( > struct xfs_trans *tp, > struct xfs_inode *ip, > diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h > index 16db95b11589..08c16e4edc0f 100644 > --- a/fs/xfs/libxfs/xfs_bmap.h > +++ b/fs/xfs/libxfs/xfs_bmap.h > @@ -220,6 +220,8 @@ int xfs_bmap_add_extent_unwritten_real(struct > xfs_trans *tp, > struct xfs_inode *ip, int whichfork, > struct xfs_iext_cursor *icur, struct xfs_btree_cur > **curp, > struct xfs_bmbt_irec *new, int *logflagsp); > +xfs_extlen_t xfs_bmapi_minleft(struct xfs_trans *tp, struct > xfs_inode *ip, > + int fork); > > enum xfs_bmap_intent_type { > XFS_BMAP_MAP = 1, > diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c > b/fs/xfs/libxfs/xfs_bmap_btree.c > index cfa052d40105..18de4fbfef4e 100644 > --- a/fs/xfs/libxfs/xfs_bmap_btree.c > +++ b/fs/xfs/libxfs/xfs_bmap_btree.c > @@ -213,18 +213,16 @@ xfs_bmbt_alloc_block( > if (args.fsbno == NULLFSBLOCK) { > args.fsbno = be64_to_cpu(start->l); > args.type = XFS_ALLOCTYPE_START_BNO; > + > /* > - * Make sure there is sufficient room left in the AG > to > - * complete a full tree split for an extent insert. > If > - * we are converting the middle part of an extent > then > - * we may need space for two tree splits. > - * > - * We are relying on the caller to make the correct > block > - * reservation for this operation to succeed. If the > - * reservation amount is insufficient then we may > fail a > - * block allocation here and corrupt the filesystem. > + * If we are coming here from something like > unwritten extent > + * conversion, there has been no data extent > allocation already > + * done, so we have to ensure that we attempt to > locate the > + * entire set of bmbt allocations in the same AG, as > + * xfs_bmapi_write() would have reserved. > */ > - args.minleft = args.tp->t_blk_res; > + args.minleft = xfs_bmapi_minleft(cur->bc_tp, cur- > >bc_ino.ip, > + cur- > >bc_ino.whichfork); > } else if (cur->bc_tp->t_flags & XFS_TRANS_LOWMODE) { > args.type = XFS_ALLOCTYPE_START_BNO; > } else { > @@ -248,6 +246,7 @@ xfs_bmbt_alloc_block( > * successful activate the lowspace algorithm. > */ > args.fsbno = 0; > + args.minleft = 0; > args.type = XFS_ALLOCTYPE_FIRST_AG; > error = xfs_alloc_vextent(&args); > if (error)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 018837bd72c8..9dc33cdc2ab9 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4242,7 +4242,7 @@ xfs_bmapi_convert_unwritten( return 0; } -static inline xfs_extlen_t +xfs_extlen_t xfs_bmapi_minleft( struct xfs_trans *tp, struct xfs_inode *ip, diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 16db95b11589..08c16e4edc0f 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -220,6 +220,8 @@ int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_bmbt_irec *new, int *logflagsp); +xfs_extlen_t xfs_bmapi_minleft(struct xfs_trans *tp, struct xfs_inode *ip, + int fork); enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index cfa052d40105..18de4fbfef4e 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -213,18 +213,16 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); args.type = XFS_ALLOCTYPE_START_BNO; + /* - * Make sure there is sufficient room left in the AG to - * complete a full tree split for an extent insert. If - * we are converting the middle part of an extent then - * we may need space for two tree splits. - * - * We are relying on the caller to make the correct block - * reservation for this operation to succeed. If the - * reservation amount is insufficient then we may fail a - * block allocation here and corrupt the filesystem. + * If we are coming here from something like unwritten extent + * conversion, there has been no data extent allocation already + * done, so we have to ensure that we attempt to locate the + * entire set of bmbt allocations in the same AG, as + * xfs_bmapi_write() would have reserved. */ - args.minleft = args.tp->t_blk_res; + args.minleft = xfs_bmapi_minleft(cur->bc_tp, cur->bc_ino.ip, + cur->bc_ino.whichfork); } else if (cur->bc_tp->t_flags & XFS_TRANS_LOWMODE) { args.type = XFS_ALLOCTYPE_START_BNO; } else { @@ -248,6 +246,7 @@ xfs_bmbt_alloc_block( * successful activate the lowspace algorithm. */ args.fsbno = 0; + args.minleft = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; error = xfs_alloc_vextent(&args); if (error)