Message ID | 20170413080517.12564-9-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Apr 13, 2017 at 10:05:15AM +0200, Christoph Hellwig wrote: > We currently have two classes of xfs_bmapi_write callers that have > conflicting space reservation needs. Many callers expect to be able > to reserve the number of total blocks passed in a single AG for > the use with the current allocation as well as future allocations > in the same transactions, or transactions chained off from it. > > Other callers want to allocate up to the number of blocks passed in, > although they are fine with a smaller number of blocks to make > forward progress. For those we only need to leave a few blocks aside > for the bmap btree manipulations when doing the main space allocation. > > This patch introduces a new XFS_BMAPI_BESTEFFORT flag for the second > kind of callers that ignores the total flag and just uses the minleft > parameter to leave space for bmap btree allocations and splits. > > With this we can remove the potentially dangerous fallback that > ignores the total reservation in xfs_bmap_btalloc. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > fs/xfs/libxfs/xfs_attr_remote.c | 6 ++++-- > fs/xfs/libxfs/xfs_bmap.c | 27 ++++++++++++++++++++------- > fs/xfs/libxfs/xfs_bmap.h | 3 +++ > fs/xfs/xfs_bmap_util.c | 2 ++ > fs/xfs/xfs_iomap.c | 4 ++-- > fs/xfs/xfs_reflink.c | 3 ++- > 6 files changed, 33 insertions(+), 12 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c > index d52f525f5b2d..35a99d15521c 100644 > --- a/fs/xfs/libxfs/xfs_attr_remote.c > +++ b/fs/xfs/libxfs/xfs_attr_remote.c > @@ -464,8 +464,10 @@ xfs_attr_rmtval_set( > xfs_defer_init(args->dfops, args->firstblock); > nmap = 1; > error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, > - blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, > - args->total, &map, &nmap, args->dfops); > + blkcnt, > + XFS_BMAPI_ATTRFORK | XFS_BMAPI_BESTEFFORT, > + args->firstblock, args->total, &map, &nmap, > + args->dfops); > if (!error) > error = xfs_defer_finish(&args->trans, args->dfops, dp); > if (error) { > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > index 6faefa342748..c06e7d500ed1 100644 > --- a/fs/xfs/libxfs/xfs_bmap.c > +++ b/fs/xfs/libxfs/xfs_bmap.c > @@ -915,7 +915,7 @@ xfs_bmap_local_to_extents( > args.fsbno = *firstblock; > args.type = XFS_ALLOCTYPE_NEAR_BNO; > } > - args.total = args.minlen = args.maxlen = args.prod = 1; > + args.minlen = args.maxlen = args.prod = 1; > error = xfs_alloc_vextent(&args); > if (error) > goto done; > @@ -3504,7 +3504,6 @@ xfs_bmap_btalloc_nullfb( > int error; > > args->type = XFS_ALLOCTYPE_START_BNO; > - args->total = ap->total; > > startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); > if (startag == NULLAGNUMBER) > @@ -3538,7 +3537,6 @@ xfs_bmap_btalloc_filestreams( > int error; > > args->type = XFS_ALLOCTYPE_NEAR_BNO; > - args->total = ap->total; > > ag = XFS_FSB_TO_AGNO(mp, args->fsbno); > if (ag == NULLAGNUMBER) > @@ -3684,10 +3682,9 @@ xfs_bmap_btalloc( > args.type = XFS_ALLOCTYPE_FIRST_AG; > else > args.type = XFS_ALLOCTYPE_START_BNO; > - args.total = args.minlen = 1; > + args.minlen = 1; > } else { > args.type = XFS_ALLOCTYPE_NEAR_BNO; > - args.total = ap->total; > args.minlen = 1; > } > /* apply extent size hints if obtained earlier */ > @@ -3754,7 +3751,24 @@ xfs_bmap_btalloc( > args.alignment = 1; > args.minalignslop = 0; > } > - args.minleft = xfs_bmap_minleft(ap); > + > + /* > + * If the XFS_BMAPI_BESTEFFORT flag is set we try to allocate any > + * space that's available, even if it is less than requested. We > + * still need to set a minleft value to guarantee that we can still > + * manipulate the bmap btree, though. The total value is ignored in > + * this case. > + * > + * If the flag is not set the total value specifies the total space > + * that the transaction may use, and we must find an AG that has > + * enough space available for all of total, or this allocation request > + * will fail. > + */ > + if (ap->flags & XFS_BMAPI_BESTEFFORT) > + args.minleft = xfs_bmap_minleft(ap); > + else > + args.total = ap->total; > + This seems Ok, but I don't think it's very elegant to have callers pass a total parameter along with a flag to ignore it. Couldn't we just set minleft when total is not used and pass zero from those particular callers, or do we actually need to support the !BESTEFFORT && total == 0 case? (At the very least, we could assert that total == 0 when BESTEFFORT is set.) Brian > args.wasdel = ap->wasdel; > args.resv = XFS_AG_RESV_NONE; > args.datatype = ap->datatype; > @@ -3800,7 +3814,6 @@ xfs_bmap_btalloc( > if (args.fsbno == NULLFSBLOCK && nullfb) { > args.fsbno = 0; > args.type = XFS_ALLOCTYPE_FIRST_AG; > - args.total = 1; > if ((error = xfs_alloc_vextent(&args))) > return error; > ap->dfops->dop_low = true; > diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h > index 36a7f36f5f38..f35a2b2c4f06 100644 > --- a/fs/xfs/libxfs/xfs_bmap.h > +++ b/fs/xfs/libxfs/xfs_bmap.h > @@ -79,6 +79,9 @@ struct xfs_extent_free_item > #define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */ > #define XFS_BMAPI_IGSTATE 0x010 /* Ignore state - */ > /* combine contig. space */ > + > +#define XFS_BMAPI_BESTEFFORT 0x020 /* may allocate less than requested */ > + > /* > * unwritten extent conversion - this needs write cache flushing and no additional > * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c > index 4d1920e594b0..f809ebc7a495 100644 > --- a/fs/xfs/xfs_bmap_util.c > +++ b/fs/xfs/xfs_bmap_util.c > @@ -1033,6 +1033,8 @@ xfs_alloc_file_space( > startoffset_fsb = XFS_B_TO_FSBT(mp, offset); > allocatesize_fsb = XFS_B_TO_FSB(mp, count); > > + alloc_type |= XFS_BMAPI_BESTEFFORT; > + > /* > * Allocate file space until done or until there is an error > */ > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index 009f8243dddc..1abed9b6d5c5 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -171,7 +171,7 @@ xfs_iomap_write_direct( > uint qblocks, resblks, resrtextents; > int error; > int lockmode; > - int bmapi_flags = XFS_BMAPI_PREALLOC; > + int bmapi_flags = XFS_BMAPI_PREALLOC | XFS_BMAPI_BESTEFFORT; > uint tflags = 0; > > rt = XFS_IS_REALTIME_INODE(ip); > @@ -679,7 +679,7 @@ xfs_iomap_write_allocate( > xfs_trans_t *tp; > int nimaps; > int error = 0; > - int flags = XFS_BMAPI_DELALLOC; > + int flags = XFS_BMAPI_DELALLOC | XFS_BMAPI_BESTEFFORT; > int nres; > > if (whichfork == XFS_COW_FORK) > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > index c0f3754caca2..aad321c16d2f 100644 > --- a/fs/xfs/xfs_reflink.c > +++ b/fs/xfs/xfs_reflink.c > @@ -455,7 +455,8 @@ xfs_reflink_allocate_cow( > > /* Allocate the entire reservation as unwritten blocks. */ > error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, > - XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block, > + XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC | > + XFS_BMAPI_BESTEFFORT, &first_block, > resblks, imap, &nimaps, &dfops); > if (error) > goto out_bmap_cancel; > -- > 2.11.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Apr 17, 2017 at 02:08:08PM -0400, Brian Foster wrote: > This seems Ok, but I don't think it's very elegant to have callers pass > a total parameter along with a flag to ignore it. Couldn't we just set > minleft when total is not used and pass zero from those particular > callers, or do we actually need to support the !BESTEFFORT && total == 0 > case? We could do that, in in context of just this patch it would even seem cleaner. But for the next merge window I have changes queued up that remove the total parameter and it's passing along the dir/da_btree code entirely by looking at the transaction reservation for the !BESTEFFORT case, which I thikn is even better as a grand scheme. > (At the very least, we could assert that total == 0 when > BESTEFFORT is set.) Sure. -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Apr 18, 2017 at 09:58:01AM +0200, Christoph Hellwig wrote: > On Mon, Apr 17, 2017 at 02:08:08PM -0400, Brian Foster wrote: > > This seems Ok, but I don't think it's very elegant to have callers pass > > a total parameter along with a flag to ignore it. Couldn't we just set > > minleft when total is not used and pass zero from those particular > > callers, or do we actually need to support the !BESTEFFORT && total == 0 > > case? > > We could do that, in in context of just this patch it would even seem > cleaner. But for the next merge window I have changes queued up that > remove the total parameter and it's passing along the dir/da_btree > code entirely by looking at the transaction reservation for the > !BESTEFFORT case, which I thikn is even better as a grand scheme. > Ok, then there is probably justification for the flag. In which case, I think the tweak and assertion below at least helps clarify the semantics of the call. > > (At the very least, we could assert that total == 0 when > > BESTEFFORT is set.) > > Sure. Thanks. Brian > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index d52f525f5b2d..35a99d15521c 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -464,8 +464,10 @@ xfs_attr_rmtval_set( xfs_defer_init(args->dfops, args->firstblock); nmap = 1; error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, - blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, - args->total, &map, &nmap, args->dfops); + blkcnt, + XFS_BMAPI_ATTRFORK | XFS_BMAPI_BESTEFFORT, + args->firstblock, args->total, &map, &nmap, + args->dfops); if (!error) error = xfs_defer_finish(&args->trans, args->dfops, dp); if (error) { diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 6faefa342748..c06e7d500ed1 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -915,7 +915,7 @@ xfs_bmap_local_to_extents( args.fsbno = *firstblock; args.type = XFS_ALLOCTYPE_NEAR_BNO; } - args.total = args.minlen = args.maxlen = args.prod = 1; + args.minlen = args.maxlen = args.prod = 1; error = xfs_alloc_vextent(&args); if (error) goto done; @@ -3504,7 +3504,6 @@ xfs_bmap_btalloc_nullfb( int error; args->type = XFS_ALLOCTYPE_START_BNO; - args->total = ap->total; startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); if (startag == NULLAGNUMBER) @@ -3538,7 +3537,6 @@ xfs_bmap_btalloc_filestreams( int error; args->type = XFS_ALLOCTYPE_NEAR_BNO; - args->total = ap->total; ag = XFS_FSB_TO_AGNO(mp, args->fsbno); if (ag == NULLAGNUMBER) @@ -3684,10 +3682,9 @@ xfs_bmap_btalloc( args.type = XFS_ALLOCTYPE_FIRST_AG; else args.type = XFS_ALLOCTYPE_START_BNO; - args.total = args.minlen = 1; + args.minlen = 1; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.total = ap->total; args.minlen = 1; } /* apply extent size hints if obtained earlier */ @@ -3754,7 +3751,24 @@ xfs_bmap_btalloc( args.alignment = 1; args.minalignslop = 0; } - args.minleft = xfs_bmap_minleft(ap); + + /* + * If the XFS_BMAPI_BESTEFFORT flag is set we try to allocate any + * space that's available, even if it is less than requested. We + * still need to set a minleft value to guarantee that we can still + * manipulate the bmap btree, though. The total value is ignored in + * this case. + * + * If the flag is not set the total value specifies the total space + * that the transaction may use, and we must find an AG that has + * enough space available for all of total, or this allocation request + * will fail. + */ + if (ap->flags & XFS_BMAPI_BESTEFFORT) + args.minleft = xfs_bmap_minleft(ap); + else + args.total = ap->total; + args.wasdel = ap->wasdel; args.resv = XFS_AG_RESV_NONE; args.datatype = ap->datatype; @@ -3800,7 +3814,6 @@ xfs_bmap_btalloc( if (args.fsbno == NULLFSBLOCK && nullfb) { args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; - args.total = 1; if ((error = xfs_alloc_vextent(&args))) return error; ap->dfops->dop_low = true; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 36a7f36f5f38..f35a2b2c4f06 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -79,6 +79,9 @@ struct xfs_extent_free_item #define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */ #define XFS_BMAPI_IGSTATE 0x010 /* Ignore state - */ /* combine contig. space */ + +#define XFS_BMAPI_BESTEFFORT 0x020 /* may allocate less than requested */ + /* * unwritten extent conversion - this needs write cache flushing and no additional * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 4d1920e594b0..f809ebc7a495 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1033,6 +1033,8 @@ xfs_alloc_file_space( startoffset_fsb = XFS_B_TO_FSBT(mp, offset); allocatesize_fsb = XFS_B_TO_FSB(mp, count); + alloc_type |= XFS_BMAPI_BESTEFFORT; + /* * Allocate file space until done or until there is an error */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 009f8243dddc..1abed9b6d5c5 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -171,7 +171,7 @@ xfs_iomap_write_direct( uint qblocks, resblks, resrtextents; int error; int lockmode; - int bmapi_flags = XFS_BMAPI_PREALLOC; + int bmapi_flags = XFS_BMAPI_PREALLOC | XFS_BMAPI_BESTEFFORT; uint tflags = 0; rt = XFS_IS_REALTIME_INODE(ip); @@ -679,7 +679,7 @@ xfs_iomap_write_allocate( xfs_trans_t *tp; int nimaps; int error = 0; - int flags = XFS_BMAPI_DELALLOC; + int flags = XFS_BMAPI_DELALLOC | XFS_BMAPI_BESTEFFORT; int nres; if (whichfork == XFS_COW_FORK) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index c0f3754caca2..aad321c16d2f 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -455,7 +455,8 @@ xfs_reflink_allocate_cow( /* Allocate the entire reservation as unwritten blocks. */ error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, - XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block, + XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC | + XFS_BMAPI_BESTEFFORT, &first_block, resblks, imap, &nimaps, &dfops); if (error) goto out_bmap_cancel;
We currently have two classes of xfs_bmapi_write callers that have conflicting space reservation needs. Many callers expect to be able to reserve the number of total blocks passed in a single AG for the use with the current allocation as well as future allocations in the same transactions, or transactions chained off from it. Other callers want to allocate up to the number of blocks passed in, although they are fine with a smaller number of blocks to make forward progress. For those we only need to leave a few blocks aside for the bmap btree manipulations when doing the main space allocation. This patch introduces a new XFS_BMAPI_BESTEFFORT flag for the second kind of callers that ignores the total flag and just uses the minleft parameter to leave space for bmap btree allocations and splits. With this we can remove the potentially dangerous fallback that ignores the total reservation in xfs_bmap_btalloc. Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/xfs/libxfs/xfs_attr_remote.c | 6 ++++-- fs/xfs/libxfs/xfs_bmap.c | 27 ++++++++++++++++++++------- fs/xfs/libxfs/xfs_bmap.h | 3 +++ fs/xfs/xfs_bmap_util.c | 2 ++ fs/xfs/xfs_iomap.c | 4 ++-- fs/xfs/xfs_reflink.c | 3 ++- 6 files changed, 33 insertions(+), 12 deletions(-)