diff mbox

xfs: tune down agno asserts in the bmap code

Message ID 20170216211035.4206-1-hch@lst.de (mailing list archive)
State Accepted
Headers show

Commit Message

Christoph Hellwig Feb. 16, 2017, 9:10 p.m. UTC
In various places we currently assert that xfs_bmap_btalloc allocates
from the same as the firstblock value passed in, unless it's either
NULLAGNO or the dop_low flag is set.  But the reflink code does not
fully follow this convention as it passes in firstblock purely as
a hint for the allocator without actually having previous allocations
in the transaction, and without having a minleft check on the current
AG, leading to the assert firing on a very full and heavily used
file system.  As even the reflink code only allocates from equal or
higher AGs for now we can simply the check to always allow for equal
or higher AGs.

Note that we need to eventually split the two meanings of the firstblock
value.  At that point we can also allow the reflink code to allocate
from any AG instead of limiting it in any way.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

Comments

Darrick J. Wong Feb. 17, 2017, 1:11 a.m. UTC | #1
On Thu, Feb 16, 2017 at 10:10:35PM +0100, Christoph Hellwig wrote:
> In various places we currently assert that xfs_bmap_btalloc allocates
> from the same as the firstblock value passed in, unless it's either
> NULLAGNO or the dop_low flag is set.  But the reflink code does not
> fully follow this convention as it passes in firstblock purely as
> a hint for the allocator without actually having previous allocations

Are you referring to BMAPI_REMAP?  In that case *firstblock isn't a
hint, it's the new physical location of the extent.

> in the transaction, and without having a minleft check on the current
> AG, leading to the assert firing on a very full and heavily used
> file system.  As even the reflink code only allocates from equal or
> higher AGs for now we can simply the check to always allow for equal
> or higher AGs.
> 
> Note that we need to eventually split the two meanings of the firstblock
> value.  At that point we can also allow the reflink code to allocate
> from any AG instead of limiting it in any way.

...but you're correct that we probably ought to disambiguate the two.

For now,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

By the way, was it your intent to push this series into 4.11?

--D

> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/libxfs/xfs_bmap.c | 22 ++++++----------------
>  1 file changed, 6 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index bfc00de5c6f1..2e79f4cc9a8e 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -804,9 +804,7 @@ xfs_bmap_extents_to_btree(
>  	 */
>  	ASSERT(args.fsbno != NULLFSBLOCK);
>  	ASSERT(*firstblock == NULLFSBLOCK ||
> -	       args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
> -	       (dfops->dop_low &&
> -		args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
> +	       args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
>  	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
>  	cur->bc_private.b.allocated++;
>  	ip->i_d.di_nblocks++;
> @@ -3822,17 +3820,13 @@ xfs_bmap_btalloc(
>  		 * the first block that was allocated.
>  		 */
>  		ASSERT(*ap->firstblock == NULLFSBLOCK ||
> -		       XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
> -		       XFS_FSB_TO_AGNO(mp, args.fsbno) ||
> -		       (ap->dfops->dop_low &&
> -			XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
> -			XFS_FSB_TO_AGNO(mp, args.fsbno)));
> +		       XFS_FSB_TO_AGNO(mp, *ap->firstblock) <=
> +		       XFS_FSB_TO_AGNO(mp, args.fsbno));
>  
>  		ap->blkno = args.fsbno;
>  		if (*ap->firstblock == NULLFSBLOCK)
>  			*ap->firstblock = args.fsbno;
> -		ASSERT(nullfb || fb_agno == args.agno ||
> -		       (ap->dfops->dop_low && fb_agno < args.agno));
> +		ASSERT(nullfb || fb_agno <= args.agno);
>  		ap->length = args.len;
>  		if (!(ap->flags & XFS_BMAPI_COWFORK))
>  			ap->ip->i_d.di_nblocks += args.len;
> @@ -4746,13 +4740,9 @@ xfs_bmapi_write(
>  	if (bma.cur) {
>  		if (!error) {
>  			ASSERT(*firstblock == NULLFSBLOCK ||
> -			       XFS_FSB_TO_AGNO(mp, *firstblock) ==
> +			       XFS_FSB_TO_AGNO(mp, *firstblock) <=
>  			       XFS_FSB_TO_AGNO(mp,
> -				       bma.cur->bc_private.b.firstblock) ||
> -			       (dfops->dop_low &&
> -				XFS_FSB_TO_AGNO(mp, *firstblock) <
> -				XFS_FSB_TO_AGNO(mp,
> -					bma.cur->bc_private.b.firstblock)));
> +				       bma.cur->bc_private.b.firstblock));
>  			*firstblock = bma.cur->bc_private.b.firstblock;
>  		}
>  		xfs_btree_del_cursor(bma.cur,
> -- 
> 2.11.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Feb. 17, 2017, 12:42 p.m. UTC | #2
On Thu, Feb 16, 2017 at 05:11:17PM -0800, Darrick J. Wong wrote:
> On Thu, Feb 16, 2017 at 10:10:35PM +0100, Christoph Hellwig wrote:
> > In various places we currently assert that xfs_bmap_btalloc allocates
> > from the same as the firstblock value passed in, unless it's either
> > NULLAGNO or the dop_low flag is set.  But the reflink code does not
> > fully follow this convention as it passes in firstblock purely as
> > a hint for the allocator without actually having previous allocations
> 
> Are you referring to BMAPI_REMAP?

Yes.

> In that case *firstblock isn't a
> hint, it's the new physical location of the extent.

Yes and no.  For the high-level bmap code it is indeed the location of the
extent, but it then also gets passed down to the btree code, where is
is treated as both a hint for allocating the bmap btree blocks close
to it, which limits the allocation to the same AG.  We work around that
a bit using the try_another_ag loop, but it's all a bit of a mess.

> By the way, was it your intent to push this series into 4.11?

It's just a single patch, and given that it's fairly easy to trigger
we should defintively have it in 4.11 and later -stable.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index bfc00de5c6f1..2e79f4cc9a8e 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -804,9 +804,7 @@  xfs_bmap_extents_to_btree(
 	 */
 	ASSERT(args.fsbno != NULLFSBLOCK);
 	ASSERT(*firstblock == NULLFSBLOCK ||
-	       args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
-	       (dfops->dop_low &&
-		args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
+	       args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
 	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
 	cur->bc_private.b.allocated++;
 	ip->i_d.di_nblocks++;
@@ -3822,17 +3820,13 @@  xfs_bmap_btalloc(
 		 * the first block that was allocated.
 		 */
 		ASSERT(*ap->firstblock == NULLFSBLOCK ||
-		       XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
-		       XFS_FSB_TO_AGNO(mp, args.fsbno) ||
-		       (ap->dfops->dop_low &&
-			XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
-			XFS_FSB_TO_AGNO(mp, args.fsbno)));
+		       XFS_FSB_TO_AGNO(mp, *ap->firstblock) <=
+		       XFS_FSB_TO_AGNO(mp, args.fsbno));
 
 		ap->blkno = args.fsbno;
 		if (*ap->firstblock == NULLFSBLOCK)
 			*ap->firstblock = args.fsbno;
-		ASSERT(nullfb || fb_agno == args.agno ||
-		       (ap->dfops->dop_low && fb_agno < args.agno));
+		ASSERT(nullfb || fb_agno <= args.agno);
 		ap->length = args.len;
 		if (!(ap->flags & XFS_BMAPI_COWFORK))
 			ap->ip->i_d.di_nblocks += args.len;
@@ -4746,13 +4740,9 @@  xfs_bmapi_write(
 	if (bma.cur) {
 		if (!error) {
 			ASSERT(*firstblock == NULLFSBLOCK ||
-			       XFS_FSB_TO_AGNO(mp, *firstblock) ==
+			       XFS_FSB_TO_AGNO(mp, *firstblock) <=
 			       XFS_FSB_TO_AGNO(mp,
-				       bma.cur->bc_private.b.firstblock) ||
-			       (dfops->dop_low &&
-				XFS_FSB_TO_AGNO(mp, *firstblock) <
-				XFS_FSB_TO_AGNO(mp,
-					bma.cur->bc_private.b.firstblock)));
+				       bma.cur->bc_private.b.firstblock));
 			*firstblock = bma.cur->bc_private.b.firstblock;
 		}
 		xfs_btree_del_cursor(bma.cur,