diff mbox series

[v2] xfs: ensure inobt record walks always make forward progress

Message ID 20201117181456.GZ9695@magnolia (mailing list archive)
State Accepted, archived
Headers show
Series [v2] xfs: ensure inobt record walks always make forward progress | expand

Commit Message

Darrick J. Wong Nov. 17, 2020, 6:14 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

The aim of the inode btree record iterator function is to call a
callback on every record in the btree.  To avoid having to tear down and
recreate the inode btree cursor around every callback, it caches a
certain number of records in a memory buffer.  After each batch of
callback invocations, we have to perform a btree lookup to find the
next record after where we left off.

However, if the keys of the inode btree are corrupt, the lookup might
put us in the wrong part of the inode btree, causing the walk function
to loop forever.  Therefore, we add extra cursor tracking to make sure
that we never go backwards neither when performing the lookup nor when
jumping to the next inobt record.  This also fixes an off by one error
where upon resume the lookup should have been for the inode /after/ the
point at which we stopped.

Found by fuzzing xfs/460 with keys[2].startino = ones causing bulkstat
and quotacheck to hang.

Fixes: a211432c27ff ("xfs: create simplified inode walk function")
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
v2: fix idiotic mismerge, sorry about that...
---
 fs/xfs/xfs_iwalk.c |   27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

Comments

Chandan Babu R Nov. 18, 2020, 5:58 a.m. UTC | #1
On Tuesday 17 November 2020 11:44:56 PM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> The aim of the inode btree record iterator function is to call a
> callback on every record in the btree.  To avoid having to tear down and
> recreate the inode btree cursor around every callback, it caches a
> certain number of records in a memory buffer.  After each batch of
> callback invocations, we have to perform a btree lookup to find the
> next record after where we left off.
> 
> However, if the keys of the inode btree are corrupt, the lookup might
> put us in the wrong part of the inode btree, causing the walk function
> to loop forever.  Therefore, we add extra cursor tracking to make sure
> that we never go backwards neither when performing the lookup nor when
> jumping to the next inobt record.  This also fixes an off by one error
> where upon resume the lookup should have been for the inode /after/ the
> point at which we stopped.
> 
> Found by fuzzing xfs/460 with keys[2].startino = ones causing bulkstat
> and quotacheck to hang.

The changes look good to me.

Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>

> 
> Fixes: a211432c27ff ("xfs: create simplified inode walk function")
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> v2: fix idiotic mismerge, sorry about that...
> ---
>  fs/xfs/xfs_iwalk.c |   27 ++++++++++++++++++++++++---
>  1 file changed, 24 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
> index 233dcc8784db..2a45138831e3 100644
> --- a/fs/xfs/xfs_iwalk.c
> +++ b/fs/xfs/xfs_iwalk.c
> @@ -55,6 +55,9 @@ struct xfs_iwalk_ag {
>  	/* Where do we start the traversal? */
>  	xfs_ino_t			startino;
>  
> +	/* What was the last inode number we saw when iterating the inobt? */
> +	xfs_ino_t			lastino;
> +
>  	/* Array of inobt records we cache. */
>  	struct xfs_inobt_rec_incore	*recs;
>  
> @@ -301,6 +304,9 @@ xfs_iwalk_ag_start(
>  	if (XFS_IS_CORRUPT(mp, *has_more != 1))
>  		return -EFSCORRUPTED;
>  
> +	iwag->lastino = XFS_AGINO_TO_INO(mp, agno,
> +				irec->ir_startino + XFS_INODES_PER_CHUNK - 1);
> +
>  	/*
>  	 * If the LE lookup yielded an inobt record before the cursor position,
>  	 * skip it and see if there's another one after it.
> @@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks(
>  	struct xfs_mount		*mp = iwag->mp;
>  	struct xfs_trans		*tp = iwag->tp;
>  	struct xfs_inobt_rec_incore	*irec;
> -	xfs_agino_t			restart;
> +	xfs_agino_t			next_agino;
>  	int				error;
>  
> +	next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1;
> +
>  	ASSERT(iwag->nr_recs > 0);
>  
>  	/* Delete cursor but remember the last record we cached... */
>  	xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
>  	irec = &iwag->recs[iwag->nr_recs - 1];
> -	restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
> +	ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK);
>  
>  	error = xfs_iwalk_ag_recs(iwag);
>  	if (error)
> @@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks(
>  	if (error)
>  		return error;
>  
> -	return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
> +	return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more);
>  }
>  
>  /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
> @@ -396,6 +404,7 @@ xfs_iwalk_ag(
>  
>  	while (!error && has_more) {
>  		struct xfs_inobt_rec_incore	*irec;
> +		xfs_ino_t			rec_fsino;
>  
>  		cond_resched();
>  		if (xfs_pwork_want_abort(&iwag->pwork))
> @@ -407,6 +416,15 @@ xfs_iwalk_ag(
>  		if (error || !has_more)
>  			break;
>  
> +		/* Make sure that we always move forward. */
> +		rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino);
> +		if (iwag->lastino != NULLFSINO &&
> +		    XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) {
> +			error = -EFSCORRUPTED;
> +			goto out;
> +		}
> +		iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1;
> +
>  		/* No allocated inodes in this chunk; skip it. */
>  		if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
>  			error = xfs_btree_increment(cur, 0, &has_more);
> @@ -535,6 +553,7 @@ xfs_iwalk(
>  		.trim_start	= 1,
>  		.skip_empty	= 1,
>  		.pwork		= XFS_PWORK_SINGLE_THREADED,
> +		.lastino	= NULLFSINO,
>  	};
>  	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, startino);
>  	int			error;
> @@ -623,6 +642,7 @@ xfs_iwalk_threaded(
>  		iwag->data = data;
>  		iwag->startino = startino;
>  		iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
> +		iwag->lastino = NULLFSINO;
>  		xfs_pwork_queue(&pctl, &iwag->pwork);
>  		startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
>  		if (flags & XFS_INOBT_WALK_SAME_AG)
> @@ -696,6 +716,7 @@ xfs_inobt_walk(
>  		.startino	= startino,
>  		.sz_recs	= xfs_inobt_walk_prefetch(inobt_records),
>  		.pwork		= XFS_PWORK_SINGLE_THREADED,
> +		.lastino	= NULLFSINO,
>  	};
>  	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, startino);
>  	int			error;
>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 233dcc8784db..2a45138831e3 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -55,6 +55,9 @@  struct xfs_iwalk_ag {
 	/* Where do we start the traversal? */
 	xfs_ino_t			startino;
 
+	/* What was the last inode number we saw when iterating the inobt? */
+	xfs_ino_t			lastino;
+
 	/* Array of inobt records we cache. */
 	struct xfs_inobt_rec_incore	*recs;
 
@@ -301,6 +304,9 @@  xfs_iwalk_ag_start(
 	if (XFS_IS_CORRUPT(mp, *has_more != 1))
 		return -EFSCORRUPTED;
 
+	iwag->lastino = XFS_AGINO_TO_INO(mp, agno,
+				irec->ir_startino + XFS_INODES_PER_CHUNK - 1);
+
 	/*
 	 * If the LE lookup yielded an inobt record before the cursor position,
 	 * skip it and see if there's another one after it.
@@ -347,15 +353,17 @@  xfs_iwalk_run_callbacks(
 	struct xfs_mount		*mp = iwag->mp;
 	struct xfs_trans		*tp = iwag->tp;
 	struct xfs_inobt_rec_incore	*irec;
-	xfs_agino_t			restart;
+	xfs_agino_t			next_agino;
 	int				error;
 
+	next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1;
+
 	ASSERT(iwag->nr_recs > 0);
 
 	/* Delete cursor but remember the last record we cached... */
 	xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
 	irec = &iwag->recs[iwag->nr_recs - 1];
-	restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
+	ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK);
 
 	error = xfs_iwalk_ag_recs(iwag);
 	if (error)
@@ -372,7 +380,7 @@  xfs_iwalk_run_callbacks(
 	if (error)
 		return error;
 
-	return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
+	return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more);
 }
 
 /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
@@ -396,6 +404,7 @@  xfs_iwalk_ag(
 
 	while (!error && has_more) {
 		struct xfs_inobt_rec_incore	*irec;
+		xfs_ino_t			rec_fsino;
 
 		cond_resched();
 		if (xfs_pwork_want_abort(&iwag->pwork))
@@ -407,6 +416,15 @@  xfs_iwalk_ag(
 		if (error || !has_more)
 			break;
 
+		/* Make sure that we always move forward. */
+		rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino);
+		if (iwag->lastino != NULLFSINO &&
+		    XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) {
+			error = -EFSCORRUPTED;
+			goto out;
+		}
+		iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1;
+
 		/* No allocated inodes in this chunk; skip it. */
 		if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
 			error = xfs_btree_increment(cur, 0, &has_more);
@@ -535,6 +553,7 @@  xfs_iwalk(
 		.trim_start	= 1,
 		.skip_empty	= 1,
 		.pwork		= XFS_PWORK_SINGLE_THREADED,
+		.lastino	= NULLFSINO,
 	};
 	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, startino);
 	int			error;
@@ -623,6 +642,7 @@  xfs_iwalk_threaded(
 		iwag->data = data;
 		iwag->startino = startino;
 		iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
+		iwag->lastino = NULLFSINO;
 		xfs_pwork_queue(&pctl, &iwag->pwork);
 		startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
 		if (flags & XFS_INOBT_WALK_SAME_AG)
@@ -696,6 +716,7 @@  xfs_inobt_walk(
 		.startino	= startino,
 		.sz_recs	= xfs_inobt_walk_prefetch(inobt_records),
 		.pwork		= XFS_PWORK_SINGLE_THREADED,
+		.lastino	= NULLFSINO,
 	};
 	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, startino);
 	int			error;