Message ID | 20201117181456.GZ9695@magnolia (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
Series | [v2] xfs: ensure inobt record walks always make forward progress | expand |
On Tuesday 17 November 2020 11:44:56 PM IST Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@oracle.com> > > The aim of the inode btree record iterator function is to call a > callback on every record in the btree. To avoid having to tear down and > recreate the inode btree cursor around every callback, it caches a > certain number of records in a memory buffer. After each batch of > callback invocations, we have to perform a btree lookup to find the > next record after where we left off. > > However, if the keys of the inode btree are corrupt, the lookup might > put us in the wrong part of the inode btree, causing the walk function > to loop forever. Therefore, we add extra cursor tracking to make sure > that we never go backwards neither when performing the lookup nor when > jumping to the next inobt record. This also fixes an off by one error > where upon resume the lookup should have been for the inode /after/ the > point at which we stopped. > > Found by fuzzing xfs/460 with keys[2].startino = ones causing bulkstat > and quotacheck to hang. The changes look good to me. Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com> > > Fixes: a211432c27ff ("xfs: create simplified inode walk function") > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > --- > v2: fix idiotic mismerge, sorry about that... > --- > fs/xfs/xfs_iwalk.c | 27 ++++++++++++++++++++++++--- > 1 file changed, 24 insertions(+), 3 deletions(-) > > diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c > index 233dcc8784db..2a45138831e3 100644 > --- a/fs/xfs/xfs_iwalk.c > +++ b/fs/xfs/xfs_iwalk.c > @@ -55,6 +55,9 @@ struct xfs_iwalk_ag { > /* Where do we start the traversal? */ > xfs_ino_t startino; > > + /* What was the last inode number we saw when iterating the inobt? */ > + xfs_ino_t lastino; > + > /* Array of inobt records we cache. */ > struct xfs_inobt_rec_incore *recs; > > @@ -301,6 +304,9 @@ xfs_iwalk_ag_start( > if (XFS_IS_CORRUPT(mp, *has_more != 1)) > return -EFSCORRUPTED; > > + iwag->lastino = XFS_AGINO_TO_INO(mp, agno, > + irec->ir_startino + XFS_INODES_PER_CHUNK - 1); > + > /* > * If the LE lookup yielded an inobt record before the cursor position, > * skip it and see if there's another one after it. > @@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks( > struct xfs_mount *mp = iwag->mp; > struct xfs_trans *tp = iwag->tp; > struct xfs_inobt_rec_incore *irec; > - xfs_agino_t restart; > + xfs_agino_t next_agino; > int error; > > + next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1; > + > ASSERT(iwag->nr_recs > 0); > > /* Delete cursor but remember the last record we cached... */ > xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); > irec = &iwag->recs[iwag->nr_recs - 1]; > - restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1; > + ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK); > > error = xfs_iwalk_ag_recs(iwag); > if (error) > @@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks( > if (error) > return error; > > - return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more); > + return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more); > } > > /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ > @@ -396,6 +404,7 @@ xfs_iwalk_ag( > > while (!error && has_more) { > struct xfs_inobt_rec_incore *irec; > + xfs_ino_t rec_fsino; > > cond_resched(); > if (xfs_pwork_want_abort(&iwag->pwork)) > @@ -407,6 +416,15 @@ xfs_iwalk_ag( > if (error || !has_more) > break; > > + /* Make sure that we always move forward. */ > + rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino); > + if (iwag->lastino != NULLFSINO && > + XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) { > + error = -EFSCORRUPTED; > + goto out; > + } > + iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1; > + > /* No allocated inodes in this chunk; skip it. */ > if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { > error = xfs_btree_increment(cur, 0, &has_more); > @@ -535,6 +553,7 @@ xfs_iwalk( > .trim_start = 1, > .skip_empty = 1, > .pwork = XFS_PWORK_SINGLE_THREADED, > + .lastino = NULLFSINO, > }; > xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); > int error; > @@ -623,6 +642,7 @@ xfs_iwalk_threaded( > iwag->data = data; > iwag->startino = startino; > iwag->sz_recs = xfs_iwalk_prefetch(inode_records); > + iwag->lastino = NULLFSINO; > xfs_pwork_queue(&pctl, &iwag->pwork); > startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); > if (flags & XFS_INOBT_WALK_SAME_AG) > @@ -696,6 +716,7 @@ xfs_inobt_walk( > .startino = startino, > .sz_recs = xfs_inobt_walk_prefetch(inobt_records), > .pwork = XFS_PWORK_SINGLE_THREADED, > + .lastino = NULLFSINO, > }; > xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); > int error; >
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 233dcc8784db..2a45138831e3 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -55,6 +55,9 @@ struct xfs_iwalk_ag { /* Where do we start the traversal? */ xfs_ino_t startino; + /* What was the last inode number we saw when iterating the inobt? */ + xfs_ino_t lastino; + /* Array of inobt records we cache. */ struct xfs_inobt_rec_incore *recs; @@ -301,6 +304,9 @@ xfs_iwalk_ag_start( if (XFS_IS_CORRUPT(mp, *has_more != 1)) return -EFSCORRUPTED; + iwag->lastino = XFS_AGINO_TO_INO(mp, agno, + irec->ir_startino + XFS_INODES_PER_CHUNK - 1); + /* * If the LE lookup yielded an inobt record before the cursor position, * skip it and see if there's another one after it. @@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks( struct xfs_mount *mp = iwag->mp; struct xfs_trans *tp = iwag->tp; struct xfs_inobt_rec_incore *irec; - xfs_agino_t restart; + xfs_agino_t next_agino; int error; + next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1; + ASSERT(iwag->nr_recs > 0); /* Delete cursor but remember the last record we cached... */ xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); irec = &iwag->recs[iwag->nr_recs - 1]; - restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1; + ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK); error = xfs_iwalk_ag_recs(iwag); if (error) @@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks( if (error) return error; - return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more); + return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more); } /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ @@ -396,6 +404,7 @@ xfs_iwalk_ag( while (!error && has_more) { struct xfs_inobt_rec_incore *irec; + xfs_ino_t rec_fsino; cond_resched(); if (xfs_pwork_want_abort(&iwag->pwork)) @@ -407,6 +416,15 @@ xfs_iwalk_ag( if (error || !has_more) break; + /* Make sure that we always move forward. */ + rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino); + if (iwag->lastino != NULLFSINO && + XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) { + error = -EFSCORRUPTED; + goto out; + } + iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1; + /* No allocated inodes in this chunk; skip it. */ if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { error = xfs_btree_increment(cur, 0, &has_more); @@ -535,6 +553,7 @@ xfs_iwalk( .trim_start = 1, .skip_empty = 1, .pwork = XFS_PWORK_SINGLE_THREADED, + .lastino = NULLFSINO, }; xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); int error; @@ -623,6 +642,7 @@ xfs_iwalk_threaded( iwag->data = data; iwag->startino = startino; iwag->sz_recs = xfs_iwalk_prefetch(inode_records); + iwag->lastino = NULLFSINO; xfs_pwork_queue(&pctl, &iwag->pwork); startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); if (flags & XFS_INOBT_WALK_SAME_AG) @@ -696,6 +716,7 @@ xfs_inobt_walk( .startino = startino, .sz_recs = xfs_inobt_walk_prefetch(inobt_records), .pwork = XFS_PWORK_SINGLE_THREADED, + .lastino = NULLFSINO, }; xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); int error;