Message ID | 166689089384.3788582.15595498616742667720.stgit@magnolia (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: improve runtime refcountbt corruption detection | expand |
On Thu, Oct 27, 2022 at 10:14:53AM -0700, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@kernel.org> > > Now that we've separated the startblock and CoW/shared extent domain in > the incore refcount record structure, check the domain whenever we > retrieve a record to ensure that it's still in the domain that we want. > Depending on the circumstances, a change in domain either means we're > done processing or that we've found a corruption and need to fail out. > > The refcount check in xchk_xref_is_cow_staging is redundant since > _get_rec has done that for a long time now, so we can get rid of it. > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > --- > fs/xfs/libxfs/xfs_refcount.c | 53 ++++++++++++++++++++++++++++++++---------- > fs/xfs/scrub/refcount.c | 4 ++- > 2 files changed, 43 insertions(+), 14 deletions(-) > > > diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c > index 3b1cb0578770..608a122eef16 100644 > --- a/fs/xfs/libxfs/xfs_refcount.c > +++ b/fs/xfs/libxfs/xfs_refcount.c > @@ -386,6 +386,8 @@ xfs_refcount_split_extent( > goto out_error; > } > > + if (rcext.rc_domain != domain) > + return 0; > if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) > return 0; > > @@ -434,6 +436,9 @@ xfs_refcount_merge_center_extents( > int error; > int found_rec; > > + ASSERT(left->rc_domain == center->rc_domain); > + ASSERT(right->rc_domain == center->rc_domain); > + > trace_xfs_refcount_merge_center_extents(cur->bc_mp, > cur->bc_ag.pag->pag_agno, left, center, right); Can you move the asserts to after the trace points? That way we if need to debug the assert, we'll have a tracepoint with the record information that triggered the assert emitted immediately before it fires... > > @@ -510,6 +515,8 @@ xfs_refcount_merge_left_extent( > int error; > int found_rec; > > + ASSERT(left->rc_domain == cleft->rc_domain); > + > trace_xfs_refcount_merge_left_extent(cur->bc_mp, > cur->bc_ag.pag->pag_agno, left, cleft); > > @@ -571,6 +578,8 @@ xfs_refcount_merge_right_extent( > int error; > int found_rec; > > + ASSERT(right->rc_domain == cright->rc_domain); > + > trace_xfs_refcount_merge_right_extent(cur->bc_mp, > cur->bc_ag.pag->pag_agno, cright, right); > > @@ -654,12 +663,10 @@ xfs_refcount_find_left_extents( > goto out_error; > } > > + if (tmp.rc_domain != domain) > + return 0; > if (xfs_refc_next(&tmp) != agbno) > return 0; > - if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) > - return 0; > - if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) > - return 0; Ah, as these go away, you can ignore my comment about them in the previous patches... :) Otherwise, looks ok. Reviewed-by: Dave Chinner <dchinner@redhat.com>
On Fri, Oct 28, 2022 at 08:15:31AM +1100, Dave Chinner wrote: > On Thu, Oct 27, 2022 at 10:14:53AM -0700, Darrick J. Wong wrote: > > From: Darrick J. Wong <djwong@kernel.org> > > > > Now that we've separated the startblock and CoW/shared extent domain in > > the incore refcount record structure, check the domain whenever we > > retrieve a record to ensure that it's still in the domain that we want. > > Depending on the circumstances, a change in domain either means we're > > done processing or that we've found a corruption and need to fail out. > > > > The refcount check in xchk_xref_is_cow_staging is redundant since > > _get_rec has done that for a long time now, so we can get rid of it. > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > > --- > > fs/xfs/libxfs/xfs_refcount.c | 53 ++++++++++++++++++++++++++++++++---------- > > fs/xfs/scrub/refcount.c | 4 ++- > > 2 files changed, 43 insertions(+), 14 deletions(-) > > > > > > diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c > > index 3b1cb0578770..608a122eef16 100644 > > --- a/fs/xfs/libxfs/xfs_refcount.c > > +++ b/fs/xfs/libxfs/xfs_refcount.c > > @@ -386,6 +386,8 @@ xfs_refcount_split_extent( > > goto out_error; > > } > > > > + if (rcext.rc_domain != domain) > > + return 0; > > if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) > > return 0; > > > > @@ -434,6 +436,9 @@ xfs_refcount_merge_center_extents( > > int error; > > int found_rec; > > > > + ASSERT(left->rc_domain == center->rc_domain); > > + ASSERT(right->rc_domain == center->rc_domain); > > + > > trace_xfs_refcount_merge_center_extents(cur->bc_mp, > > cur->bc_ag.pag->pag_agno, left, center, right); > > Can you move the asserts to after the trace points? That way we if > need to debug the assert, we'll have a tracepoint with the record > information that triggered the assert emitted immediately before it > fires... Done. > > > > @@ -510,6 +515,8 @@ xfs_refcount_merge_left_extent( > > int error; > > int found_rec; > > > > + ASSERT(left->rc_domain == cleft->rc_domain); > > + > > trace_xfs_refcount_merge_left_extent(cur->bc_mp, > > cur->bc_ag.pag->pag_agno, left, cleft); > > > > @@ -571,6 +578,8 @@ xfs_refcount_merge_right_extent( > > int error; > > int found_rec; > > > > + ASSERT(right->rc_domain == cright->rc_domain); > > + > > trace_xfs_refcount_merge_right_extent(cur->bc_mp, > > cur->bc_ag.pag->pag_agno, cright, right); > > > > @@ -654,12 +663,10 @@ xfs_refcount_find_left_extents( > > goto out_error; > > } > > > > + if (tmp.rc_domain != domain) > > + return 0; > > if (xfs_refc_next(&tmp) != agbno) > > return 0; > > - if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) > > - return 0; > > - if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) > > - return 0; > > Ah, as these go away, you can ignore my comment about them in the > previous patches... :) > > Otherwise, looks ok. > > Reviewed-by: Dave Chinner <dchinner@redhat.com> Cool, thanks! --D > -- > Dave Chinner > david@fromorbit.com
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 3b1cb0578770..608a122eef16 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -386,6 +386,8 @@ xfs_refcount_split_extent( goto out_error; } + if (rcext.rc_domain != domain) + return 0; if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) return 0; @@ -434,6 +436,9 @@ xfs_refcount_merge_center_extents( int error; int found_rec; + ASSERT(left->rc_domain == center->rc_domain); + ASSERT(right->rc_domain == center->rc_domain); + trace_xfs_refcount_merge_center_extents(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, center, right); @@ -510,6 +515,8 @@ xfs_refcount_merge_left_extent( int error; int found_rec; + ASSERT(left->rc_domain == cleft->rc_domain); + trace_xfs_refcount_merge_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, cleft); @@ -571,6 +578,8 @@ xfs_refcount_merge_right_extent( int error; int found_rec; + ASSERT(right->rc_domain == cright->rc_domain); + trace_xfs_refcount_merge_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, cright, right); @@ -654,12 +663,10 @@ xfs_refcount_find_left_extents( goto out_error; } + if (tmp.rc_domain != domain) + return 0; if (xfs_refc_next(&tmp) != agbno) return 0; - if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) - return 0; - if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) - return 0; /* We have a left extent; retrieve (or invent) the next right one */ *left = tmp; @@ -675,6 +682,9 @@ xfs_refcount_find_left_extents( goto out_error; } + if (tmp.rc_domain != domain) + goto not_found; + /* if tmp starts at the end of our range, just use that */ if (tmp.rc_startblock == agbno) *cleft = tmp; @@ -694,6 +704,7 @@ xfs_refcount_find_left_extents( cleft->rc_domain = domain; } } else { +not_found: /* * No extents, so pretend that there's one covering the whole * range. @@ -745,12 +756,10 @@ xfs_refcount_find_right_extents( goto out_error; } + if (tmp.rc_domain != domain) + return 0; if (tmp.rc_startblock != agbno + aglen) return 0; - if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) - return 0; - if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) - return 0; /* We have a right extent; retrieve (or invent) the next left one */ *right = tmp; @@ -766,6 +775,9 @@ xfs_refcount_find_right_extents( goto out_error; } + if (tmp.rc_domain != domain) + goto not_found; + /* if tmp ends at the end of our range, just use that */ if (xfs_refc_next(&tmp) == agbno + aglen) *cright = tmp; @@ -785,6 +797,7 @@ xfs_refcount_find_right_extents( cright->rc_domain = domain; } } else { +not_found: /* * No extents, so pretend that there's one covering the whole * range. @@ -894,7 +907,7 @@ xfs_refcount_merge_extents( aglen); } - return error; + return 0; } /* @@ -966,7 +979,7 @@ xfs_refcount_adjust_extents( error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; - if (!found_rec) { + if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; @@ -1415,6 +1428,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) + goto done; /* If the extent ends before the start, look at the next one */ if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { @@ -1430,6 +1445,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) + goto done; } /* If the extent starts after the range we want, bail out */ @@ -1461,7 +1478,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } - if (tmp.rc_startblock >= agbno + aglen || + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || + tmp.rc_startblock >= agbno + aglen || tmp.rc_startblock != *fbno + *flen) break; *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); @@ -1552,6 +1570,11 @@ xfs_refcount_adjust_cow_extents( error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && + ext.rc_domain != XFS_REFC_DOMAIN_COW)) { + error = -EFSCORRUPTED; + goto out_error; + } if (!found_rec) { ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; @@ -1761,8 +1784,14 @@ xfs_refcount_recover_extent( rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); + + if (XFS_IS_CORRUPT(cur->bc_mp, + rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { + kmem_free(rr); + return -EFSCORRUPTED; + } + list_add_tail(&rr->rr_list, debris); - return 0; } diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 98c033072120..8b06dd0bc955 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -441,8 +441,8 @@ xchk_xref_is_cow_staging( return; } - /* CoW flag must be set, refcount must be 1. */ - if (rc.rc_domain != XFS_REFC_DOMAIN_COW || rc.rc_refcount != 1) + /* CoW lookup returned a shared extent record? */ + if (rc.rc_domain != XFS_REFC_DOMAIN_COW) xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); /* Must be at least as long as what was passed in */