diff mbox

[v2,18/21] xfs: cross-reference the rmapbt data with the refcountbt

Message ID 20180109212517.GM5602@magnolia (mailing list archive)
State Superseded
Headers show

Commit Message

Darrick J. Wong Jan. 9, 2018, 9:25 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Cross reference the refcount data with the rmap data to check that the
number of rmaps for a given block match the refcount of that block, and
that CoW blocks (which are owned entirely by the refcountbt) are tracked
as well.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
v2: streamline scrubber arguments, remove stack allocated objects
---
 fs/xfs/scrub/refcount.c |  318 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 316 insertions(+), 2 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Dave Chinner Jan. 15, 2018, 11:49 p.m. UTC | #1
On Tue, Jan 09, 2018 at 01:25:17PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Cross reference the refcount data with the rmap data to check that the
> number of rmaps for a given block match the refcount of that block, and
> that CoW blocks (which are owned entirely by the refcountbt) are tracked
> as well.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
> v2: streamline scrubber arguments, remove stack allocated objects
> ---
>  fs/xfs/scrub/refcount.c |  318 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 316 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
> index 700f8f1..df18e47 100644
> --- a/fs/xfs/scrub/refcount.c
> +++ b/fs/xfs/scrub/refcount.c
> @@ -50,6 +50,274 @@ xfs_scrub_setup_ag_refcountbt(
>  
>  /* Reference count btree scrubber. */
>  
> +/*
> + * Confirming Reference Counts via Reverse Mappings
> + *
> + * We want to count the reverse mappings overlapping a refcount record
> + * (bno, len, refcount), allowing for the possibility that some of the
> + * overlap may come from smaller adjoining reverse mappings, while some
> + * comes from single extents which overlap the range entirely.  The
> + * outer loop is as follows:
> + *
> + * 1. For all reverse mappings overlapping the refcount extent,
> + *    a. If a given rmap completely overlaps, mark it as seen.
> + *    b. Otherwise, record the fragment for later processing.
> + *
> + * Once we've seen all the rmaps, we know that for all blocks in the
> + * refcount record we want to find $refcount owners and we've already
> + * visited $seen extents that overlap all the blocks.  Therefore, we
> + * need to find ($refcount - $seen) owners for every block in the
> + * extent; call that quantity $target_nr.  Proceed as follows:
> + *
> + * 2. Pull the first $target_nr fragments from the list; all of them
> + *    should start at or before the start of the extent.
> + *    Call this subset of fragments the working set.
> + * 3. Until there are no more unprocessed fragments,
> + *    a. Find the shortest fragments in the set and remove them.
> + *    b. Note the block number of the end of these fragments.
> + *    c. Pull the same number of fragments from the list.  All of these
> + *       fragments should start at the block number recorded in the
> + *       previous step.
> + *    d. Put those fragments in the set.
> + * 4. Check that there are $target_nr fragments remaining in the list,
> + *    and that they all end at or beyond the end of the refcount extent.
> + *
> + * If the refcount is correct, all the check conditions in the algorithm
> + * should always hold true.  If not, the refcount is incorrect.

This needs a comment somewhere in here describing the order of
the records on the fragment list. AFAICT, it's ordered by start
bno, but I'm not 100% sure and it seems the code is dependent on
strict ordering of records the frag list....

.....

> +	} else {
> +		/*
> +		 * This rmap covers only part of the refcount record, so
> +		 * save the fragment for later processing.
> +		 */
> +		frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag),
> +				KM_MAYFAIL | KM_NOFS);
> +		if (!frag)
> +			return -ENOMEM;
> +		memcpy(&frag->rm, rec, sizeof(frag->rm));
> +		list_add_tail(&frag->list, &refchk->fragments);

I'm making the assumption here that we're seeing records in the
order they are in the rmap tree and that it's in increase startblock
order, hence the list is ordered that way....

> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Given a bunch of rmap fragments, iterate through them, keeping
> + * a running tally of the refcount.  If this ever deviates from
> + * what we expect (which is the refcountbt's refcount minus the
> + * number of extents that totally covered the refcountbt extent),
> + * we have a refcountbt error.
> + */
> +STATIC void
> +xfs_scrub_refcountbt_process_rmap_fragments(
> +	struct xfs_scrub_refcnt_check	*refchk)
> +{
> +	struct list_head		worklist;
> +	struct xfs_scrub_refcnt_frag	*frag;
> +	struct xfs_scrub_refcnt_frag	*n;
> +	xfs_agblock_t			bno;
> +	xfs_agblock_t			rbno;
> +	xfs_agblock_t			next_rbno;
> +	xfs_nlink_t			nr;
> +	xfs_nlink_t			target_nr;
> +
> +	target_nr = refchk->refcount - refchk->seen;
> +	if (target_nr == 0)
> +		return;
> +
> +	/*
> +	 * There are (refchk->rc.rc_refcount - refchk->nr refcount)
> +	 * references we haven't found yet.  Pull that many off the
> +	 * fragment list and figure out where the smallest rmap ends
> +	 * (and therefore the next rmap should start).  All the rmaps
> +	 * we pull off should start at or before the beginning of the
> +	 * refcount record's range.
> +	 */
> +	INIT_LIST_HEAD(&worklist);
> +	rbno = NULLAGBLOCK;
> +	nr = 1;
> +
> +	/* Find all the rmaps that start at or before the refc extent. */
> +	list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
> +		if (frag->rm.rm_startblock > refchk->bno)
> +			goto done;

.... and is where the code implies lowest to highest startblock
ordering on the frag list.

> +		bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
> +		if (rbno > bno)
> +			rbno = bno;

Can we put that check the other way around? we're looking for the
shortest/smallest end block, so

		if (bno < rbno)
			rbno = bno;

Makes a lot more sense to me.

> +		list_del(&frag->list);
> +		list_add_tail(&frag->list, &worklist);

list_move_tail()?

Ok, so we are moving fragments that start before the recount bno to
the work list.

> +		if (nr == target_nr)
> +			break;
> +		nr++;
> +	}
> +
> +	/*
> +	 * We should have found exactly $target_nr rmap fragments starting
> +	 * at or before the refcount extent.
> +	 */
> +	if (nr != target_nr)
> +		goto done;

ok. so on error we clean up and free the frag list and work list....

> +
> +	while (!list_empty(&refchk->fragments)) {
> +		/* Discard any fragments ending at rbno. */
> +		nr = 0;
> +		next_rbno = NULLAGBLOCK;
> +		list_for_each_entry_safe(frag, n, &worklist, list) {

Ok, this needs to be clearer than it's walking the working set
of fragments. I had to read this code several times before I worked
out this is where the "working set" was being processed....

> +			bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
> +			if (bno != rbno) {
> +				if (next_rbno > bno)
> +					next_rbno = bno;

Same comment here about being a next_rbno "smallest bno" variable.

> +				continue;
> +			}
> +			list_del(&frag->list);
> +			kmem_free(frag);
> +			nr++;
> +		}
> +
> +		/* Empty list?  We're done. */
> +		if (list_empty(&refchk->fragments))
> +			break;
> +
> +		/* Try to add nr rmaps starting at rbno to the worklist. */
> +		list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
> +			bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
> +			if (frag->rm.rm_startblock != rbno)
> +				goto done;

definitely assuming the frag list is ordered here :P

> +			list_del(&frag->list);
> +			list_add_tail(&frag->list, &worklist);
> +			if (next_rbno > bno)
> +				next_rbno = bno;
> +			nr--;
> +			if (nr == 0)
> +				break;
> +		}

Ok, so if we get here with nr > 0, then we must have emptied the
fragment list onto the work list, right? At this point, the outer
loop will terminate. Don't we need to run the worklist processing
loop one last time?

> +		rbno = next_rbno;
> +	}

.....
> +/* Use the rmap entries covering this extent to verify the refcount. */
> +STATIC void
> +xfs_scrub_refcountbt_xref_rmap(
> +	struct xfs_scrub_context	*sc,
> +	xfs_agblock_t			bno,
> +	xfs_extlen_t			len,
> +	xfs_nlink_t			refcount)
> +{
> +	struct xfs_scrub_refcnt_check	refchk = {
> +		.sc = sc,
> +		.bno = bno,
> +		.len = len,
> +		.refcount = refcount,
> +		.seen = 0,
> +	};
> +	struct xfs_rmap_irec		low;
> +	struct xfs_rmap_irec		high;
> +	struct xfs_scrub_refcnt_frag	*frag;
> +	struct xfs_scrub_refcnt_frag	*n;
> +	int				error;
> +
> +	if (!sc->sa.rmap_cur)
> +		return;
> +
> +	/* Cross-reference with the rmapbt to confirm the refcount. */
> +	memset(&low, 0, sizeof(low));
> +	low.rm_startblock = bno;
> +	memset(&high, 0xFF, sizeof(high));
> +	high.rm_startblock = bno + len - 1;

This range query init feels like a familiar pattern now. Helper
function (separate patch)?

....

> +/* Make sure we have as many refc blocks as the rmap says. */
> +STATIC void
> +xfs_scrub_refcount_xref_rmap(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_owner_info		*oinfo,
> +	xfs_filblks_t			cow_blocks)
> +{
> +	xfs_extlen_t			refcbt_blocks = 0;
> +	xfs_filblks_t			blocks;
> +	int				error;
> +
> +	/* Check that we saw as many refcbt blocks as the rmap knows about. */
> +	error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
> +	if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
> +		return;
> +	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
> +			&blocks);
> +	if (xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur) &&
> +	    blocks != refcbt_blocks)
> +		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
> +
> +	if (!sc->sa.rmap_cur)
> +		return;
> +
> +	/* Check that we saw as many cow blocks as the rmap knows about. */
> +	xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
> +	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
> +			&blocks);
> +	if (xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur) &&
> +	    blocks != cow_blocks)
> +		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);

Bit of a landmine that this code changes the owner info structure
that was passed in....

> +}
> +
>  /* Scrub the refcount btree for some AG. */
>  int
>  xfs_scrub_refcountbt(
>  	struct xfs_scrub_context	*sc)
>  {
>  	struct xfs_owner_info		oinfo;
> +	xfs_agblock_t			cow_blocks = 0;
> +	int				error;
>  
>  	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
> -	return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
> -			&oinfo, NULL);
> +	error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
> +			&oinfo, &cow_blocks);
> +	if (error)
> +		return error;
> +
> +	if (sc->sa.rmap_cur)
> +		xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks);

.... because that's not obvious in this code here if we add anymore
code after this call.

> +
> +	return error;

error is zero here, so "return 0" instead?

Cheers,

Dave.
Darrick J. Wong Jan. 16, 2018, 6:49 a.m. UTC | #2
On Tue, Jan 16, 2018 at 10:49:28AM +1100, Dave Chinner wrote:
> On Tue, Jan 09, 2018 at 01:25:17PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Cross reference the refcount data with the rmap data to check that the
> > number of rmaps for a given block match the refcount of that block, and
> > that CoW blocks (which are owned entirely by the refcountbt) are tracked
> > as well.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> > v2: streamline scrubber arguments, remove stack allocated objects
> > ---
> >  fs/xfs/scrub/refcount.c |  318 +++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 316 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
> > index 700f8f1..df18e47 100644
> > --- a/fs/xfs/scrub/refcount.c
> > +++ b/fs/xfs/scrub/refcount.c
> > @@ -50,6 +50,274 @@ xfs_scrub_setup_ag_refcountbt(
> >  
> >  /* Reference count btree scrubber. */
> >  
> > +/*
> > + * Confirming Reference Counts via Reverse Mappings
> > + *
> > + * We want to count the reverse mappings overlapping a refcount record
> > + * (bno, len, refcount), allowing for the possibility that some of the
> > + * overlap may come from smaller adjoining reverse mappings, while some
> > + * comes from single extents which overlap the range entirely.  The
> > + * outer loop is as follows:
> > + *
> > + * 1. For all reverse mappings overlapping the refcount extent,
> > + *    a. If a given rmap completely overlaps, mark it as seen.
> > + *    b. Otherwise, record the fragment for later processing.
> > + *
> > + * Once we've seen all the rmaps, we know that for all blocks in the
> > + * refcount record we want to find $refcount owners and we've already
> > + * visited $seen extents that overlap all the blocks.  Therefore, we
> > + * need to find ($refcount - $seen) owners for every block in the
> > + * extent; call that quantity $target_nr.  Proceed as follows:
> > + *
> > + * 2. Pull the first $target_nr fragments from the list; all of them
> > + *    should start at or before the start of the extent.
> > + *    Call this subset of fragments the working set.
> > + * 3. Until there are no more unprocessed fragments,
> > + *    a. Find the shortest fragments in the set and remove them.
> > + *    b. Note the block number of the end of these fragments.
> > + *    c. Pull the same number of fragments from the list.  All of these
> > + *       fragments should start at the block number recorded in the
> > + *       previous step.
> > + *    d. Put those fragments in the set.
> > + * 4. Check that there are $target_nr fragments remaining in the list,
> > + *    and that they all end at or beyond the end of the refcount extent.
> > + *
> > + * If the refcount is correct, all the check conditions in the algorithm
> > + * should always hold true.  If not, the refcount is incorrect.
> 
> This needs a comment somewhere in here describing the order of
> the records on the fragment list. AFAICT, it's ordered by start
> bno, but I'm not 100% sure and it seems the code is dependent on
> strict ordering of records the frag list....

Yes, the list must be ordered by agbno, which should be the case if we
iterated the rmap records in order.  We /could/ potentially list_sort
to ensure that this code doesn't blow up even if the rmapbt decides to
feed us out of order garbage.

Could?  Should.

> .....
> 
> > +	} else {
> > +		/*
> > +		 * This rmap covers only part of the refcount record, so
> > +		 * save the fragment for later processing.
> > +		 */
> > +		frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag),
> > +				KM_MAYFAIL | KM_NOFS);
> > +		if (!frag)
> > +			return -ENOMEM;
> > +		memcpy(&frag->rm, rec, sizeof(frag->rm));
> > +		list_add_tail(&frag->list, &refchk->fragments);
> 
> I'm making the assumption here that we're seeing records in the
> order they are in the rmap tree and that it's in increase startblock
> order, hence the list is ordered that way....
> 
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +/*
> > + * Given a bunch of rmap fragments, iterate through them, keeping
> > + * a running tally of the refcount.  If this ever deviates from
> > + * what we expect (which is the refcountbt's refcount minus the
> > + * number of extents that totally covered the refcountbt extent),
> > + * we have a refcountbt error.
> > + */
> > +STATIC void
> > +xfs_scrub_refcountbt_process_rmap_fragments(
> > +	struct xfs_scrub_refcnt_check	*refchk)
> > +{
> > +	struct list_head		worklist;
> > +	struct xfs_scrub_refcnt_frag	*frag;
> > +	struct xfs_scrub_refcnt_frag	*n;
> > +	xfs_agblock_t			bno;
> > +	xfs_agblock_t			rbno;
> > +	xfs_agblock_t			next_rbno;
> > +	xfs_nlink_t			nr;
> > +	xfs_nlink_t			target_nr;
> > +
> > +	target_nr = refchk->refcount - refchk->seen;
> > +	if (target_nr == 0)
> > +		return;
> > +
> > +	/*
> > +	 * There are (refchk->rc.rc_refcount - refchk->nr refcount)
> > +	 * references we haven't found yet.  Pull that many off the
> > +	 * fragment list and figure out where the smallest rmap ends
> > +	 * (and therefore the next rmap should start).  All the rmaps
> > +	 * we pull off should start at or before the beginning of the
> > +	 * refcount record's range.
> > +	 */
> > +	INIT_LIST_HEAD(&worklist);
> > +	rbno = NULLAGBLOCK;
> > +	nr = 1;
> > +
> > +	/* Find all the rmaps that start at or before the refc extent. */
> > +	list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
> > +		if (frag->rm.rm_startblock > refchk->bno)
> > +			goto done;
> 
> .... and is where the code implies lowest to highest startblock
> ordering on the frag list.
> 
> > +		bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
> > +		if (rbno > bno)
> > +			rbno = bno;
> 
> Can we put that check the other way around? we're looking for the
> shortest/smallest end block, so
> 
> 		if (bno < rbno)
> 			rbno = bno;
> 
> Makes a lot more sense to me.

Ok.

> 
> > +		list_del(&frag->list);
> > +		list_add_tail(&frag->list, &worklist);
> 
> list_move_tail()?
> 
> Ok, so we are moving fragments that start before the recount bno to
> the work list.

<nod>

> > +		if (nr == target_nr)
> > +			break;
> > +		nr++;
> > +	}
> > +
> > +	/*
> > +	 * We should have found exactly $target_nr rmap fragments starting
> > +	 * at or before the refcount extent.
> > +	 */
> > +	if (nr != target_nr)
> > +		goto done;
> 
> ok. so on error we clean up and free the frag list and work list....
> 
> > +
> > +	while (!list_empty(&refchk->fragments)) {
> > +		/* Discard any fragments ending at rbno. */
> > +		nr = 0;
> > +		next_rbno = NULLAGBLOCK;
> > +		list_for_each_entry_safe(frag, n, &worklist, list) {
> 
> Ok, this needs to be clearer than it's walking the working set
> of fragments. I had to read this code several times before I worked
> out this is where the "working set" was being processed....

/* Walk the working set of rmap fragments... */

> 
> > +			bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
> > +			if (bno != rbno) {
> > +				if (next_rbno > bno)
> > +					next_rbno = bno;
> 
> Same comment here about being a next_rbno "smallest bno" variable.

<nod>

> > +				continue;
> > +			}
> > +			list_del(&frag->list);
> > +			kmem_free(frag);
> > +			nr++;
> > +		}
> > +
> > +		/* Empty list?  We're done. */
> > +		if (list_empty(&refchk->fragments))
> > +			break;
> > +
> > +		/* Try to add nr rmaps starting at rbno to the worklist. */
> > +		list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
> > +			bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
> > +			if (frag->rm.rm_startblock != rbno)
> > +				goto done;
> 
> definitely assuming the frag list is ordered here :P
> 
> > +			list_del(&frag->list);
> > +			list_add_tail(&frag->list, &worklist);
> > +			if (next_rbno > bno)
> > +				next_rbno = bno;
> > +			nr--;
> > +			if (nr == 0)
> > +				break;
> > +		}
> 
> Ok, so if we get here with nr > 0, then we must have emptied the
> fragment list onto the work list, right? At this point, the outer
> loop will terminate. Don't we need to run the worklist processing
> loop one last time?

Nope.  Throughout xfs_scrub_refcountbt_process_rmap_fragments, we're
checking that the number of rmaps for a given refcount extent (i.e.
target_nr) remains the same.  nr is the number of rmaps we discarded
from the worklist at the top of the loop, so if we can't add the exact
same number of rmaps back to the worklist then we know that the refcount
is wrong.

So there /is/ a bug here, and the bug is that we need "if (nr) break;"
because we don't need to process more of the loop, we already know the
refcountbt is broken.

> > +		rbno = next_rbno;
> > +	}
> 
> .....
> > +/* Use the rmap entries covering this extent to verify the refcount. */
> > +STATIC void
> > +xfs_scrub_refcountbt_xref_rmap(
> > +	struct xfs_scrub_context	*sc,
> > +	xfs_agblock_t			bno,
> > +	xfs_extlen_t			len,
> > +	xfs_nlink_t			refcount)
> > +{
> > +	struct xfs_scrub_refcnt_check	refchk = {
> > +		.sc = sc,
> > +		.bno = bno,
> > +		.len = len,
> > +		.refcount = refcount,
> > +		.seen = 0,
> > +	};
> > +	struct xfs_rmap_irec		low;
> > +	struct xfs_rmap_irec		high;
> > +	struct xfs_scrub_refcnt_frag	*frag;
> > +	struct xfs_scrub_refcnt_frag	*n;
> > +	int				error;
> > +
> > +	if (!sc->sa.rmap_cur)
> > +		return;
> > +
> > +	/* Cross-reference with the rmapbt to confirm the refcount. */
> > +	memset(&low, 0, sizeof(low));
> > +	low.rm_startblock = bno;
> > +	memset(&high, 0xFF, sizeof(high));
> > +	high.rm_startblock = bno + len - 1;
> 
> This range query init feels like a familiar pattern now. Helper
> function (separate patch)?

Yeah, these can all get cleaned up at the end of the series.

> ....
> 
> > +/* Make sure we have as many refc blocks as the rmap says. */
> > +STATIC void
> > +xfs_scrub_refcount_xref_rmap(
> > +	struct xfs_scrub_context	*sc,
> > +	struct xfs_owner_info		*oinfo,
> > +	xfs_filblks_t			cow_blocks)
> > +{
> > +	xfs_extlen_t			refcbt_blocks = 0;
> > +	xfs_filblks_t			blocks;
> > +	int				error;
> > +
> > +	/* Check that we saw as many refcbt blocks as the rmap knows about. */
> > +	error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
> > +	if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
> > +		return;
> > +	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
> > +			&blocks);
> > +	if (xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur) &&
> > +	    blocks != refcbt_blocks)
> > +		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
> > +
> > +	if (!sc->sa.rmap_cur)
> > +		return;
> > +
> > +	/* Check that we saw as many cow blocks as the rmap knows about. */
> > +	xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
> > +	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
> > +			&blocks);
> > +	if (xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur) &&
> > +	    blocks != cow_blocks)
> > +		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
> 
> Bit of a landmine that this code changes the owner info structure
> that was passed in....

Will fix.

> > +}
> > +
> >  /* Scrub the refcount btree for some AG. */
> >  int
> >  xfs_scrub_refcountbt(
> >  	struct xfs_scrub_context	*sc)
> >  {
> >  	struct xfs_owner_info		oinfo;
> > +	xfs_agblock_t			cow_blocks = 0;
> > +	int				error;
> >  
> >  	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
> > -	return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
> > -			&oinfo, NULL);
> > +	error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
> > +			&oinfo, &cow_blocks);
> > +	if (error)
> > +		return error;
> > +
> > +	if (sc->sa.rmap_cur)
> > +		xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks);
> 
> .... because that's not obvious in this code here if we add anymore
> code after this call.

<nod>

> > +
> > +	return error;
> 
> error is zero here, so "return 0" instead?

Ok.

--D

> 
> Cheers,
> 
> Dave.
> 
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Jan. 16, 2018, 7:47 p.m. UTC | #3
On Mon, Jan 15, 2018 at 10:49:55PM -0800, Darrick J. Wong wrote:
> On Tue, Jan 16, 2018 at 10:49:28AM +1100, Dave Chinner wrote:
> > On Tue, Jan 09, 2018 at 01:25:17PM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Cross reference the refcount data with the rmap data to check that the
> > > number of rmaps for a given block match the refcount of that block, and
> > > that CoW blocks (which are owned entirely by the refcountbt) are tracked
> > > as well.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > > v2: streamline scrubber arguments, remove stack allocated objects
> > > ---
> > >  fs/xfs/scrub/refcount.c |  318 +++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 316 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
> > > index 700f8f1..df18e47 100644
> > > --- a/fs/xfs/scrub/refcount.c
> > > +++ b/fs/xfs/scrub/refcount.c
> > > @@ -50,6 +50,274 @@ xfs_scrub_setup_ag_refcountbt(
> > >  
> > >  /* Reference count btree scrubber. */
> > >  
> > > +/*
> > > + * Confirming Reference Counts via Reverse Mappings
> > > + *
> > > + * We want to count the reverse mappings overlapping a refcount record
> > > + * (bno, len, refcount), allowing for the possibility that some of the
> > > + * overlap may come from smaller adjoining reverse mappings, while some
> > > + * comes from single extents which overlap the range entirely.  The
> > > + * outer loop is as follows:
> > > + *
> > > + * 1. For all reverse mappings overlapping the refcount extent,
> > > + *    a. If a given rmap completely overlaps, mark it as seen.
> > > + *    b. Otherwise, record the fragment for later processing.
> > > + *
> > > + * Once we've seen all the rmaps, we know that for all blocks in the
> > > + * refcount record we want to find $refcount owners and we've already
> > > + * visited $seen extents that overlap all the blocks.  Therefore, we
> > > + * need to find ($refcount - $seen) owners for every block in the
> > > + * extent; call that quantity $target_nr.  Proceed as follows:
> > > + *
> > > + * 2. Pull the first $target_nr fragments from the list; all of them
> > > + *    should start at or before the start of the extent.
> > > + *    Call this subset of fragments the working set.
> > > + * 3. Until there are no more unprocessed fragments,
> > > + *    a. Find the shortest fragments in the set and remove them.
> > > + *    b. Note the block number of the end of these fragments.
> > > + *    c. Pull the same number of fragments from the list.  All of these
> > > + *       fragments should start at the block number recorded in the
> > > + *       previous step.
> > > + *    d. Put those fragments in the set.
> > > + * 4. Check that there are $target_nr fragments remaining in the list,
> > > + *    and that they all end at or beyond the end of the refcount extent.
> > > + *
> > > + * If the refcount is correct, all the check conditions in the algorithm
> > > + * should always hold true.  If not, the refcount is incorrect.
> > 
> > This needs a comment somewhere in here describing the order of
> > the records on the fragment list. AFAICT, it's ordered by start
> > bno, but I'm not 100% sure and it seems the code is dependent on
> > strict ordering of records the frag list....
> 
> Yes, the list must be ordered by agbno, which should be the case if we
> iterated the rmap records in order.  We /could/ potentially list_sort
> to ensure that this code doesn't blow up even if the rmapbt decides to
> feed us out of order garbage.
> 
> Could?  Should.

Hm.  In theory the rmap should never feed us out of order rmaps, but if
it does that's an xref corruption, so we can employ a computationally
less expensive order check (over list_sort) and bail out if the list
is out of order.

> > .....
> > 
> > > +	} else {
> > > +		/*
> > > +		 * This rmap covers only part of the refcount record, so
> > > +		 * save the fragment for later processing.
> > > +		 */
> > > +		frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag),
> > > +				KM_MAYFAIL | KM_NOFS);
> > > +		if (!frag)
> > > +			return -ENOMEM;
> > > +		memcpy(&frag->rm, rec, sizeof(frag->rm));
> > > +		list_add_tail(&frag->list, &refchk->fragments);
> > 
> > I'm making the assumption here that we're seeing records in the
> > order they are in the rmap tree and that it's in increase startblock
> > order, hence the list is ordered that way....
> > 
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +/*
> > > + * Given a bunch of rmap fragments, iterate through them, keeping
> > > + * a running tally of the refcount.  If this ever deviates from
> > > + * what we expect (which is the refcountbt's refcount minus the
> > > + * number of extents that totally covered the refcountbt extent),
> > > + * we have a refcountbt error.
> > > + */
> > > +STATIC void
> > > +xfs_scrub_refcountbt_process_rmap_fragments(
> > > +	struct xfs_scrub_refcnt_check	*refchk)
> > > +{
> > > +	struct list_head		worklist;
> > > +	struct xfs_scrub_refcnt_frag	*frag;
> > > +	struct xfs_scrub_refcnt_frag	*n;
> > > +	xfs_agblock_t			bno;
> > > +	xfs_agblock_t			rbno;
> > > +	xfs_agblock_t			next_rbno;
> > > +	xfs_nlink_t			nr;
> > > +	xfs_nlink_t			target_nr;
> > > +
> > > +	target_nr = refchk->refcount - refchk->seen;
> > > +	if (target_nr == 0)
> > > +		return;
> > > +
> > > +	/*
> > > +	 * There are (refchk->rc.rc_refcount - refchk->nr refcount)
> > > +	 * references we haven't found yet.  Pull that many off the
> > > +	 * fragment list and figure out where the smallest rmap ends
> > > +	 * (and therefore the next rmap should start).  All the rmaps
> > > +	 * we pull off should start at or before the beginning of the
> > > +	 * refcount record's range.
> > > +	 */
> > > +	INIT_LIST_HEAD(&worklist);
> > > +	rbno = NULLAGBLOCK;
> > > +	nr = 1;
> > > +
> > > +	/* Find all the rmaps that start at or before the refc extent. */
> > > +	list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
> > > +		if (frag->rm.rm_startblock > refchk->bno)
> > > +			goto done;
> > 
> > .... and is where the code implies lowest to highest startblock
> > ordering on the frag list.
> > 
> > > +		bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
> > > +		if (rbno > bno)
> > > +			rbno = bno;
> > 
> > Can we put that check the other way around? we're looking for the
> > shortest/smallest end block, so
> > 
> > 		if (bno < rbno)
> > 			rbno = bno;
> > 
> > Makes a lot more sense to me.
> 
> Ok.
> 
> > 
> > > +		list_del(&frag->list);
> > > +		list_add_tail(&frag->list, &worklist);
> > 
> > list_move_tail()?
> > 
> > Ok, so we are moving fragments that start before the recount bno to
> > the work list.
> 
> <nod>
> 
> > > +		if (nr == target_nr)
> > > +			break;
> > > +		nr++;
> > > +	}
> > > +
> > > +	/*
> > > +	 * We should have found exactly $target_nr rmap fragments starting
> > > +	 * at or before the refcount extent.
> > > +	 */
> > > +	if (nr != target_nr)
> > > +		goto done;
> > 
> > ok. so on error we clean up and free the frag list and work list....
> > 
> > > +
> > > +	while (!list_empty(&refchk->fragments)) {
> > > +		/* Discard any fragments ending at rbno. */
> > > +		nr = 0;
> > > +		next_rbno = NULLAGBLOCK;
> > > +		list_for_each_entry_safe(frag, n, &worklist, list) {
> > 
> > Ok, this needs to be clearer than it's walking the working set
> > of fragments. I had to read this code several times before I worked
> > out this is where the "working set" was being processed....
> 
> /* Walk the working set of rmap fragments... */

Correction: just update the comment to:
/* Discard any fragments ending at rbno from the worklist. */

--D

> 
> > 
> > > +			bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
> > > +			if (bno != rbno) {
> > > +				if (next_rbno > bno)
> > > +					next_rbno = bno;
> > 
> > Same comment here about being a next_rbno "smallest bno" variable.
> 
> <nod>
> 
> > > +				continue;
> > > +			}
> > > +			list_del(&frag->list);
> > > +			kmem_free(frag);
> > > +			nr++;
> > > +		}
> > > +
> > > +		/* Empty list?  We're done. */
> > > +		if (list_empty(&refchk->fragments))
> > > +			break;
> > > +
> > > +		/* Try to add nr rmaps starting at rbno to the worklist. */
> > > +		list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
> > > +			bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
> > > +			if (frag->rm.rm_startblock != rbno)
> > > +				goto done;
> > 
> > definitely assuming the frag list is ordered here :P
> > 
> > > +			list_del(&frag->list);
> > > +			list_add_tail(&frag->list, &worklist);
> > > +			if (next_rbno > bno)
> > > +				next_rbno = bno;
> > > +			nr--;
> > > +			if (nr == 0)
> > > +				break;
> > > +		}
> > 
> > Ok, so if we get here with nr > 0, then we must have emptied the
> > fragment list onto the work list, right? At this point, the outer
> > loop will terminate. Don't we need to run the worklist processing
> > loop one last time?
> 
> Nope.  Throughout xfs_scrub_refcountbt_process_rmap_fragments, we're
> checking that the number of rmaps for a given refcount extent (i.e.
> target_nr) remains the same.  nr is the number of rmaps we discarded
> from the worklist at the top of the loop, so if we can't add the exact
> same number of rmaps back to the worklist then we know that the refcount
> is wrong.
> 
> So there /is/ a bug here, and the bug is that we need "if (nr) break;"
> because we don't need to process more of the loop, we already know the
> refcountbt is broken.
> 
> > > +		rbno = next_rbno;
> > > +	}
> > 
> > .....
> > > +/* Use the rmap entries covering this extent to verify the refcount. */
> > > +STATIC void
> > > +xfs_scrub_refcountbt_xref_rmap(
> > > +	struct xfs_scrub_context	*sc,
> > > +	xfs_agblock_t			bno,
> > > +	xfs_extlen_t			len,
> > > +	xfs_nlink_t			refcount)
> > > +{
> > > +	struct xfs_scrub_refcnt_check	refchk = {
> > > +		.sc = sc,
> > > +		.bno = bno,
> > > +		.len = len,
> > > +		.refcount = refcount,
> > > +		.seen = 0,
> > > +	};
> > > +	struct xfs_rmap_irec		low;
> > > +	struct xfs_rmap_irec		high;
> > > +	struct xfs_scrub_refcnt_frag	*frag;
> > > +	struct xfs_scrub_refcnt_frag	*n;
> > > +	int				error;
> > > +
> > > +	if (!sc->sa.rmap_cur)
> > > +		return;
> > > +
> > > +	/* Cross-reference with the rmapbt to confirm the refcount. */
> > > +	memset(&low, 0, sizeof(low));
> > > +	low.rm_startblock = bno;
> > > +	memset(&high, 0xFF, sizeof(high));
> > > +	high.rm_startblock = bno + len - 1;
> > 
> > This range query init feels like a familiar pattern now. Helper
> > function (separate patch)?
> 
> Yeah, these can all get cleaned up at the end of the series.
> 
> > ....
> > 
> > > +/* Make sure we have as many refc blocks as the rmap says. */
> > > +STATIC void
> > > +xfs_scrub_refcount_xref_rmap(
> > > +	struct xfs_scrub_context	*sc,
> > > +	struct xfs_owner_info		*oinfo,
> > > +	xfs_filblks_t			cow_blocks)
> > > +{
> > > +	xfs_extlen_t			refcbt_blocks = 0;
> > > +	xfs_filblks_t			blocks;
> > > +	int				error;
> > > +
> > > +	/* Check that we saw as many refcbt blocks as the rmap knows about. */
> > > +	error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
> > > +	if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
> > > +		return;
> > > +	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
> > > +			&blocks);
> > > +	if (xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur) &&
> > > +	    blocks != refcbt_blocks)
> > > +		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
> > > +
> > > +	if (!sc->sa.rmap_cur)
> > > +		return;
> > > +
> > > +	/* Check that we saw as many cow blocks as the rmap knows about. */
> > > +	xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
> > > +	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
> > > +			&blocks);
> > > +	if (xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur) &&
> > > +	    blocks != cow_blocks)
> > > +		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
> > 
> > Bit of a landmine that this code changes the owner info structure
> > that was passed in....
> 
> Will fix.
> 
> > > +}
> > > +
> > >  /* Scrub the refcount btree for some AG. */
> > >  int
> > >  xfs_scrub_refcountbt(
> > >  	struct xfs_scrub_context	*sc)
> > >  {
> > >  	struct xfs_owner_info		oinfo;
> > > +	xfs_agblock_t			cow_blocks = 0;
> > > +	int				error;
> > >  
> > >  	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
> > > -	return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
> > > -			&oinfo, NULL);
> > > +	error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
> > > +			&oinfo, &cow_blocks);
> > > +	if (error)
> > > +		return error;
> > > +
> > > +	if (sc->sa.rmap_cur)
> > > +		xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks);
> > 
> > .... because that's not obvious in this code here if we add anymore
> > code after this call.
> 
> <nod>
> 
> > > +
> > > +	return error;
> > 
> > error is zero here, so "return 0" instead?
> 
> Ok.
> 
> --D
> 
> > 
> > Cheers,
> > 
> > Dave.
> > 
> > -- 
> > Dave Chinner
> > david@fromorbit.com
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 700f8f1..df18e47 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -50,6 +50,274 @@  xfs_scrub_setup_ag_refcountbt(
 
 /* Reference count btree scrubber. */
 
+/*
+ * Confirming Reference Counts via Reverse Mappings
+ *
+ * We want to count the reverse mappings overlapping a refcount record
+ * (bno, len, refcount), allowing for the possibility that some of the
+ * overlap may come from smaller adjoining reverse mappings, while some
+ * comes from single extents which overlap the range entirely.  The
+ * outer loop is as follows:
+ *
+ * 1. For all reverse mappings overlapping the refcount extent,
+ *    a. If a given rmap completely overlaps, mark it as seen.
+ *    b. Otherwise, record the fragment for later processing.
+ *
+ * Once we've seen all the rmaps, we know that for all blocks in the
+ * refcount record we want to find $refcount owners and we've already
+ * visited $seen extents that overlap all the blocks.  Therefore, we
+ * need to find ($refcount - $seen) owners for every block in the
+ * extent; call that quantity $target_nr.  Proceed as follows:
+ *
+ * 2. Pull the first $target_nr fragments from the list; all of them
+ *    should start at or before the start of the extent.
+ *    Call this subset of fragments the working set.
+ * 3. Until there are no more unprocessed fragments,
+ *    a. Find the shortest fragments in the set and remove them.
+ *    b. Note the block number of the end of these fragments.
+ *    c. Pull the same number of fragments from the list.  All of these
+ *       fragments should start at the block number recorded in the
+ *       previous step.
+ *    d. Put those fragments in the set.
+ * 4. Check that there are $target_nr fragments remaining in the list,
+ *    and that they all end at or beyond the end of the refcount extent.
+ *
+ * If the refcount is correct, all the check conditions in the algorithm
+ * should always hold true.  If not, the refcount is incorrect.
+ */
+struct xfs_scrub_refcnt_frag {
+	struct list_head		list;
+	struct xfs_rmap_irec		rm;
+};
+
+struct xfs_scrub_refcnt_check {
+	struct xfs_scrub_context	*sc;
+	struct list_head		fragments;
+
+	/* refcount extent we're examining */
+	xfs_agblock_t			bno;
+	xfs_extlen_t			len;
+	xfs_nlink_t			refcount;
+
+	/* number of owners seen */
+	xfs_nlink_t			seen;
+};
+
+/*
+ * Decide if the given rmap is large enough that we can redeem it
+ * towards refcount verification now, or if it's a fragment, in
+ * which case we'll hang onto it in the hopes that we'll later
+ * discover that we've collected exactly the correct number of
+ * fragments as the refcountbt says we should have.
+ */
+STATIC int
+xfs_scrub_refcountbt_rmap_check(
+	struct xfs_btree_cur		*cur,
+	struct xfs_rmap_irec		*rec,
+	void				*priv)
+{
+	struct xfs_scrub_refcnt_check	*refchk = priv;
+	struct xfs_scrub_refcnt_frag	*frag;
+	xfs_agblock_t			rm_last;
+	xfs_agblock_t			rc_last;
+	int				error = 0;
+
+	if (xfs_scrub_should_terminate(refchk->sc, &error))
+		return error;
+
+	rm_last = rec->rm_startblock + rec->rm_blockcount - 1;
+	rc_last = refchk->bno + refchk->len - 1;
+
+	/* Confirm that a single-owner refc extent is a CoW stage. */
+	if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) {
+		xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0);
+		return 0;
+	}
+
+	if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) {
+		/*
+		 * The rmap overlaps the refcount record, so we can confirm
+		 * one refcount owner seen.
+		 */
+		refchk->seen++;
+	} else {
+		/*
+		 * This rmap covers only part of the refcount record, so
+		 * save the fragment for later processing.
+		 */
+		frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag),
+				KM_MAYFAIL | KM_NOFS);
+		if (!frag)
+			return -ENOMEM;
+		memcpy(&frag->rm, rec, sizeof(frag->rm));
+		list_add_tail(&frag->list, &refchk->fragments);
+	}
+
+	return 0;
+}
+
+/*
+ * Given a bunch of rmap fragments, iterate through them, keeping
+ * a running tally of the refcount.  If this ever deviates from
+ * what we expect (which is the refcountbt's refcount minus the
+ * number of extents that totally covered the refcountbt extent),
+ * we have a refcountbt error.
+ */
+STATIC void
+xfs_scrub_refcountbt_process_rmap_fragments(
+	struct xfs_scrub_refcnt_check	*refchk)
+{
+	struct list_head		worklist;
+	struct xfs_scrub_refcnt_frag	*frag;
+	struct xfs_scrub_refcnt_frag	*n;
+	xfs_agblock_t			bno;
+	xfs_agblock_t			rbno;
+	xfs_agblock_t			next_rbno;
+	xfs_nlink_t			nr;
+	xfs_nlink_t			target_nr;
+
+	target_nr = refchk->refcount - refchk->seen;
+	if (target_nr == 0)
+		return;
+
+	/*
+	 * There are (refchk->rc.rc_refcount - refchk->nr refcount)
+	 * references we haven't found yet.  Pull that many off the
+	 * fragment list and figure out where the smallest rmap ends
+	 * (and therefore the next rmap should start).  All the rmaps
+	 * we pull off should start at or before the beginning of the
+	 * refcount record's range.
+	 */
+	INIT_LIST_HEAD(&worklist);
+	rbno = NULLAGBLOCK;
+	nr = 1;
+
+	/* Find all the rmaps that start at or before the refc extent. */
+	list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+		if (frag->rm.rm_startblock > refchk->bno)
+			goto done;
+		bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+		if (rbno > bno)
+			rbno = bno;
+		list_del(&frag->list);
+		list_add_tail(&frag->list, &worklist);
+		if (nr == target_nr)
+			break;
+		nr++;
+	}
+
+	/*
+	 * We should have found exactly $target_nr rmap fragments starting
+	 * at or before the refcount extent.
+	 */
+	if (nr != target_nr)
+		goto done;
+
+	while (!list_empty(&refchk->fragments)) {
+		/* Discard any fragments ending at rbno. */
+		nr = 0;
+		next_rbno = NULLAGBLOCK;
+		list_for_each_entry_safe(frag, n, &worklist, list) {
+			bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+			if (bno != rbno) {
+				if (next_rbno > bno)
+					next_rbno = bno;
+				continue;
+			}
+			list_del(&frag->list);
+			kmem_free(frag);
+			nr++;
+		}
+
+		/* Empty list?  We're done. */
+		if (list_empty(&refchk->fragments))
+			break;
+
+		/* Try to add nr rmaps starting at rbno to the worklist. */
+		list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+			bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+			if (frag->rm.rm_startblock != rbno)
+				goto done;
+			list_del(&frag->list);
+			list_add_tail(&frag->list, &worklist);
+			if (next_rbno > bno)
+				next_rbno = bno;
+			nr--;
+			if (nr == 0)
+				break;
+		}
+
+		rbno = next_rbno;
+	}
+
+	/*
+	 * Make sure the last extent we processed ends at or beyond
+	 * the end of the refcount extent.
+	 */
+	if (rbno < refchk->bno + refchk->len)
+		goto done;
+
+	/* Actually record us having seen the remaining refcount. */
+	refchk->seen = refchk->refcount;
+done:
+	/* Delete fragments and work list. */
+	list_for_each_entry_safe(frag, n, &worklist, list) {
+		list_del(&frag->list);
+		kmem_free(frag);
+	}
+	list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+		list_del(&frag->list);
+		kmem_free(frag);
+	}
+}
+
+/* Use the rmap entries covering this extent to verify the refcount. */
+STATIC void
+xfs_scrub_refcountbt_xref_rmap(
+	struct xfs_scrub_context	*sc,
+	xfs_agblock_t			bno,
+	xfs_extlen_t			len,
+	xfs_nlink_t			refcount)
+{
+	struct xfs_scrub_refcnt_check	refchk = {
+		.sc = sc,
+		.bno = bno,
+		.len = len,
+		.refcount = refcount,
+		.seen = 0,
+	};
+	struct xfs_rmap_irec		low;
+	struct xfs_rmap_irec		high;
+	struct xfs_scrub_refcnt_frag	*frag;
+	struct xfs_scrub_refcnt_frag	*n;
+	int				error;
+
+	if (!sc->sa.rmap_cur)
+		return;
+
+	/* Cross-reference with the rmapbt to confirm the refcount. */
+	memset(&low, 0, sizeof(low));
+	low.rm_startblock = bno;
+	memset(&high, 0xFF, sizeof(high));
+	high.rm_startblock = bno + len - 1;
+
+	INIT_LIST_HEAD(&refchk.fragments);
+	error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
+			&xfs_scrub_refcountbt_rmap_check, &refchk);
+	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+		goto out_free;
+
+	xfs_scrub_refcountbt_process_rmap_fragments(&refchk);
+	if (refcount != refchk.seen)
+		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+
+out_free:
+	list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
+		list_del(&frag->list);
+		kmem_free(frag);
+	}
+}
+
 /* Cross-reference with the other btrees. */
 STATIC void
 xfs_scrub_refcountbt_xref(
@@ -63,6 +331,7 @@  xfs_scrub_refcountbt_xref(
 
 	xfs_scrub_xref_is_used_space(sc, agbno, len);
 	xfs_scrub_xref_not_inodes(sc, agbno, len);
+	xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount);
 }
 
 /* Scrub a refcountbt record. */
@@ -72,6 +341,7 @@  xfs_scrub_refcountbt_rec(
 	union xfs_btree_rec		*rec)
 {
 	struct xfs_mount		*mp = bs->cur->bc_mp;
+	xfs_agblock_t			*cow_blocks = bs->private;
 	xfs_agnumber_t			agno = bs->cur->bc_private.a.agno;
 	xfs_agblock_t			bno;
 	xfs_extlen_t			len;
@@ -87,6 +357,8 @@  xfs_scrub_refcountbt_rec(
 	has_cowflag = (bno & XFS_REFC_COW_START);
 	if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
 		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+	if (has_cowflag)
+		(*cow_blocks) += len;
 
 	/* Check the extent. */
 	bno &= ~XFS_REFC_COW_START;
@@ -103,14 +375,56 @@  xfs_scrub_refcountbt_rec(
 	return error;
 }
 
+/* Make sure we have as many refc blocks as the rmap says. */
+STATIC void
+xfs_scrub_refcount_xref_rmap(
+	struct xfs_scrub_context	*sc,
+	struct xfs_owner_info		*oinfo,
+	xfs_filblks_t			cow_blocks)
+{
+	xfs_extlen_t			refcbt_blocks = 0;
+	xfs_filblks_t			blocks;
+	int				error;
+
+	/* Check that we saw as many refcbt blocks as the rmap knows about. */
+	error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
+	if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
+		return;
+	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
+			&blocks);
+	if (xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur) &&
+	    blocks != refcbt_blocks)
+		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+
+	if (!sc->sa.rmap_cur)
+		return;
+
+	/* Check that we saw as many cow blocks as the rmap knows about. */
+	xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
+	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
+			&blocks);
+	if (xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur) &&
+	    blocks != cow_blocks)
+		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+}
+
 /* Scrub the refcount btree for some AG. */
 int
 xfs_scrub_refcountbt(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_owner_info		oinfo;
+	xfs_agblock_t			cow_blocks = 0;
+	int				error;
 
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
-	return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
-			&oinfo, NULL);
+	error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
+			&oinfo, &cow_blocks);
+	if (error)
+		return error;
+
+	if (sc->sa.rmap_cur)
+		xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks);
+
+	return error;
 }