diff mbox series

[1/5] xfs: replace xfs_btree_has_record with a general keyspace scanner

Message ID 166473481597.1084209.14598185861526380195.stgit@magnolia (mailing list archive)
State Superseded
Headers show
Series xfs: detect incorrect gaps in refcount btree | expand

Commit Message

Darrick J. Wong Oct. 2, 2022, 6:20 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

The current implementation of xfs_btree_has_record returns true if it
finds /any/ record within the given range.  Unfortunately, that's not
sufficient for scrub.  We want to be able to tell if a range of keyspace
for a btree is devoid of records, is totally mapped to records, or is
somewhere in between.  By forcing this to be a boolean, we were
generally missing the "in between" case and returning incorrect results.
Fix the API so that we can tell the caller which of those three is the
current state.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_alloc.c          |   11 +++-
 fs/xfs/libxfs/xfs_alloc.h          |    4 +-
 fs/xfs/libxfs/xfs_alloc_btree.c    |   13 +++++
 fs/xfs/libxfs/xfs_bmap_btree.c     |   13 +++++
 fs/xfs/libxfs/xfs_btree.c          |   92 +++++++++++++++++++++++++++++++-----
 fs/xfs/libxfs/xfs_btree.h          |   15 +++++-
 fs/xfs/libxfs/xfs_ialloc_btree.c   |   14 +++++
 fs/xfs/libxfs/xfs_refcount.c       |   11 +++-
 fs/xfs/libxfs/xfs_refcount.h       |    5 +-
 fs/xfs/libxfs/xfs_refcount_btree.c |   13 +++++
 fs/xfs/libxfs/xfs_rmap.c           |   12 +++--
 fs/xfs/libxfs/xfs_rmap.h           |    4 +-
 fs/xfs/libxfs/xfs_rmap_btree.c     |   13 +++++
 fs/xfs/libxfs/xfs_types.h          |   12 +++++
 fs/xfs/scrub/alloc.c               |    6 +-
 fs/xfs/scrub/refcount.c            |    7 ++-
 fs/xfs/scrub/rmap.c                |    6 +-
 17 files changed, 209 insertions(+), 42 deletions(-)

Comments

Dave Chinner Nov. 2, 2022, 1:47 a.m. UTC | #1
On Sun, Oct 02, 2022 at 11:20:16AM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> The current implementation of xfs_btree_has_record returns true if it
> finds /any/ record within the given range.  Unfortunately, that's not
> sufficient for scrub.  We want to be able to tell if a range of keyspace
> for a btree is devoid of records, is totally mapped to records, or is
> somewhere in between.  By forcing this to be a boolean, we were
> generally missing the "in between" case and returning incorrect results.
> Fix the API so that we can tell the caller which of those three is the
> current state.

My first reaction is that this "keyfill" API name is .... awful.

From an API perspective, all we are doing is changing the
"has_record()" API that returns a boolean to return a tri-state - we
add a "partial" return to the "all" and "none" states we currently
return. The whole API doesn't need renaming - it's impossible to
work out what "scan_keyfill" iis actually intended to do, whereas
"has_record"  is very much self documenting....

Hence I think that the implementation of xfs_btree_has_record()
needs to change, I don't think the entire API needs to be renamed.
All that needs to happen to the higher level API is this conversion:

> -	bool			*exists)
> +	enum xfs_btree_keyfill	*exists)

Even then, the enum could be named for what it means -
xfs_btree_rec_overlap - with values for none, full and partial. At
this point, nothing above xfs_btree_has record needs to know
anything about whatever a "key fill" operation might actually be.


>  static const struct xfs_btree_ops xfs_cntbt_ops = {
> diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
> index cfa052d40105..d1225b957649 100644
> --- a/fs/xfs/libxfs/xfs_bmap_btree.c
> +++ b/fs/xfs/libxfs/xfs_bmap_btree.c
> @@ -518,6 +518,18 @@ xfs_bmbt_recs_inorder(
>  		xfs_bmbt_disk_get_startoff(&r2->bmbt);
>  }
>  
> +STATIC bool
> +xfs_bmbt_has_key_gap(
> +	struct xfs_btree_cur		*cur,
> +	const union xfs_btree_key	*key1,
> +	const union xfs_btree_key	*key2)
> +{
> +	xfs_fileoff_t			next;
> +
> +	next = be64_to_cpu(key1->bmbt.br_startoff) + 1;
> +	return next != be64_to_cpu(key2->bmbt.br_startoff);
> +}

IDGI - this is an extent tree - there is no gap if the extent at
key2 starts at the end of key1. IOWs, this only returns "no gap"
if the extent at key 1 is a single block in length. I'll come back
to this...

Oh, does this assume that the caller has already created a key to a
nonexistent record in the BMBT that points to the end of the first
extent? i.e. that this method is being called with key1 being a high
key for the bmbt record (i.e. an end pointer) and key2 being a low
key for the bmbt record (i.e. a start pointer)?

If so, this API needs some documentation on how it is expected to be
used - at least name the two keys something more descriptive like
"high key" and "next low key"....

It occurs to me that what I'm actually doing here is reverse
engineering the design of this mechanism from the code because
there's no documentation in the patch or the commit description of
the algorithm being used to find overlapping records....

>  static const struct xfs_btree_ops xfs_bmbt_ops = {
>  	.rec_len		= sizeof(xfs_bmbt_rec_t),
>  	.key_len		= sizeof(xfs_bmbt_key_t),
> @@ -538,6 +550,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
>  	.buf_ops		= &xfs_bmbt_buf_ops,
>  	.keys_inorder		= xfs_bmbt_keys_inorder,
>  	.recs_inorder		= xfs_bmbt_recs_inorder,
> +	.has_key_gap		= xfs_bmbt_has_key_gap,
>  };
>  
>  /*
> diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
> index 4c16c8c31fcb..5710d3ee582a 100644
> --- a/fs/xfs/libxfs/xfs_btree.c
> +++ b/fs/xfs/libxfs/xfs_btree.c
> @@ -5008,34 +5008,100 @@ xfs_btree_diff_two_ptrs(
>  	return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
>  }
>  
> -/* If there's an extent, we're done. */
> +struct xfs_btree_scan_keyfill {
> +	/* Keys for the start and end of the range we want to know about. */
> +	union xfs_btree_key	start_key;
> +	union xfs_btree_key	end_key;
> +
> +	/* Highest record key we've seen so far. */
> +	union xfs_btree_key	high_key;
> +
> +	enum xfs_btree_keyfill	outcome;
> +};

This "keyfill" scan operation is completely private to
xfs_btree_has_record(), which further indicates the higher level API
should not be renamed "keyfill"....

> +
>  STATIC int
> -xfs_btree_has_record_helper(
> +xfs_btree_scan_keyfill_helper(
>  	struct xfs_btree_cur		*cur,
>  	const union xfs_btree_rec	*rec,
>  	void				*priv)
>  {
> -	return -ECANCELED;
> +	union xfs_btree_key		rec_key;
> +	union xfs_btree_key		rec_high_key;
> +	struct xfs_btree_scan_keyfill	*info = priv;
> +	int64_t				res;
> +
> +	cur->bc_ops->init_key_from_rec(&rec_key, rec);
> +
> +	if (info->outcome == XFS_BTREE_KEYFILL_EMPTY) {
> +		info->outcome = XFS_BTREE_KEYFILL_SPARSE;
> +
> +		/* Bail if the first record starts after the start key. */
> +		res = cur->bc_ops->diff_two_keys(cur, &info->start_key,
> +				&rec_key);
> +		if (res < 0)
> +			return -ECANCELED;

Better comment needed.

		/*
		 * If the first record we find does not overlap the
		 * start key, then there is a hole at the start of
		 * the search range before the overlap was found.
		 * Hence we can classify this as a sparse overlap
		 * and we don't need to search any further.
		 */

> +	} else {
> +		/* Bail if there's a gap with the previous record. */
> +		if (cur->bc_ops->has_key_gap(cur, &info->high_key, &rec_key))
> +			return -ECANCELED;

Ah, yeah, there's the high key -> low key implementation assumption.

> +	}
> +
> +	/* If the current record is higher than what we've seen, remember it. */
> +	cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
> +	res = cur->bc_ops->diff_two_keys(cur, &rec_high_key, &info->high_key);
> +	if (res > 0)
> +		info->high_key = rec_high_key; /* struct copy */
> +
> +	return 0;
>  }
>  
> -/* Is there a record covering a given range of keys? */
> +/*
> + * Scan part of the keyspace of a btree and tell us if the area has no records,
> + * is fully mapped by records, or is partially filled.
> + */
>  int
> -xfs_btree_has_record(
> +xfs_btree_scan_keyfill(
>  	struct xfs_btree_cur		*cur,
>  	const union xfs_btree_irec	*low,
>  	const union xfs_btree_irec	*high,
> -	bool				*exists)
> +	enum xfs_btree_keyfill		*outcome)
>  {
> +	struct xfs_btree_scan_keyfill	info = {
> +		.outcome		= XFS_BTREE_KEYFILL_EMPTY,
> +	};
> +	union xfs_btree_rec		rec;
> +	int64_t				res;
>  	int				error;
>  
> +	if (!cur->bc_ops->has_key_gap)
> +		return -EOPNOTSUPP;
> +
> +	cur->bc_rec = *low;
> +	cur->bc_ops->init_rec_from_cur(cur, &rec);
> +	cur->bc_ops->init_key_from_rec(&info.start_key, &rec);
> +
> +	cur->bc_rec = *high;
> +	cur->bc_ops->init_rec_from_cur(cur, &rec);
> +	cur->bc_ops->init_key_from_rec(&info.end_key, &rec);

Didn't a previous patch I just create helpers for these?

Oh.... patches in the series were threaded in the wrong order...


> +
>  	error = xfs_btree_query_range(cur, low, high,
> -			&xfs_btree_has_record_helper, NULL);
> -	if (error == -ECANCELED) {
> -		*exists = true;
> -		return 0;
> -	}
> -	*exists = false;
> -	return error;
> +			xfs_btree_scan_keyfill_helper, &info);
> +	if (error == -ECANCELED)
> +		goto out;
> +	if (error)
> +		return error;
> +
> +	if (info.outcome == XFS_BTREE_KEYFILL_EMPTY)
> +		goto out;
> +
> +	/* Did the record set go at least as far as the end? */
> +	res = cur->bc_ops->diff_two_keys(cur, &info.high_key, &info.end_key);
> +	if (res >= 0)
> +		info.outcome = XFS_BTREE_KEYFILL_FULL;

Hmmm. I'm wondering if we should have helpers for these sorts of
key comparisons.

static bool
xfs_btree_keycmp_lt(
	struct xfs_btree_cur	*cur,
	struct xfs_btree_key	*key1,
	struct xfs_btree_key	*key2)
{
	return cur->bc_ops->diff_two_keys(cur, key1, key2) < 0;
}

static bool
xfs_btree_keycmp_gt(
	struct xfs_btree_cur	*cur,
	struct xfs_btree_key	*key1,
	struct xfs_btree_key	*key2)
{
	return cur->bc_ops->diff_two_keys(cur, key1, key2) > 0;
}

static bool
xfs_btree_keycmp_ge(
	struct xfs_btree_cur	*cur,
	struct xfs_btree_key	*key1,
	struct xfs_btree_key	*key2)
{
	return cur->bc_ops->diff_two_keys(cur, key1, key2) >= 0;
}

Which then makes the code read a whole lot nicer:

	/* Did the record set go at least as far as the end? */
	if (xfs_btree_keycmp_ge(cur, &info.high_key, &info.end_key))
		info.outcome = XFS_BTREE_KEYFILL_FULL;
...

Not necessary for this patch, but I note there are a few places
where these sorts of key range/ordering checks are open coded...
> +
> +out:
> +	*outcome = info.outcome;
> +	return 0;
>  }
>  
>  /* Are there more records in this btree? */
> diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
> index eef27858a013..58a05f0d1f1b 100644
> --- a/fs/xfs/libxfs/xfs_btree.h
> +++ b/fs/xfs/libxfs/xfs_btree.h
> @@ -157,6 +157,11 @@ struct xfs_btree_ops {
>  	int	(*recs_inorder)(struct xfs_btree_cur *cur,
>  				const union xfs_btree_rec *r1,
>  				const union xfs_btree_rec *r2);
> +
> +	/* decide if there's a gap in the keyspace between two keys */
> +	bool	(*has_key_gap)(struct xfs_btree_cur *cur,
> +			       const union xfs_btree_key *key1,
> +			       const union xfs_btree_key *key2);

Having read through it this far, this looks like it is checking for
two discrete keys form a contiguous range? Perhaps that's a better
name than "gap", because "contiguous" means different things for
different keys. e.g. extents vs inode records.


> diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
> index 8c83e265770c..fd48b95b4f4e 100644
> --- a/fs/xfs/libxfs/xfs_ialloc_btree.c
> +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
> @@ -380,6 +380,18 @@ xfs_inobt_recs_inorder(
>  		be32_to_cpu(r2->inobt.ir_startino);
>  }
>  
> +STATIC bool
> +xfs_inobt_has_key_gap(
> +	struct xfs_btree_cur		*cur,
> +	const union xfs_btree_key	*key1,
> +	const union xfs_btree_key	*key2)
> +{
> +	xfs_agino_t			next;
> +
> +	next = be32_to_cpu(key1->inobt.ir_startino) + XFS_INODES_PER_CHUNK;
> +	return next != be32_to_cpu(key2->inobt.ir_startino);
> +}

Huh. Is that correct? The high key for an inode chunk is:

STATIC void                                                                      
xfs_inobt_init_high_key_from_rec(                                                
        union xfs_btree_key             *key,                                    
        const union xfs_btree_rec       *rec)                                    
{                                                                                
        __u32                           x;                                       
                                                                                 
        x = be32_to_cpu(rec->inobt.ir_startino);                                 
        x += XFS_INODES_PER_CHUNK - 1;                                           
        key->inobt.ir_startino = cpu_to_be32(x);                                 
}                                                                                

Which means highkey->ir_startino (end range pointer) points to
low_key->ir_startino + 63 (start range pointer + inodes in chunk)

Hence if this "gap" API is supposed to be passed {high_key,
low_key}, then xfs_inobt_has_key_gap() is checking
(low_key->ir_startino + 127) against next_low_key->ir_startino...

> +STATIC bool
> +xfs_refcountbt_has_key_gap(
> +	struct xfs_btree_cur		*cur,
> +	const union xfs_btree_key	*key1,
> +	const union xfs_btree_key	*key2)
> +{
> +	xfs_agblock_t			next;
> +
> +	next = be32_to_cpu(key1->refc.rc_startblock) + 1;
> +	return next != be32_to_cpu(key2->refc.rc_startblock);
> +}

... and this matches the BMBT code (as does the rmapbt code), which seems to
assume a high key (end extent pointer) is being passed as key1, and key2 is
a low key (start extent pointer).

Am I completely misunderstanding what the key gap API uses for
low_key and high_key? I am completely confused now...

-Dave.
Darrick J. Wong Nov. 3, 2022, 1:56 a.m. UTC | #2
On Wed, Nov 02, 2022 at 12:47:45PM +1100, Dave Chinner wrote:
> On Sun, Oct 02, 2022 at 11:20:16AM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > The current implementation of xfs_btree_has_record returns true if it
> > finds /any/ record within the given range.  Unfortunately, that's not
> > sufficient for scrub.  We want to be able to tell if a range of keyspace
> > for a btree is devoid of records, is totally mapped to records, or is
> > somewhere in between.  By forcing this to be a boolean, we were
> > generally missing the "in between" case and returning incorrect results.
> > Fix the API so that we can tell the caller which of those three is the
> > current state.
> 
> My first reaction is that this "keyfill" API name is .... awful.

/me smacks head, realizes that "fill" could be interpreted as an action
verb, instead of a noun.  "fullness" might have been better.

This function scans part of a btree's keyspace to determine the fullness
factor (empty, totally full, sparse).

xfs_rmapbt_keyspace_fullness ?
                   _sparseness?
		   _contiguity?

That's the best thing I can think of, though my brain is a little tired
right now.  I could even leave it as _has_record just to avoid the
rename costs, though "has records" is a little vague.

OTOH "keyspace" is one of those jargon terms that comes from database
theory land.

> From an API perspective, all we are doing is changing the
> "has_record()" API that returns a boolean to return a tri-state - we
> add a "partial" return to the "all" and "none" states we currently
> return. The whole API doesn't need renaming - it's impossible to
> work out what "scan_keyfill" iis actually intended to do, whereas
> "has_record"  is very much self documenting....

Ok.

> Hence I think that the implementation of xfs_btree_has_record()
> needs to change, I don't think the entire API needs to be renamed.
> All that needs to happen to the higher level API is this conversion:
> 
> > -	bool			*exists)
> > +	enum xfs_btree_keyfill	*exists)
> 
> Even then, the enum could be named for what it means -
> xfs_btree_rec_overlap - with values for none, full and partial. At
> this point, nothing above xfs_btree_has record needs to know
> anything about whatever a "key fill" operation might actually be.

/me wishes he'd thought of "keyspace contiguity" earlier.

Though that's a lot of long words.  I'll take your suggestion to leave
the name as _has_records.  However, we're not actually measuring the
amount of *overlap* between records -- what we're really looking for is
the btree record equivalent of file holes.

enum xfs_btree_rec_contiguity?

> >  static const struct xfs_btree_ops xfs_cntbt_ops = {
> > diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
> > index cfa052d40105..d1225b957649 100644
> > --- a/fs/xfs/libxfs/xfs_bmap_btree.c
> > +++ b/fs/xfs/libxfs/xfs_bmap_btree.c
> > @@ -518,6 +518,18 @@ xfs_bmbt_recs_inorder(
> >  		xfs_bmbt_disk_get_startoff(&r2->bmbt);
> >  }
> >  
> > +STATIC bool
> > +xfs_bmbt_has_key_gap(
> > +	struct xfs_btree_cur		*cur,
> > +	const union xfs_btree_key	*key1,
> > +	const union xfs_btree_key	*key2)
> > +{
> > +	xfs_fileoff_t			next;
> > +
> > +	next = be64_to_cpu(key1->bmbt.br_startoff) + 1;
> > +	return next != be64_to_cpu(key2->bmbt.br_startoff);
> > +}
> 
> IDGI - this is an extent tree - there is no gap if the extent at
> key2 starts at the end of key1. IOWs, this only returns "no gap"
> if the extent at key 1 is a single block in length. I'll come back
> to this...
> 
> Oh, does this assume that the caller has already created a key to a
> nonexistent record in the BMBT that points to the end of the first
> extent?

Yes.

> i.e. that this method is being called with key1 being a high
> key for the bmbt record (i.e. an end pointer) and key2 being a low
> key for the bmbt record (i.e. a start pointer)?

Generically, the _has_key_gap functions take two record keys A and C and
decide if is possible for there to be a third record key B satisfying
this relationship: A < B < C.  For the operation to make any sense, it's
very strongly implied that A is the high key of a record R and B is the
low key of a record S.  Technically, however, there's no reason why you
can't pass any two keys.

> If so, this API needs some documentation on how it is expected to be
> used - at least name the two keys something more descriptive like
> "high key" and "next low key"....

Now that I know that scrub is the only user of the key gap functions,
I'm confident that the function signatures can s/key1/high_key/ and
s/key2/low_key/.

Clearly I also need to improve the documentation for this function.

"Given two btree keys @high_key and @low_key, decide if it is possible
for there to be a third btree key K satisfying the relationship
@high_key < K < @low_key.  To determine the sparseness of the keyspace
for a pair of btree records, @high_key should be the high key of a
record and @low_key should be the low key of the next record in the
record set."

Not sure if that's any better though.

> It occurs to me that what I'm actually doing here is reverse
> engineering the design of this mechanism from the code because
> there's no documentation in the patch or the commit description of
> the algorithm being used to find overlapping records....

That's the (severe!) downside of talking to a database guy -- these
kinds of things are obvious to me, but that's not everyone's background.

> >  static const struct xfs_btree_ops xfs_bmbt_ops = {
> >  	.rec_len		= sizeof(xfs_bmbt_rec_t),
> >  	.key_len		= sizeof(xfs_bmbt_key_t),
> > @@ -538,6 +550,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
> >  	.buf_ops		= &xfs_bmbt_buf_ops,
> >  	.keys_inorder		= xfs_bmbt_keys_inorder,
> >  	.recs_inorder		= xfs_bmbt_recs_inorder,
> > +	.has_key_gap		= xfs_bmbt_has_key_gap,
> >  };
> >  
> >  /*
> > diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
> > index 4c16c8c31fcb..5710d3ee582a 100644
> > --- a/fs/xfs/libxfs/xfs_btree.c
> > +++ b/fs/xfs/libxfs/xfs_btree.c
> > @@ -5008,34 +5008,100 @@ xfs_btree_diff_two_ptrs(
> >  	return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
> >  }
> >  
> > -/* If there's an extent, we're done. */
> > +struct xfs_btree_scan_keyfill {
> > +	/* Keys for the start and end of the range we want to know about. */
> > +	union xfs_btree_key	start_key;
> > +	union xfs_btree_key	end_key;
> > +
> > +	/* Highest record key we've seen so far. */
> > +	union xfs_btree_key	high_key;
> > +
> > +	enum xfs_btree_keyfill	outcome;
> > +};
> 
> This "keyfill" scan operation is completely private to
> xfs_btree_has_record(), which further indicates the higher level API
> should not be renamed "keyfill"....

struct xfbt_has_records?

> > +
> >  STATIC int
> > -xfs_btree_has_record_helper(
> > +xfs_btree_scan_keyfill_helper(
> >  	struct xfs_btree_cur		*cur,
> >  	const union xfs_btree_rec	*rec,
> >  	void				*priv)
> >  {
> > -	return -ECANCELED;
> > +	union xfs_btree_key		rec_key;
> > +	union xfs_btree_key		rec_high_key;
> > +	struct xfs_btree_scan_keyfill	*info = priv;
> > +	int64_t				res;
> > +
> > +	cur->bc_ops->init_key_from_rec(&rec_key, rec);
> > +
> > +	if (info->outcome == XFS_BTREE_KEYFILL_EMPTY) {
> > +		info->outcome = XFS_BTREE_KEYFILL_SPARSE;
> > +
> > +		/* Bail if the first record starts after the start key. */
> > +		res = cur->bc_ops->diff_two_keys(cur, &info->start_key,
> > +				&rec_key);
> > +		if (res < 0)
> > +			return -ECANCELED;
> 
> Better comment needed.
> 
> 		/*
> 		 * If the first record we find does not overlap the
> 		 * start key, then there is a hole at the start of
> 		 * the search range before the overlap was found.
> 		 * Hence we can classify this as a sparse overlap
> 		 * and we don't need to search any further.
> 		 */

Added.

> > +	} else {
> > +		/* Bail if there's a gap with the previous record. */
> > +		if (cur->bc_ops->has_key_gap(cur, &info->high_key, &rec_key))
> > +			return -ECANCELED;
> 
> Ah, yeah, there's the high key -> low key implementation assumption.

Yes.

> > +	}
> > +
> > +	/* If the current record is higher than what we've seen, remember it. */
> > +	cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
> > +	res = cur->bc_ops->diff_two_keys(cur, &rec_high_key, &info->high_key);
> > +	if (res > 0)
> > +		info->high_key = rec_high_key; /* struct copy */
> > +
> > +	return 0;
> >  }
> >  
> > -/* Is there a record covering a given range of keys? */
> > +/*
> > + * Scan part of the keyspace of a btree and tell us if the area has no records,
> > + * is fully mapped by records, or is partially filled.
> > + */
> >  int
> > -xfs_btree_has_record(
> > +xfs_btree_scan_keyfill(
> >  	struct xfs_btree_cur		*cur,
> >  	const union xfs_btree_irec	*low,
> >  	const union xfs_btree_irec	*high,
> > -	bool				*exists)
> > +	enum xfs_btree_keyfill		*outcome)
> >  {
> > +	struct xfs_btree_scan_keyfill	info = {
> > +		.outcome		= XFS_BTREE_KEYFILL_EMPTY,
> > +	};
> > +	union xfs_btree_rec		rec;
> > +	int64_t				res;
> >  	int				error;
> >  
> > +	if (!cur->bc_ops->has_key_gap)
> > +		return -EOPNOTSUPP;
> > +
> > +	cur->bc_rec = *low;
> > +	cur->bc_ops->init_rec_from_cur(cur, &rec);
> > +	cur->bc_ops->init_key_from_rec(&info.start_key, &rec);
> > +
> > +	cur->bc_rec = *high;
> > +	cur->bc_ops->init_rec_from_cur(cur, &rec);
> > +	cur->bc_ops->init_key_from_rec(&info.end_key, &rec);
> 
> Didn't a previous patch I just create helpers for these?
> 
> Oh.... patches in the series were threaded in the wrong order...

Yeah.  I'll rearrange these.

> 
> > +
> >  	error = xfs_btree_query_range(cur, low, high,
> > -			&xfs_btree_has_record_helper, NULL);
> > -	if (error == -ECANCELED) {
> > -		*exists = true;
> > -		return 0;
> > -	}
> > -	*exists = false;
> > -	return error;
> > +			xfs_btree_scan_keyfill_helper, &info);
> > +	if (error == -ECANCELED)
> > +		goto out;
> > +	if (error)
> > +		return error;
> > +
> > +	if (info.outcome == XFS_BTREE_KEYFILL_EMPTY)
> > +		goto out;
> > +
> > +	/* Did the record set go at least as far as the end? */
> > +	res = cur->bc_ops->diff_two_keys(cur, &info.high_key, &info.end_key);
> > +	if (res >= 0)
> > +		info.outcome = XFS_BTREE_KEYFILL_FULL;
> 
> Hmmm. I'm wondering if we should have helpers for these sorts of
> key comparisons.
> 
> static bool
> xfs_btree_keycmp_lt(
> 	struct xfs_btree_cur	*cur,
> 	struct xfs_btree_key	*key1,
> 	struct xfs_btree_key	*key2)
> {
> 	return cur->bc_ops->diff_two_keys(cur, key1, key2) < 0;
> }
> 
> static bool
> xfs_btree_keycmp_gt(
> 	struct xfs_btree_cur	*cur,
> 	struct xfs_btree_key	*key1,
> 	struct xfs_btree_key	*key2)
> {
> 	return cur->bc_ops->diff_two_keys(cur, key1, key2) > 0;
> }
> 
> static bool
> xfs_btree_keycmp_ge(
> 	struct xfs_btree_cur	*cur,
> 	struct xfs_btree_key	*key1,
> 	struct xfs_btree_key	*key2)
> {
> 	return cur->bc_ops->diff_two_keys(cur, key1, key2) >= 0;
> }
> 
> Which then makes the code read a whole lot nicer:
> 
> 	/* Did the record set go at least as far as the end? */
> 	if (xfs_btree_keycmp_ge(cur, &info.high_key, &info.end_key))
> 		info.outcome = XFS_BTREE_KEYFILL_FULL;
> ...
> 
> Not necessary for this patch, but I note there are a few places
> where these sorts of key range/ordering checks are open coded...

Yeah.  Every time I squint at "< 0" "> 0" and have to remember what all
that means.  I'll clean that one up too.

> > +
> > +out:
> > +	*outcome = info.outcome;
> > +	return 0;
> >  }
> >  
> >  /* Are there more records in this btree? */
> > diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
> > index eef27858a013..58a05f0d1f1b 100644
> > --- a/fs/xfs/libxfs/xfs_btree.h
> > +++ b/fs/xfs/libxfs/xfs_btree.h
> > @@ -157,6 +157,11 @@ struct xfs_btree_ops {
> >  	int	(*recs_inorder)(struct xfs_btree_cur *cur,
> >  				const union xfs_btree_rec *r1,
> >  				const union xfs_btree_rec *r2);
> > +
> > +	/* decide if there's a gap in the keyspace between two keys */
> > +	bool	(*has_key_gap)(struct xfs_btree_cur *cur,
> > +			       const union xfs_btree_key *key1,
> > +			       const union xfs_btree_key *key2);
> 
> Having read through it this far, this looks like it is checking for
> two discrete keys form a contiguous range? Perhaps that's a better
> name than "gap", because "contiguous" means different things for
> different keys. e.g. extents vs inode records.

bc_ops->keys_contiguous()?  Sounds good to me.

> 
> 
> > diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
> > index 8c83e265770c..fd48b95b4f4e 100644
> > --- a/fs/xfs/libxfs/xfs_ialloc_btree.c
> > +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
> > @@ -380,6 +380,18 @@ xfs_inobt_recs_inorder(
> >  		be32_to_cpu(r2->inobt.ir_startino);
> >  }
> >  
> > +STATIC bool
> > +xfs_inobt_has_key_gap(
> > +	struct xfs_btree_cur		*cur,
> > +	const union xfs_btree_key	*key1,
> > +	const union xfs_btree_key	*key2)
> > +{
> > +	xfs_agino_t			next;
> > +
> > +	next = be32_to_cpu(key1->inobt.ir_startino) + XFS_INODES_PER_CHUNK;
> > +	return next != be32_to_cpu(key2->inobt.ir_startino);
> > +}
> 
> Huh. Is that correct? The high key for an inode chunk is:
> 
> STATIC void                                                                      
> xfs_inobt_init_high_key_from_rec(                                                
>         union xfs_btree_key             *key,                                    
>         const union xfs_btree_rec       *rec)                                    
> {                                                                                
>         __u32                           x;                                       
>                                                                                  
>         x = be32_to_cpu(rec->inobt.ir_startino);                                 
>         x += XFS_INODES_PER_CHUNK - 1;                                           
>         key->inobt.ir_startino = cpu_to_be32(x);                                 
> }                                                                                
> 
> Which means highkey->ir_startino (end range pointer) points to
> low_key->ir_startino + 63 (start range pointer + inodes in chunk)
> 
> Hence if this "gap" API is supposed to be passed {high_key,
> low_key}, then xfs_inobt_has_key_gap() is checking
> (low_key->ir_startino + 127) against next_low_key->ir_startino...

Oops, I committed the correct code into the wrong patch.  Some times I
really hate stgit.  This has gotten better recently now that I figured
out how to dump the branch and patch name into PS1.

> > +STATIC bool
> > +xfs_refcountbt_has_key_gap(
> > +	struct xfs_btree_cur		*cur,
> > +	const union xfs_btree_key	*key1,
> > +	const union xfs_btree_key	*key2)
> > +{
> > +	xfs_agblock_t			next;
> > +
> > +	next = be32_to_cpu(key1->refc.rc_startblock) + 1;
> > +	return next != be32_to_cpu(key2->refc.rc_startblock);
> > +}
> 
> ... and this matches the BMBT code (as does the rmapbt code), which seems to
> assume a high key (end extent pointer) is being passed as key1, and key2 is
> a low key (start extent pointer).
> 
> Am I completely misunderstanding what the key gap API uses for
> low_key and high_key? I am completely confused now...

You've understood btree keyspace sparseness scanning correctly.
My apologies for making it harder than it had to be, and thanks for
wading through all this.

--D

> 
> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index e2bdf089c0a3..f0c92093db0a 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -3498,13 +3498,16 @@  xfs_alloc_query_all(
 	return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
 }
 
-/* Is there a record covering a given extent? */
+/*
+ * Scan part of the keyspace of the free space and tell us if the area has no
+ * records, is fully mapped by records, or is partially filled.
+ */
 int
-xfs_alloc_has_record(
+xfs_alloc_scan_keyfill(
 	struct xfs_btree_cur	*cur,
 	xfs_agblock_t		bno,
 	xfs_extlen_t		len,
-	bool			*exists)
+	enum xfs_btree_keyfill	*outcome)
 {
 	union xfs_btree_irec	low;
 	union xfs_btree_irec	high;
@@ -3514,7 +3517,7 @@  xfs_alloc_has_record(
 	memset(&high, 0xFF, sizeof(high));
 	high.a.ar_startblock = bno + len - 1;
 
-	return xfs_btree_has_record(cur, &low, &high, exists);
+	return xfs_btree_scan_keyfill(cur, &low, &high, outcome);
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 2c3f762dfb58..ebda867aa6f4 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -194,8 +194,8 @@  int xfs_alloc_query_range(struct xfs_btree_cur *cur,
 int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
 		void *priv);
 
-int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
-		xfs_extlen_t len, bool *exist);
+int xfs_alloc_scan_keyfill(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+		xfs_extlen_t len, enum xfs_btree_keyfill *outcome);
 
 typedef int (*xfs_agfl_walk_fn)(struct xfs_mount *mp, xfs_agblock_t bno,
 		void *priv);
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 549a3cba0234..916d278204f5 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -423,6 +423,18 @@  xfs_cntbt_recs_inorder(
 		 be32_to_cpu(r2->alloc.ar_startblock));
 }
 
+STATIC bool
+xfs_allocbt_has_key_gap(
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key1,
+	const union xfs_btree_key	*key2)
+{
+	xfs_agblock_t			next;
+
+	next = be32_to_cpu(key1->alloc.ar_startblock) + 1;
+	return next != be32_to_cpu(key2->alloc.ar_startblock);
+}
+
 static const struct xfs_btree_ops xfs_bnobt_ops = {
 	.rec_len		= sizeof(xfs_alloc_rec_t),
 	.key_len		= sizeof(xfs_alloc_key_t),
@@ -443,6 +455,7 @@  static const struct xfs_btree_ops xfs_bnobt_ops = {
 	.diff_two_keys		= xfs_bnobt_diff_two_keys,
 	.keys_inorder		= xfs_bnobt_keys_inorder,
 	.recs_inorder		= xfs_bnobt_recs_inorder,
+	.has_key_gap		= xfs_allocbt_has_key_gap,
 };
 
 static const struct xfs_btree_ops xfs_cntbt_ops = {
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index cfa052d40105..d1225b957649 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -518,6 +518,18 @@  xfs_bmbt_recs_inorder(
 		xfs_bmbt_disk_get_startoff(&r2->bmbt);
 }
 
+STATIC bool
+xfs_bmbt_has_key_gap(
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key1,
+	const union xfs_btree_key	*key2)
+{
+	xfs_fileoff_t			next;
+
+	next = be64_to_cpu(key1->bmbt.br_startoff) + 1;
+	return next != be64_to_cpu(key2->bmbt.br_startoff);
+}
+
 static const struct xfs_btree_ops xfs_bmbt_ops = {
 	.rec_len		= sizeof(xfs_bmbt_rec_t),
 	.key_len		= sizeof(xfs_bmbt_key_t),
@@ -538,6 +550,7 @@  static const struct xfs_btree_ops xfs_bmbt_ops = {
 	.buf_ops		= &xfs_bmbt_buf_ops,
 	.keys_inorder		= xfs_bmbt_keys_inorder,
 	.recs_inorder		= xfs_bmbt_recs_inorder,
+	.has_key_gap		= xfs_bmbt_has_key_gap,
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 4c16c8c31fcb..5710d3ee582a 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -5008,34 +5008,100 @@  xfs_btree_diff_two_ptrs(
 	return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
 }
 
-/* If there's an extent, we're done. */
+struct xfs_btree_scan_keyfill {
+	/* Keys for the start and end of the range we want to know about. */
+	union xfs_btree_key	start_key;
+	union xfs_btree_key	end_key;
+
+	/* Highest record key we've seen so far. */
+	union xfs_btree_key	high_key;
+
+	enum xfs_btree_keyfill	outcome;
+};
+
 STATIC int
-xfs_btree_has_record_helper(
+xfs_btree_scan_keyfill_helper(
 	struct xfs_btree_cur		*cur,
 	const union xfs_btree_rec	*rec,
 	void				*priv)
 {
-	return -ECANCELED;
+	union xfs_btree_key		rec_key;
+	union xfs_btree_key		rec_high_key;
+	struct xfs_btree_scan_keyfill	*info = priv;
+	int64_t				res;
+
+	cur->bc_ops->init_key_from_rec(&rec_key, rec);
+
+	if (info->outcome == XFS_BTREE_KEYFILL_EMPTY) {
+		info->outcome = XFS_BTREE_KEYFILL_SPARSE;
+
+		/* Bail if the first record starts after the start key. */
+		res = cur->bc_ops->diff_two_keys(cur, &info->start_key,
+				&rec_key);
+		if (res < 0)
+			return -ECANCELED;
+	} else {
+		/* Bail if there's a gap with the previous record. */
+		if (cur->bc_ops->has_key_gap(cur, &info->high_key, &rec_key))
+			return -ECANCELED;
+	}
+
+	/* If the current record is higher than what we've seen, remember it. */
+	cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
+	res = cur->bc_ops->diff_two_keys(cur, &rec_high_key, &info->high_key);
+	if (res > 0)
+		info->high_key = rec_high_key; /* struct copy */
+
+	return 0;
 }
 
-/* Is there a record covering a given range of keys? */
+/*
+ * Scan part of the keyspace of a btree and tell us if the area has no records,
+ * is fully mapped by records, or is partially filled.
+ */
 int
-xfs_btree_has_record(
+xfs_btree_scan_keyfill(
 	struct xfs_btree_cur		*cur,
 	const union xfs_btree_irec	*low,
 	const union xfs_btree_irec	*high,
-	bool				*exists)
+	enum xfs_btree_keyfill		*outcome)
 {
+	struct xfs_btree_scan_keyfill	info = {
+		.outcome		= XFS_BTREE_KEYFILL_EMPTY,
+	};
+	union xfs_btree_rec		rec;
+	int64_t				res;
 	int				error;
 
+	if (!cur->bc_ops->has_key_gap)
+		return -EOPNOTSUPP;
+
+	cur->bc_rec = *low;
+	cur->bc_ops->init_rec_from_cur(cur, &rec);
+	cur->bc_ops->init_key_from_rec(&info.start_key, &rec);
+
+	cur->bc_rec = *high;
+	cur->bc_ops->init_rec_from_cur(cur, &rec);
+	cur->bc_ops->init_key_from_rec(&info.end_key, &rec);
+
 	error = xfs_btree_query_range(cur, low, high,
-			&xfs_btree_has_record_helper, NULL);
-	if (error == -ECANCELED) {
-		*exists = true;
-		return 0;
-	}
-	*exists = false;
-	return error;
+			xfs_btree_scan_keyfill_helper, &info);
+	if (error == -ECANCELED)
+		goto out;
+	if (error)
+		return error;
+
+	if (info.outcome == XFS_BTREE_KEYFILL_EMPTY)
+		goto out;
+
+	/* Did the record set go at least as far as the end? */
+	res = cur->bc_ops->diff_two_keys(cur, &info.high_key, &info.end_key);
+	if (res >= 0)
+		info.outcome = XFS_BTREE_KEYFILL_FULL;
+
+out:
+	*outcome = info.outcome;
+	return 0;
 }
 
 /* Are there more records in this btree? */
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index eef27858a013..58a05f0d1f1b 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -157,6 +157,11 @@  struct xfs_btree_ops {
 	int	(*recs_inorder)(struct xfs_btree_cur *cur,
 				const union xfs_btree_rec *r1,
 				const union xfs_btree_rec *r2);
+
+	/* decide if there's a gap in the keyspace between two keys */
+	bool	(*has_key_gap)(struct xfs_btree_cur *cur,
+			       const union xfs_btree_key *key1,
+			       const union xfs_btree_key *key2);
 };
 
 /*
@@ -540,9 +545,15 @@  void xfs_btree_get_keys(struct xfs_btree_cur *cur,
 		struct xfs_btree_block *block, union xfs_btree_key *key);
 union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur,
 		union xfs_btree_key *key);
-int xfs_btree_has_record(struct xfs_btree_cur *cur,
+typedef bool (*xfs_btree_key_gap_fn)(struct xfs_btree_cur *cur,
+		const union xfs_btree_key *key1,
+		const union xfs_btree_key *key2);
+
+int xfs_btree_scan_keyfill(struct xfs_btree_cur *cur,
 		const union xfs_btree_irec *low,
-		const union xfs_btree_irec *high, bool *exists);
+		const union xfs_btree_irec *high,
+		enum xfs_btree_keyfill *outcome);
+
 bool xfs_btree_has_more_records(struct xfs_btree_cur *cur);
 struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur);
 
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 8c83e265770c..fd48b95b4f4e 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -380,6 +380,18 @@  xfs_inobt_recs_inorder(
 		be32_to_cpu(r2->inobt.ir_startino);
 }
 
+STATIC bool
+xfs_inobt_has_key_gap(
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key1,
+	const union xfs_btree_key	*key2)
+{
+	xfs_agino_t			next;
+
+	next = be32_to_cpu(key1->inobt.ir_startino) + XFS_INODES_PER_CHUNK;
+	return next != be32_to_cpu(key2->inobt.ir_startino);
+}
+
 static const struct xfs_btree_ops xfs_inobt_ops = {
 	.rec_len		= sizeof(xfs_inobt_rec_t),
 	.key_len		= sizeof(xfs_inobt_key_t),
@@ -399,6 +411,7 @@  static const struct xfs_btree_ops xfs_inobt_ops = {
 	.diff_two_keys		= xfs_inobt_diff_two_keys,
 	.keys_inorder		= xfs_inobt_keys_inorder,
 	.recs_inorder		= xfs_inobt_recs_inorder,
+	.has_key_gap		= xfs_inobt_has_key_gap,
 };
 
 static const struct xfs_btree_ops xfs_finobt_ops = {
@@ -420,6 +433,7 @@  static const struct xfs_btree_ops xfs_finobt_ops = {
 	.diff_two_keys		= xfs_inobt_diff_two_keys,
 	.keys_inorder		= xfs_inobt_keys_inorder,
 	.recs_inorder		= xfs_inobt_recs_inorder,
+	.has_key_gap		= xfs_inobt_has_key_gap,
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 64b910caafaa..607fd25fda56 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1766,13 +1766,16 @@  xfs_refcount_recover_cow_leftovers(
 	return error;
 }
 
-/* Is there a record covering a given extent? */
+/*
+ * Scan part of the keyspace of the refcount records and tell us if the area
+ * has no records, is fully mapped by records, or is partially filled.
+ */
 int
-xfs_refcount_has_record(
+xfs_refcount_scan_keyfill(
 	struct xfs_btree_cur	*cur,
 	xfs_agblock_t		bno,
 	xfs_extlen_t		len,
-	bool			*exists)
+	enum xfs_btree_keyfill	*outcome)
 {
 	union xfs_btree_irec	low;
 	union xfs_btree_irec	high;
@@ -1782,7 +1785,7 @@  xfs_refcount_has_record(
 	memset(&high, 0xFF, sizeof(high));
 	high.rc.rc_startblock = bno + len - 1;
 
-	return xfs_btree_has_record(cur, &low, &high, exists);
+	return xfs_btree_scan_keyfill(cur, &low, &high, outcome);
 }
 
 int __init
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index e8b322de7f3d..14b8edc289fa 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -78,8 +78,9 @@  extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
  */
 #define XFS_REFCOUNT_ITEM_OVERHEAD	32
 
-extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
-		xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+extern int xfs_refcount_scan_keyfill(struct xfs_btree_cur *cur,
+		xfs_agblock_t bno, xfs_extlen_t len,
+		enum xfs_btree_keyfill *outcome);
 union xfs_btree_rec;
 extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec,
 		struct xfs_refcount_irec *irec);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 316c1ec0c3c2..f7982b2ecc49 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -290,6 +290,18 @@  xfs_refcountbt_recs_inorder(
 		be32_to_cpu(r2->refc.rc_startblock);
 }
 
+STATIC bool
+xfs_refcountbt_has_key_gap(
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key1,
+	const union xfs_btree_key	*key2)
+{
+	xfs_agblock_t			next;
+
+	next = be32_to_cpu(key1->refc.rc_startblock) + 1;
+	return next != be32_to_cpu(key2->refc.rc_startblock);
+}
+
 static const struct xfs_btree_ops xfs_refcountbt_ops = {
 	.rec_len		= sizeof(struct xfs_refcount_rec),
 	.key_len		= sizeof(struct xfs_refcount_key),
@@ -309,6 +321,7 @@  static const struct xfs_btree_ops xfs_refcountbt_ops = {
 	.diff_two_keys		= xfs_refcountbt_diff_two_keys,
 	.keys_inorder		= xfs_refcountbt_keys_inorder,
 	.recs_inorder		= xfs_refcountbt_recs_inorder,
+	.has_key_gap		= xfs_refcountbt_has_key_gap,
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 094dfc897ebc..08d47cbf4697 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2671,13 +2671,17 @@  xfs_rmap_compare(
 		return 0;
 }
 
-/* Is there a record covering a given extent? */
+/*
+ * Scan the physical storage part of the keyspace of the reverse mapping index
+ * and tell us if the area has no records, is fully mapped by records, or is
+ * partially filled.
+ */
 int
-xfs_rmap_has_record(
+xfs_rmap_scan_keyfill(
 	struct xfs_btree_cur	*cur,
 	xfs_agblock_t		bno,
 	xfs_extlen_t		len,
-	bool			*exists)
+	enum xfs_btree_keyfill	*outcome)
 {
 	union xfs_btree_irec	low;
 	union xfs_btree_irec	high;
@@ -2687,7 +2691,7 @@  xfs_rmap_has_record(
 	memset(&high, 0xFF, sizeof(high));
 	high.r.rm_startblock = bno + len - 1;
 
-	return xfs_btree_has_record(cur, &low, &high, exists);
+	return xfs_btree_scan_keyfill(cur, &low, &high, outcome);
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 54741a591a17..263a2dd09216 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -192,8 +192,8 @@  int xfs_rmap_compare(const struct xfs_rmap_irec *a,
 union xfs_btree_rec;
 int xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec,
 		struct xfs_rmap_irec *irec);
-int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
-		xfs_extlen_t len, bool *exists);
+int xfs_rmap_scan_keyfill(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+		xfs_extlen_t len, enum xfs_btree_keyfill *outcome);
 int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno,
 		xfs_extlen_t len, const struct xfs_owner_info *oinfo,
 		bool *has_rmap);
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index e2e1f68cedf5..d64143a842ce 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -433,6 +433,18 @@  xfs_rmapbt_recs_inorder(
 	return 0;
 }
 
+STATIC bool
+xfs_rmapbt_has_key_gap(
+	struct xfs_btree_cur		*cur,
+	const union xfs_btree_key	*key1,
+	const union xfs_btree_key	*key2)
+{
+	xfs_agblock_t			next;
+
+	next = be32_to_cpu(key1->rmap.rm_startblock) + 1;
+	return next != be32_to_cpu(key2->rmap.rm_startblock);
+}
+
 static const struct xfs_btree_ops xfs_rmapbt_ops = {
 	.rec_len		= sizeof(struct xfs_rmap_rec),
 	.key_len		= 2 * sizeof(struct xfs_rmap_key),
@@ -452,6 +464,7 @@  static const struct xfs_btree_ops xfs_rmapbt_ops = {
 	.diff_two_keys		= xfs_rmapbt_diff_two_keys,
 	.keys_inorder		= xfs_rmapbt_keys_inorder,
 	.recs_inorder		= xfs_rmapbt_recs_inorder,
+	.has_key_gap		= xfs_rmapbt_has_key_gap,
 };
 
 static struct xfs_btree_cur *
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index a6b7d98cf68f..d63637a3b873 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -174,6 +174,18 @@  enum xfs_ag_resv_type {
 	XFS_AG_RESV_RMAPBT,
 };
 
+/* Results of scanning a btree keyspace to check occupancy. */
+enum xfs_btree_keyfill {
+	/* None of the keyspace maps to records. */
+	XFS_BTREE_KEYFILL_EMPTY = 0,
+
+	/* Some, but not all, of the keyspace maps to records. */
+	XFS_BTREE_KEYFILL_SPARSE,
+
+	/* The entire keyspace maps to records. */
+	XFS_BTREE_KEYFILL_FULL,
+};
+
 /*
  * Type verifier functions
  */
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 4d9ccc15b048..d8f2ba7efa22 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -146,15 +146,15 @@  xchk_xref_is_used_space(
 	xfs_agblock_t		agbno,
 	xfs_extlen_t		len)
 {
-	bool			is_freesp;
+	enum xfs_btree_keyfill	keyfill;
 	int			error;
 
 	if (!sc->sa.bno_cur || xchk_skip_xref(sc->sm))
 		return;
 
-	error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp);
+	error = xfs_alloc_scan_keyfill(sc->sa.bno_cur, agbno, len, &keyfill);
 	if (!xchk_should_check_xref(sc, &error, &sc->sa.bno_cur))
 		return;
-	if (is_freesp)
+	if (keyfill != XFS_BTREE_KEYFILL_EMPTY)
 		xchk_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0);
 }
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 4e70cd821b62..bfd48aaceb82 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -475,15 +475,16 @@  xchk_xref_is_not_shared(
 	xfs_agblock_t		agbno,
 	xfs_extlen_t		len)
 {
-	bool			shared;
+	enum xfs_btree_keyfill	keyfill;
 	int			error;
 
 	if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
 		return;
 
-	error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
+	error = xfs_refcount_scan_keyfill(sc->sa.refc_cur, agbno, len,
+			&keyfill);
 	if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
 		return;
-	if (shared)
+	if (keyfill != XFS_BTREE_KEYFILL_EMPTY)
 		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 }
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index afc4f840b6bc..6525202c6a8a 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -224,15 +224,15 @@  xchk_xref_has_no_owner(
 	xfs_agblock_t		bno,
 	xfs_extlen_t		len)
 {
-	bool			has_rmap;
+	enum xfs_btree_keyfill	keyfill;
 	int			error;
 
 	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 		return;
 
-	error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap);
+	error = xfs_rmap_scan_keyfill(sc->sa.rmap_cur, bno, len, &keyfill);
 	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 		return;
-	if (has_rmap)
+	if (keyfill != XFS_BTREE_KEYFILL_EMPTY)
 		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 }