diff mbox

[10/22] xfs: scrub inode btrees

Message ID 150061197454.14732.6179560461009984483.stgit@magnolia (mailing list archive)
State Superseded
Headers show

Commit Message

Darrick J. Wong July 21, 2017, 4:39 a.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Check the records of the inode btrees to make sure that the values
make sense given the inode records themselves.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/Makefile            |    1 
 fs/xfs/libxfs/xfs_format.h |    2 
 fs/xfs/libxfs/xfs_fs.h     |    4 -
 fs/xfs/scrub/common.c      |    9 +
 fs/xfs/scrub/common.h      |    3 
 fs/xfs/scrub/ialloc.c      |  347 ++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_trace.h         |    4 -
 7 files changed, 367 insertions(+), 3 deletions(-)
 create mode 100644 fs/xfs/scrub/ialloc.c



--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Allison Henderson July 23, 2017, 5:15 p.m. UTC | #1
Looks good. Thanks for all the comments, they help!
Reviewed by: Allison Henderson <allison.henderson@oracle.com>

On 7/20/2017 9:39 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Check the records of the inode btrees to make sure that the values
> make sense given the inode records themselves.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/Makefile            |    1
>  fs/xfs/libxfs/xfs_format.h |    2
>  fs/xfs/libxfs/xfs_fs.h     |    4 -
>  fs/xfs/scrub/common.c      |    9 +
>  fs/xfs/scrub/common.h      |    3
>  fs/xfs/scrub/ialloc.c      |  347 ++++++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_trace.h         |    4 -
>  7 files changed, 367 insertions(+), 3 deletions(-)
>  create mode 100644 fs/xfs/scrub/ialloc.c
>
>
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index ce492ee..5197bea 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -144,6 +144,7 @@ xfs-y				+= $(addprefix scrub/, \
>  				   alloc.o \
>  				   btree.o \
>  				   common.o \
> +				   ialloc.o \
>  				   metabufs.o \
>  				   )
>  endif
> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index 23229f0..154c3dd 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -518,7 +518,7 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
>  		 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
>  }
>
> -static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
> +static inline bool xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
>  {
>  	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
>  		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
> diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> index bb36acf..5120cfd 100644
> --- a/fs/xfs/libxfs/xfs_fs.h
> +++ b/fs/xfs/libxfs/xfs_fs.h
> @@ -489,7 +489,9 @@ struct xfs_scrub_metadata {
>  #define XFS_SCRUB_TYPE_AGI	5	/* AG inode header */
>  #define XFS_SCRUB_TYPE_BNOBT	6	/* freesp by block btree */
>  #define XFS_SCRUB_TYPE_CNTBT	7	/* freesp by length btree */
> -#define XFS_SCRUB_TYPE_MAX	7
> +#define XFS_SCRUB_TYPE_INOBT	8	/* inode btree */
> +#define XFS_SCRUB_TYPE_FINOBT	9	/* free inode btree */
> +#define XFS_SCRUB_TYPE_MAX	9
>
>  /* i: repair this metadata */
>  #define XFS_SCRUB_FLAG_REPAIR		(1 << 0)
> diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
> index 86161b5..9a31846 100644
> --- a/fs/xfs/scrub/common.c
> +++ b/fs/xfs/scrub/common.c
> @@ -716,6 +716,15 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
>  		.setup	= xfs_scrub_setup_ag_allocbt,
>  		.scrub	= xfs_scrub_cntbt,
>  	},
> +	{ /* inobt */
> +		.setup	= xfs_scrub_setup_ag_iallocbt,
> +		.scrub	= xfs_scrub_inobt,
> +	},
> +	{ /* finobt */
> +		.setup	= xfs_scrub_setup_ag_iallocbt,
> +		.scrub	= xfs_scrub_finobt,
> +		.has	= xfs_sb_version_hasfinobt,
> +	},
>  };
>
>  /* Dispatch metadata scrubbing. */
> diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
> index f14abfb..cd89bec 100644
> --- a/fs/xfs/scrub/common.h
> +++ b/fs/xfs/scrub/common.h
> @@ -210,6 +210,7 @@ SETUP_FN(xfs_scrub_setup_fs);
>  SETUP_FN(xfs_scrub_setup_metabufs);
>  SETUP_FN(xfs_scrub_setup_ag_header);
>  SETUP_FN(xfs_scrub_setup_ag_allocbt);
> +SETUP_FN(xfs_scrub_setup_ag_iallocbt);
>  #undef SETUP_FN
>
>  /* Metadata scrubbers */
> @@ -223,6 +224,8 @@ SCRUB_FN(xfs_scrub_agfl);
>  SCRUB_FN(xfs_scrub_agi);
>  SCRUB_FN(xfs_scrub_bnobt);
>  SCRUB_FN(xfs_scrub_cntbt);
> +SCRUB_FN(xfs_scrub_inobt);
> +SCRUB_FN(xfs_scrub_finobt);
>  #undef SCRUB_FN
>
>  #endif	/* __XFS_REPAIR_COMMON_H__ */
> diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
> new file mode 100644
> index 0000000..ecf1852
> --- /dev/null
> +++ b/fs/xfs/scrub/ialloc.c
> @@ -0,0 +1,347 @@
> +/*
> + * Copyright (C) 2017 Oracle.  All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_defer.h"
> +#include "xfs_btree.h"
> +#include "xfs_bit.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans.h"
> +#include "xfs_trace.h"
> +#include "xfs_sb.h"
> +#include "xfs_inode.h"
> +#include "xfs_ialloc.h"
> +#include "xfs_ialloc_btree.h"
> +#include "xfs_icache.h"
> +#include "xfs_rmap.h"
> +#include "xfs_log.h"
> +#include "xfs_trans_priv.h"
> +#include "scrub/common.h"
> +#include "scrub/btree.h"
> +
> +/*
> + * Set us up to scrub inode btrees.
> + * If we detect a discrepancy between the inobt and the inode,
> + * try again after forcing logged inode cores out to disk.
> + */
> +int
> +xfs_scrub_setup_ag_iallocbt(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_inode		*ip)
> +{
> +	return xfs_scrub_setup_ag_btree(sc, ip, sc->try_harder);
> +}
> +
> +/* Inode btree scrubber. */
> +
> +/* Scrub a chunk of an inobt record. */
> +STATIC int
> +xfs_scrub_iallocbt_chunk(
> +	struct xfs_scrub_btree		*bs,
> +	struct xfs_inobt_rec_incore	*irec,
> +	xfs_agino_t			agino,
> +	xfs_extlen_t			len,
> +	bool				*keep_scanning)
> +{
> +	struct xfs_mount		*mp = bs->cur->bc_mp;
> +	struct xfs_agf			*agf;
> +	xfs_agblock_t			eoag;
> +	xfs_agblock_t			bno;
> +	int				error = 0;
> +
> +	agf = XFS_BUF_TO_AGF(bs->sc->sa.agf_bp);
> +	eoag = be32_to_cpu(agf->agf_length);
> +	bno = XFS_AGINO_TO_AGBNO(mp, agino);
> +
> +	*keep_scanning = true;
> +	XFS_SCRUB_BTREC_CHECK(bs, bno < mp->m_sb.sb_agblocks);
> +	XFS_SCRUB_BTREC_CHECK(bs, bno < eoag);
> +	XFS_SCRUB_BTREC_CHECK(bs, bno < bno + len);
> +	XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)bno + len <=
> +			mp->m_sb.sb_agblocks);
> +	XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)bno + len <=
> +			eoag);
> +	if (error) {
> +		*keep_scanning = false;
> +		goto out;
> +	}
> +
> +out:
> +	return error;
> +}
> +
> +/* Count the number of free inodes. */
> +static unsigned int
> +xfs_scrub_iallocbt_freecount(
> +	xfs_inofree_t			freemask)
> +{
> +	int				bits = XFS_INODES_PER_CHUNK;
> +	unsigned int			ret = 0;
> +
> +	while (bits--) {
> +		if (freemask & 1)
> +			ret++;
> +		freemask >>= 1;
> +	}
> +
> +	return ret;
> +}
> +
> +/* Check a particular inode with ir_free. */
> +STATIC int
> +xfs_scrub_iallocbt_check_cluster_freemask(
> +	struct xfs_scrub_btree		*bs,
> +	xfs_ino_t			fsino,
> +	xfs_agino_t			chunkino,
> +	xfs_agino_t			clusterino,
> +	struct xfs_inobt_rec_incore	*irec,
> +	struct xfs_buf			*bp)
> +{
> +	struct xfs_dinode		*dip;
> +	struct xfs_mount		*mp = bs->cur->bc_mp;
> +	bool				freemask_ok;
> +	bool				inuse;
> +	int				error;
> +
> +	dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize);
> +	XFS_SCRUB_BTREC_GOTO(bs,
> +			be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC,
> +			out);
> +	XFS_SCRUB_BTREC_GOTO(bs,
> +			dip->di_version < 3 || be64_to_cpu(dip->di_ino) ==
> +				fsino + clusterino,
> +			out);
> +	freemask_ok = !!(irec->ir_free & XFS_INOBT_MASK(chunkino + clusterino));
> +	error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp,
> +			fsino + clusterino, &inuse);
> +	if (error == -ENOENT) {
> +		/* Not cached, just read the disk buffer */
> +		freemask_ok ^= !!(dip->di_mode);
> +		if (!bs->sc->try_harder && !freemask_ok)
> +			return -EDEADLOCK;
> +	} else if (error < 0) {
> +		/* Inode is only half assembled, don't bother. */
> +		freemask_ok = true;
> +	} else {
> +		/* Inode is all there. */
> +		freemask_ok ^= inuse;
> +	}
> +	XFS_SCRUB_BTREC_CHECK(bs, freemask_ok);
> +out:
> +	return 0;
> +}
> +
> +/* Make sure the free mask is consistent with what the inodes think. */
> +STATIC int
> +xfs_scrub_iallocbt_check_freemask(
> +	struct xfs_scrub_btree		*bs,
> +	struct xfs_inobt_rec_incore	*irec)
> +{
> +	struct xfs_owner_info		oinfo;
> +	struct xfs_imap			imap;
> +	struct xfs_mount		*mp = bs->cur->bc_mp;
> +	struct xfs_dinode		*dip;
> +	struct xfs_buf			*bp;
> +	xfs_ino_t			fsino;
> +	xfs_agino_t			nr_inodes;
> +	xfs_agino_t			agino;
> +	xfs_agino_t			chunkino;
> +	xfs_agino_t			clusterino;
> +	xfs_agblock_t			agbno;
> +	int				blks_per_cluster;
> +	uint16_t			holemask;
> +	uint16_t			ir_holemask;
> +	int				error = 0;
> +
> +	/* Make sure the freemask matches the inode records. */
> +	blks_per_cluster = xfs_icluster_size_fsb(mp);
> +	nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0);
> +	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
> +
> +	for (agino = irec->ir_startino;
> +	     agino < irec->ir_startino + XFS_INODES_PER_CHUNK;
> +	     agino += blks_per_cluster * mp->m_sb.sb_inopblock) {
> +		fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino);
> +		chunkino = agino - irec->ir_startino;
> +		agbno = XFS_AGINO_TO_AGBNO(mp, agino);
> +
> +		/* Compute the holemask mask for this cluster. */
> +		for (clusterino = 0, holemask = 0; clusterino < nr_inodes;
> +		     clusterino += XFS_INODES_PER_HOLEMASK_BIT)
> +			holemask |= XFS_INOBT_MASK((chunkino + clusterino) /
> +					XFS_INODES_PER_HOLEMASK_BIT);
> +
> +		/* The whole cluster must be a hole or not a hole. */
> +		ir_holemask = (irec->ir_holemask & holemask);
> +		XFS_SCRUB_BTREC_CHECK(bs, ir_holemask == holemask ||
> +				ir_holemask == 0);
> +
> +		/* If any part of this is a hole, skip it. */
> +		if (ir_holemask)
> +			continue;
> +
> +		/* Grab the inode cluster buffer. */
> +		imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
> +				agbno);
> +		imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
> +		imap.im_boffset = 0;
> +
> +		error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
> +				&dip, &bp, 0, 0);
> +		XFS_SCRUB_BTREC_OP_ERROR_GOTO(bs, &error, next_cluster);
> +
> +		/* Which inodes are free? */
> +		for (clusterino = 0; clusterino < nr_inodes; clusterino++) {
> +			error = xfs_scrub_iallocbt_check_cluster_freemask(bs,
> +					fsino, chunkino, clusterino, irec, bp);
> +			if (error) {
> +				xfs_trans_brelse(bs->cur->bc_tp, bp);
> +				return error;
> +			}
> +		}
> +
> +		xfs_trans_brelse(bs->cur->bc_tp, bp);
> +next_cluster:
> +		;
> +	}
> +
> +	return error;
> +}
> +
> +/* Scrub an inobt/finobt record. */
> +STATIC int
> +xfs_scrub_iallocbt_helper(
> +	struct xfs_scrub_btree		*bs,
> +	union xfs_btree_rec		*rec)
> +{
> +	struct xfs_mount		*mp = bs->cur->bc_mp;
> +	struct xfs_agi			*agi;
> +	struct xfs_inobt_rec_incore	irec;
> +	uint64_t			holes;
> +	xfs_agino_t			agino;
> +	xfs_agblock_t			agbno;
> +	xfs_extlen_t			len;
> +	bool				keep_scanning;
> +	int				holecount;
> +	int				i;
> +	int				error = 0;
> +	int				err2 = 0;
> +	unsigned int			real_freecount;
> +	uint16_t			holemask;
> +
> +	xfs_inobt_btrec_to_irec(mp, rec, &irec);
> +
> +	XFS_SCRUB_BTREC_CHECK(bs, irec.ir_count <= XFS_INODES_PER_CHUNK);
> +	XFS_SCRUB_BTREC_CHECK(bs, irec.ir_freecount <= XFS_INODES_PER_CHUNK);
> +	real_freecount = irec.ir_freecount +
> +			(XFS_INODES_PER_CHUNK - irec.ir_count);
> +	XFS_SCRUB_BTREC_CHECK(bs, real_freecount ==
> +			xfs_scrub_iallocbt_freecount(irec.ir_free));
> +	agi = XFS_BUF_TO_AGI(bs->sc->sa.agi_bp);
> +	agino = irec.ir_startino;
> +	agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino);
> +	XFS_SCRUB_BTREC_GOTO(bs, agbno < be32_to_cpu(agi->agi_length), out);
> +	XFS_SCRUB_BTREC_CHECK(bs,
> +			!(agbno & (xfs_ialloc_cluster_alignment(mp) - 1)));
> +	XFS_SCRUB_BTREC_CHECK(bs, !(agbno & (xfs_icluster_size_fsb(mp) - 1)));
> +
> +	/* Handle non-sparse inodes */
> +	if (!xfs_inobt_issparse(irec.ir_holemask)) {
> +		len = XFS_B_TO_FSB(mp,
> +				XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize);
> +		XFS_SCRUB_BTREC_CHECK(bs,
> +				irec.ir_count == XFS_INODES_PER_CHUNK);
> +
> +		error = xfs_scrub_iallocbt_chunk(bs, &irec, agino, len,
> +				&keep_scanning);
> +		if (error)
> +			goto out;
> +		goto check_freemask;
> +	}
> +
> +	/* Check each chunk of a sparse inode cluster. */
> +	holemask = irec.ir_holemask;
> +	holecount = 0;
> +	len = XFS_B_TO_FSB(mp,
> +			XFS_INODES_PER_HOLEMASK_BIT * mp->m_sb.sb_inodesize);
> +	holes = ~xfs_inobt_irec_to_allocmask(&irec);
> +	XFS_SCRUB_BTREC_CHECK(bs, (holes & irec.ir_free) == holes);
> +	XFS_SCRUB_BTREC_CHECK(bs, irec.ir_freecount <= irec.ir_count);
> +
> +	for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1,
> +			i++, agino += XFS_INODES_PER_HOLEMASK_BIT) {
> +		if (holemask & 1) {
> +			holecount += XFS_INODES_PER_HOLEMASK_BIT;
> +			continue;
> +		}
> +
> +		err2 = xfs_scrub_iallocbt_chunk(bs, &irec, agino, len,
> +				&keep_scanning);
> +		if (!error && err2)
> +			error = err2;
> +		if (!keep_scanning)
> +			break;
> +	}
> +
> +	XFS_SCRUB_BTREC_CHECK(bs, holecount <= XFS_INODES_PER_CHUNK);
> +	XFS_SCRUB_BTREC_CHECK(bs, holecount + irec.ir_count ==
> +			XFS_INODES_PER_CHUNK);
> +
> +check_freemask:
> +	error = xfs_scrub_iallocbt_check_freemask(bs, &irec);
> +	if (error)
> +		goto out;
> +
> +out:
> +	return error;
> +}
> +
> +/* Scrub the inode btrees for some AG. */
> +STATIC int
> +xfs_scrub_iallocbt(
> +	struct xfs_scrub_context	*sc,
> +	xfs_btnum_t			which)
> +{
> +	struct xfs_btree_cur		*cur;
> +	struct xfs_owner_info		oinfo;
> +
> +	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
> +	cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
> +	return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_helper,
> +			&oinfo, NULL);
> +}
> +
> +int
> +xfs_scrub_inobt(
> +	struct xfs_scrub_context	*sc)
> +{
> +	return xfs_scrub_iallocbt(sc, XFS_BTNUM_INO);
> +}
> +
> +int
> +xfs_scrub_finobt(
> +	struct xfs_scrub_context	*sc)
> +{
> +	return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
> +}
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 4a9a645..e2c5f99 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -3319,7 +3319,9 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
>  	{ XFS_SCRUB_TYPE_AGFL,		"AGFL" }, \
>  	{ XFS_SCRUB_TYPE_AGI,		"AGI" }, \
>  	{ XFS_SCRUB_TYPE_BNOBT,		"bnobt" }, \
> -	{ XFS_SCRUB_TYPE_CNTBT,		"cntbt" }
> +	{ XFS_SCRUB_TYPE_CNTBT,		"cntbt" }, \
> +	{ XFS_SCRUB_TYPE_INOBT,		"inobt" }, \
> +	{ XFS_SCRUB_TYPE_FINOBT,	"finobt" }
>  DECLARE_EVENT_CLASS(xfs_scrub_class,
>  	TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
>  		 int error),
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index ce492ee..5197bea 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -144,6 +144,7 @@  xfs-y				+= $(addprefix scrub/, \
 				   alloc.o \
 				   btree.o \
 				   common.o \
+				   ialloc.o \
 				   metabufs.o \
 				   )
 endif
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 23229f0..154c3dd 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -518,7 +518,7 @@  static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
 		 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
 }
 
-static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
 		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index bb36acf..5120cfd 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -489,7 +489,9 @@  struct xfs_scrub_metadata {
 #define XFS_SCRUB_TYPE_AGI	5	/* AG inode header */
 #define XFS_SCRUB_TYPE_BNOBT	6	/* freesp by block btree */
 #define XFS_SCRUB_TYPE_CNTBT	7	/* freesp by length btree */
-#define XFS_SCRUB_TYPE_MAX	7
+#define XFS_SCRUB_TYPE_INOBT	8	/* inode btree */
+#define XFS_SCRUB_TYPE_FINOBT	9	/* free inode btree */
+#define XFS_SCRUB_TYPE_MAX	9
 
 /* i: repair this metadata */
 #define XFS_SCRUB_FLAG_REPAIR		(1 << 0)
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 86161b5..9a31846 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -716,6 +716,15 @@  static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
 		.setup	= xfs_scrub_setup_ag_allocbt,
 		.scrub	= xfs_scrub_cntbt,
 	},
+	{ /* inobt */
+		.setup	= xfs_scrub_setup_ag_iallocbt,
+		.scrub	= xfs_scrub_inobt,
+	},
+	{ /* finobt */
+		.setup	= xfs_scrub_setup_ag_iallocbt,
+		.scrub	= xfs_scrub_finobt,
+		.has	= xfs_sb_version_hasfinobt,
+	},
 };
 
 /* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index f14abfb..cd89bec 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -210,6 +210,7 @@  SETUP_FN(xfs_scrub_setup_fs);
 SETUP_FN(xfs_scrub_setup_metabufs);
 SETUP_FN(xfs_scrub_setup_ag_header);
 SETUP_FN(xfs_scrub_setup_ag_allocbt);
+SETUP_FN(xfs_scrub_setup_ag_iallocbt);
 #undef SETUP_FN
 
 /* Metadata scrubbers */
@@ -223,6 +224,8 @@  SCRUB_FN(xfs_scrub_agfl);
 SCRUB_FN(xfs_scrub_agi);
 SCRUB_FN(xfs_scrub_bnobt);
 SCRUB_FN(xfs_scrub_cntbt);
+SCRUB_FN(xfs_scrub_inobt);
+SCRUB_FN(xfs_scrub_finobt);
 #undef SCRUB_FN
 
 #endif	/* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
new file mode 100644
index 0000000..ecf1852
--- /dev/null
+++ b/fs/xfs/scrub/ialloc.c
@@ -0,0 +1,347 @@ 
+/*
+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_icache.h"
+#include "xfs_rmap.h"
+#include "xfs_log.h"
+#include "xfs_trans_priv.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+
+/*
+ * Set us up to scrub inode btrees.
+ * If we detect a discrepancy between the inobt and the inode,
+ * try again after forcing logged inode cores out to disk.
+ */
+int
+xfs_scrub_setup_ag_iallocbt(
+	struct xfs_scrub_context	*sc,
+	struct xfs_inode		*ip)
+{
+	return xfs_scrub_setup_ag_btree(sc, ip, sc->try_harder);
+}
+
+/* Inode btree scrubber. */
+
+/* Scrub a chunk of an inobt record. */
+STATIC int
+xfs_scrub_iallocbt_chunk(
+	struct xfs_scrub_btree		*bs,
+	struct xfs_inobt_rec_incore	*irec,
+	xfs_agino_t			agino,
+	xfs_extlen_t			len,
+	bool				*keep_scanning)
+{
+	struct xfs_mount		*mp = bs->cur->bc_mp;
+	struct xfs_agf			*agf;
+	xfs_agblock_t			eoag;
+	xfs_agblock_t			bno;
+	int				error = 0;
+
+	agf = XFS_BUF_TO_AGF(bs->sc->sa.agf_bp);
+	eoag = be32_to_cpu(agf->agf_length);
+	bno = XFS_AGINO_TO_AGBNO(mp, agino);
+
+	*keep_scanning = true;
+	XFS_SCRUB_BTREC_CHECK(bs, bno < mp->m_sb.sb_agblocks);
+	XFS_SCRUB_BTREC_CHECK(bs, bno < eoag);
+	XFS_SCRUB_BTREC_CHECK(bs, bno < bno + len);
+	XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)bno + len <=
+			mp->m_sb.sb_agblocks);
+	XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)bno + len <=
+			eoag);
+	if (error) {
+		*keep_scanning = false;
+		goto out;
+	}
+
+out:
+	return error;
+}
+
+/* Count the number of free inodes. */
+static unsigned int
+xfs_scrub_iallocbt_freecount(
+	xfs_inofree_t			freemask)
+{
+	int				bits = XFS_INODES_PER_CHUNK;
+	unsigned int			ret = 0;
+
+	while (bits--) {
+		if (freemask & 1)
+			ret++;
+		freemask >>= 1;
+	}
+
+	return ret;
+}
+
+/* Check a particular inode with ir_free. */
+STATIC int
+xfs_scrub_iallocbt_check_cluster_freemask(
+	struct xfs_scrub_btree		*bs,
+	xfs_ino_t			fsino,
+	xfs_agino_t			chunkino,
+	xfs_agino_t			clusterino,
+	struct xfs_inobt_rec_incore	*irec,
+	struct xfs_buf			*bp)
+{
+	struct xfs_dinode		*dip;
+	struct xfs_mount		*mp = bs->cur->bc_mp;
+	bool				freemask_ok;
+	bool				inuse;
+	int				error;
+
+	dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize);
+	XFS_SCRUB_BTREC_GOTO(bs,
+			be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC,
+			out);
+	XFS_SCRUB_BTREC_GOTO(bs,
+			dip->di_version < 3 || be64_to_cpu(dip->di_ino) ==
+				fsino + clusterino,
+			out);
+	freemask_ok = !!(irec->ir_free & XFS_INOBT_MASK(chunkino + clusterino));
+	error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp,
+			fsino + clusterino, &inuse);
+	if (error == -ENOENT) {
+		/* Not cached, just read the disk buffer */
+		freemask_ok ^= !!(dip->di_mode);
+		if (!bs->sc->try_harder && !freemask_ok)
+			return -EDEADLOCK;
+	} else if (error < 0) {
+		/* Inode is only half assembled, don't bother. */
+		freemask_ok = true;
+	} else {
+		/* Inode is all there. */
+		freemask_ok ^= inuse;
+	}
+	XFS_SCRUB_BTREC_CHECK(bs, freemask_ok);
+out:
+	return 0;
+}
+
+/* Make sure the free mask is consistent with what the inodes think. */
+STATIC int
+xfs_scrub_iallocbt_check_freemask(
+	struct xfs_scrub_btree		*bs,
+	struct xfs_inobt_rec_incore	*irec)
+{
+	struct xfs_owner_info		oinfo;
+	struct xfs_imap			imap;
+	struct xfs_mount		*mp = bs->cur->bc_mp;
+	struct xfs_dinode		*dip;
+	struct xfs_buf			*bp;
+	xfs_ino_t			fsino;
+	xfs_agino_t			nr_inodes;
+	xfs_agino_t			agino;
+	xfs_agino_t			chunkino;
+	xfs_agino_t			clusterino;
+	xfs_agblock_t			agbno;
+	int				blks_per_cluster;
+	uint16_t			holemask;
+	uint16_t			ir_holemask;
+	int				error = 0;
+
+	/* Make sure the freemask matches the inode records. */
+	blks_per_cluster = xfs_icluster_size_fsb(mp);
+	nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0);
+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+
+	for (agino = irec->ir_startino;
+	     agino < irec->ir_startino + XFS_INODES_PER_CHUNK;
+	     agino += blks_per_cluster * mp->m_sb.sb_inopblock) {
+		fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino);
+		chunkino = agino - irec->ir_startino;
+		agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+
+		/* Compute the holemask mask for this cluster. */
+		for (clusterino = 0, holemask = 0; clusterino < nr_inodes;
+		     clusterino += XFS_INODES_PER_HOLEMASK_BIT)
+			holemask |= XFS_INOBT_MASK((chunkino + clusterino) /
+					XFS_INODES_PER_HOLEMASK_BIT);
+
+		/* The whole cluster must be a hole or not a hole. */
+		ir_holemask = (irec->ir_holemask & holemask);
+		XFS_SCRUB_BTREC_CHECK(bs, ir_holemask == holemask ||
+				ir_holemask == 0);
+
+		/* If any part of this is a hole, skip it. */
+		if (ir_holemask)
+			continue;
+
+		/* Grab the inode cluster buffer. */
+		imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
+				agbno);
+		imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
+		imap.im_boffset = 0;
+
+		error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
+				&dip, &bp, 0, 0);
+		XFS_SCRUB_BTREC_OP_ERROR_GOTO(bs, &error, next_cluster);
+
+		/* Which inodes are free? */
+		for (clusterino = 0; clusterino < nr_inodes; clusterino++) {
+			error = xfs_scrub_iallocbt_check_cluster_freemask(bs,
+					fsino, chunkino, clusterino, irec, bp);
+			if (error) {
+				xfs_trans_brelse(bs->cur->bc_tp, bp);
+				return error;
+			}
+		}
+
+		xfs_trans_brelse(bs->cur->bc_tp, bp);
+next_cluster:
+		;
+	}
+
+	return error;
+}
+
+/* Scrub an inobt/finobt record. */
+STATIC int
+xfs_scrub_iallocbt_helper(
+	struct xfs_scrub_btree		*bs,
+	union xfs_btree_rec		*rec)
+{
+	struct xfs_mount		*mp = bs->cur->bc_mp;
+	struct xfs_agi			*agi;
+	struct xfs_inobt_rec_incore	irec;
+	uint64_t			holes;
+	xfs_agino_t			agino;
+	xfs_agblock_t			agbno;
+	xfs_extlen_t			len;
+	bool				keep_scanning;
+	int				holecount;
+	int				i;
+	int				error = 0;
+	int				err2 = 0;
+	unsigned int			real_freecount;
+	uint16_t			holemask;
+
+	xfs_inobt_btrec_to_irec(mp, rec, &irec);
+
+	XFS_SCRUB_BTREC_CHECK(bs, irec.ir_count <= XFS_INODES_PER_CHUNK);
+	XFS_SCRUB_BTREC_CHECK(bs, irec.ir_freecount <= XFS_INODES_PER_CHUNK);
+	real_freecount = irec.ir_freecount +
+			(XFS_INODES_PER_CHUNK - irec.ir_count);
+	XFS_SCRUB_BTREC_CHECK(bs, real_freecount ==
+			xfs_scrub_iallocbt_freecount(irec.ir_free));
+	agi = XFS_BUF_TO_AGI(bs->sc->sa.agi_bp);
+	agino = irec.ir_startino;
+	agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino);
+	XFS_SCRUB_BTREC_GOTO(bs, agbno < be32_to_cpu(agi->agi_length), out);
+	XFS_SCRUB_BTREC_CHECK(bs,
+			!(agbno & (xfs_ialloc_cluster_alignment(mp) - 1)));
+	XFS_SCRUB_BTREC_CHECK(bs, !(agbno & (xfs_icluster_size_fsb(mp) - 1)));
+
+	/* Handle non-sparse inodes */
+	if (!xfs_inobt_issparse(irec.ir_holemask)) {
+		len = XFS_B_TO_FSB(mp,
+				XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize);
+		XFS_SCRUB_BTREC_CHECK(bs,
+				irec.ir_count == XFS_INODES_PER_CHUNK);
+
+		error = xfs_scrub_iallocbt_chunk(bs, &irec, agino, len,
+				&keep_scanning);
+		if (error)
+			goto out;
+		goto check_freemask;
+	}
+
+	/* Check each chunk of a sparse inode cluster. */
+	holemask = irec.ir_holemask;
+	holecount = 0;
+	len = XFS_B_TO_FSB(mp,
+			XFS_INODES_PER_HOLEMASK_BIT * mp->m_sb.sb_inodesize);
+	holes = ~xfs_inobt_irec_to_allocmask(&irec);
+	XFS_SCRUB_BTREC_CHECK(bs, (holes & irec.ir_free) == holes);
+	XFS_SCRUB_BTREC_CHECK(bs, irec.ir_freecount <= irec.ir_count);
+
+	for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1,
+			i++, agino += XFS_INODES_PER_HOLEMASK_BIT) {
+		if (holemask & 1) {
+			holecount += XFS_INODES_PER_HOLEMASK_BIT;
+			continue;
+		}
+
+		err2 = xfs_scrub_iallocbt_chunk(bs, &irec, agino, len,
+				&keep_scanning);
+		if (!error && err2)
+			error = err2;
+		if (!keep_scanning)
+			break;
+	}
+
+	XFS_SCRUB_BTREC_CHECK(bs, holecount <= XFS_INODES_PER_CHUNK);
+	XFS_SCRUB_BTREC_CHECK(bs, holecount + irec.ir_count ==
+			XFS_INODES_PER_CHUNK);
+
+check_freemask:
+	error = xfs_scrub_iallocbt_check_freemask(bs, &irec);
+	if (error)
+		goto out;
+
+out:
+	return error;
+}
+
+/* Scrub the inode btrees for some AG. */
+STATIC int
+xfs_scrub_iallocbt(
+	struct xfs_scrub_context	*sc,
+	xfs_btnum_t			which)
+{
+	struct xfs_btree_cur		*cur;
+	struct xfs_owner_info		oinfo;
+
+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
+	cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
+	return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_helper,
+			&oinfo, NULL);
+}
+
+int
+xfs_scrub_inobt(
+	struct xfs_scrub_context	*sc)
+{
+	return xfs_scrub_iallocbt(sc, XFS_BTNUM_INO);
+}
+
+int
+xfs_scrub_finobt(
+	struct xfs_scrub_context	*sc)
+{
+	return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
+}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 4a9a645..e2c5f99 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3319,7 +3319,9 @@  DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
 	{ XFS_SCRUB_TYPE_AGFL,		"AGFL" }, \
 	{ XFS_SCRUB_TYPE_AGI,		"AGI" }, \
 	{ XFS_SCRUB_TYPE_BNOBT,		"bnobt" }, \
-	{ XFS_SCRUB_TYPE_CNTBT,		"cntbt" }
+	{ XFS_SCRUB_TYPE_CNTBT,		"cntbt" }, \
+	{ XFS_SCRUB_TYPE_INOBT,		"inobt" }, \
+	{ XFS_SCRUB_TYPE_FINOBT,	"finobt" }
 DECLARE_EVENT_CLASS(xfs_scrub_class,
 	TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
 		 int error),