diff mbox

[17/25] xfs: scrub inode block mappings

Message ID 150706335646.19351.8363960454156993724.stgit@magnolia (mailing list archive)
State Superseded
Headers show

Commit Message

Darrick J. Wong Oct. 3, 2017, 8:42 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Scrub an individual inode's block mappings to make sure they make sense.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/Makefile        |    1 
 fs/xfs/libxfs/xfs_fs.h |    5 +
 fs/xfs/scrub/bmap.c    |  371 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/common.h  |    5 +
 fs/xfs/scrub/scrub.c   |   12 ++
 fs/xfs/scrub/scrub.h   |    3 
 6 files changed, 395 insertions(+), 2 deletions(-)
 create mode 100644 fs/xfs/scrub/bmap.c



--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Dave Chinner Oct. 6, 2017, 2:51 a.m. UTC | #1
On Tue, Oct 03, 2017 at 01:42:36PM -0700, Darrick J. Wong wrote:
> +/* Set us up with an inode's bmap. */
> +STATIC int
> +__xfs_scrub_setup_inode_bmap(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_inode		*ip,
> +	bool				flush_data)
> +{
> +	struct xfs_mount		*mp = sc->mp;
> +	int				error;
> +
> +	error = xfs_scrub_get_inode(sc, ip);
> +	if (error)
> +		return error;
> +
> +	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
> +	xfs_ilock(sc->ip, sc->ilock_flags);
> +
> +	/*
> +	 * We don't want any ephemeral data fork updates sitting around
> +	 * while we inspect block mappings, so wait for directio to finish
> +	 * and flush dirty data if we have delalloc reservations.
> +	 */
> +	if (S_ISREG(VFS_I(sc->ip)->i_mode) && flush_data) {
> +		inode_dio_wait(VFS_I(sc->ip));
> +		error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping);
> +		if (error)
> +			goto out_unlock;
> +		error = invalidate_inode_pages2(VFS_I(sc->ip)->i_mapping);
> +		if (error)
> +			goto out_unlock;
> +	}

The same flush and invalidate is done in xfs_fs_map_blocks and
xfs_ioctl_setattr_dax_invalidate. we used to have helper functions
to do this, but we got rid of them because we removed all the
cases where such behaviour was necessary. Now we're adding them
back, perhaps we should have a helper for this again?


> +	/* Got the inode, lock it and we're ready to go. */
> +	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
> +	if (error)
> +		goto out_unlock;
> +	sc->ilock_flags |= XFS_ILOCK_EXCL;
> +	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
> +
> +	return 0;
> +out_unlock:
> +	xfs_iunlock(sc->ip, sc->ilock_flags);
> +	if (sc->ip != ip)
> +		iput(VFS_I(sc->ip));
> +	sc->ip = NULL;

Slightly tricky - how many places do we end up having to do this?
If its more than one, perhaps we need a xfs_scrub_irele(sc, ip)
helper?

> +/*
> + * Inode fork block mapping (BMBT) scrubber.
> + * More complex than the others because we have to scrub
> + * all the extents regardless of whether or not the fork
> + * is in btree format.
> + */
> +
> +struct xfs_scrub_bmap_info {
> +	struct xfs_scrub_context	*sc;
> +	xfs_daddr_t			eofs;
> +	xfs_fileoff_t			lastoff;
> +	bool				is_rt;
> +	bool				is_shared;
> +	int				whichfork;
> +};
> +
> +/* Scrub a single extent record. */
> +STATIC int
> +xfs_scrub_bmap_extent(
> +	struct xfs_inode		*ip,
> +	struct xfs_btree_cur		*cur,
> +	struct xfs_scrub_bmap_info	*info,
> +	struct xfs_bmbt_irec		*irec)
> +{
> +	struct xfs_scrub_ag		sa = { 0 };
> +	struct xfs_mount		*mp = info->sc->mp;
> +	struct xfs_buf			*bp = NULL;
> +	xfs_daddr_t			daddr;
> +	xfs_daddr_t			dlen;
> +	xfs_fsblock_t			bno;
> +	xfs_agnumber_t			agno;
> +	int				error = 0;
> +
> +	if (cur)
> +		xfs_btree_get_block(cur, 0, &bp);
> +
> +	if (irec->br_startoff < info->lastoff ||
> +	    irec->br_startblock == HOLESTARTBLOCK ||
> +	    isnullstartblock(irec->br_startblock))
> +		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
> +				irec->br_startoff);

What are we checking here? that it's ordered correctly and not a
hole/delalloc record? If it is bad, shouldn't we just jump out here
because the following checks are likely to throw silly errors on
hole/delalloc mappings?

> +	/* Actual mapping, so check the block ranges. */
> +	if (info->is_rt) {
> +		daddr = XFS_FSB_TO_BB(mp, irec->br_startblock);
> +		agno = NULLAGNUMBER;
> +		bno = irec->br_startblock;
> +	} else {
> +		daddr = XFS_FSB_TO_DADDR(mp, irec->br_startblock);
> +		agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
> +		if (agno >= mp->m_sb.sb_agcount) {
> +			xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
> +				irec->br_startoff);
> +			goto out;
> +		}
> +		bno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
> +		if (bno >= mp->m_sb.sb_agblocks)
> +			xfs_scrub_fblock_set_corrupt(info->sc,
> +						     info->whichfork,
> +						     irec->br_startoff);

more verify_agbno()/verify_fsbno stuff.

> +	}
> +	dlen = XFS_FSB_TO_BB(mp, irec->br_blockcount);
> +	if (irec->br_blockcount <= 0 ||
> +	    irec->br_blockcount > MAXEXTLEN ||

irec->br_blockcount is unsigned (uint64_t).

Also needs to be checked against AG size.

> +	    daddr >= info->eofs ||
> +	    daddr + dlen > info->eofs)
> +		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
> +				irec->br_startoff);
> +
> +	if (irec->br_state == XFS_EXT_UNWRITTEN &&
> +	    !xfs_sb_version_hasextflgbit(&mp->m_sb))

Superblock scrubber should reject any filesystem without the
extflgbit as corrupt - it's always set by mkfs - so I'm not sure we
need to check this here.

> +/* Scrub an inode fork's block mappings. */
> +STATIC int
> +xfs_scrub_bmap(
> +	struct xfs_scrub_context	*sc,
> +	int				whichfork)
> +{
> +	struct xfs_bmbt_irec		irec;
> +	struct xfs_scrub_bmap_info	info = {0};
> +	struct xfs_owner_info		oinfo;
> +	struct xfs_mount		*mp = sc->mp;
> +	struct xfs_inode		*ip = sc->ip;
> +	struct xfs_ifork		*ifp;
> +	struct xfs_btree_cur		*cur;
> +	xfs_fileoff_t			endoff;
> +	xfs_extnum_t			idx;
> +	bool				found;
> +	int				error = 0;
> +
> +	ifp = XFS_IFORK_PTR(ip, whichfork);
> +
> +	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
> +	info.eofs = XFS_FSB_TO_BB(mp, info.is_rt ? mp->m_sb.sb_rblocks :
> +					      mp->m_sb.sb_dblocks);
> +	info.whichfork = whichfork;
> +	info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
> +	info.sc = sc;
> +
> +	switch (whichfork) {
> +	case XFS_COW_FORK:
> +		/* Non-existent CoW forks are ignorable. */
> +		if (!ifp)
> +			goto out_unlock;
> +		/* No CoW forks on non-reflink inodes/filesystems. */
> +		if (!xfs_is_reflink_inode(ip)) {
> +			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
> +			goto out_unlock;
> +		}
> +		break;
> +	case XFS_ATTR_FORK:
> +		if (!ifp)
> +			goto out_unlock;
> +		if (!xfs_sb_version_hasattr(&mp->m_sb) &&
> +		    !xfs_sb_version_hasattr2(&mp->m_sb))
> +			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
> +		break;
> +	}
> +
> +	/* Check the fork values */
> +	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
> +	case XFS_DINODE_FMT_UUID:
> +	case XFS_DINODE_FMT_DEV:
> +	case XFS_DINODE_FMT_LOCAL:
> +		/* No mappings to check. */
> +		goto out_unlock;
> +	case XFS_DINODE_FMT_EXTENTS:
> +		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> +			xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
> +			goto out_unlock;
> +		}
> +		break;
> +	case XFS_DINODE_FMT_BTREE:
> +		ASSERT(whichfork != XFS_COW_FORK);

Corruption check, jump to out?

> +
> +		/* Scan the btree records. */
> +		cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
> +		xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
> +		error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_helper,
> +				&oinfo, &info);
> +		xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
> +						  XFS_BTREE_NOERROR);

FYI, I missed that the on-disk bmbt was scanned here the first time
I went through this code - i had to go back and work out why the
code only appeared to scrub the incore extent list. Can you wrap
this whole chunk of code into a helper named xfs_scrub_bmbt()
so it stands out that this is where the on disk btree is scrubbed?


> +		if (error == -EDEADLOCK)
> +			return error;

Ok, why don't we go to out_unlock here?

> +		else if (error)
> +			goto out_unlock;

But do for all other errors....

> +		break;
> +	default:
> +		xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
> +		goto out_unlock;
> +	}
> +
> +	/* Extent data is in memory, so scrub that. */
> +
> +	/* Find the offset of the last extent in the mapping. */
> +	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
> +	if (!xfs_scrub_fblock_op_ok(sc, whichfork, 0, &error))
> +		goto out_unlock;
> +
> +	/* Scrub extent records. */
> +	info.lastoff = 0;
> +	ifp = XFS_IFORK_PTR(ip, whichfork);
> +	for (found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &irec);
> +	     found != 0;
> +	     found = xfs_iext_get_extent(ifp, ++idx, &irec)) {
> +		if (xfs_scrub_should_terminate(sc, &error))
> +			break;
> +		if (isnullstartblock(irec.br_startblock))
> +			continue;
> +		if (irec.br_startoff >= endoff) {
> +			xfs_scrub_fblock_set_corrupt(sc, whichfork,
> +					irec.br_startoff);
> +			goto out_unlock;
> +		}
> +		error = xfs_scrub_bmap_extent(ip, NULL, &info, &irec);
> +		if (error == -EDEADLOCK)
> +			return error;
> +	}
> +
> +out_unlock:
> +	return error;

Hmmm - out_unlock doesn't unlock anything?

Cheers,

Dave.
Darrick J. Wong Oct. 6, 2017, 5 p.m. UTC | #2
On Fri, Oct 06, 2017 at 01:51:23PM +1100, Dave Chinner wrote:
> On Tue, Oct 03, 2017 at 01:42:36PM -0700, Darrick J. Wong wrote:
> > +/* Set us up with an inode's bmap. */
> > +STATIC int
> > +__xfs_scrub_setup_inode_bmap(
> > +	struct xfs_scrub_context	*sc,
> > +	struct xfs_inode		*ip,
> > +	bool				flush_data)
> > +{
> > +	struct xfs_mount		*mp = sc->mp;
> > +	int				error;
> > +
> > +	error = xfs_scrub_get_inode(sc, ip);
> > +	if (error)
> > +		return error;
> > +
> > +	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
> > +	xfs_ilock(sc->ip, sc->ilock_flags);
> > +
> > +	/*
> > +	 * We don't want any ephemeral data fork updates sitting around
> > +	 * while we inspect block mappings, so wait for directio to finish
> > +	 * and flush dirty data if we have delalloc reservations.
> > +	 */
> > +	if (S_ISREG(VFS_I(sc->ip)->i_mode) && flush_data) {
> > +		inode_dio_wait(VFS_I(sc->ip));
> > +		error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping);
> > +		if (error)
> > +			goto out_unlock;
> > +		error = invalidate_inode_pages2(VFS_I(sc->ip)->i_mapping);
> > +		if (error)
> > +			goto out_unlock;
> > +	}
> 
> The same flush and invalidate is done in xfs_fs_map_blocks and
> xfs_ioctl_setattr_dax_invalidate. we used to have helper functions
> to do this, but we got rid of them because we removed all the
> cases where such behaviour was necessary. Now we're adding them
> back, perhaps we should have a helper for this again?

Hmmm... looking at all the filemap_write_and_wait callers in the kernel:

xfs_ioctl_setattr_dax_invalidate flushes & invalidates the page cache
xfs_vm_bmap flushes dirty pages only
xfs_reflink_unshare flushes a range of dirty pages
xfs_fs_map_blocks flushes & invalidates pagecache
xfs_setattr_size flushes between old and new EOF
xfs_getbmap flushes dirty pages
xfs_flush_unmap_range flushes and rips out the page cache

Looking at the one caller of invalidate_inode_pages* that I hadn't
already mentioned:

xfs_fs_commit_blocks invalidates a range of pagecache

And looking at the callers of truncate_pagecache_range:

xfs_file_iomap_end_delalloc truncates pagecache for a partial delalloc write

So you're right, the only usage patterns that match are the two that you
suggested.  However, I got to thinking last night, do we actually need
to invalidate pagecache if we're /only/ doing a readonly check of the
data fork mappings?  I see a good argument for doing so if we need to
rebuild the data maps, but since scrub never dirties anything, the page
cache needn't be nuked here.

I'm going to try changing this code only to invalidate if userspace set
IFLAG_REPAIR.

> > +	/* Got the inode, lock it and we're ready to go. */
> > +	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
> > +	if (error)
> > +		goto out_unlock;
> > +	sc->ilock_flags |= XFS_ILOCK_EXCL;
> > +	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
> > +
> > +	return 0;
> > +out_unlock:
> > +	xfs_iunlock(sc->ip, sc->ilock_flags);
> > +	if (sc->ip != ip)
> > +		iput(VFS_I(sc->ip));
> > +	sc->ip = NULL;
> 
> Slightly tricky - how many places do we end up having to do this?
> If its more than one, perhaps we need a xfs_scrub_irele(sc, ip)
> helper?

Three places, two of which are setup functions and the third is the
teardown function.  The teardown function is always called even if setup
fails, so I'll just purge those out and let xfs_scrub_teardown do this
once.  With a better comment.

> > +/*
> > + * Inode fork block mapping (BMBT) scrubber.
> > + * More complex than the others because we have to scrub
> > + * all the extents regardless of whether or not the fork
> > + * is in btree format.
> > + */
> > +
> > +struct xfs_scrub_bmap_info {
> > +	struct xfs_scrub_context	*sc;
> > +	xfs_daddr_t			eofs;
> > +	xfs_fileoff_t			lastoff;
> > +	bool				is_rt;
> > +	bool				is_shared;
> > +	int				whichfork;
> > +};
> > +
> > +/* Scrub a single extent record. */
> > +STATIC int
> > +xfs_scrub_bmap_extent(
> > +	struct xfs_inode		*ip,
> > +	struct xfs_btree_cur		*cur,
> > +	struct xfs_scrub_bmap_info	*info,
> > +	struct xfs_bmbt_irec		*irec)
> > +{
> > +	struct xfs_scrub_ag		sa = { 0 };
> > +	struct xfs_mount		*mp = info->sc->mp;
> > +	struct xfs_buf			*bp = NULL;
> > +	xfs_daddr_t			daddr;
> > +	xfs_daddr_t			dlen;
> > +	xfs_fsblock_t			bno;
> > +	xfs_agnumber_t			agno;
> > +	int				error = 0;
> > +
> > +	if (cur)
> > +		xfs_btree_get_block(cur, 0, &bp);
> > +
> > +	if (irec->br_startoff < info->lastoff ||
> > +	    irec->br_startblock == HOLESTARTBLOCK ||
> > +	    isnullstartblock(irec->br_startblock))
> > +		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
> > +				irec->br_startoff);
> 
> What are we checking here? that it's ordered correctly and not a
> hole/delalloc record? If it is bad, shouldn't we just jump out here
> because the following checks are likely to throw silly errors on
> hole/delalloc mappings?

Yes, I rewrote this section to use xfs_verify_fsbno and jump out if
corrupt.  I plan to make all the check functions skip the xref if
OFLAG_CORRUPT is set by the time we finish the record sanity checks.

> > +	/* Actual mapping, so check the block ranges. */
> > +	if (info->is_rt) {
> > +		daddr = XFS_FSB_TO_BB(mp, irec->br_startblock);
> > +		agno = NULLAGNUMBER;
> > +		bno = irec->br_startblock;
> > +	} else {
> > +		daddr = XFS_FSB_TO_DADDR(mp, irec->br_startblock);
> > +		agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
> > +		if (agno >= mp->m_sb.sb_agcount) {
> > +			xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
> > +				irec->br_startoff);
> > +			goto out;
> > +		}
> > +		bno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
> > +		if (bno >= mp->m_sb.sb_agblocks)
> > +			xfs_scrub_fblock_set_corrupt(info->sc,
> > +						     info->whichfork,
> > +						     irec->br_startoff);
> 
> more verify_agbno()/verify_fsbno stuff.
> 
> > +	}
> > +	dlen = XFS_FSB_TO_BB(mp, irec->br_blockcount);
> > +	if (irec->br_blockcount <= 0 ||
> > +	    irec->br_blockcount > MAXEXTLEN ||
> 
> irec->br_blockcount is unsigned (uint64_t).
> 
> Also needs to be checked against AG size.
> 
> > +	    daddr >= info->eofs ||
> > +	    daddr + dlen > info->eofs)
> > +		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
> > +				irec->br_startoff);
> > +
> > +	if (irec->br_state == XFS_EXT_UNWRITTEN &&
> > +	    !xfs_sb_version_hasextflgbit(&mp->m_sb))
> 
> Superblock scrubber should reject any filesystem without the
> extflgbit as corrupt - it's always set by mkfs - so I'm not sure we
> need to check this here.

What happens if scrub encounters a v4 filesystem without EXTFLGBIT?
The superblock scrubber only checks that the secondary superblocks are
consistent (geometry-wise) with sb 0, and mount doesn't prohibit
!EXTFLGBIT filesystems from mounting.  fallocate and friends even work,
albeit slower because we actually write zeroes to the disk in lieu of
setting the unwritten flag, apparently.

But, seeing as mkfs always sets EXTFLGBIT and v5 implies the feature
even if the bit isn't set; and there's no way to turn off the feature
bit (except unsupported things like xfs_db -x), are you suggesting that
we should simply end support for mounting !EXTFLGBIT v4 filesystems?

> > +/* Scrub an inode fork's block mappings. */
> > +STATIC int
> > +xfs_scrub_bmap(
> > +	struct xfs_scrub_context	*sc,
> > +	int				whichfork)
> > +{
> > +	struct xfs_bmbt_irec		irec;
> > +	struct xfs_scrub_bmap_info	info = {0};
> > +	struct xfs_owner_info		oinfo;
> > +	struct xfs_mount		*mp = sc->mp;
> > +	struct xfs_inode		*ip = sc->ip;
> > +	struct xfs_ifork		*ifp;
> > +	struct xfs_btree_cur		*cur;
> > +	xfs_fileoff_t			endoff;
> > +	xfs_extnum_t			idx;
> > +	bool				found;
> > +	int				error = 0;
> > +
> > +	ifp = XFS_IFORK_PTR(ip, whichfork);
> > +
> > +	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
> > +	info.eofs = XFS_FSB_TO_BB(mp, info.is_rt ? mp->m_sb.sb_rblocks :
> > +					      mp->m_sb.sb_dblocks);
> > +	info.whichfork = whichfork;
> > +	info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
> > +	info.sc = sc;
> > +
> > +	switch (whichfork) {
> > +	case XFS_COW_FORK:
> > +		/* Non-existent CoW forks are ignorable. */
> > +		if (!ifp)
> > +			goto out_unlock;
> > +		/* No CoW forks on non-reflink inodes/filesystems. */
> > +		if (!xfs_is_reflink_inode(ip)) {
> > +			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
> > +			goto out_unlock;
> > +		}
> > +		break;
> > +	case XFS_ATTR_FORK:
> > +		if (!ifp)
> > +			goto out_unlock;
> > +		if (!xfs_sb_version_hasattr(&mp->m_sb) &&
> > +		    !xfs_sb_version_hasattr2(&mp->m_sb))
> > +			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
> > +		break;
> > +	}
> > +
> > +	/* Check the fork values */
> > +	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
> > +	case XFS_DINODE_FMT_UUID:
> > +	case XFS_DINODE_FMT_DEV:
> > +	case XFS_DINODE_FMT_LOCAL:
> > +		/* No mappings to check. */
> > +		goto out_unlock;
> > +	case XFS_DINODE_FMT_EXTENTS:
> > +		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> > +			xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
> > +			goto out_unlock;
> > +		}
> > +		break;
> > +	case XFS_DINODE_FMT_BTREE:
> > +		ASSERT(whichfork != XFS_COW_FORK);
> 
> Corruption check, jump to out?

Oops, yes.

> > +
> > +		/* Scan the btree records. */
> > +		cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
> > +		xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
> > +		error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_helper,
> > +				&oinfo, &info);
> > +		xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
> > +						  XFS_BTREE_NOERROR);
> 
> FYI, I missed that the on-disk bmbt was scanned here the first time
> I went through this code - i had to go back and work out why the
> code only appeared to scrub the incore extent list. Can you wrap
> this whole chunk of code into a helper named xfs_scrub_bmbt()
> so it stands out that this is where the on disk btree is scrubbed?

Sure.  I'll update the xfs_scrub_bmap comment to point this out too.

> 
> > +		if (error == -EDEADLOCK)
> > +			return error;
> 
> Ok, why don't we go to out_unlock here?

<urk> bad code.

> 
> > +		else if (error)
> > +			goto out_unlock;
> 
> But do for all other errors....
> 
> > +		break;
> > +	default:
> > +		xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
> > +		goto out_unlock;
> > +	}
> > +
> > +	/* Extent data is in memory, so scrub that. */
> > +
> > +	/* Find the offset of the last extent in the mapping. */
> > +	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
> > +	if (!xfs_scrub_fblock_op_ok(sc, whichfork, 0, &error))
> > +		goto out_unlock;
> > +
> > +	/* Scrub extent records. */
> > +	info.lastoff = 0;
> > +	ifp = XFS_IFORK_PTR(ip, whichfork);
> > +	for (found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &irec);
> > +	     found != 0;
> > +	     found = xfs_iext_get_extent(ifp, ++idx, &irec)) {
> > +		if (xfs_scrub_should_terminate(sc, &error))
> > +			break;
> > +		if (isnullstartblock(irec.br_startblock))
> > +			continue;
> > +		if (irec.br_startoff >= endoff) {
> > +			xfs_scrub_fblock_set_corrupt(sc, whichfork,
> > +					irec.br_startoff);
> > +			goto out_unlock;
> > +		}
> > +		error = xfs_scrub_bmap_extent(ip, NULL, &info, &irec);
> > +		if (error == -EDEADLOCK)
> > +			return error;
> > +	}
> > +
> > +out_unlock:
> > +	return error;
> 
> Hmmm - out_unlock doesn't unlock anything?

Heh, it never does.  Baaaaaaaaad label.

--D

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner Oct. 7, 2017, 11:10 p.m. UTC | #3
On Fri, Oct 06, 2017 at 10:00:44AM -0700, Darrick J. Wong wrote:
> On Fri, Oct 06, 2017 at 01:51:23PM +1100, Dave Chinner wrote:
> > On Tue, Oct 03, 2017 at 01:42:36PM -0700, Darrick J. Wong wrote:
> > > +	    daddr >= info->eofs ||
> > > +	    daddr + dlen > info->eofs)
> > > +		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
> > > +				irec->br_startoff);
> > > +
> > > +	if (irec->br_state == XFS_EXT_UNWRITTEN &&
> > > +	    !xfs_sb_version_hasextflgbit(&mp->m_sb))
> > 
> > Superblock scrubber should reject any filesystem without the
> > extflgbit as corrupt - it's always set by mkfs - so I'm not sure we
> > need to check this here.
> 
> What happens if scrub encounters a v4 filesystem without EXTFLGBIT?

We don't support such filesystems (user level stale data exposure
security risk), but we have allowed them to continue to mount
because such filesystems did exist in the past (*cough* SGI DMF
database partitions *cough*). I'd suggest scrub should say
filesystems without the flag are bad are too old to be scrubbed
correctly.

> The superblock scrubber only checks that the secondary superblocks are
> consistent (geometry-wise) with sb 0, and mount doesn't prohibit
> !EXTFLGBIT filesystems from mounting.  fallocate and friends even work,
> albeit slower because we actually write zeroes to the disk in lieu of
> setting the unwritten flag, apparently.

Unwritten extents were enabled by default in 2003, and the mkfs flag
was dropped completely in 2007. So no filesystem made in the past
ten years should have this set.

> But, seeing as mkfs always sets EXTFLGBIT and v5 implies the feature
> even if the bit isn't set; and there's no way to turn off the feature
> bit (except unsupported things like xfs_db -x), are you suggesting that
> we should simply end support for mounting !EXTFLGBIT v4 filesystems?

I wasn't suggesting that, but perhaps we should.

> > > +
> > > +out_unlock:
> > > +	return error;
> > 
> > Hmmm - out_unlock doesn't unlock anything?
> 
> Heh, it never does.  Baaaaaaaaad label.

:P

Cheers,

Dave.
Darrick J. Wong Oct. 8, 2017, 3:54 a.m. UTC | #4
On Sun, Oct 08, 2017 at 10:10:55AM +1100, Dave Chinner wrote:
> On Fri, Oct 06, 2017 at 10:00:44AM -0700, Darrick J. Wong wrote:
> > On Fri, Oct 06, 2017 at 01:51:23PM +1100, Dave Chinner wrote:
> > > On Tue, Oct 03, 2017 at 01:42:36PM -0700, Darrick J. Wong wrote:
> > > > +	    daddr >= info->eofs ||
> > > > +	    daddr + dlen > info->eofs)
> > > > +		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
> > > > +				irec->br_startoff);
> > > > +
> > > > +	if (irec->br_state == XFS_EXT_UNWRITTEN &&
> > > > +	    !xfs_sb_version_hasextflgbit(&mp->m_sb))
> > > 
> > > Superblock scrubber should reject any filesystem without the
> > > extflgbit as corrupt - it's always set by mkfs - so I'm not sure we
> > > need to check this here.
> > 
> > What happens if scrub encounters a v4 filesystem without EXTFLGBIT?
> 
> We don't support such filesystems (user level stale data exposure
> security risk), but we have allowed them to continue to mount
> because such filesystems did exist in the past (*cough* SGI DMF
> database partitions *cough*). I'd suggest scrub should say
> filesystems without the flag are bad are too old to be scrubbed
> correctly.

Ok, works for me.

> > The superblock scrubber only checks that the secondary superblocks are
> > consistent (geometry-wise) with sb 0, and mount doesn't prohibit
> > !EXTFLGBIT filesystems from mounting.  fallocate and friends even work,
> > albeit slower because we actually write zeroes to the disk in lieu of
> > setting the unwritten flag, apparently.
> 
> Unwritten extents were enabled by default in 2003, and the mkfs flag
> was dropped completely in 2007. So no filesystem made in the past
> ten years should have this set.
> 
> > But, seeing as mkfs always sets EXTFLGBIT and v5 implies the feature
> > even if the bit isn't set; and there's no way to turn off the feature
> > bit (except unsupported things like xfs_db -x), are you suggesting that
> > we should simply end support for mounting !EXTFLGBIT v4 filesystems?
> 
> I wasn't suggesting that, but perhaps we should.

Anyone want to send a patch to get people out of the woodwork?

:)

> > > > +
> > > > +out_unlock:
> > > > +	return error;
> > > 
> > > Hmmm - out_unlock doesn't unlock anything?
> > 
> > Heh, it never does.  Baaaaaaaaad label.

(Fixed.)

--D

> :P
> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 28e14b7..5a77489 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -148,6 +148,7 @@  xfs-y				+= $(addprefix scrub/, \
 				   trace.o \
 				   agheader.o \
 				   alloc.o \
+				   bmap.o \
 				   btree.o \
 				   common.o \
 				   ialloc.o \
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index f8463e0..02ae58b 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -495,9 +495,12 @@  struct xfs_scrub_metadata {
 #define XFS_SCRUB_TYPE_RMAPBT	9	/* reverse mapping btree */
 #define XFS_SCRUB_TYPE_REFCNTBT	10	/* reference count btree */
 #define XFS_SCRUB_TYPE_INODE	11	/* inode record */
+#define XFS_SCRUB_TYPE_BMBTD	12	/* data fork block mapping */
+#define XFS_SCRUB_TYPE_BMBTA	13	/* attr fork block mapping */
+#define XFS_SCRUB_TYPE_BMBTC	14	/* CoW fork block mapping */
 
 /* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR	12
+#define XFS_SCRUB_TYPE_NR	15
 
 /* i: Repair this metadata. */
 #define XFS_SCRUB_IFLAG_REPAIR		(1 << 0)
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
new file mode 100644
index 0000000..57f1bbe
--- /dev/null
+++ b/fs/xfs/scrub/bmap.c
@@ -0,0 +1,371 @@ 
+/*
+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_inode_fork.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_rmap.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+
+/* Set us up with an inode's bmap. */
+STATIC int
+__xfs_scrub_setup_inode_bmap(
+	struct xfs_scrub_context	*sc,
+	struct xfs_inode		*ip,
+	bool				flush_data)
+{
+	struct xfs_mount		*mp = sc->mp;
+	int				error;
+
+	error = xfs_scrub_get_inode(sc, ip);
+	if (error)
+		return error;
+
+	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
+	xfs_ilock(sc->ip, sc->ilock_flags);
+
+	/*
+	 * We don't want any ephemeral data fork updates sitting around
+	 * while we inspect block mappings, so wait for directio to finish
+	 * and flush dirty data if we have delalloc reservations.
+	 */
+	if (S_ISREG(VFS_I(sc->ip)->i_mode) && flush_data) {
+		inode_dio_wait(VFS_I(sc->ip));
+		error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping);
+		if (error)
+			goto out_unlock;
+		error = invalidate_inode_pages2(VFS_I(sc->ip)->i_mapping);
+		if (error)
+			goto out_unlock;
+	}
+
+	/* Got the inode, lock it and we're ready to go. */
+	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
+	if (error)
+		goto out_unlock;
+	sc->ilock_flags |= XFS_ILOCK_EXCL;
+	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
+
+	return 0;
+out_unlock:
+	xfs_iunlock(sc->ip, sc->ilock_flags);
+	if (sc->ip != ip)
+		iput(VFS_I(sc->ip));
+	sc->ip = NULL;
+	return error;
+}
+
+/* Set us up to scrub the data fork. */
+int
+xfs_scrub_setup_inode_bmap_data(
+	struct xfs_scrub_context	*sc,
+	struct xfs_inode		*ip)
+{
+	return __xfs_scrub_setup_inode_bmap(sc, ip, true);
+}
+
+/* Set us up to scrub the attr or CoW fork. */
+int
+xfs_scrub_setup_inode_bmap(
+	struct xfs_scrub_context	*sc,
+	struct xfs_inode		*ip)
+{
+	return __xfs_scrub_setup_inode_bmap(sc, ip, false);
+}
+
+/*
+ * Inode fork block mapping (BMBT) scrubber.
+ * More complex than the others because we have to scrub
+ * all the extents regardless of whether or not the fork
+ * is in btree format.
+ */
+
+struct xfs_scrub_bmap_info {
+	struct xfs_scrub_context	*sc;
+	xfs_daddr_t			eofs;
+	xfs_fileoff_t			lastoff;
+	bool				is_rt;
+	bool				is_shared;
+	int				whichfork;
+};
+
+/* Scrub a single extent record. */
+STATIC int
+xfs_scrub_bmap_extent(
+	struct xfs_inode		*ip,
+	struct xfs_btree_cur		*cur,
+	struct xfs_scrub_bmap_info	*info,
+	struct xfs_bmbt_irec		*irec)
+{
+	struct xfs_scrub_ag		sa = { 0 };
+	struct xfs_mount		*mp = info->sc->mp;
+	struct xfs_buf			*bp = NULL;
+	xfs_daddr_t			daddr;
+	xfs_daddr_t			dlen;
+	xfs_fsblock_t			bno;
+	xfs_agnumber_t			agno;
+	int				error = 0;
+
+	if (cur)
+		xfs_btree_get_block(cur, 0, &bp);
+
+	if (irec->br_startoff < info->lastoff ||
+	    irec->br_startblock == HOLESTARTBLOCK ||
+	    isnullstartblock(irec->br_startblock))
+		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+				irec->br_startoff);
+
+	/* Actual mapping, so check the block ranges. */
+	if (info->is_rt) {
+		daddr = XFS_FSB_TO_BB(mp, irec->br_startblock);
+		agno = NULLAGNUMBER;
+		bno = irec->br_startblock;
+	} else {
+		daddr = XFS_FSB_TO_DADDR(mp, irec->br_startblock);
+		agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
+		if (agno >= mp->m_sb.sb_agcount) {
+			xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+				irec->br_startoff);
+			goto out;
+		}
+		bno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
+		if (bno >= mp->m_sb.sb_agblocks)
+			xfs_scrub_fblock_set_corrupt(info->sc,
+						     info->whichfork,
+						     irec->br_startoff);
+	}
+	dlen = XFS_FSB_TO_BB(mp, irec->br_blockcount);
+	if (irec->br_blockcount <= 0 ||
+	    irec->br_blockcount > MAXEXTLEN ||
+	    daddr >= info->eofs ||
+	    daddr + dlen > info->eofs)
+		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+				irec->br_startoff);
+
+	if (irec->br_state == XFS_EXT_UNWRITTEN &&
+	    !xfs_sb_version_hasextflgbit(&mp->m_sb))
+		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+				irec->br_startoff);
+
+	/* Set ourselves up for cross-referencing later. */
+	if (!info->is_rt) {
+		error = xfs_scrub_ag_init(info->sc, agno, &sa);
+		if (!xfs_scrub_fblock_op_ok(info->sc, info->whichfork,
+				irec->br_startoff, &error))
+			goto out;
+	}
+
+	xfs_scrub_ag_free(info->sc, &sa);
+out:
+	info->lastoff = irec->br_startoff + irec->br_blockcount;
+	return error;
+}
+
+/* Scrub a bmbt record. */
+STATIC int
+xfs_scrub_bmapbt_helper(
+	struct xfs_scrub_btree		*bs,
+	union xfs_btree_rec		*rec)
+{
+	struct xfs_bmbt_rec_host	ihost;
+	struct xfs_bmbt_irec		irec;
+	struct xfs_scrub_bmap_info	*info = bs->private;
+	struct xfs_inode		*ip = bs->cur->bc_private.b.ip;
+	struct xfs_buf			*bp = NULL;
+	struct xfs_btree_block		*block;
+	uint64_t			owner;
+	int				i;
+
+	/*
+	 * Check the owners of the btree blocks up to the level below
+	 * the root since the verifiers don't do that.
+	 */
+	if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) &&
+	    bs->cur->bc_ptrs[0] == 1) {
+		for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
+			block = xfs_btree_get_block(bs->cur, i, &bp);
+			owner = be64_to_cpu(block->bb_u.l.bb_owner);
+			if (owner != ip->i_ino)
+				xfs_scrub_fblock_set_corrupt(bs->sc,
+						info->whichfork, 0);
+		}
+	}
+
+	/* Set up the in-core record and scrub it. */
+	ihost.l0 = be64_to_cpu(rec->bmbt.l0);
+	ihost.l1 = be64_to_cpu(rec->bmbt.l1);
+	xfs_bmbt_get_all(&ihost, &irec);
+	return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec);
+}
+
+/* Scrub an inode fork's block mappings. */
+STATIC int
+xfs_scrub_bmap(
+	struct xfs_scrub_context	*sc,
+	int				whichfork)
+{
+	struct xfs_bmbt_irec		irec;
+	struct xfs_scrub_bmap_info	info = {0};
+	struct xfs_owner_info		oinfo;
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_inode		*ip = sc->ip;
+	struct xfs_ifork		*ifp;
+	struct xfs_btree_cur		*cur;
+	xfs_fileoff_t			endoff;
+	xfs_extnum_t			idx;
+	bool				found;
+	int				error = 0;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+
+	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
+	info.eofs = XFS_FSB_TO_BB(mp, info.is_rt ? mp->m_sb.sb_rblocks :
+					      mp->m_sb.sb_dblocks);
+	info.whichfork = whichfork;
+	info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
+	info.sc = sc;
+
+	switch (whichfork) {
+	case XFS_COW_FORK:
+		/* Non-existent CoW forks are ignorable. */
+		if (!ifp)
+			goto out_unlock;
+		/* No CoW forks on non-reflink inodes/filesystems. */
+		if (!xfs_is_reflink_inode(ip)) {
+			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
+			goto out_unlock;
+		}
+		break;
+	case XFS_ATTR_FORK:
+		if (!ifp)
+			goto out_unlock;
+		if (!xfs_sb_version_hasattr(&mp->m_sb) &&
+		    !xfs_sb_version_hasattr2(&mp->m_sb))
+			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
+		break;
+	}
+
+	/* Check the fork values */
+	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	case XFS_DINODE_FMT_UUID:
+	case XFS_DINODE_FMT_DEV:
+	case XFS_DINODE_FMT_LOCAL:
+		/* No mappings to check. */
+		goto out_unlock;
+	case XFS_DINODE_FMT_EXTENTS:
+		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+			xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
+			goto out_unlock;
+		}
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		ASSERT(whichfork != XFS_COW_FORK);
+
+		/* Scan the btree records. */
+		cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
+		xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
+		error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_helper,
+				&oinfo, &info);
+		xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
+						  XFS_BTREE_NOERROR);
+		if (error == -EDEADLOCK)
+			return error;
+		else if (error)
+			goto out_unlock;
+		break;
+	default:
+		xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
+		goto out_unlock;
+	}
+
+	/* Extent data is in memory, so scrub that. */
+
+	/* Find the offset of the last extent in the mapping. */
+	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
+	if (!xfs_scrub_fblock_op_ok(sc, whichfork, 0, &error))
+		goto out_unlock;
+
+	/* Scrub extent records. */
+	info.lastoff = 0;
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	for (found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &irec);
+	     found != 0;
+	     found = xfs_iext_get_extent(ifp, ++idx, &irec)) {
+		if (xfs_scrub_should_terminate(sc, &error))
+			break;
+		if (isnullstartblock(irec.br_startblock))
+			continue;
+		if (irec.br_startoff >= endoff) {
+			xfs_scrub_fblock_set_corrupt(sc, whichfork,
+					irec.br_startoff);
+			goto out_unlock;
+		}
+		error = xfs_scrub_bmap_extent(ip, NULL, &info, &irec);
+		if (error == -EDEADLOCK)
+			return error;
+	}
+
+out_unlock:
+	return error;
+}
+
+/* Scrub an inode's data fork. */
+int
+xfs_scrub_bmap_data(
+	struct xfs_scrub_context	*sc)
+{
+	return xfs_scrub_bmap(sc, XFS_DATA_FORK);
+}
+
+/* Scrub an inode's attr fork. */
+int
+xfs_scrub_bmap_attr(
+	struct xfs_scrub_context	*sc)
+{
+	return xfs_scrub_bmap(sc, XFS_ATTR_FORK);
+}
+
+/* Scrub an inode's CoW fork. */
+int
+xfs_scrub_bmap_cow(
+	struct xfs_scrub_context	*sc)
+{
+	if (!xfs_is_reflink_inode(sc->ip))
+		return -ENOENT;
+
+	return xfs_scrub_bmap(sc, XFS_COW_FORK);
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index c8143e8..3e87e3a 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -90,7 +90,10 @@  int xfs_scrub_setup_ag_refcountbt(struct xfs_scrub_context *sc,
 				  struct xfs_inode *ip);
 int xfs_scrub_setup_inode(struct xfs_scrub_context *sc,
 			  struct xfs_inode *ip);
-
+int xfs_scrub_setup_inode_bmap(struct xfs_scrub_context *sc,
+			       struct xfs_inode *ip);
+int xfs_scrub_setup_inode_bmap_data(struct xfs_scrub_context *sc,
+				    struct xfs_inode *ip);
 
 void xfs_scrub_ag_free(struct xfs_scrub_context *sc, struct xfs_scrub_ag *sa);
 int xfs_scrub_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index f014ef0..b10d627 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -209,6 +209,18 @@  static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
 		.setup	= xfs_scrub_setup_inode,
 		.scrub	= xfs_scrub_inode,
 	},
+	{ /* inode data fork */
+		.setup	= xfs_scrub_setup_inode_bmap_data,
+		.scrub	= xfs_scrub_bmap_data,
+	},
+	{ /* inode attr fork */
+		.setup	= xfs_scrub_setup_inode_bmap,
+		.scrub	= xfs_scrub_bmap_attr,
+	},
+	{ /* inode CoW fork */
+		.setup	= xfs_scrub_setup_inode_bmap,
+		.scrub	= xfs_scrub_bmap_cow,
+	},
 };
 
 /* This isn't a stable feature, warn once per day. */
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index ec635d4..8920ccf 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -79,5 +79,8 @@  int xfs_scrub_finobt(struct xfs_scrub_context *sc);
 int xfs_scrub_rmapbt(struct xfs_scrub_context *sc);
 int xfs_scrub_refcountbt(struct xfs_scrub_context *sc);
 int xfs_scrub_inode(struct xfs_scrub_context *sc);
+int xfs_scrub_bmap_data(struct xfs_scrub_context *sc);
+int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc);
+int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc);
 
 #endif	/* __XFS_SCRUB_SCRUB_H__ */