[5/9] xfs: report dir/attr block corruption errors to the health system
diff mbox series

Message ID 157375558620.3692735.5123638007449434510.stgit@magnolia
State New
Headers show
Series
  • xfs: report corruption to the health trackers
Related show

Commit Message

Darrick J. Wong Nov. 14, 2019, 6:19 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Whenever we encounter corrupt directory or extended attribute blocks, we
should report that to the health monitoring system for later reporting.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_leaf.c   |    5 ++++-
 fs/xfs/libxfs/xfs_attr_remote.c |   27 ++++++++++++++++-----------
 fs/xfs/libxfs/xfs_da_btree.c    |   29 ++++++++++++++++++++++++++---
 fs/xfs/libxfs/xfs_dir2.c        |    5 ++++-
 fs/xfs/libxfs/xfs_dir2_data.c   |    2 ++
 fs/xfs/libxfs/xfs_dir2_leaf.c   |    3 +++
 fs/xfs/libxfs/xfs_dir2_node.c   |    7 +++++++
 fs/xfs/libxfs/xfs_health.h      |    3 +++
 fs/xfs/xfs_attr_inactive.c      |    4 ++++
 fs/xfs/xfs_attr_list.c          |   16 +++++++++++++---
 fs/xfs/xfs_dir2_readdir.c       |    6 +++++-
 fs/xfs/xfs_health.c             |   39 +++++++++++++++++++++++++++++++++++++++
 12 files changed, 126 insertions(+), 20 deletions(-)

Comments

Brian Foster Nov. 20, 2019, 4:11 p.m. UTC | #1
On Thu, Nov 14, 2019 at 10:19:46AM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Whenever we encounter corrupt directory or extended attribute blocks, we
> should report that to the health monitoring system for later reporting.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_attr_leaf.c   |    5 ++++-
>  fs/xfs/libxfs/xfs_attr_remote.c |   27 ++++++++++++++++-----------
>  fs/xfs/libxfs/xfs_da_btree.c    |   29 ++++++++++++++++++++++++++---
>  fs/xfs/libxfs/xfs_dir2.c        |    5 ++++-
>  fs/xfs/libxfs/xfs_dir2_data.c   |    2 ++
>  fs/xfs/libxfs/xfs_dir2_leaf.c   |    3 +++
>  fs/xfs/libxfs/xfs_dir2_node.c   |    7 +++++++
>  fs/xfs/libxfs/xfs_health.h      |    3 +++
>  fs/xfs/xfs_attr_inactive.c      |    4 ++++
>  fs/xfs/xfs_attr_list.c          |   16 +++++++++++++---
>  fs/xfs/xfs_dir2_readdir.c       |    6 +++++-
>  fs/xfs/xfs_health.c             |   39 +++++++++++++++++++++++++++++++++++++++
>  12 files changed, 126 insertions(+), 20 deletions(-)
> 
> 
...
> diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
> index e424b004e3cb..a17622dadf00 100644
> --- a/fs/xfs/libxfs/xfs_da_btree.c
> +++ b/fs/xfs/libxfs/xfs_da_btree.c
...
> @@ -1589,6 +1593,7 @@ xfs_da3_node_lookup_int(
>  
>  		if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
>  			xfs_buf_corruption_error(blk->bp);
> +			xfs_da_mark_sick(args);
>  			return -EFSCORRUPTED;
>  		}
>  
> @@ -1604,6 +1609,7 @@ xfs_da3_node_lookup_int(
>  		/* Tree taller than we can handle; bail out! */
>  		if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
>  			xfs_buf_corruption_error(blk->bp);
> +			xfs_da_mark_sick(args);
>  			return -EFSCORRUPTED;
>  		}
>  
> @@ -1612,6 +1618,7 @@ xfs_da3_node_lookup_int(
>  			expected_level = nodehdr.level - 1;
>  		else if (expected_level != nodehdr.level) {
>  			xfs_buf_corruption_error(blk->bp);
> +			xfs_da_mark_sick(args);
>  			return -EFSCORRUPTED;
>  		} else
>  			expected_level--;
> @@ -1663,12 +1670,16 @@ xfs_da3_node_lookup_int(
>  		}
>  
>  		/* We can't point back to the root. */
> -		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
> +		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
> +			xfs_da_mark_sick(args);
>  			return -EFSCORRUPTED;
> +		}
>  	}
>  
> -	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
> +	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
> +		xfs_da_mark_sick(args);
>  		return -EFSCORRUPTED;
> +	}
>  
>  	/*
>  	 * A leaf block that ends in the hashval that we are interested in
> @@ -1686,6 +1697,7 @@ xfs_da3_node_lookup_int(
>  			args->blkno = blk->blkno;
>  		} else {
>  			ASSERT(0);
> +			xfs_da_mark_sick(args);
>  			return -EFSCORRUPTED;
>  		}

I'm just kind of skimming through the rest for general feedback at this
point given previous comments, but it might be nice to start using exit
labels at some of these places where we're enlarging and duplicating the
error path for particular errors. It's not so much about the code in
these patches, but rather to hopefully ease maintaining these state bits
properly in new code where devs/reviewers might not know much about
scrub state or have it in mind. Short of having some kind of generic
helper to handle corruption state, ISTM that the combination of using
verifiers where possible and common exit labels anywhere else we
generate -EFSCORRUPTED at multiple places within some function could
shrink these patches a bit..

Brian

>  		if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
> @@ -2250,8 +2262,10 @@ xfs_da3_swap_lastblock(
>  	error = xfs_bmap_last_before(tp, dp, &lastoff, w);
>  	if (error)
>  		return error;
> -	if (XFS_IS_CORRUPT(mp, lastoff == 0))
> +	if (XFS_IS_CORRUPT(mp, lastoff == 0)) {
> +		xfs_da_mark_sick(args);
>  		return -EFSCORRUPTED;
> +	}
>  	/*
>  	 * Read the last block in the btree space.
>  	 */
> @@ -2300,6 +2314,7 @@ xfs_da3_swap_lastblock(
>  		if (XFS_IS_CORRUPT(mp,
>  				   be32_to_cpu(sib_info->forw) != last_blkno ||
>  				   sib_info->magic != dead_info->magic)) {
> +			xfs_da_mark_sick(args);
>  			error = -EFSCORRUPTED;
>  			goto done;
>  		}
> @@ -2320,6 +2335,7 @@ xfs_da3_swap_lastblock(
>  		if (XFS_IS_CORRUPT(mp,
>  				   be32_to_cpu(sib_info->back) != last_blkno ||
>  				   sib_info->magic != dead_info->magic)) {
> +			xfs_da_mark_sick(args);
>  			error = -EFSCORRUPTED;
>  			goto done;
>  		}
> @@ -2342,6 +2358,7 @@ xfs_da3_swap_lastblock(
>  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
>  		if (XFS_IS_CORRUPT(mp,
>  				   level >= 0 && level != par_hdr.level + 1)) {
> +			xfs_da_mark_sick(args);
>  			error = -EFSCORRUPTED;
>  			goto done;
>  		}
> @@ -2353,6 +2370,7 @@ xfs_da3_swap_lastblock(
>  		     entno++)
>  			continue;
>  		if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
> +			xfs_da_mark_sick(args);
>  			error = -EFSCORRUPTED;
>  			goto done;
>  		}
> @@ -2378,6 +2396,7 @@ xfs_da3_swap_lastblock(
>  		xfs_trans_brelse(tp, par_buf);
>  		par_buf = NULL;
>  		if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
> +			xfs_da_mark_sick(args);
>  			error = -EFSCORRUPTED;
>  			goto done;
>  		}
> @@ -2387,6 +2406,7 @@ xfs_da3_swap_lastblock(
>  		par_node = par_buf->b_addr;
>  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
>  		if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
> +			xfs_da_mark_sick(args);
>  			error = -EFSCORRUPTED;
>  			goto done;
>  		}
> @@ -2601,6 +2621,7 @@ xfs_dabuf_map(
>  					irecs[i].br_state);
>  			}
>  		}
> +		xfs_dirattr_mark_sick(dp, whichfork);
>  		error = -EFSCORRUPTED;
>  		goto out;
>  	}
> @@ -2693,6 +2714,8 @@ xfs_da_read_buf(
>  	error = xfs_trans_read_buf_map(dp->i_mount, trans,
>  					dp->i_mount->m_ddev_targp,
>  					mapp, nmap, 0, &bp, ops);
> +	if (xfs_metadata_is_sick(error))
> +		xfs_dirattr_mark_sick(dp, whichfork);
>  	if (error)
>  		goto out_free;
>  
> diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
> index 0aa87cbde49e..e1aa411a1b8b 100644
> --- a/fs/xfs/libxfs/xfs_dir2.c
> +++ b/fs/xfs/libxfs/xfs_dir2.c
> @@ -18,6 +18,7 @@
>  #include "xfs_errortag.h"
>  #include "xfs_error.h"
>  #include "xfs_trace.h"
> +#include "xfs_health.h"
>  
>  struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
>  
> @@ -608,8 +609,10 @@ xfs_dir2_isblock(
>  	rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
>  	if (XFS_IS_CORRUPT(args->dp->i_mount,
>  			   rval != 0 &&
> -			   args->dp->i_d.di_size != args->geo->blksize))
> +			   args->dp->i_d.di_size != args->geo->blksize)) {
> +		xfs_da_mark_sick(args);
>  		return -EFSCORRUPTED;
> +	}
>  	*vp = rval;
>  	return 0;
>  }
> diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
> index a6eb71a62b53..80cc9c7ea4e5 100644
> --- a/fs/xfs/libxfs/xfs_dir2_data.c
> +++ b/fs/xfs/libxfs/xfs_dir2_data.c
> @@ -18,6 +18,7 @@
>  #include "xfs_trans.h"
>  #include "xfs_buf_item.h"
>  #include "xfs_log.h"
> +#include "xfs_health.h"
>  
>  static xfs_failaddr_t xfs_dir2_data_freefind_verify(
>  		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
> @@ -1170,6 +1171,7 @@ xfs_dir2_data_use_free(
>  corrupt:
>  	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
>  			hdr, sizeof(*hdr), __FILE__, __LINE__, fa);
> +	xfs_da_mark_sick(args);
>  	return -EFSCORRUPTED;
>  }
>  
> diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
> index 73edd96ce0ac..32d17420fff3 100644
> --- a/fs/xfs/libxfs/xfs_dir2_leaf.c
> +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
> @@ -19,6 +19,7 @@
>  #include "xfs_trace.h"
>  #include "xfs_trans.h"
>  #include "xfs_buf_item.h"
> +#include "xfs_health.h"
>  
>  /*
>   * Local function declarations.
> @@ -1386,8 +1387,10 @@ xfs_dir2_leaf_removename(
>  	bestsp = xfs_dir2_leaf_bests_p(ltp);
>  	if (be16_to_cpu(bestsp[db]) != oldbest) {
>  		xfs_buf_corruption_error(lbp);
> +		xfs_da_mark_sick(args);
>  		return -EFSCORRUPTED;
>  	}
> +
>  	/*
>  	 * Mark the former data entry unused.
>  	 */
> diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
> index 3a8b0625a08b..e0f3ab254a1a 100644
> --- a/fs/xfs/libxfs/xfs_dir2_node.c
> +++ b/fs/xfs/libxfs/xfs_dir2_node.c
> @@ -20,6 +20,7 @@
>  #include "xfs_trans.h"
>  #include "xfs_buf_item.h"
>  #include "xfs_log.h"
> +#include "xfs_health.h"
>  
>  /*
>   * Function declarations.
> @@ -228,6 +229,7 @@ __xfs_dir3_free_read(
>  	if (fa) {
>  		xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
>  		xfs_trans_brelse(tp, *bpp);
> +		xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
>  		return -EFSCORRUPTED;
>  	}
>  
> @@ -440,6 +442,7 @@ xfs_dir2_leaf_to_node(
>  	if (be32_to_cpu(ltp->bestcount) >
>  				(uint)dp->i_d.di_size / args->geo->blksize) {
>  		xfs_buf_corruption_error(lbp);
> +		xfs_da_mark_sick(args);
>  		return -EFSCORRUPTED;
>  	}
>  
> @@ -514,6 +517,7 @@ xfs_dir2_leafn_add(
>  	 */
>  	if (index < 0) {
>  		xfs_buf_corruption_error(bp);
> +		xfs_da_mark_sick(args);
>  		return -EFSCORRUPTED;
>  	}
>  
> @@ -733,6 +737,7 @@ xfs_dir2_leafn_lookup_for_addname(
>  					   cpu_to_be16(NULLDATAOFF))) {
>  				if (curfdb != newfdb)
>  					xfs_trans_brelse(tp, curbp);
> +				xfs_da_mark_sick(args);
>  				return -EFSCORRUPTED;
>  			}
>  			curfdb = newfdb;
> @@ -801,6 +806,7 @@ xfs_dir2_leafn_lookup_for_entry(
>  	xfs_dir3_leaf_check(dp, bp);
>  	if (leafhdr.count <= 0) {
>  		xfs_buf_corruption_error(bp);
> +		xfs_da_mark_sick(args);
>  		return -EFSCORRUPTED;
>  	}
>  
> @@ -1737,6 +1743,7 @@ xfs_dir2_node_add_datablk(
>  			} else {
>  				xfs_alert(mp, " ... fblk is NULL");
>  			}
> +			xfs_da_mark_sick(args);
>  			return -EFSCORRUPTED;
>  		}
>  
> diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
> index 2049419e9555..d9404cd3d09b 100644
> --- a/fs/xfs/libxfs/xfs_health.h
> +++ b/fs/xfs/libxfs/xfs_health.h
> @@ -38,6 +38,7 @@ struct xfs_perag;
>  struct xfs_inode;
>  struct xfs_fsop_geom;
>  struct xfs_btree_cur;
> +struct xfs_da_args;
>  
>  /* Observable health issues for metadata spanning the entire filesystem. */
>  #define XFS_SICK_FS_COUNTERS	(1 << 0)  /* summary counters */
> @@ -141,6 +142,8 @@ void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
>  void xfs_health_unmount(struct xfs_mount *mp);
>  void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
>  void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
> +void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
> +void xfs_da_mark_sick(struct xfs_da_args *args);
>  
>  /* Now some helpers. */
>  
> diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
> index a78c501f6fb1..429a97494ffa 100644
> --- a/fs/xfs/xfs_attr_inactive.c
> +++ b/fs/xfs/xfs_attr_inactive.c
> @@ -23,6 +23,7 @@
>  #include "xfs_quota.h"
>  #include "xfs_dir2.h"
>  #include "xfs_error.h"
> +#include "xfs_health.h"
>  
>  /*
>   * Look at all the extents for this logical region,
> @@ -209,6 +210,7 @@ xfs_attr3_node_inactive(
>  	if (level > XFS_DA_NODE_MAXDEPTH) {
>  		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
>  		xfs_buf_corruption_error(bp);
> +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
>  		return -EFSCORRUPTED;
>  	}
>  
> @@ -256,6 +258,7 @@ xfs_attr3_node_inactive(
>  			error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
>  			break;
>  		default:
> +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
>  			xfs_buf_corruption_error(child_bp);
>  			xfs_trans_brelse(*trans, child_bp);
>  			error = -EFSCORRUPTED;
> @@ -342,6 +345,7 @@ xfs_attr3_root_inactive(
>  		error = xfs_attr3_leaf_inactive(trans, dp, bp);
>  		break;
>  	default:
> +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
>  		error = -EFSCORRUPTED;
>  		xfs_buf_corruption_error(bp);
>  		xfs_trans_brelse(*trans, bp);
> diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
> index 7a099df88a0c..1a2a3d4ce422 100644
> --- a/fs/xfs/xfs_attr_list.c
> +++ b/fs/xfs/xfs_attr_list.c
> @@ -21,6 +21,7 @@
>  #include "xfs_error.h"
>  #include "xfs_trace.h"
>  #include "xfs_dir2.h"
> +#include "xfs_health.h"
>  
>  STATIC int
>  xfs_attr_shortform_compare(const void *a, const void *b)
> @@ -88,8 +89,10 @@ xfs_attr_shortform_list(
>  		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
>  			if (XFS_IS_CORRUPT(context->dp->i_mount,
>  					   !xfs_attr_namecheck(sfe->nameval,
> -							       sfe->namelen)))
> +							       sfe->namelen))) {
> +				xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
>  				return -EFSCORRUPTED;
> +			}
>  			context->put_listent(context,
>  					     sfe->flags,
>  					     sfe->nameval,
> @@ -131,6 +134,7 @@ xfs_attr_shortform_list(
>  					     context->dp->i_mount, sfe,
>  					     sizeof(*sfe));
>  			kmem_free(sbuf);
> +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
>  			return -EFSCORRUPTED;
>  		}
>  
> @@ -181,6 +185,7 @@ xfs_attr_shortform_list(
>  		if (XFS_IS_CORRUPT(context->dp->i_mount,
>  				   !xfs_attr_namecheck(sbp->name,
>  						       sbp->namelen))) {
> +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
>  			error = -EFSCORRUPTED;
>  			goto out;
>  		}
> @@ -268,8 +273,10 @@ xfs_attr_node_list_lookup(
>  			return 0;
>  
>  		/* We can't point back to the root. */
> -		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0))
> +		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0)) {
> +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
>  			return -EFSCORRUPTED;
> +		}
>  	}
>  
>  	if (expected_level != 0)
> @@ -281,6 +288,7 @@ xfs_attr_node_list_lookup(
>  out_corruptbuf:
>  	xfs_buf_corruption_error(bp);
>  	xfs_trans_brelse(tp, bp);
> +	xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
>  	return -EFSCORRUPTED;
>  }
>  
> @@ -471,8 +479,10 @@ xfs_attr3_leaf_list_int(
>  		}
>  
>  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> -				   !xfs_attr_namecheck(name, namelen)))
> +				   !xfs_attr_namecheck(name, namelen))) {
> +			xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK);
>  			return -EFSCORRUPTED;
> +		}
>  		context->put_listent(context, entry->flags,
>  					      name, namelen, valuelen);
>  		if (context->seen_enough)
> diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
> index 95bc9ef8f5f9..715ded503334 100644
> --- a/fs/xfs/xfs_dir2_readdir.c
> +++ b/fs/xfs/xfs_dir2_readdir.c
> @@ -18,6 +18,7 @@
>  #include "xfs_bmap.h"
>  #include "xfs_trans.h"
>  #include "xfs_error.h"
> +#include "xfs_health.h"
>  
>  /*
>   * Directory file type support functions
> @@ -119,8 +120,10 @@ xfs_dir2_sf_getdents(
>  		ctx->pos = off & 0x7fffffff;
>  		if (XFS_IS_CORRUPT(dp->i_mount,
>  				   !xfs_dir2_namecheck(sfep->name,
> -						       sfep->namelen)))
> +						       sfep->namelen))) {
> +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
>  			return -EFSCORRUPTED;
> +		}
>  		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
>  			    xfs_dir3_get_dtype(mp, filetype)))
>  			return 0;
> @@ -461,6 +464,7 @@ xfs_dir2_leaf_getdents(
>  		if (XFS_IS_CORRUPT(dp->i_mount,
>  				   !xfs_dir2_namecheck(dep->name,
>  						       dep->namelen))) {
> +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
>  			error = -EFSCORRUPTED;
>  			break;
>  		}
> diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
> index 1f09027c55ad..c1b6e8fb72ec 100644
> --- a/fs/xfs/xfs_health.c
> +++ b/fs/xfs/xfs_health.c
> @@ -15,6 +15,8 @@
>  #include "xfs_trace.h"
>  #include "xfs_health.h"
>  #include "xfs_btree.h"
> +#include "xfs_da_format.h"
> +#include "xfs_da_btree.h"
>  
>  /*
>   * Warn about metadata corruption that we detected but haven't fixed, and
> @@ -517,3 +519,40 @@ xfs_btree_mark_sick(
>  
>  	xfs_agno_mark_sick(cur->bc_mp, cur->bc_private.a.agno, mask);
>  }
> +
> +/*
> + * Record observations of dir/attr btree corruption with the health tracking
> + * system.
> + */
> +void
> +xfs_dirattr_mark_sick(
> +	struct xfs_inode	*ip,
> +	int			whichfork)
> +{
> +	unsigned int		mask;
> +
> +	switch (whichfork) {
> +	case XFS_DATA_FORK:
> +		mask = XFS_SICK_INO_DIR;
> +		break;
> +	case XFS_ATTR_FORK:
> +		mask = XFS_SICK_INO_XATTR;
> +		break;
> +	default:
> +		ASSERT(0);
> +		return;
> +	}
> +
> +	xfs_inode_mark_sick(ip, mask);
> +}
> +
> +/*
> + * Record observations of dir/attr btree corruption with the health tracking
> + * system.
> + */
> +void
> +xfs_da_mark_sick(
> +	struct xfs_da_args	*args)
> +{
> +	xfs_dirattr_mark_sick(args->dp, args->whichfork);
> +}
>
Darrick J. Wong Nov. 20, 2019, 4:55 p.m. UTC | #2
On Wed, Nov 20, 2019 at 11:11:47AM -0500, Brian Foster wrote:
> On Thu, Nov 14, 2019 at 10:19:46AM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Whenever we encounter corrupt directory or extended attribute blocks, we
> > should report that to the health monitoring system for later reporting.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/libxfs/xfs_attr_leaf.c   |    5 ++++-
> >  fs/xfs/libxfs/xfs_attr_remote.c |   27 ++++++++++++++++-----------
> >  fs/xfs/libxfs/xfs_da_btree.c    |   29 ++++++++++++++++++++++++++---
> >  fs/xfs/libxfs/xfs_dir2.c        |    5 ++++-
> >  fs/xfs/libxfs/xfs_dir2_data.c   |    2 ++
> >  fs/xfs/libxfs/xfs_dir2_leaf.c   |    3 +++
> >  fs/xfs/libxfs/xfs_dir2_node.c   |    7 +++++++
> >  fs/xfs/libxfs/xfs_health.h      |    3 +++
> >  fs/xfs/xfs_attr_inactive.c      |    4 ++++
> >  fs/xfs/xfs_attr_list.c          |   16 +++++++++++++---
> >  fs/xfs/xfs_dir2_readdir.c       |    6 +++++-
> >  fs/xfs/xfs_health.c             |   39 +++++++++++++++++++++++++++++++++++++++
> >  12 files changed, 126 insertions(+), 20 deletions(-)
> > 
> > 
> ...
> > diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
> > index e424b004e3cb..a17622dadf00 100644
> > --- a/fs/xfs/libxfs/xfs_da_btree.c
> > +++ b/fs/xfs/libxfs/xfs_da_btree.c
> ...
> > @@ -1589,6 +1593,7 @@ xfs_da3_node_lookup_int(
> >  
> >  		if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
> >  			xfs_buf_corruption_error(blk->bp);
> > +			xfs_da_mark_sick(args);
> >  			return -EFSCORRUPTED;
> >  		}
> >  
> > @@ -1604,6 +1609,7 @@ xfs_da3_node_lookup_int(
> >  		/* Tree taller than we can handle; bail out! */
> >  		if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
> >  			xfs_buf_corruption_error(blk->bp);
> > +			xfs_da_mark_sick(args);
> >  			return -EFSCORRUPTED;
> >  		}
> >  
> > @@ -1612,6 +1618,7 @@ xfs_da3_node_lookup_int(
> >  			expected_level = nodehdr.level - 1;
> >  		else if (expected_level != nodehdr.level) {
> >  			xfs_buf_corruption_error(blk->bp);
> > +			xfs_da_mark_sick(args);
> >  			return -EFSCORRUPTED;
> >  		} else
> >  			expected_level--;
> > @@ -1663,12 +1670,16 @@ xfs_da3_node_lookup_int(
> >  		}
> >  
> >  		/* We can't point back to the root. */
> > -		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
> > +		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
> > +			xfs_da_mark_sick(args);
> >  			return -EFSCORRUPTED;
> > +		}
> >  	}
> >  
> > -	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
> > +	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
> > +		xfs_da_mark_sick(args);
> >  		return -EFSCORRUPTED;
> > +	}
> >  
> >  	/*
> >  	 * A leaf block that ends in the hashval that we are interested in
> > @@ -1686,6 +1697,7 @@ xfs_da3_node_lookup_int(
> >  			args->blkno = blk->blkno;
> >  		} else {
> >  			ASSERT(0);
> > +			xfs_da_mark_sick(args);
> >  			return -EFSCORRUPTED;
> >  		}
> 
> I'm just kind of skimming through the rest for general feedback at this
> point given previous comments, but it might be nice to start using exit
> labels at some of these places where we're enlarging and duplicating the
> error path for particular errors.

Yeah.  This current iteration is pretty wordy since I used coccinelle to
find all the EFSCORRUPTED clauses and inject the appropriate _mark_sick
call.

> It's not so much about the code in
> these patches, but rather to hopefully ease maintaining these state bits
> properly in new code where devs/reviewers might not know much about
> scrub state or have it in mind. Short of having some kind of generic
> helper to handle corruption state, ISTM that the combination of using
> verifiers where possible and common exit labels anywhere else we
> generate -EFSCORRUPTED at multiple places within some function could
> shrink these patches a bit..

<nod> Eric suggested on IRC that maybe the _mark_sick functions should
return EFSCORRUPTED so that we could at least collapse that to:

if (XFS_IS_CORRUPT(...)) {
	error = xfs_da_mark_sick(...);
	goto barf;
}

However, doing it the wordy way I've done it has the neat effects (IMHO)
that you can find all the places where xfs decides some metadata is
corrupt by grepping for EFSCORRUPTED, and confirm that each place it
does that also has a corresponding _mark_sick call.

I guess you could create a dorky shouty wrapper to maintain that greppy
property:

#define XFS_DA_EFSCORRUPTED(...) \
	(xfs_da_mark_sick(...), -EFSCORRUPTED)

But... that might be stylistically undesirable.  OTOH I guess it
wouldn't be so bad either to do:

	if (XFS_IS_CORRUPT(...)) {
		error = -EFSCORRUPTED;
		goto bad;
	}

	if (XFS_IS_CORRUPT(...)) {
		error = -EFSCORRUPTED;
		goto bad;
	}

	return 0;
bad:
	if (error == -EFSCORRUPTED)
		xfs_da_mark_sick(...);
	return error;

Or using the shouty macro above:

	if (XFS_IS_CORRUPT(...)) {
		error = XFS_DA_EFSCORRUPTED(...);
		goto bad;
	}

	if (XFS_IS_CORRUPT(...)) {
		error = XFS_DA_EFSCORRUPTED(...);
		goto bad;
	}

bad:
	return error;

I'll think about that.  It doesn't sound so bad when coding it up in
this email.

--D

> 
> Brian
> 
> >  		if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
> > @@ -2250,8 +2262,10 @@ xfs_da3_swap_lastblock(
> >  	error = xfs_bmap_last_before(tp, dp, &lastoff, w);
> >  	if (error)
> >  		return error;
> > -	if (XFS_IS_CORRUPT(mp, lastoff == 0))
> > +	if (XFS_IS_CORRUPT(mp, lastoff == 0)) {
> > +		xfs_da_mark_sick(args);
> >  		return -EFSCORRUPTED;
> > +	}
> >  	/*
> >  	 * Read the last block in the btree space.
> >  	 */
> > @@ -2300,6 +2314,7 @@ xfs_da3_swap_lastblock(
> >  		if (XFS_IS_CORRUPT(mp,
> >  				   be32_to_cpu(sib_info->forw) != last_blkno ||
> >  				   sib_info->magic != dead_info->magic)) {
> > +			xfs_da_mark_sick(args);
> >  			error = -EFSCORRUPTED;
> >  			goto done;
> >  		}
> > @@ -2320,6 +2335,7 @@ xfs_da3_swap_lastblock(
> >  		if (XFS_IS_CORRUPT(mp,
> >  				   be32_to_cpu(sib_info->back) != last_blkno ||
> >  				   sib_info->magic != dead_info->magic)) {
> > +			xfs_da_mark_sick(args);
> >  			error = -EFSCORRUPTED;
> >  			goto done;
> >  		}
> > @@ -2342,6 +2358,7 @@ xfs_da3_swap_lastblock(
> >  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
> >  		if (XFS_IS_CORRUPT(mp,
> >  				   level >= 0 && level != par_hdr.level + 1)) {
> > +			xfs_da_mark_sick(args);
> >  			error = -EFSCORRUPTED;
> >  			goto done;
> >  		}
> > @@ -2353,6 +2370,7 @@ xfs_da3_swap_lastblock(
> >  		     entno++)
> >  			continue;
> >  		if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
> > +			xfs_da_mark_sick(args);
> >  			error = -EFSCORRUPTED;
> >  			goto done;
> >  		}
> > @@ -2378,6 +2396,7 @@ xfs_da3_swap_lastblock(
> >  		xfs_trans_brelse(tp, par_buf);
> >  		par_buf = NULL;
> >  		if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
> > +			xfs_da_mark_sick(args);
> >  			error = -EFSCORRUPTED;
> >  			goto done;
> >  		}
> > @@ -2387,6 +2406,7 @@ xfs_da3_swap_lastblock(
> >  		par_node = par_buf->b_addr;
> >  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
> >  		if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
> > +			xfs_da_mark_sick(args);
> >  			error = -EFSCORRUPTED;
> >  			goto done;
> >  		}
> > @@ -2601,6 +2621,7 @@ xfs_dabuf_map(
> >  					irecs[i].br_state);
> >  			}
> >  		}
> > +		xfs_dirattr_mark_sick(dp, whichfork);
> >  		error = -EFSCORRUPTED;
> >  		goto out;
> >  	}
> > @@ -2693,6 +2714,8 @@ xfs_da_read_buf(
> >  	error = xfs_trans_read_buf_map(dp->i_mount, trans,
> >  					dp->i_mount->m_ddev_targp,
> >  					mapp, nmap, 0, &bp, ops);
> > +	if (xfs_metadata_is_sick(error))
> > +		xfs_dirattr_mark_sick(dp, whichfork);
> >  	if (error)
> >  		goto out_free;
> >  
> > diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
> > index 0aa87cbde49e..e1aa411a1b8b 100644
> > --- a/fs/xfs/libxfs/xfs_dir2.c
> > +++ b/fs/xfs/libxfs/xfs_dir2.c
> > @@ -18,6 +18,7 @@
> >  #include "xfs_errortag.h"
> >  #include "xfs_error.h"
> >  #include "xfs_trace.h"
> > +#include "xfs_health.h"
> >  
> >  struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
> >  
> > @@ -608,8 +609,10 @@ xfs_dir2_isblock(
> >  	rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
> >  	if (XFS_IS_CORRUPT(args->dp->i_mount,
> >  			   rval != 0 &&
> > -			   args->dp->i_d.di_size != args->geo->blksize))
> > +			   args->dp->i_d.di_size != args->geo->blksize)) {
> > +		xfs_da_mark_sick(args);
> >  		return -EFSCORRUPTED;
> > +	}
> >  	*vp = rval;
> >  	return 0;
> >  }
> > diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
> > index a6eb71a62b53..80cc9c7ea4e5 100644
> > --- a/fs/xfs/libxfs/xfs_dir2_data.c
> > +++ b/fs/xfs/libxfs/xfs_dir2_data.c
> > @@ -18,6 +18,7 @@
> >  #include "xfs_trans.h"
> >  #include "xfs_buf_item.h"
> >  #include "xfs_log.h"
> > +#include "xfs_health.h"
> >  
> >  static xfs_failaddr_t xfs_dir2_data_freefind_verify(
> >  		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
> > @@ -1170,6 +1171,7 @@ xfs_dir2_data_use_free(
> >  corrupt:
> >  	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
> >  			hdr, sizeof(*hdr), __FILE__, __LINE__, fa);
> > +	xfs_da_mark_sick(args);
> >  	return -EFSCORRUPTED;
> >  }
> >  
> > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > index 73edd96ce0ac..32d17420fff3 100644
> > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c
> > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > @@ -19,6 +19,7 @@
> >  #include "xfs_trace.h"
> >  #include "xfs_trans.h"
> >  #include "xfs_buf_item.h"
> > +#include "xfs_health.h"
> >  
> >  /*
> >   * Local function declarations.
> > @@ -1386,8 +1387,10 @@ xfs_dir2_leaf_removename(
> >  	bestsp = xfs_dir2_leaf_bests_p(ltp);
> >  	if (be16_to_cpu(bestsp[db]) != oldbest) {
> >  		xfs_buf_corruption_error(lbp);
> > +		xfs_da_mark_sick(args);
> >  		return -EFSCORRUPTED;
> >  	}
> > +
> >  	/*
> >  	 * Mark the former data entry unused.
> >  	 */
> > diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
> > index 3a8b0625a08b..e0f3ab254a1a 100644
> > --- a/fs/xfs/libxfs/xfs_dir2_node.c
> > +++ b/fs/xfs/libxfs/xfs_dir2_node.c
> > @@ -20,6 +20,7 @@
> >  #include "xfs_trans.h"
> >  #include "xfs_buf_item.h"
> >  #include "xfs_log.h"
> > +#include "xfs_health.h"
> >  
> >  /*
> >   * Function declarations.
> > @@ -228,6 +229,7 @@ __xfs_dir3_free_read(
> >  	if (fa) {
> >  		xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
> >  		xfs_trans_brelse(tp, *bpp);
> > +		xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> >  		return -EFSCORRUPTED;
> >  	}
> >  
> > @@ -440,6 +442,7 @@ xfs_dir2_leaf_to_node(
> >  	if (be32_to_cpu(ltp->bestcount) >
> >  				(uint)dp->i_d.di_size / args->geo->blksize) {
> >  		xfs_buf_corruption_error(lbp);
> > +		xfs_da_mark_sick(args);
> >  		return -EFSCORRUPTED;
> >  	}
> >  
> > @@ -514,6 +517,7 @@ xfs_dir2_leafn_add(
> >  	 */
> >  	if (index < 0) {
> >  		xfs_buf_corruption_error(bp);
> > +		xfs_da_mark_sick(args);
> >  		return -EFSCORRUPTED;
> >  	}
> >  
> > @@ -733,6 +737,7 @@ xfs_dir2_leafn_lookup_for_addname(
> >  					   cpu_to_be16(NULLDATAOFF))) {
> >  				if (curfdb != newfdb)
> >  					xfs_trans_brelse(tp, curbp);
> > +				xfs_da_mark_sick(args);
> >  				return -EFSCORRUPTED;
> >  			}
> >  			curfdb = newfdb;
> > @@ -801,6 +806,7 @@ xfs_dir2_leafn_lookup_for_entry(
> >  	xfs_dir3_leaf_check(dp, bp);
> >  	if (leafhdr.count <= 0) {
> >  		xfs_buf_corruption_error(bp);
> > +		xfs_da_mark_sick(args);
> >  		return -EFSCORRUPTED;
> >  	}
> >  
> > @@ -1737,6 +1743,7 @@ xfs_dir2_node_add_datablk(
> >  			} else {
> >  				xfs_alert(mp, " ... fblk is NULL");
> >  			}
> > +			xfs_da_mark_sick(args);
> >  			return -EFSCORRUPTED;
> >  		}
> >  
> > diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
> > index 2049419e9555..d9404cd3d09b 100644
> > --- a/fs/xfs/libxfs/xfs_health.h
> > +++ b/fs/xfs/libxfs/xfs_health.h
> > @@ -38,6 +38,7 @@ struct xfs_perag;
> >  struct xfs_inode;
> >  struct xfs_fsop_geom;
> >  struct xfs_btree_cur;
> > +struct xfs_da_args;
> >  
> >  /* Observable health issues for metadata spanning the entire filesystem. */
> >  #define XFS_SICK_FS_COUNTERS	(1 << 0)  /* summary counters */
> > @@ -141,6 +142,8 @@ void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
> >  void xfs_health_unmount(struct xfs_mount *mp);
> >  void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
> >  void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
> > +void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
> > +void xfs_da_mark_sick(struct xfs_da_args *args);
> >  
> >  /* Now some helpers. */
> >  
> > diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
> > index a78c501f6fb1..429a97494ffa 100644
> > --- a/fs/xfs/xfs_attr_inactive.c
> > +++ b/fs/xfs/xfs_attr_inactive.c
> > @@ -23,6 +23,7 @@
> >  #include "xfs_quota.h"
> >  #include "xfs_dir2.h"
> >  #include "xfs_error.h"
> > +#include "xfs_health.h"
> >  
> >  /*
> >   * Look at all the extents for this logical region,
> > @@ -209,6 +210,7 @@ xfs_attr3_node_inactive(
> >  	if (level > XFS_DA_NODE_MAXDEPTH) {
> >  		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
> >  		xfs_buf_corruption_error(bp);
> > +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> >  		return -EFSCORRUPTED;
> >  	}
> >  
> > @@ -256,6 +258,7 @@ xfs_attr3_node_inactive(
> >  			error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
> >  			break;
> >  		default:
> > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> >  			xfs_buf_corruption_error(child_bp);
> >  			xfs_trans_brelse(*trans, child_bp);
> >  			error = -EFSCORRUPTED;
> > @@ -342,6 +345,7 @@ xfs_attr3_root_inactive(
> >  		error = xfs_attr3_leaf_inactive(trans, dp, bp);
> >  		break;
> >  	default:
> > +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> >  		error = -EFSCORRUPTED;
> >  		xfs_buf_corruption_error(bp);
> >  		xfs_trans_brelse(*trans, bp);
> > diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
> > index 7a099df88a0c..1a2a3d4ce422 100644
> > --- a/fs/xfs/xfs_attr_list.c
> > +++ b/fs/xfs/xfs_attr_list.c
> > @@ -21,6 +21,7 @@
> >  #include "xfs_error.h"
> >  #include "xfs_trace.h"
> >  #include "xfs_dir2.h"
> > +#include "xfs_health.h"
> >  
> >  STATIC int
> >  xfs_attr_shortform_compare(const void *a, const void *b)
> > @@ -88,8 +89,10 @@ xfs_attr_shortform_list(
> >  		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
> >  			if (XFS_IS_CORRUPT(context->dp->i_mount,
> >  					   !xfs_attr_namecheck(sfe->nameval,
> > -							       sfe->namelen)))
> > +							       sfe->namelen))) {
> > +				xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> >  				return -EFSCORRUPTED;
> > +			}
> >  			context->put_listent(context,
> >  					     sfe->flags,
> >  					     sfe->nameval,
> > @@ -131,6 +134,7 @@ xfs_attr_shortform_list(
> >  					     context->dp->i_mount, sfe,
> >  					     sizeof(*sfe));
> >  			kmem_free(sbuf);
> > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> >  			return -EFSCORRUPTED;
> >  		}
> >  
> > @@ -181,6 +185,7 @@ xfs_attr_shortform_list(
> >  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> >  				   !xfs_attr_namecheck(sbp->name,
> >  						       sbp->namelen))) {
> > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> >  			error = -EFSCORRUPTED;
> >  			goto out;
> >  		}
> > @@ -268,8 +273,10 @@ xfs_attr_node_list_lookup(
> >  			return 0;
> >  
> >  		/* We can't point back to the root. */
> > -		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0))
> > +		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0)) {
> > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> >  			return -EFSCORRUPTED;
> > +		}
> >  	}
> >  
> >  	if (expected_level != 0)
> > @@ -281,6 +288,7 @@ xfs_attr_node_list_lookup(
> >  out_corruptbuf:
> >  	xfs_buf_corruption_error(bp);
> >  	xfs_trans_brelse(tp, bp);
> > +	xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> >  	return -EFSCORRUPTED;
> >  }
> >  
> > @@ -471,8 +479,10 @@ xfs_attr3_leaf_list_int(
> >  		}
> >  
> >  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> > -				   !xfs_attr_namecheck(name, namelen)))
> > +				   !xfs_attr_namecheck(name, namelen))) {
> > +			xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK);
> >  			return -EFSCORRUPTED;
> > +		}
> >  		context->put_listent(context, entry->flags,
> >  					      name, namelen, valuelen);
> >  		if (context->seen_enough)
> > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
> > index 95bc9ef8f5f9..715ded503334 100644
> > --- a/fs/xfs/xfs_dir2_readdir.c
> > +++ b/fs/xfs/xfs_dir2_readdir.c
> > @@ -18,6 +18,7 @@
> >  #include "xfs_bmap.h"
> >  #include "xfs_trans.h"
> >  #include "xfs_error.h"
> > +#include "xfs_health.h"
> >  
> >  /*
> >   * Directory file type support functions
> > @@ -119,8 +120,10 @@ xfs_dir2_sf_getdents(
> >  		ctx->pos = off & 0x7fffffff;
> >  		if (XFS_IS_CORRUPT(dp->i_mount,
> >  				   !xfs_dir2_namecheck(sfep->name,
> > -						       sfep->namelen)))
> > +						       sfep->namelen))) {
> > +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> >  			return -EFSCORRUPTED;
> > +		}
> >  		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
> >  			    xfs_dir3_get_dtype(mp, filetype)))
> >  			return 0;
> > @@ -461,6 +464,7 @@ xfs_dir2_leaf_getdents(
> >  		if (XFS_IS_CORRUPT(dp->i_mount,
> >  				   !xfs_dir2_namecheck(dep->name,
> >  						       dep->namelen))) {
> > +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> >  			error = -EFSCORRUPTED;
> >  			break;
> >  		}
> > diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
> > index 1f09027c55ad..c1b6e8fb72ec 100644
> > --- a/fs/xfs/xfs_health.c
> > +++ b/fs/xfs/xfs_health.c
> > @@ -15,6 +15,8 @@
> >  #include "xfs_trace.h"
> >  #include "xfs_health.h"
> >  #include "xfs_btree.h"
> > +#include "xfs_da_format.h"
> > +#include "xfs_da_btree.h"
> >  
> >  /*
> >   * Warn about metadata corruption that we detected but haven't fixed, and
> > @@ -517,3 +519,40 @@ xfs_btree_mark_sick(
> >  
> >  	xfs_agno_mark_sick(cur->bc_mp, cur->bc_private.a.agno, mask);
> >  }
> > +
> > +/*
> > + * Record observations of dir/attr btree corruption with the health tracking
> > + * system.
> > + */
> > +void
> > +xfs_dirattr_mark_sick(
> > +	struct xfs_inode	*ip,
> > +	int			whichfork)
> > +{
> > +	unsigned int		mask;
> > +
> > +	switch (whichfork) {
> > +	case XFS_DATA_FORK:
> > +		mask = XFS_SICK_INO_DIR;
> > +		break;
> > +	case XFS_ATTR_FORK:
> > +		mask = XFS_SICK_INO_XATTR;
> > +		break;
> > +	default:
> > +		ASSERT(0);
> > +		return;
> > +	}
> > +
> > +	xfs_inode_mark_sick(ip, mask);
> > +}
> > +
> > +/*
> > + * Record observations of dir/attr btree corruption with the health tracking
> > + * system.
> > + */
> > +void
> > +xfs_da_mark_sick(
> > +	struct xfs_da_args	*args)
> > +{
> > +	xfs_dirattr_mark_sick(args->dp, args->whichfork);
> > +}
> > 
>
Brian Foster Nov. 21, 2019, 1:26 p.m. UTC | #3
On Wed, Nov 20, 2019 at 08:55:08AM -0800, Darrick J. Wong wrote:
> On Wed, Nov 20, 2019 at 11:11:47AM -0500, Brian Foster wrote:
> > On Thu, Nov 14, 2019 at 10:19:46AM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Whenever we encounter corrupt directory or extended attribute blocks, we
> > > should report that to the health monitoring system for later reporting.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  fs/xfs/libxfs/xfs_attr_leaf.c   |    5 ++++-
> > >  fs/xfs/libxfs/xfs_attr_remote.c |   27 ++++++++++++++++-----------
> > >  fs/xfs/libxfs/xfs_da_btree.c    |   29 ++++++++++++++++++++++++++---
> > >  fs/xfs/libxfs/xfs_dir2.c        |    5 ++++-
> > >  fs/xfs/libxfs/xfs_dir2_data.c   |    2 ++
> > >  fs/xfs/libxfs/xfs_dir2_leaf.c   |    3 +++
> > >  fs/xfs/libxfs/xfs_dir2_node.c   |    7 +++++++
> > >  fs/xfs/libxfs/xfs_health.h      |    3 +++
> > >  fs/xfs/xfs_attr_inactive.c      |    4 ++++
> > >  fs/xfs/xfs_attr_list.c          |   16 +++++++++++++---
> > >  fs/xfs/xfs_dir2_readdir.c       |    6 +++++-
> > >  fs/xfs/xfs_health.c             |   39 +++++++++++++++++++++++++++++++++++++++
> > >  12 files changed, 126 insertions(+), 20 deletions(-)
> > > 
> > > 
> > ...
> > > diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
> > > index e424b004e3cb..a17622dadf00 100644
> > > --- a/fs/xfs/libxfs/xfs_da_btree.c
> > > +++ b/fs/xfs/libxfs/xfs_da_btree.c
> > ...
> > > @@ -1589,6 +1593,7 @@ xfs_da3_node_lookup_int(
> > >  
> > >  		if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
> > >  			xfs_buf_corruption_error(blk->bp);
> > > +			xfs_da_mark_sick(args);
> > >  			return -EFSCORRUPTED;
> > >  		}
> > >  
> > > @@ -1604,6 +1609,7 @@ xfs_da3_node_lookup_int(
> > >  		/* Tree taller than we can handle; bail out! */
> > >  		if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
> > >  			xfs_buf_corruption_error(blk->bp);
> > > +			xfs_da_mark_sick(args);
> > >  			return -EFSCORRUPTED;
> > >  		}
> > >  
> > > @@ -1612,6 +1618,7 @@ xfs_da3_node_lookup_int(
> > >  			expected_level = nodehdr.level - 1;
> > >  		else if (expected_level != nodehdr.level) {
> > >  			xfs_buf_corruption_error(blk->bp);
> > > +			xfs_da_mark_sick(args);
> > >  			return -EFSCORRUPTED;
> > >  		} else
> > >  			expected_level--;
> > > @@ -1663,12 +1670,16 @@ xfs_da3_node_lookup_int(
> > >  		}
> > >  
> > >  		/* We can't point back to the root. */
> > > -		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
> > > +		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
> > > +			xfs_da_mark_sick(args);
> > >  			return -EFSCORRUPTED;
> > > +		}
> > >  	}
> > >  
> > > -	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
> > > +	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
> > > +		xfs_da_mark_sick(args);
> > >  		return -EFSCORRUPTED;
> > > +	}
> > >  
> > >  	/*
> > >  	 * A leaf block that ends in the hashval that we are interested in
> > > @@ -1686,6 +1697,7 @@ xfs_da3_node_lookup_int(
> > >  			args->blkno = blk->blkno;
> > >  		} else {
> > >  			ASSERT(0);
> > > +			xfs_da_mark_sick(args);
> > >  			return -EFSCORRUPTED;
> > >  		}
> > 
> > I'm just kind of skimming through the rest for general feedback at this
> > point given previous comments, but it might be nice to start using exit
> > labels at some of these places where we're enlarging and duplicating the
> > error path for particular errors.
> 
> Yeah.  This current iteration is pretty wordy since I used coccinelle to
> find all the EFSCORRUPTED clauses and inject the appropriate _mark_sick
> call.
> 
> > It's not so much about the code in
> > these patches, but rather to hopefully ease maintaining these state bits
> > properly in new code where devs/reviewers might not know much about
> > scrub state or have it in mind. Short of having some kind of generic
> > helper to handle corruption state, ISTM that the combination of using
> > verifiers where possible and common exit labels anywhere else we
> > generate -EFSCORRUPTED at multiple places within some function could
> > shrink these patches a bit..
> 
> <nod> Eric suggested on IRC that maybe the _mark_sick functions should
> return EFSCORRUPTED so that we could at least collapse that to:
> 
> if (XFS_IS_CORRUPT(...)) {
> 	error = xfs_da_mark_sick(...);
> 	goto barf;
> }
> 
> However, doing it the wordy way I've done it has the neat effects (IMHO)
> that you can find all the places where xfs decides some metadata is
> corrupt by grepping for EFSCORRUPTED, and confirm that each place it
> does that also has a corresponding _mark_sick call.
> 

Yeah, that was actually my thought process in suggesting pushing the
mark_sick() calls down into verifiers as well. It seems a little more
clear (and open to future cleanups) with a strict pattern of setting
sickness in the locations that generate corruption errors. Of course
that likely means some special macro or something like you propose
below, but I didn't want to quite go there until we could put the state
updates in the right places.

> I guess you could create a dorky shouty wrapper to maintain that greppy
> property:
> 
> #define XFS_DA_EFSCORRUPTED(...) \
> 	(xfs_da_mark_sick(...), -EFSCORRUPTED)
> 
> But... that might be stylistically undesirable.  OTOH I guess it
> wouldn't be so bad either to do:
> 
> 	if (XFS_IS_CORRUPT(...)) {
> 		error = -EFSCORRUPTED;
> 		goto bad;
> 	}
> 
> 	if (XFS_IS_CORRUPT(...)) {
> 		error = -EFSCORRUPTED;
> 		goto bad;
> 	}
> 
> 	return 0;
> bad:
> 	if (error == -EFSCORRUPTED)
> 		xfs_da_mark_sick(...);
> 	return error;
> 
> Or using the shouty macro above:
> 
> 	if (XFS_IS_CORRUPT(...)) {
> 		error = XFS_DA_EFSCORRUPTED(...);
> 		goto bad;
> 	}
> 
> 	if (XFS_IS_CORRUPT(...)) {
> 		error = XFS_DA_EFSCORRUPTED(...);
> 		goto bad;
> 	}
> 
> bad:
> 	return error;
> 
> I'll think about that.  It doesn't sound so bad when coding it up in
> this email.
> 

I suppose a macro is nice in that it enforces sickness is updated
wherever -EFSCORRUPTED occurs, or at least can easily be verified by
grepping. I find the separate macros pattern a little confusing, FWIW,
simply because at a glance it looks like a garbled bunch of logic to me.
I.e. I see 'if (IS_CORRUPT()) SOMETHING_CORRUPTED(); ...' and wonder wtf
that is doing, for one. It's also not immediately obvious when we should
use one or not the other, etc. This is getting into bikeshedding
territory though and I don't have much of a better suggestion atm...

Brian

> --D
> 
> > 
> > Brian
> > 
> > >  		if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
> > > @@ -2250,8 +2262,10 @@ xfs_da3_swap_lastblock(
> > >  	error = xfs_bmap_last_before(tp, dp, &lastoff, w);
> > >  	if (error)
> > >  		return error;
> > > -	if (XFS_IS_CORRUPT(mp, lastoff == 0))
> > > +	if (XFS_IS_CORRUPT(mp, lastoff == 0)) {
> > > +		xfs_da_mark_sick(args);
> > >  		return -EFSCORRUPTED;
> > > +	}
> > >  	/*
> > >  	 * Read the last block in the btree space.
> > >  	 */
> > > @@ -2300,6 +2314,7 @@ xfs_da3_swap_lastblock(
> > >  		if (XFS_IS_CORRUPT(mp,
> > >  				   be32_to_cpu(sib_info->forw) != last_blkno ||
> > >  				   sib_info->magic != dead_info->magic)) {
> > > +			xfs_da_mark_sick(args);
> > >  			error = -EFSCORRUPTED;
> > >  			goto done;
> > >  		}
> > > @@ -2320,6 +2335,7 @@ xfs_da3_swap_lastblock(
> > >  		if (XFS_IS_CORRUPT(mp,
> > >  				   be32_to_cpu(sib_info->back) != last_blkno ||
> > >  				   sib_info->magic != dead_info->magic)) {
> > > +			xfs_da_mark_sick(args);
> > >  			error = -EFSCORRUPTED;
> > >  			goto done;
> > >  		}
> > > @@ -2342,6 +2358,7 @@ xfs_da3_swap_lastblock(
> > >  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
> > >  		if (XFS_IS_CORRUPT(mp,
> > >  				   level >= 0 && level != par_hdr.level + 1)) {
> > > +			xfs_da_mark_sick(args);
> > >  			error = -EFSCORRUPTED;
> > >  			goto done;
> > >  		}
> > > @@ -2353,6 +2370,7 @@ xfs_da3_swap_lastblock(
> > >  		     entno++)
> > >  			continue;
> > >  		if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
> > > +			xfs_da_mark_sick(args);
> > >  			error = -EFSCORRUPTED;
> > >  			goto done;
> > >  		}
> > > @@ -2378,6 +2396,7 @@ xfs_da3_swap_lastblock(
> > >  		xfs_trans_brelse(tp, par_buf);
> > >  		par_buf = NULL;
> > >  		if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
> > > +			xfs_da_mark_sick(args);
> > >  			error = -EFSCORRUPTED;
> > >  			goto done;
> > >  		}
> > > @@ -2387,6 +2406,7 @@ xfs_da3_swap_lastblock(
> > >  		par_node = par_buf->b_addr;
> > >  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
> > >  		if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
> > > +			xfs_da_mark_sick(args);
> > >  			error = -EFSCORRUPTED;
> > >  			goto done;
> > >  		}
> > > @@ -2601,6 +2621,7 @@ xfs_dabuf_map(
> > >  					irecs[i].br_state);
> > >  			}
> > >  		}
> > > +		xfs_dirattr_mark_sick(dp, whichfork);
> > >  		error = -EFSCORRUPTED;
> > >  		goto out;
> > >  	}
> > > @@ -2693,6 +2714,8 @@ xfs_da_read_buf(
> > >  	error = xfs_trans_read_buf_map(dp->i_mount, trans,
> > >  					dp->i_mount->m_ddev_targp,
> > >  					mapp, nmap, 0, &bp, ops);
> > > +	if (xfs_metadata_is_sick(error))
> > > +		xfs_dirattr_mark_sick(dp, whichfork);
> > >  	if (error)
> > >  		goto out_free;
> > >  
> > > diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
> > > index 0aa87cbde49e..e1aa411a1b8b 100644
> > > --- a/fs/xfs/libxfs/xfs_dir2.c
> > > +++ b/fs/xfs/libxfs/xfs_dir2.c
> > > @@ -18,6 +18,7 @@
> > >  #include "xfs_errortag.h"
> > >  #include "xfs_error.h"
> > >  #include "xfs_trace.h"
> > > +#include "xfs_health.h"
> > >  
> > >  struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
> > >  
> > > @@ -608,8 +609,10 @@ xfs_dir2_isblock(
> > >  	rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
> > >  	if (XFS_IS_CORRUPT(args->dp->i_mount,
> > >  			   rval != 0 &&
> > > -			   args->dp->i_d.di_size != args->geo->blksize))
> > > +			   args->dp->i_d.di_size != args->geo->blksize)) {
> > > +		xfs_da_mark_sick(args);
> > >  		return -EFSCORRUPTED;
> > > +	}
> > >  	*vp = rval;
> > >  	return 0;
> > >  }
> > > diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
> > > index a6eb71a62b53..80cc9c7ea4e5 100644
> > > --- a/fs/xfs/libxfs/xfs_dir2_data.c
> > > +++ b/fs/xfs/libxfs/xfs_dir2_data.c
> > > @@ -18,6 +18,7 @@
> > >  #include "xfs_trans.h"
> > >  #include "xfs_buf_item.h"
> > >  #include "xfs_log.h"
> > > +#include "xfs_health.h"
> > >  
> > >  static xfs_failaddr_t xfs_dir2_data_freefind_verify(
> > >  		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
> > > @@ -1170,6 +1171,7 @@ xfs_dir2_data_use_free(
> > >  corrupt:
> > >  	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
> > >  			hdr, sizeof(*hdr), __FILE__, __LINE__, fa);
> > > +	xfs_da_mark_sick(args);
> > >  	return -EFSCORRUPTED;
> > >  }
> > >  
> > > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > index 73edd96ce0ac..32d17420fff3 100644
> > > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > @@ -19,6 +19,7 @@
> > >  #include "xfs_trace.h"
> > >  #include "xfs_trans.h"
> > >  #include "xfs_buf_item.h"
> > > +#include "xfs_health.h"
> > >  
> > >  /*
> > >   * Local function declarations.
> > > @@ -1386,8 +1387,10 @@ xfs_dir2_leaf_removename(
> > >  	bestsp = xfs_dir2_leaf_bests_p(ltp);
> > >  	if (be16_to_cpu(bestsp[db]) != oldbest) {
> > >  		xfs_buf_corruption_error(lbp);
> > > +		xfs_da_mark_sick(args);
> > >  		return -EFSCORRUPTED;
> > >  	}
> > > +
> > >  	/*
> > >  	 * Mark the former data entry unused.
> > >  	 */
> > > diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
> > > index 3a8b0625a08b..e0f3ab254a1a 100644
> > > --- a/fs/xfs/libxfs/xfs_dir2_node.c
> > > +++ b/fs/xfs/libxfs/xfs_dir2_node.c
> > > @@ -20,6 +20,7 @@
> > >  #include "xfs_trans.h"
> > >  #include "xfs_buf_item.h"
> > >  #include "xfs_log.h"
> > > +#include "xfs_health.h"
> > >  
> > >  /*
> > >   * Function declarations.
> > > @@ -228,6 +229,7 @@ __xfs_dir3_free_read(
> > >  	if (fa) {
> > >  		xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
> > >  		xfs_trans_brelse(tp, *bpp);
> > > +		xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > >  		return -EFSCORRUPTED;
> > >  	}
> > >  
> > > @@ -440,6 +442,7 @@ xfs_dir2_leaf_to_node(
> > >  	if (be32_to_cpu(ltp->bestcount) >
> > >  				(uint)dp->i_d.di_size / args->geo->blksize) {
> > >  		xfs_buf_corruption_error(lbp);
> > > +		xfs_da_mark_sick(args);
> > >  		return -EFSCORRUPTED;
> > >  	}
> > >  
> > > @@ -514,6 +517,7 @@ xfs_dir2_leafn_add(
> > >  	 */
> > >  	if (index < 0) {
> > >  		xfs_buf_corruption_error(bp);
> > > +		xfs_da_mark_sick(args);
> > >  		return -EFSCORRUPTED;
> > >  	}
> > >  
> > > @@ -733,6 +737,7 @@ xfs_dir2_leafn_lookup_for_addname(
> > >  					   cpu_to_be16(NULLDATAOFF))) {
> > >  				if (curfdb != newfdb)
> > >  					xfs_trans_brelse(tp, curbp);
> > > +				xfs_da_mark_sick(args);
> > >  				return -EFSCORRUPTED;
> > >  			}
> > >  			curfdb = newfdb;
> > > @@ -801,6 +806,7 @@ xfs_dir2_leafn_lookup_for_entry(
> > >  	xfs_dir3_leaf_check(dp, bp);
> > >  	if (leafhdr.count <= 0) {
> > >  		xfs_buf_corruption_error(bp);
> > > +		xfs_da_mark_sick(args);
> > >  		return -EFSCORRUPTED;
> > >  	}
> > >  
> > > @@ -1737,6 +1743,7 @@ xfs_dir2_node_add_datablk(
> > >  			} else {
> > >  				xfs_alert(mp, " ... fblk is NULL");
> > >  			}
> > > +			xfs_da_mark_sick(args);
> > >  			return -EFSCORRUPTED;
> > >  		}
> > >  
> > > diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
> > > index 2049419e9555..d9404cd3d09b 100644
> > > --- a/fs/xfs/libxfs/xfs_health.h
> > > +++ b/fs/xfs/libxfs/xfs_health.h
> > > @@ -38,6 +38,7 @@ struct xfs_perag;
> > >  struct xfs_inode;
> > >  struct xfs_fsop_geom;
> > >  struct xfs_btree_cur;
> > > +struct xfs_da_args;
> > >  
> > >  /* Observable health issues for metadata spanning the entire filesystem. */
> > >  #define XFS_SICK_FS_COUNTERS	(1 << 0)  /* summary counters */
> > > @@ -141,6 +142,8 @@ void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
> > >  void xfs_health_unmount(struct xfs_mount *mp);
> > >  void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
> > >  void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
> > > +void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
> > > +void xfs_da_mark_sick(struct xfs_da_args *args);
> > >  
> > >  /* Now some helpers. */
> > >  
> > > diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
> > > index a78c501f6fb1..429a97494ffa 100644
> > > --- a/fs/xfs/xfs_attr_inactive.c
> > > +++ b/fs/xfs/xfs_attr_inactive.c
> > > @@ -23,6 +23,7 @@
> > >  #include "xfs_quota.h"
> > >  #include "xfs_dir2.h"
> > >  #include "xfs_error.h"
> > > +#include "xfs_health.h"
> > >  
> > >  /*
> > >   * Look at all the extents for this logical region,
> > > @@ -209,6 +210,7 @@ xfs_attr3_node_inactive(
> > >  	if (level > XFS_DA_NODE_MAXDEPTH) {
> > >  		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
> > >  		xfs_buf_corruption_error(bp);
> > > +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > >  		return -EFSCORRUPTED;
> > >  	}
> > >  
> > > @@ -256,6 +258,7 @@ xfs_attr3_node_inactive(
> > >  			error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
> > >  			break;
> > >  		default:
> > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > >  			xfs_buf_corruption_error(child_bp);
> > >  			xfs_trans_brelse(*trans, child_bp);
> > >  			error = -EFSCORRUPTED;
> > > @@ -342,6 +345,7 @@ xfs_attr3_root_inactive(
> > >  		error = xfs_attr3_leaf_inactive(trans, dp, bp);
> > >  		break;
> > >  	default:
> > > +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > >  		error = -EFSCORRUPTED;
> > >  		xfs_buf_corruption_error(bp);
> > >  		xfs_trans_brelse(*trans, bp);
> > > diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
> > > index 7a099df88a0c..1a2a3d4ce422 100644
> > > --- a/fs/xfs/xfs_attr_list.c
> > > +++ b/fs/xfs/xfs_attr_list.c
> > > @@ -21,6 +21,7 @@
> > >  #include "xfs_error.h"
> > >  #include "xfs_trace.h"
> > >  #include "xfs_dir2.h"
> > > +#include "xfs_health.h"
> > >  
> > >  STATIC int
> > >  xfs_attr_shortform_compare(const void *a, const void *b)
> > > @@ -88,8 +89,10 @@ xfs_attr_shortform_list(
> > >  		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
> > >  			if (XFS_IS_CORRUPT(context->dp->i_mount,
> > >  					   !xfs_attr_namecheck(sfe->nameval,
> > > -							       sfe->namelen)))
> > > +							       sfe->namelen))) {
> > > +				xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > >  				return -EFSCORRUPTED;
> > > +			}
> > >  			context->put_listent(context,
> > >  					     sfe->flags,
> > >  					     sfe->nameval,
> > > @@ -131,6 +134,7 @@ xfs_attr_shortform_list(
> > >  					     context->dp->i_mount, sfe,
> > >  					     sizeof(*sfe));
> > >  			kmem_free(sbuf);
> > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > >  			return -EFSCORRUPTED;
> > >  		}
> > >  
> > > @@ -181,6 +185,7 @@ xfs_attr_shortform_list(
> > >  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> > >  				   !xfs_attr_namecheck(sbp->name,
> > >  						       sbp->namelen))) {
> > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > >  			error = -EFSCORRUPTED;
> > >  			goto out;
> > >  		}
> > > @@ -268,8 +273,10 @@ xfs_attr_node_list_lookup(
> > >  			return 0;
> > >  
> > >  		/* We can't point back to the root. */
> > > -		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0))
> > > +		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0)) {
> > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > >  			return -EFSCORRUPTED;
> > > +		}
> > >  	}
> > >  
> > >  	if (expected_level != 0)
> > > @@ -281,6 +288,7 @@ xfs_attr_node_list_lookup(
> > >  out_corruptbuf:
> > >  	xfs_buf_corruption_error(bp);
> > >  	xfs_trans_brelse(tp, bp);
> > > +	xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > >  	return -EFSCORRUPTED;
> > >  }
> > >  
> > > @@ -471,8 +479,10 @@ xfs_attr3_leaf_list_int(
> > >  		}
> > >  
> > >  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> > > -				   !xfs_attr_namecheck(name, namelen)))
> > > +				   !xfs_attr_namecheck(name, namelen))) {
> > > +			xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK);
> > >  			return -EFSCORRUPTED;
> > > +		}
> > >  		context->put_listent(context, entry->flags,
> > >  					      name, namelen, valuelen);
> > >  		if (context->seen_enough)
> > > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
> > > index 95bc9ef8f5f9..715ded503334 100644
> > > --- a/fs/xfs/xfs_dir2_readdir.c
> > > +++ b/fs/xfs/xfs_dir2_readdir.c
> > > @@ -18,6 +18,7 @@
> > >  #include "xfs_bmap.h"
> > >  #include "xfs_trans.h"
> > >  #include "xfs_error.h"
> > > +#include "xfs_health.h"
> > >  
> > >  /*
> > >   * Directory file type support functions
> > > @@ -119,8 +120,10 @@ xfs_dir2_sf_getdents(
> > >  		ctx->pos = off & 0x7fffffff;
> > >  		if (XFS_IS_CORRUPT(dp->i_mount,
> > >  				   !xfs_dir2_namecheck(sfep->name,
> > > -						       sfep->namelen)))
> > > +						       sfep->namelen))) {
> > > +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > >  			return -EFSCORRUPTED;
> > > +		}
> > >  		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
> > >  			    xfs_dir3_get_dtype(mp, filetype)))
> > >  			return 0;
> > > @@ -461,6 +464,7 @@ xfs_dir2_leaf_getdents(
> > >  		if (XFS_IS_CORRUPT(dp->i_mount,
> > >  				   !xfs_dir2_namecheck(dep->name,
> > >  						       dep->namelen))) {
> > > +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > >  			error = -EFSCORRUPTED;
> > >  			break;
> > >  		}
> > > diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
> > > index 1f09027c55ad..c1b6e8fb72ec 100644
> > > --- a/fs/xfs/xfs_health.c
> > > +++ b/fs/xfs/xfs_health.c
> > > @@ -15,6 +15,8 @@
> > >  #include "xfs_trace.h"
> > >  #include "xfs_health.h"
> > >  #include "xfs_btree.h"
> > > +#include "xfs_da_format.h"
> > > +#include "xfs_da_btree.h"
> > >  
> > >  /*
> > >   * Warn about metadata corruption that we detected but haven't fixed, and
> > > @@ -517,3 +519,40 @@ xfs_btree_mark_sick(
> > >  
> > >  	xfs_agno_mark_sick(cur->bc_mp, cur->bc_private.a.agno, mask);
> > >  }
> > > +
> > > +/*
> > > + * Record observations of dir/attr btree corruption with the health tracking
> > > + * system.
> > > + */
> > > +void
> > > +xfs_dirattr_mark_sick(
> > > +	struct xfs_inode	*ip,
> > > +	int			whichfork)
> > > +{
> > > +	unsigned int		mask;
> > > +
> > > +	switch (whichfork) {
> > > +	case XFS_DATA_FORK:
> > > +		mask = XFS_SICK_INO_DIR;
> > > +		break;
> > > +	case XFS_ATTR_FORK:
> > > +		mask = XFS_SICK_INO_XATTR;
> > > +		break;
> > > +	default:
> > > +		ASSERT(0);
> > > +		return;
> > > +	}
> > > +
> > > +	xfs_inode_mark_sick(ip, mask);
> > > +}
> > > +
> > > +/*
> > > + * Record observations of dir/attr btree corruption with the health tracking
> > > + * system.
> > > + */
> > > +void
> > > +xfs_da_mark_sick(
> > > +	struct xfs_da_args	*args)
> > > +{
> > > +	xfs_dirattr_mark_sick(args->dp, args->whichfork);
> > > +}
> > > 
> > 
>
Darrick J. Wong Nov. 22, 2019, 1:03 a.m. UTC | #4
On Thu, Nov 21, 2019 at 08:26:27AM -0500, Brian Foster wrote:
> On Wed, Nov 20, 2019 at 08:55:08AM -0800, Darrick J. Wong wrote:
> > On Wed, Nov 20, 2019 at 11:11:47AM -0500, Brian Foster wrote:
> > > On Thu, Nov 14, 2019 at 10:19:46AM -0800, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > 
> > > > Whenever we encounter corrupt directory or extended attribute blocks, we
> > > > should report that to the health monitoring system for later reporting.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > >  fs/xfs/libxfs/xfs_attr_leaf.c   |    5 ++++-
> > > >  fs/xfs/libxfs/xfs_attr_remote.c |   27 ++++++++++++++++-----------
> > > >  fs/xfs/libxfs/xfs_da_btree.c    |   29 ++++++++++++++++++++++++++---
> > > >  fs/xfs/libxfs/xfs_dir2.c        |    5 ++++-
> > > >  fs/xfs/libxfs/xfs_dir2_data.c   |    2 ++
> > > >  fs/xfs/libxfs/xfs_dir2_leaf.c   |    3 +++
> > > >  fs/xfs/libxfs/xfs_dir2_node.c   |    7 +++++++
> > > >  fs/xfs/libxfs/xfs_health.h      |    3 +++
> > > >  fs/xfs/xfs_attr_inactive.c      |    4 ++++
> > > >  fs/xfs/xfs_attr_list.c          |   16 +++++++++++++---
> > > >  fs/xfs/xfs_dir2_readdir.c       |    6 +++++-
> > > >  fs/xfs/xfs_health.c             |   39 +++++++++++++++++++++++++++++++++++++++
> > > >  12 files changed, 126 insertions(+), 20 deletions(-)
> > > > 
> > > > 
> > > ...
> > > > diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
> > > > index e424b004e3cb..a17622dadf00 100644
> > > > --- a/fs/xfs/libxfs/xfs_da_btree.c
> > > > +++ b/fs/xfs/libxfs/xfs_da_btree.c
> > > ...
> > > > @@ -1589,6 +1593,7 @@ xfs_da3_node_lookup_int(
> > > >  
> > > >  		if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
> > > >  			xfs_buf_corruption_error(blk->bp);
> > > > +			xfs_da_mark_sick(args);
> > > >  			return -EFSCORRUPTED;
> > > >  		}
> > > >  
> > > > @@ -1604,6 +1609,7 @@ xfs_da3_node_lookup_int(
> > > >  		/* Tree taller than we can handle; bail out! */
> > > >  		if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
> > > >  			xfs_buf_corruption_error(blk->bp);
> > > > +			xfs_da_mark_sick(args);
> > > >  			return -EFSCORRUPTED;
> > > >  		}
> > > >  
> > > > @@ -1612,6 +1618,7 @@ xfs_da3_node_lookup_int(
> > > >  			expected_level = nodehdr.level - 1;
> > > >  		else if (expected_level != nodehdr.level) {
> > > >  			xfs_buf_corruption_error(blk->bp);
> > > > +			xfs_da_mark_sick(args);
> > > >  			return -EFSCORRUPTED;
> > > >  		} else
> > > >  			expected_level--;
> > > > @@ -1663,12 +1670,16 @@ xfs_da3_node_lookup_int(
> > > >  		}
> > > >  
> > > >  		/* We can't point back to the root. */
> > > > -		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
> > > > +		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
> > > > +			xfs_da_mark_sick(args);
> > > >  			return -EFSCORRUPTED;
> > > > +		}
> > > >  	}
> > > >  
> > > > -	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
> > > > +	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
> > > > +		xfs_da_mark_sick(args);
> > > >  		return -EFSCORRUPTED;
> > > > +	}
> > > >  
> > > >  	/*
> > > >  	 * A leaf block that ends in the hashval that we are interested in
> > > > @@ -1686,6 +1697,7 @@ xfs_da3_node_lookup_int(
> > > >  			args->blkno = blk->blkno;
> > > >  		} else {
> > > >  			ASSERT(0);
> > > > +			xfs_da_mark_sick(args);
> > > >  			return -EFSCORRUPTED;
> > > >  		}
> > > 
> > > I'm just kind of skimming through the rest for general feedback at this
> > > point given previous comments, but it might be nice to start using exit
> > > labels at some of these places where we're enlarging and duplicating the
> > > error path for particular errors.
> > 
> > Yeah.  This current iteration is pretty wordy since I used coccinelle to
> > find all the EFSCORRUPTED clauses and inject the appropriate _mark_sick
> > call.
> > 
> > > It's not so much about the code in
> > > these patches, but rather to hopefully ease maintaining these state bits
> > > properly in new code where devs/reviewers might not know much about
> > > scrub state or have it in mind. Short of having some kind of generic
> > > helper to handle corruption state, ISTM that the combination of using
> > > verifiers where possible and common exit labels anywhere else we
> > > generate -EFSCORRUPTED at multiple places within some function could
> > > shrink these patches a bit..
> > 
> > <nod> Eric suggested on IRC that maybe the _mark_sick functions should
> > return EFSCORRUPTED so that we could at least collapse that to:
> > 
> > if (XFS_IS_CORRUPT(...)) {
> > 	error = xfs_da_mark_sick(...);
> > 	goto barf;
> > }
> > 
> > However, doing it the wordy way I've done it has the neat effects (IMHO)
> > that you can find all the places where xfs decides some metadata is
> > corrupt by grepping for EFSCORRUPTED, and confirm that each place it
> > does that also has a corresponding _mark_sick call.
> > 
> 
> Yeah, that was actually my thought process in suggesting pushing the
> mark_sick() calls down into verifiers as well.

<nod> It does strike me as a little odd that the verifiers are the /one/
place where EFSCORRUPTED isn't preceded or followed by a _mark_sick.

> It seems a little more clear (and open to future cleanups) with a
> strict pattern of setting sickness in the locations that generate
> corruption errors. Of course that likely means some special macro or
> something like you propose below, but I didn't want to quite go there
> until we could put the state updates in the right places.

Yeah....

> > I guess you could create a dorky shouty wrapper to maintain that greppy
> > property:
> > 
> > #define XFS_DA_EFSCORRUPTED(...) \
> > 	(xfs_da_mark_sick(...), -EFSCORRUPTED)
> > 
> > But... that might be stylistically undesirable.  OTOH I guess it
> > wouldn't be so bad either to do:
> > 
> > 	if (XFS_IS_CORRUPT(...)) {
> > 		error = -EFSCORRUPTED;
> > 		goto bad;
> > 	}
> > 
> > 	if (XFS_IS_CORRUPT(...)) {
> > 		error = -EFSCORRUPTED;
> > 		goto bad;
> > 	}
> > 
> > 	return 0;
> > bad:
> > 	if (error == -EFSCORRUPTED)
> > 		xfs_da_mark_sick(...);
> > 	return error;
> > 
> > Or using the shouty macro above:
> > 
> > 	if (XFS_IS_CORRUPT(...)) {
> > 		error = XFS_DA_EFSCORRUPTED(...);
> > 		goto bad;
> > 	}
> > 
> > 	if (XFS_IS_CORRUPT(...)) {
> > 		error = XFS_DA_EFSCORRUPTED(...);
> > 		goto bad;
> > 	}
> > 
> > bad:
> > 	return error;
> > 
> > I'll think about that.  It doesn't sound so bad when coding it up in
> > this email.
> > 
> 
> I suppose a macro is nice in that it enforces sickness is updated
> wherever -EFSCORRUPTED occurs, or at least can easily be verified by
> grepping. I find the separate macros pattern a little confusing, FWIW,
> simply because at a glance it looks like a garbled bunch of logic to me.
> I.e. I see 'if (IS_CORRUPT()) SOMETHING_CORRUPTED(); ...' and wonder wtf
> that is doing, for one. It's also not immediately obvious when we should
> use one or not the other, etc. This is getting into bikeshedding
> territory though and I don't have much of a better suggestion atm...

...one /could/ have specific IS_CORRUPT macros mapping to different
types of things.  Though I think this could easily get messy:

#define XFS_DIR_IS_CORRUPT(dp, perror, expr) \
	(unlikely(expr) ? xfs_corruption_report(#expr, ...), \
			  *(perror) = -EFSCORRUPTED, \
			  xfs_da_mark_sick(dp, XFS_DATA_FORK), true : false)

I don't want to load up these macros with too much stuff, but I guess at
least that reduces the directory code to:

	if (XFS_DIR_IS_CORRUPT(dp, &error, blah == badvalue))
		goto out;
	...
	if (XFS_DIR_IS_CORRUPT(dp, &error, ugh == NULL))
		return error;
out:
	return error;

Though now we're getting pretty far from the original intent to kill off
wonky macros.  At least these are less weird, so maybe this won't set
off a round of macro bikeshed rage?

--D

> 
> Brian
> 
> > --D
> > 
> > > 
> > > Brian
> > > 
> > > >  		if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
> > > > @@ -2250,8 +2262,10 @@ xfs_da3_swap_lastblock(
> > > >  	error = xfs_bmap_last_before(tp, dp, &lastoff, w);
> > > >  	if (error)
> > > >  		return error;
> > > > -	if (XFS_IS_CORRUPT(mp, lastoff == 0))
> > > > +	if (XFS_IS_CORRUPT(mp, lastoff == 0)) {
> > > > +		xfs_da_mark_sick(args);
> > > >  		return -EFSCORRUPTED;
> > > > +	}
> > > >  	/*
> > > >  	 * Read the last block in the btree space.
> > > >  	 */
> > > > @@ -2300,6 +2314,7 @@ xfs_da3_swap_lastblock(
> > > >  		if (XFS_IS_CORRUPT(mp,
> > > >  				   be32_to_cpu(sib_info->forw) != last_blkno ||
> > > >  				   sib_info->magic != dead_info->magic)) {
> > > > +			xfs_da_mark_sick(args);
> > > >  			error = -EFSCORRUPTED;
> > > >  			goto done;
> > > >  		}
> > > > @@ -2320,6 +2335,7 @@ xfs_da3_swap_lastblock(
> > > >  		if (XFS_IS_CORRUPT(mp,
> > > >  				   be32_to_cpu(sib_info->back) != last_blkno ||
> > > >  				   sib_info->magic != dead_info->magic)) {
> > > > +			xfs_da_mark_sick(args);
> > > >  			error = -EFSCORRUPTED;
> > > >  			goto done;
> > > >  		}
> > > > @@ -2342,6 +2358,7 @@ xfs_da3_swap_lastblock(
> > > >  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
> > > >  		if (XFS_IS_CORRUPT(mp,
> > > >  				   level >= 0 && level != par_hdr.level + 1)) {
> > > > +			xfs_da_mark_sick(args);
> > > >  			error = -EFSCORRUPTED;
> > > >  			goto done;
> > > >  		}
> > > > @@ -2353,6 +2370,7 @@ xfs_da3_swap_lastblock(
> > > >  		     entno++)
> > > >  			continue;
> > > >  		if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
> > > > +			xfs_da_mark_sick(args);
> > > >  			error = -EFSCORRUPTED;
> > > >  			goto done;
> > > >  		}
> > > > @@ -2378,6 +2396,7 @@ xfs_da3_swap_lastblock(
> > > >  		xfs_trans_brelse(tp, par_buf);
> > > >  		par_buf = NULL;
> > > >  		if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
> > > > +			xfs_da_mark_sick(args);
> > > >  			error = -EFSCORRUPTED;
> > > >  			goto done;
> > > >  		}
> > > > @@ -2387,6 +2406,7 @@ xfs_da3_swap_lastblock(
> > > >  		par_node = par_buf->b_addr;
> > > >  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
> > > >  		if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
> > > > +			xfs_da_mark_sick(args);
> > > >  			error = -EFSCORRUPTED;
> > > >  			goto done;
> > > >  		}
> > > > @@ -2601,6 +2621,7 @@ xfs_dabuf_map(
> > > >  					irecs[i].br_state);
> > > >  			}
> > > >  		}
> > > > +		xfs_dirattr_mark_sick(dp, whichfork);
> > > >  		error = -EFSCORRUPTED;
> > > >  		goto out;
> > > >  	}
> > > > @@ -2693,6 +2714,8 @@ xfs_da_read_buf(
> > > >  	error = xfs_trans_read_buf_map(dp->i_mount, trans,
> > > >  					dp->i_mount->m_ddev_targp,
> > > >  					mapp, nmap, 0, &bp, ops);
> > > > +	if (xfs_metadata_is_sick(error))
> > > > +		xfs_dirattr_mark_sick(dp, whichfork);
> > > >  	if (error)
> > > >  		goto out_free;
> > > >  
> > > > diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
> > > > index 0aa87cbde49e..e1aa411a1b8b 100644
> > > > --- a/fs/xfs/libxfs/xfs_dir2.c
> > > > +++ b/fs/xfs/libxfs/xfs_dir2.c
> > > > @@ -18,6 +18,7 @@
> > > >  #include "xfs_errortag.h"
> > > >  #include "xfs_error.h"
> > > >  #include "xfs_trace.h"
> > > > +#include "xfs_health.h"
> > > >  
> > > >  struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
> > > >  
> > > > @@ -608,8 +609,10 @@ xfs_dir2_isblock(
> > > >  	rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
> > > >  	if (XFS_IS_CORRUPT(args->dp->i_mount,
> > > >  			   rval != 0 &&
> > > > -			   args->dp->i_d.di_size != args->geo->blksize))
> > > > +			   args->dp->i_d.di_size != args->geo->blksize)) {
> > > > +		xfs_da_mark_sick(args);
> > > >  		return -EFSCORRUPTED;
> > > > +	}
> > > >  	*vp = rval;
> > > >  	return 0;
> > > >  }
> > > > diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
> > > > index a6eb71a62b53..80cc9c7ea4e5 100644
> > > > --- a/fs/xfs/libxfs/xfs_dir2_data.c
> > > > +++ b/fs/xfs/libxfs/xfs_dir2_data.c
> > > > @@ -18,6 +18,7 @@
> > > >  #include "xfs_trans.h"
> > > >  #include "xfs_buf_item.h"
> > > >  #include "xfs_log.h"
> > > > +#include "xfs_health.h"
> > > >  
> > > >  static xfs_failaddr_t xfs_dir2_data_freefind_verify(
> > > >  		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
> > > > @@ -1170,6 +1171,7 @@ xfs_dir2_data_use_free(
> > > >  corrupt:
> > > >  	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
> > > >  			hdr, sizeof(*hdr), __FILE__, __LINE__, fa);
> > > > +	xfs_da_mark_sick(args);
> > > >  	return -EFSCORRUPTED;
> > > >  }
> > > >  
> > > > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > index 73edd96ce0ac..32d17420fff3 100644
> > > > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > @@ -19,6 +19,7 @@
> > > >  #include "xfs_trace.h"
> > > >  #include "xfs_trans.h"
> > > >  #include "xfs_buf_item.h"
> > > > +#include "xfs_health.h"
> > > >  
> > > >  /*
> > > >   * Local function declarations.
> > > > @@ -1386,8 +1387,10 @@ xfs_dir2_leaf_removename(
> > > >  	bestsp = xfs_dir2_leaf_bests_p(ltp);
> > > >  	if (be16_to_cpu(bestsp[db]) != oldbest) {
> > > >  		xfs_buf_corruption_error(lbp);
> > > > +		xfs_da_mark_sick(args);
> > > >  		return -EFSCORRUPTED;
> > > >  	}
> > > > +
> > > >  	/*
> > > >  	 * Mark the former data entry unused.
> > > >  	 */
> > > > diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
> > > > index 3a8b0625a08b..e0f3ab254a1a 100644
> > > > --- a/fs/xfs/libxfs/xfs_dir2_node.c
> > > > +++ b/fs/xfs/libxfs/xfs_dir2_node.c
> > > > @@ -20,6 +20,7 @@
> > > >  #include "xfs_trans.h"
> > > >  #include "xfs_buf_item.h"
> > > >  #include "xfs_log.h"
> > > > +#include "xfs_health.h"
> > > >  
> > > >  /*
> > > >   * Function declarations.
> > > > @@ -228,6 +229,7 @@ __xfs_dir3_free_read(
> > > >  	if (fa) {
> > > >  		xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
> > > >  		xfs_trans_brelse(tp, *bpp);
> > > > +		xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > > >  		return -EFSCORRUPTED;
> > > >  	}
> > > >  
> > > > @@ -440,6 +442,7 @@ xfs_dir2_leaf_to_node(
> > > >  	if (be32_to_cpu(ltp->bestcount) >
> > > >  				(uint)dp->i_d.di_size / args->geo->blksize) {
> > > >  		xfs_buf_corruption_error(lbp);
> > > > +		xfs_da_mark_sick(args);
> > > >  		return -EFSCORRUPTED;
> > > >  	}
> > > >  
> > > > @@ -514,6 +517,7 @@ xfs_dir2_leafn_add(
> > > >  	 */
> > > >  	if (index < 0) {
> > > >  		xfs_buf_corruption_error(bp);
> > > > +		xfs_da_mark_sick(args);
> > > >  		return -EFSCORRUPTED;
> > > >  	}
> > > >  
> > > > @@ -733,6 +737,7 @@ xfs_dir2_leafn_lookup_for_addname(
> > > >  					   cpu_to_be16(NULLDATAOFF))) {
> > > >  				if (curfdb != newfdb)
> > > >  					xfs_trans_brelse(tp, curbp);
> > > > +				xfs_da_mark_sick(args);
> > > >  				return -EFSCORRUPTED;
> > > >  			}
> > > >  			curfdb = newfdb;
> > > > @@ -801,6 +806,7 @@ xfs_dir2_leafn_lookup_for_entry(
> > > >  	xfs_dir3_leaf_check(dp, bp);
> > > >  	if (leafhdr.count <= 0) {
> > > >  		xfs_buf_corruption_error(bp);
> > > > +		xfs_da_mark_sick(args);
> > > >  		return -EFSCORRUPTED;
> > > >  	}
> > > >  
> > > > @@ -1737,6 +1743,7 @@ xfs_dir2_node_add_datablk(
> > > >  			} else {
> > > >  				xfs_alert(mp, " ... fblk is NULL");
> > > >  			}
> > > > +			xfs_da_mark_sick(args);
> > > >  			return -EFSCORRUPTED;
> > > >  		}
> > > >  
> > > > diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
> > > > index 2049419e9555..d9404cd3d09b 100644
> > > > --- a/fs/xfs/libxfs/xfs_health.h
> > > > +++ b/fs/xfs/libxfs/xfs_health.h
> > > > @@ -38,6 +38,7 @@ struct xfs_perag;
> > > >  struct xfs_inode;
> > > >  struct xfs_fsop_geom;
> > > >  struct xfs_btree_cur;
> > > > +struct xfs_da_args;
> > > >  
> > > >  /* Observable health issues for metadata spanning the entire filesystem. */
> > > >  #define XFS_SICK_FS_COUNTERS	(1 << 0)  /* summary counters */
> > > > @@ -141,6 +142,8 @@ void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
> > > >  void xfs_health_unmount(struct xfs_mount *mp);
> > > >  void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
> > > >  void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
> > > > +void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
> > > > +void xfs_da_mark_sick(struct xfs_da_args *args);
> > > >  
> > > >  /* Now some helpers. */
> > > >  
> > > > diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
> > > > index a78c501f6fb1..429a97494ffa 100644
> > > > --- a/fs/xfs/xfs_attr_inactive.c
> > > > +++ b/fs/xfs/xfs_attr_inactive.c
> > > > @@ -23,6 +23,7 @@
> > > >  #include "xfs_quota.h"
> > > >  #include "xfs_dir2.h"
> > > >  #include "xfs_error.h"
> > > > +#include "xfs_health.h"
> > > >  
> > > >  /*
> > > >   * Look at all the extents for this logical region,
> > > > @@ -209,6 +210,7 @@ xfs_attr3_node_inactive(
> > > >  	if (level > XFS_DA_NODE_MAXDEPTH) {
> > > >  		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
> > > >  		xfs_buf_corruption_error(bp);
> > > > +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > >  		return -EFSCORRUPTED;
> > > >  	}
> > > >  
> > > > @@ -256,6 +258,7 @@ xfs_attr3_node_inactive(
> > > >  			error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
> > > >  			break;
> > > >  		default:
> > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > >  			xfs_buf_corruption_error(child_bp);
> > > >  			xfs_trans_brelse(*trans, child_bp);
> > > >  			error = -EFSCORRUPTED;
> > > > @@ -342,6 +345,7 @@ xfs_attr3_root_inactive(
> > > >  		error = xfs_attr3_leaf_inactive(trans, dp, bp);
> > > >  		break;
> > > >  	default:
> > > > +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > >  		error = -EFSCORRUPTED;
> > > >  		xfs_buf_corruption_error(bp);
> > > >  		xfs_trans_brelse(*trans, bp);
> > > > diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
> > > > index 7a099df88a0c..1a2a3d4ce422 100644
> > > > --- a/fs/xfs/xfs_attr_list.c
> > > > +++ b/fs/xfs/xfs_attr_list.c
> > > > @@ -21,6 +21,7 @@
> > > >  #include "xfs_error.h"
> > > >  #include "xfs_trace.h"
> > > >  #include "xfs_dir2.h"
> > > > +#include "xfs_health.h"
> > > >  
> > > >  STATIC int
> > > >  xfs_attr_shortform_compare(const void *a, const void *b)
> > > > @@ -88,8 +89,10 @@ xfs_attr_shortform_list(
> > > >  		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
> > > >  			if (XFS_IS_CORRUPT(context->dp->i_mount,
> > > >  					   !xfs_attr_namecheck(sfe->nameval,
> > > > -							       sfe->namelen)))
> > > > +							       sfe->namelen))) {
> > > > +				xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > >  				return -EFSCORRUPTED;
> > > > +			}
> > > >  			context->put_listent(context,
> > > >  					     sfe->flags,
> > > >  					     sfe->nameval,
> > > > @@ -131,6 +134,7 @@ xfs_attr_shortform_list(
> > > >  					     context->dp->i_mount, sfe,
> > > >  					     sizeof(*sfe));
> > > >  			kmem_free(sbuf);
> > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > >  			return -EFSCORRUPTED;
> > > >  		}
> > > >  
> > > > @@ -181,6 +185,7 @@ xfs_attr_shortform_list(
> > > >  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> > > >  				   !xfs_attr_namecheck(sbp->name,
> > > >  						       sbp->namelen))) {
> > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > >  			error = -EFSCORRUPTED;
> > > >  			goto out;
> > > >  		}
> > > > @@ -268,8 +273,10 @@ xfs_attr_node_list_lookup(
> > > >  			return 0;
> > > >  
> > > >  		/* We can't point back to the root. */
> > > > -		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0))
> > > > +		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0)) {
> > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > >  			return -EFSCORRUPTED;
> > > > +		}
> > > >  	}
> > > >  
> > > >  	if (expected_level != 0)
> > > > @@ -281,6 +288,7 @@ xfs_attr_node_list_lookup(
> > > >  out_corruptbuf:
> > > >  	xfs_buf_corruption_error(bp);
> > > >  	xfs_trans_brelse(tp, bp);
> > > > +	xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > >  	return -EFSCORRUPTED;
> > > >  }
> > > >  
> > > > @@ -471,8 +479,10 @@ xfs_attr3_leaf_list_int(
> > > >  		}
> > > >  
> > > >  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> > > > -				   !xfs_attr_namecheck(name, namelen)))
> > > > +				   !xfs_attr_namecheck(name, namelen))) {
> > > > +			xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK);
> > > >  			return -EFSCORRUPTED;
> > > > +		}
> > > >  		context->put_listent(context, entry->flags,
> > > >  					      name, namelen, valuelen);
> > > >  		if (context->seen_enough)
> > > > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
> > > > index 95bc9ef8f5f9..715ded503334 100644
> > > > --- a/fs/xfs/xfs_dir2_readdir.c
> > > > +++ b/fs/xfs/xfs_dir2_readdir.c
> > > > @@ -18,6 +18,7 @@
> > > >  #include "xfs_bmap.h"
> > > >  #include "xfs_trans.h"
> > > >  #include "xfs_error.h"
> > > > +#include "xfs_health.h"
> > > >  
> > > >  /*
> > > >   * Directory file type support functions
> > > > @@ -119,8 +120,10 @@ xfs_dir2_sf_getdents(
> > > >  		ctx->pos = off & 0x7fffffff;
> > > >  		if (XFS_IS_CORRUPT(dp->i_mount,
> > > >  				   !xfs_dir2_namecheck(sfep->name,
> > > > -						       sfep->namelen)))
> > > > +						       sfep->namelen))) {
> > > > +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > > >  			return -EFSCORRUPTED;
> > > > +		}
> > > >  		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
> > > >  			    xfs_dir3_get_dtype(mp, filetype)))
> > > >  			return 0;
> > > > @@ -461,6 +464,7 @@ xfs_dir2_leaf_getdents(
> > > >  		if (XFS_IS_CORRUPT(dp->i_mount,
> > > >  				   !xfs_dir2_namecheck(dep->name,
> > > >  						       dep->namelen))) {
> > > > +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > > >  			error = -EFSCORRUPTED;
> > > >  			break;
> > > >  		}
> > > > diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
> > > > index 1f09027c55ad..c1b6e8fb72ec 100644
> > > > --- a/fs/xfs/xfs_health.c
> > > > +++ b/fs/xfs/xfs_health.c
> > > > @@ -15,6 +15,8 @@
> > > >  #include "xfs_trace.h"
> > > >  #include "xfs_health.h"
> > > >  #include "xfs_btree.h"
> > > > +#include "xfs_da_format.h"
> > > > +#include "xfs_da_btree.h"
> > > >  
> > > >  /*
> > > >   * Warn about metadata corruption that we detected but haven't fixed, and
> > > > @@ -517,3 +519,40 @@ xfs_btree_mark_sick(
> > > >  
> > > >  	xfs_agno_mark_sick(cur->bc_mp, cur->bc_private.a.agno, mask);
> > > >  }
> > > > +
> > > > +/*
> > > > + * Record observations of dir/attr btree corruption with the health tracking
> > > > + * system.
> > > > + */
> > > > +void
> > > > +xfs_dirattr_mark_sick(
> > > > +	struct xfs_inode	*ip,
> > > > +	int			whichfork)
> > > > +{
> > > > +	unsigned int		mask;
> > > > +
> > > > +	switch (whichfork) {
> > > > +	case XFS_DATA_FORK:
> > > > +		mask = XFS_SICK_INO_DIR;
> > > > +		break;
> > > > +	case XFS_ATTR_FORK:
> > > > +		mask = XFS_SICK_INO_XATTR;
> > > > +		break;
> > > > +	default:
> > > > +		ASSERT(0);
> > > > +		return;
> > > > +	}
> > > > +
> > > > +	xfs_inode_mark_sick(ip, mask);
> > > > +}
> > > > +
> > > > +/*
> > > > + * Record observations of dir/attr btree corruption with the health tracking
> > > > + * system.
> > > > + */
> > > > +void
> > > > +xfs_da_mark_sick(
> > > > +	struct xfs_da_args	*args)
> > > > +{
> > > > +	xfs_dirattr_mark_sick(args->dp, args->whichfork);
> > > > +}
> > > > 
> > > 
> > 
>
Brian Foster Nov. 22, 2019, 12:28 p.m. UTC | #5
On Thu, Nov 21, 2019 at 05:03:32PM -0800, Darrick J. Wong wrote:
> On Thu, Nov 21, 2019 at 08:26:27AM -0500, Brian Foster wrote:
> > On Wed, Nov 20, 2019 at 08:55:08AM -0800, Darrick J. Wong wrote:
> > > On Wed, Nov 20, 2019 at 11:11:47AM -0500, Brian Foster wrote:
> > > > On Thu, Nov 14, 2019 at 10:19:46AM -0800, Darrick J. Wong wrote:
> > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > 
> > > > > Whenever we encounter corrupt directory or extended attribute blocks, we
> > > > > should report that to the health monitoring system for later reporting.
> > > > > 
> > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > ---
> > > > >  fs/xfs/libxfs/xfs_attr_leaf.c   |    5 ++++-
> > > > >  fs/xfs/libxfs/xfs_attr_remote.c |   27 ++++++++++++++++-----------
> > > > >  fs/xfs/libxfs/xfs_da_btree.c    |   29 ++++++++++++++++++++++++++---
> > > > >  fs/xfs/libxfs/xfs_dir2.c        |    5 ++++-
> > > > >  fs/xfs/libxfs/xfs_dir2_data.c   |    2 ++
> > > > >  fs/xfs/libxfs/xfs_dir2_leaf.c   |    3 +++
> > > > >  fs/xfs/libxfs/xfs_dir2_node.c   |    7 +++++++
> > > > >  fs/xfs/libxfs/xfs_health.h      |    3 +++
> > > > >  fs/xfs/xfs_attr_inactive.c      |    4 ++++
> > > > >  fs/xfs/xfs_attr_list.c          |   16 +++++++++++++---
> > > > >  fs/xfs/xfs_dir2_readdir.c       |    6 +++++-
> > > > >  fs/xfs/xfs_health.c             |   39 +++++++++++++++++++++++++++++++++++++++
> > > > >  12 files changed, 126 insertions(+), 20 deletions(-)
> > > > > 
> > > > > 
> > > > ...
> > > > > diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
> > > > > index e424b004e3cb..a17622dadf00 100644
> > > > > --- a/fs/xfs/libxfs/xfs_da_btree.c
> > > > > +++ b/fs/xfs/libxfs/xfs_da_btree.c
> > > > ...
> > > > > @@ -1589,6 +1593,7 @@ xfs_da3_node_lookup_int(
> > > > >  
> > > > >  		if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
> > > > >  			xfs_buf_corruption_error(blk->bp);
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			return -EFSCORRUPTED;
> > > > >  		}
> > > > >  
> > > > > @@ -1604,6 +1609,7 @@ xfs_da3_node_lookup_int(
> > > > >  		/* Tree taller than we can handle; bail out! */
> > > > >  		if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
> > > > >  			xfs_buf_corruption_error(blk->bp);
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			return -EFSCORRUPTED;
> > > > >  		}
> > > > >  
> > > > > @@ -1612,6 +1618,7 @@ xfs_da3_node_lookup_int(
> > > > >  			expected_level = nodehdr.level - 1;
> > > > >  		else if (expected_level != nodehdr.level) {
> > > > >  			xfs_buf_corruption_error(blk->bp);
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			return -EFSCORRUPTED;
> > > > >  		} else
> > > > >  			expected_level--;
> > > > > @@ -1663,12 +1670,16 @@ xfs_da3_node_lookup_int(
> > > > >  		}
> > > > >  
> > > > >  		/* We can't point back to the root. */
> > > > > -		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
> > > > > +		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			return -EFSCORRUPTED;
> > > > > +		}
> > > > >  	}
> > > > >  
> > > > > -	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
> > > > > +	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
> > > > > +		xfs_da_mark_sick(args);
> > > > >  		return -EFSCORRUPTED;
> > > > > +	}
> > > > >  
> > > > >  	/*
> > > > >  	 * A leaf block that ends in the hashval that we are interested in
> > > > > @@ -1686,6 +1697,7 @@ xfs_da3_node_lookup_int(
> > > > >  			args->blkno = blk->blkno;
> > > > >  		} else {
> > > > >  			ASSERT(0);
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			return -EFSCORRUPTED;
> > > > >  		}
> > > > 
> > > > I'm just kind of skimming through the rest for general feedback at this
> > > > point given previous comments, but it might be nice to start using exit
> > > > labels at some of these places where we're enlarging and duplicating the
> > > > error path for particular errors.
> > > 
> > > Yeah.  This current iteration is pretty wordy since I used coccinelle to
> > > find all the EFSCORRUPTED clauses and inject the appropriate _mark_sick
> > > call.
> > > 
> > > > It's not so much about the code in
> > > > these patches, but rather to hopefully ease maintaining these state bits
> > > > properly in new code where devs/reviewers might not know much about
> > > > scrub state or have it in mind. Short of having some kind of generic
> > > > helper to handle corruption state, ISTM that the combination of using
> > > > verifiers where possible and common exit labels anywhere else we
> > > > generate -EFSCORRUPTED at multiple places within some function could
> > > > shrink these patches a bit..
> > > 
> > > <nod> Eric suggested on IRC that maybe the _mark_sick functions should
> > > return EFSCORRUPTED so that we could at least collapse that to:
> > > 
> > > if (XFS_IS_CORRUPT(...)) {
> > > 	error = xfs_da_mark_sick(...);
> > > 	goto barf;
> > > }
> > > 
> > > However, doing it the wordy way I've done it has the neat effects (IMHO)
> > > that you can find all the places where xfs decides some metadata is
> > > corrupt by grepping for EFSCORRUPTED, and confirm that each place it
> > > does that also has a corresponding _mark_sick call.
> > > 
> > 
> > Yeah, that was actually my thought process in suggesting pushing the
> > mark_sick() calls down into verifiers as well.
> 
> <nod> It does strike me as a little odd that the verifiers are the /one/
> place where EFSCORRUPTED isn't preceded or followed by a _mark_sick.
> 
> > It seems a little more clear (and open to future cleanups) with a
> > strict pattern of setting sickness in the locations that generate
> > corruption errors. Of course that likely means some special macro or
> > something like you propose below, but I didn't want to quite go there
> > until we could put the state updates in the right places.
> 
> Yeah....
> 
> > > I guess you could create a dorky shouty wrapper to maintain that greppy
> > > property:
> > > 
> > > #define XFS_DA_EFSCORRUPTED(...) \
> > > 	(xfs_da_mark_sick(...), -EFSCORRUPTED)
> > > 
> > > But... that might be stylistically undesirable.  OTOH I guess it
> > > wouldn't be so bad either to do:
> > > 
> > > 	if (XFS_IS_CORRUPT(...)) {
> > > 		error = -EFSCORRUPTED;
> > > 		goto bad;
> > > 	}
> > > 
> > > 	if (XFS_IS_CORRUPT(...)) {
> > > 		error = -EFSCORRUPTED;
> > > 		goto bad;
> > > 	}
> > > 
> > > 	return 0;
> > > bad:
> > > 	if (error == -EFSCORRUPTED)
> > > 		xfs_da_mark_sick(...);
> > > 	return error;
> > > 
> > > Or using the shouty macro above:
> > > 
> > > 	if (XFS_IS_CORRUPT(...)) {
> > > 		error = XFS_DA_EFSCORRUPTED(...);
> > > 		goto bad;
> > > 	}
> > > 
> > > 	if (XFS_IS_CORRUPT(...)) {
> > > 		error = XFS_DA_EFSCORRUPTED(...);
> > > 		goto bad;
> > > 	}
> > > 
> > > bad:
> > > 	return error;
> > > 
> > > I'll think about that.  It doesn't sound so bad when coding it up in
> > > this email.
> > > 
> > 
> > I suppose a macro is nice in that it enforces sickness is updated
> > wherever -EFSCORRUPTED occurs, or at least can easily be verified by
> > grepping. I find the separate macros pattern a little confusing, FWIW,
> > simply because at a glance it looks like a garbled bunch of logic to me.
> > I.e. I see 'if (IS_CORRUPT()) SOMETHING_CORRUPTED(); ...' and wonder wtf
> > that is doing, for one. It's also not immediately obvious when we should
> > use one or not the other, etc. This is getting into bikeshedding
> > territory though and I don't have much of a better suggestion atm...
> 
> ...one /could/ have specific IS_CORRUPT macros mapping to different
> types of things.  Though I think this could easily get messy:
> 

Yep.

> #define XFS_DIR_IS_CORRUPT(dp, perror, expr) \
> 	(unlikely(expr) ? xfs_corruption_report(#expr, ...), \
> 			  *(perror) = -EFSCORRUPTED, \
> 			  xfs_da_mark_sick(dp, XFS_DATA_FORK), true : false)
> 
> I don't want to load up these macros with too much stuff, but I guess at
> least that reduces the directory code to:
> 
> 	if (XFS_DIR_IS_CORRUPT(dp, &error, blah == badvalue))
> 		goto out;
> 	...
> 	if (XFS_DIR_IS_CORRUPT(dp, &error, ugh == NULL))
> 		return error;
> out:
> 	return error;
> 
> Though now we're getting pretty far from the original intent to kill off
> wonky macros.  At least these are less weird, so maybe this won't set
> off a round of macro bikeshed rage?
> 

I dunno.. I'm trying to find an opinion beyond a waffley sense of "is it
worth changing?" on the whole macro thing. While I agree that the
original macros are ugly, they never really confused me or affected
readability so I didn't care too much whether they stay or go TBH.

In general, I think having usable interfaces for the developer and
readable functional code is more important than how ugly/bloated the
macro might be. That's why I really don't like the previous example that
combines multiple "simple" macros and turns that into some reusable
pattern. The resulting user code is not really readable IMO.

The DIR_IS_CORRUPT() example above reminds me a little more of the
original macros in that it is easy to use and makes the user code
concise. Indeed, it somewhat overloads the macro, but that seems
advantageous to me if the intent of this series is to add more
boilerplate associated with how we handle corruption errors generically.
In that regard, I find the DIR_IS_CORRUPT() approach preferable to
alternatives discussed so far (though I'd probably name it XFS_DA_*()
for consistency with the underlying health state type). Just my .02
though.. ;)

Brian

> --D
> 
> > 
> > Brian
> > 
> > > --D
> > > 
> > > > 
> > > > Brian
> > > > 
> > > > >  		if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
> > > > > @@ -2250,8 +2262,10 @@ xfs_da3_swap_lastblock(
> > > > >  	error = xfs_bmap_last_before(tp, dp, &lastoff, w);
> > > > >  	if (error)
> > > > >  		return error;
> > > > > -	if (XFS_IS_CORRUPT(mp, lastoff == 0))
> > > > > +	if (XFS_IS_CORRUPT(mp, lastoff == 0)) {
> > > > > +		xfs_da_mark_sick(args);
> > > > >  		return -EFSCORRUPTED;
> > > > > +	}
> > > > >  	/*
> > > > >  	 * Read the last block in the btree space.
> > > > >  	 */
> > > > > @@ -2300,6 +2314,7 @@ xfs_da3_swap_lastblock(
> > > > >  		if (XFS_IS_CORRUPT(mp,
> > > > >  				   be32_to_cpu(sib_info->forw) != last_blkno ||
> > > > >  				   sib_info->magic != dead_info->magic)) {
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			error = -EFSCORRUPTED;
> > > > >  			goto done;
> > > > >  		}
> > > > > @@ -2320,6 +2335,7 @@ xfs_da3_swap_lastblock(
> > > > >  		if (XFS_IS_CORRUPT(mp,
> > > > >  				   be32_to_cpu(sib_info->back) != last_blkno ||
> > > > >  				   sib_info->magic != dead_info->magic)) {
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			error = -EFSCORRUPTED;
> > > > >  			goto done;
> > > > >  		}
> > > > > @@ -2342,6 +2358,7 @@ xfs_da3_swap_lastblock(
> > > > >  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
> > > > >  		if (XFS_IS_CORRUPT(mp,
> > > > >  				   level >= 0 && level != par_hdr.level + 1)) {
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			error = -EFSCORRUPTED;
> > > > >  			goto done;
> > > > >  		}
> > > > > @@ -2353,6 +2370,7 @@ xfs_da3_swap_lastblock(
> > > > >  		     entno++)
> > > > >  			continue;
> > > > >  		if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			error = -EFSCORRUPTED;
> > > > >  			goto done;
> > > > >  		}
> > > > > @@ -2378,6 +2396,7 @@ xfs_da3_swap_lastblock(
> > > > >  		xfs_trans_brelse(tp, par_buf);
> > > > >  		par_buf = NULL;
> > > > >  		if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			error = -EFSCORRUPTED;
> > > > >  			goto done;
> > > > >  		}
> > > > > @@ -2387,6 +2406,7 @@ xfs_da3_swap_lastblock(
> > > > >  		par_node = par_buf->b_addr;
> > > > >  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
> > > > >  		if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			error = -EFSCORRUPTED;
> > > > >  			goto done;
> > > > >  		}
> > > > > @@ -2601,6 +2621,7 @@ xfs_dabuf_map(
> > > > >  					irecs[i].br_state);
> > > > >  			}
> > > > >  		}
> > > > > +		xfs_dirattr_mark_sick(dp, whichfork);
> > > > >  		error = -EFSCORRUPTED;
> > > > >  		goto out;
> > > > >  	}
> > > > > @@ -2693,6 +2714,8 @@ xfs_da_read_buf(
> > > > >  	error = xfs_trans_read_buf_map(dp->i_mount, trans,
> > > > >  					dp->i_mount->m_ddev_targp,
> > > > >  					mapp, nmap, 0, &bp, ops);
> > > > > +	if (xfs_metadata_is_sick(error))
> > > > > +		xfs_dirattr_mark_sick(dp, whichfork);
> > > > >  	if (error)
> > > > >  		goto out_free;
> > > > >  
> > > > > diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
> > > > > index 0aa87cbde49e..e1aa411a1b8b 100644
> > > > > --- a/fs/xfs/libxfs/xfs_dir2.c
> > > > > +++ b/fs/xfs/libxfs/xfs_dir2.c
> > > > > @@ -18,6 +18,7 @@
> > > > >  #include "xfs_errortag.h"
> > > > >  #include "xfs_error.h"
> > > > >  #include "xfs_trace.h"
> > > > > +#include "xfs_health.h"
> > > > >  
> > > > >  struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
> > > > >  
> > > > > @@ -608,8 +609,10 @@ xfs_dir2_isblock(
> > > > >  	rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
> > > > >  	if (XFS_IS_CORRUPT(args->dp->i_mount,
> > > > >  			   rval != 0 &&
> > > > > -			   args->dp->i_d.di_size != args->geo->blksize))
> > > > > +			   args->dp->i_d.di_size != args->geo->blksize)) {
> > > > > +		xfs_da_mark_sick(args);
> > > > >  		return -EFSCORRUPTED;
> > > > > +	}
> > > > >  	*vp = rval;
> > > > >  	return 0;
> > > > >  }
> > > > > diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
> > > > > index a6eb71a62b53..80cc9c7ea4e5 100644
> > > > > --- a/fs/xfs/libxfs/xfs_dir2_data.c
> > > > > +++ b/fs/xfs/libxfs/xfs_dir2_data.c
> > > > > @@ -18,6 +18,7 @@
> > > > >  #include "xfs_trans.h"
> > > > >  #include "xfs_buf_item.h"
> > > > >  #include "xfs_log.h"
> > > > > +#include "xfs_health.h"
> > > > >  
> > > > >  static xfs_failaddr_t xfs_dir2_data_freefind_verify(
> > > > >  		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
> > > > > @@ -1170,6 +1171,7 @@ xfs_dir2_data_use_free(
> > > > >  corrupt:
> > > > >  	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
> > > > >  			hdr, sizeof(*hdr), __FILE__, __LINE__, fa);
> > > > > +	xfs_da_mark_sick(args);
> > > > >  	return -EFSCORRUPTED;
> > > > >  }
> > > > >  
> > > > > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > > index 73edd96ce0ac..32d17420fff3 100644
> > > > > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > > @@ -19,6 +19,7 @@
> > > > >  #include "xfs_trace.h"
> > > > >  #include "xfs_trans.h"
> > > > >  #include "xfs_buf_item.h"
> > > > > +#include "xfs_health.h"
> > > > >  
> > > > >  /*
> > > > >   * Local function declarations.
> > > > > @@ -1386,8 +1387,10 @@ xfs_dir2_leaf_removename(
> > > > >  	bestsp = xfs_dir2_leaf_bests_p(ltp);
> > > > >  	if (be16_to_cpu(bestsp[db]) != oldbest) {
> > > > >  		xfs_buf_corruption_error(lbp);
> > > > > +		xfs_da_mark_sick(args);
> > > > >  		return -EFSCORRUPTED;
> > > > >  	}
> > > > > +
> > > > >  	/*
> > > > >  	 * Mark the former data entry unused.
> > > > >  	 */
> > > > > diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
> > > > > index 3a8b0625a08b..e0f3ab254a1a 100644
> > > > > --- a/fs/xfs/libxfs/xfs_dir2_node.c
> > > > > +++ b/fs/xfs/libxfs/xfs_dir2_node.c
> > > > > @@ -20,6 +20,7 @@
> > > > >  #include "xfs_trans.h"
> > > > >  #include "xfs_buf_item.h"
> > > > >  #include "xfs_log.h"
> > > > > +#include "xfs_health.h"
> > > > >  
> > > > >  /*
> > > > >   * Function declarations.
> > > > > @@ -228,6 +229,7 @@ __xfs_dir3_free_read(
> > > > >  	if (fa) {
> > > > >  		xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
> > > > >  		xfs_trans_brelse(tp, *bpp);
> > > > > +		xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > > > >  		return -EFSCORRUPTED;
> > > > >  	}
> > > > >  
> > > > > @@ -440,6 +442,7 @@ xfs_dir2_leaf_to_node(
> > > > >  	if (be32_to_cpu(ltp->bestcount) >
> > > > >  				(uint)dp->i_d.di_size / args->geo->blksize) {
> > > > >  		xfs_buf_corruption_error(lbp);
> > > > > +		xfs_da_mark_sick(args);
> > > > >  		return -EFSCORRUPTED;
> > > > >  	}
> > > > >  
> > > > > @@ -514,6 +517,7 @@ xfs_dir2_leafn_add(
> > > > >  	 */
> > > > >  	if (index < 0) {
> > > > >  		xfs_buf_corruption_error(bp);
> > > > > +		xfs_da_mark_sick(args);
> > > > >  		return -EFSCORRUPTED;
> > > > >  	}
> > > > >  
> > > > > @@ -733,6 +737,7 @@ xfs_dir2_leafn_lookup_for_addname(
> > > > >  					   cpu_to_be16(NULLDATAOFF))) {
> > > > >  				if (curfdb != newfdb)
> > > > >  					xfs_trans_brelse(tp, curbp);
> > > > > +				xfs_da_mark_sick(args);
> > > > >  				return -EFSCORRUPTED;
> > > > >  			}
> > > > >  			curfdb = newfdb;
> > > > > @@ -801,6 +806,7 @@ xfs_dir2_leafn_lookup_for_entry(
> > > > >  	xfs_dir3_leaf_check(dp, bp);
> > > > >  	if (leafhdr.count <= 0) {
> > > > >  		xfs_buf_corruption_error(bp);
> > > > > +		xfs_da_mark_sick(args);
> > > > >  		return -EFSCORRUPTED;
> > > > >  	}
> > > > >  
> > > > > @@ -1737,6 +1743,7 @@ xfs_dir2_node_add_datablk(
> > > > >  			} else {
> > > > >  				xfs_alert(mp, " ... fblk is NULL");
> > > > >  			}
> > > > > +			xfs_da_mark_sick(args);
> > > > >  			return -EFSCORRUPTED;
> > > > >  		}
> > > > >  
> > > > > diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
> > > > > index 2049419e9555..d9404cd3d09b 100644
> > > > > --- a/fs/xfs/libxfs/xfs_health.h
> > > > > +++ b/fs/xfs/libxfs/xfs_health.h
> > > > > @@ -38,6 +38,7 @@ struct xfs_perag;
> > > > >  struct xfs_inode;
> > > > >  struct xfs_fsop_geom;
> > > > >  struct xfs_btree_cur;
> > > > > +struct xfs_da_args;
> > > > >  
> > > > >  /* Observable health issues for metadata spanning the entire filesystem. */
> > > > >  #define XFS_SICK_FS_COUNTERS	(1 << 0)  /* summary counters */
> > > > > @@ -141,6 +142,8 @@ void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
> > > > >  void xfs_health_unmount(struct xfs_mount *mp);
> > > > >  void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
> > > > >  void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
> > > > > +void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
> > > > > +void xfs_da_mark_sick(struct xfs_da_args *args);
> > > > >  
> > > > >  /* Now some helpers. */
> > > > >  
> > > > > diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
> > > > > index a78c501f6fb1..429a97494ffa 100644
> > > > > --- a/fs/xfs/xfs_attr_inactive.c
> > > > > +++ b/fs/xfs/xfs_attr_inactive.c
> > > > > @@ -23,6 +23,7 @@
> > > > >  #include "xfs_quota.h"
> > > > >  #include "xfs_dir2.h"
> > > > >  #include "xfs_error.h"
> > > > > +#include "xfs_health.h"
> > > > >  
> > > > >  /*
> > > > >   * Look at all the extents for this logical region,
> > > > > @@ -209,6 +210,7 @@ xfs_attr3_node_inactive(
> > > > >  	if (level > XFS_DA_NODE_MAXDEPTH) {
> > > > >  		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
> > > > >  		xfs_buf_corruption_error(bp);
> > > > > +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > >  		return -EFSCORRUPTED;
> > > > >  	}
> > > > >  
> > > > > @@ -256,6 +258,7 @@ xfs_attr3_node_inactive(
> > > > >  			error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
> > > > >  			break;
> > > > >  		default:
> > > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > >  			xfs_buf_corruption_error(child_bp);
> > > > >  			xfs_trans_brelse(*trans, child_bp);
> > > > >  			error = -EFSCORRUPTED;
> > > > > @@ -342,6 +345,7 @@ xfs_attr3_root_inactive(
> > > > >  		error = xfs_attr3_leaf_inactive(trans, dp, bp);
> > > > >  		break;
> > > > >  	default:
> > > > > +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > >  		error = -EFSCORRUPTED;
> > > > >  		xfs_buf_corruption_error(bp);
> > > > >  		xfs_trans_brelse(*trans, bp);
> > > > > diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
> > > > > index 7a099df88a0c..1a2a3d4ce422 100644
> > > > > --- a/fs/xfs/xfs_attr_list.c
> > > > > +++ b/fs/xfs/xfs_attr_list.c
> > > > > @@ -21,6 +21,7 @@
> > > > >  #include "xfs_error.h"
> > > > >  #include "xfs_trace.h"
> > > > >  #include "xfs_dir2.h"
> > > > > +#include "xfs_health.h"
> > > > >  
> > > > >  STATIC int
> > > > >  xfs_attr_shortform_compare(const void *a, const void *b)
> > > > > @@ -88,8 +89,10 @@ xfs_attr_shortform_list(
> > > > >  		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
> > > > >  			if (XFS_IS_CORRUPT(context->dp->i_mount,
> > > > >  					   !xfs_attr_namecheck(sfe->nameval,
> > > > > -							       sfe->namelen)))
> > > > > +							       sfe->namelen))) {
> > > > > +				xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > >  				return -EFSCORRUPTED;
> > > > > +			}
> > > > >  			context->put_listent(context,
> > > > >  					     sfe->flags,
> > > > >  					     sfe->nameval,
> > > > > @@ -131,6 +134,7 @@ xfs_attr_shortform_list(
> > > > >  					     context->dp->i_mount, sfe,
> > > > >  					     sizeof(*sfe));
> > > > >  			kmem_free(sbuf);
> > > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > >  			return -EFSCORRUPTED;
> > > > >  		}
> > > > >  
> > > > > @@ -181,6 +185,7 @@ xfs_attr_shortform_list(
> > > > >  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> > > > >  				   !xfs_attr_namecheck(sbp->name,
> > > > >  						       sbp->namelen))) {
> > > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > >  			error = -EFSCORRUPTED;
> > > > >  			goto out;
> > > > >  		}
> > > > > @@ -268,8 +273,10 @@ xfs_attr_node_list_lookup(
> > > > >  			return 0;
> > > > >  
> > > > >  		/* We can't point back to the root. */
> > > > > -		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0))
> > > > > +		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0)) {
> > > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > >  			return -EFSCORRUPTED;
> > > > > +		}
> > > > >  	}
> > > > >  
> > > > >  	if (expected_level != 0)
> > > > > @@ -281,6 +288,7 @@ xfs_attr_node_list_lookup(
> > > > >  out_corruptbuf:
> > > > >  	xfs_buf_corruption_error(bp);
> > > > >  	xfs_trans_brelse(tp, bp);
> > > > > +	xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > >  	return -EFSCORRUPTED;
> > > > >  }
> > > > >  
> > > > > @@ -471,8 +479,10 @@ xfs_attr3_leaf_list_int(
> > > > >  		}
> > > > >  
> > > > >  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> > > > > -				   !xfs_attr_namecheck(name, namelen)))
> > > > > +				   !xfs_attr_namecheck(name, namelen))) {
> > > > > +			xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK);
> > > > >  			return -EFSCORRUPTED;
> > > > > +		}
> > > > >  		context->put_listent(context, entry->flags,
> > > > >  					      name, namelen, valuelen);
> > > > >  		if (context->seen_enough)
> > > > > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
> > > > > index 95bc9ef8f5f9..715ded503334 100644
> > > > > --- a/fs/xfs/xfs_dir2_readdir.c
> > > > > +++ b/fs/xfs/xfs_dir2_readdir.c
> > > > > @@ -18,6 +18,7 @@
> > > > >  #include "xfs_bmap.h"
> > > > >  #include "xfs_trans.h"
> > > > >  #include "xfs_error.h"
> > > > > +#include "xfs_health.h"
> > > > >  
> > > > >  /*
> > > > >   * Directory file type support functions
> > > > > @@ -119,8 +120,10 @@ xfs_dir2_sf_getdents(
> > > > >  		ctx->pos = off & 0x7fffffff;
> > > > >  		if (XFS_IS_CORRUPT(dp->i_mount,
> > > > >  				   !xfs_dir2_namecheck(sfep->name,
> > > > > -						       sfep->namelen)))
> > > > > +						       sfep->namelen))) {
> > > > > +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > > > >  			return -EFSCORRUPTED;
> > > > > +		}
> > > > >  		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
> > > > >  			    xfs_dir3_get_dtype(mp, filetype)))
> > > > >  			return 0;
> > > > > @@ -461,6 +464,7 @@ xfs_dir2_leaf_getdents(
> > > > >  		if (XFS_IS_CORRUPT(dp->i_mount,
> > > > >  				   !xfs_dir2_namecheck(dep->name,
> > > > >  						       dep->namelen))) {
> > > > > +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > > > >  			error = -EFSCORRUPTED;
> > > > >  			break;
> > > > >  		}
> > > > > diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
> > > > > index 1f09027c55ad..c1b6e8fb72ec 100644
> > > > > --- a/fs/xfs/xfs_health.c
> > > > > +++ b/fs/xfs/xfs_health.c
> > > > > @@ -15,6 +15,8 @@
> > > > >  #include "xfs_trace.h"
> > > > >  #include "xfs_health.h"
> > > > >  #include "xfs_btree.h"
> > > > > +#include "xfs_da_format.h"
> > > > > +#include "xfs_da_btree.h"
> > > > >  
> > > > >  /*
> > > > >   * Warn about metadata corruption that we detected but haven't fixed, and
> > > > > @@ -517,3 +519,40 @@ xfs_btree_mark_sick(
> > > > >  
> > > > >  	xfs_agno_mark_sick(cur->bc_mp, cur->bc_private.a.agno, mask);
> > > > >  }
> > > > > +
> > > > > +/*
> > > > > + * Record observations of dir/attr btree corruption with the health tracking
> > > > > + * system.
> > > > > + */
> > > > > +void
> > > > > +xfs_dirattr_mark_sick(
> > > > > +	struct xfs_inode	*ip,
> > > > > +	int			whichfork)
> > > > > +{
> > > > > +	unsigned int		mask;
> > > > > +
> > > > > +	switch (whichfork) {
> > > > > +	case XFS_DATA_FORK:
> > > > > +		mask = XFS_SICK_INO_DIR;
> > > > > +		break;
> > > > > +	case XFS_ATTR_FORK:
> > > > > +		mask = XFS_SICK_INO_XATTR;
> > > > > +		break;
> > > > > +	default:
> > > > > +		ASSERT(0);
> > > > > +		return;
> > > > > +	}
> > > > > +
> > > > > +	xfs_inode_mark_sick(ip, mask);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * Record observations of dir/attr btree corruption with the health tracking
> > > > > + * system.
> > > > > + */
> > > > > +void
> > > > > +xfs_da_mark_sick(
> > > > > +	struct xfs_da_args	*args)
> > > > > +{
> > > > > +	xfs_dirattr_mark_sick(args->dp, args->whichfork);
> > > > > +}
> > > > > 
> > > > 
> > > 
> > 
>
Darrick J. Wong Nov. 22, 2019, 6:35 p.m. UTC | #6
On Fri, Nov 22, 2019 at 07:28:49AM -0500, Brian Foster wrote:
> On Thu, Nov 21, 2019 at 05:03:32PM -0800, Darrick J. Wong wrote:
> > On Thu, Nov 21, 2019 at 08:26:27AM -0500, Brian Foster wrote:
> > > On Wed, Nov 20, 2019 at 08:55:08AM -0800, Darrick J. Wong wrote:
> > > > On Wed, Nov 20, 2019 at 11:11:47AM -0500, Brian Foster wrote:
> > > > > On Thu, Nov 14, 2019 at 10:19:46AM -0800, Darrick J. Wong wrote:
> > > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > > 
> > > > > > Whenever we encounter corrupt directory or extended attribute blocks, we
> > > > > > should report that to the health monitoring system for later reporting.
> > > > > > 
> > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > > ---
> > > > > >  fs/xfs/libxfs/xfs_attr_leaf.c   |    5 ++++-
> > > > > >  fs/xfs/libxfs/xfs_attr_remote.c |   27 ++++++++++++++++-----------
> > > > > >  fs/xfs/libxfs/xfs_da_btree.c    |   29 ++++++++++++++++++++++++++---
> > > > > >  fs/xfs/libxfs/xfs_dir2.c        |    5 ++++-
> > > > > >  fs/xfs/libxfs/xfs_dir2_data.c   |    2 ++
> > > > > >  fs/xfs/libxfs/xfs_dir2_leaf.c   |    3 +++
> > > > > >  fs/xfs/libxfs/xfs_dir2_node.c   |    7 +++++++
> > > > > >  fs/xfs/libxfs/xfs_health.h      |    3 +++
> > > > > >  fs/xfs/xfs_attr_inactive.c      |    4 ++++
> > > > > >  fs/xfs/xfs_attr_list.c          |   16 +++++++++++++---
> > > > > >  fs/xfs/xfs_dir2_readdir.c       |    6 +++++-
> > > > > >  fs/xfs/xfs_health.c             |   39 +++++++++++++++++++++++++++++++++++++++
> > > > > >  12 files changed, 126 insertions(+), 20 deletions(-)
> > > > > > 
> > > > > > 
> > > > > ...
> > > > > > diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
> > > > > > index e424b004e3cb..a17622dadf00 100644
> > > > > > --- a/fs/xfs/libxfs/xfs_da_btree.c
> > > > > > +++ b/fs/xfs/libxfs/xfs_da_btree.c
> > > > > ...
> > > > > > @@ -1589,6 +1593,7 @@ xfs_da3_node_lookup_int(
> > > > > >  
> > > > > >  		if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
> > > > > >  			xfs_buf_corruption_error(blk->bp);
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			return -EFSCORRUPTED;
> > > > > >  		}
> > > > > >  
> > > > > > @@ -1604,6 +1609,7 @@ xfs_da3_node_lookup_int(
> > > > > >  		/* Tree taller than we can handle; bail out! */
> > > > > >  		if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
> > > > > >  			xfs_buf_corruption_error(blk->bp);
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			return -EFSCORRUPTED;
> > > > > >  		}
> > > > > >  
> > > > > > @@ -1612,6 +1618,7 @@ xfs_da3_node_lookup_int(
> > > > > >  			expected_level = nodehdr.level - 1;
> > > > > >  		else if (expected_level != nodehdr.level) {
> > > > > >  			xfs_buf_corruption_error(blk->bp);
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			return -EFSCORRUPTED;
> > > > > >  		} else
> > > > > >  			expected_level--;
> > > > > > @@ -1663,12 +1670,16 @@ xfs_da3_node_lookup_int(
> > > > > >  		}
> > > > > >  
> > > > > >  		/* We can't point back to the root. */
> > > > > > -		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
> > > > > > +		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			return -EFSCORRUPTED;
> > > > > > +		}
> > > > > >  	}
> > > > > >  
> > > > > > -	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
> > > > > > +	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
> > > > > > +		xfs_da_mark_sick(args);
> > > > > >  		return -EFSCORRUPTED;
> > > > > > +	}
> > > > > >  
> > > > > >  	/*
> > > > > >  	 * A leaf block that ends in the hashval that we are interested in
> > > > > > @@ -1686,6 +1697,7 @@ xfs_da3_node_lookup_int(
> > > > > >  			args->blkno = blk->blkno;
> > > > > >  		} else {
> > > > > >  			ASSERT(0);
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			return -EFSCORRUPTED;
> > > > > >  		}
> > > > > 
> > > > > I'm just kind of skimming through the rest for general feedback at this
> > > > > point given previous comments, but it might be nice to start using exit
> > > > > labels at some of these places where we're enlarging and duplicating the
> > > > > error path for particular errors.
> > > > 
> > > > Yeah.  This current iteration is pretty wordy since I used coccinelle to
> > > > find all the EFSCORRUPTED clauses and inject the appropriate _mark_sick
> > > > call.
> > > > 
> > > > > It's not so much about the code in
> > > > > these patches, but rather to hopefully ease maintaining these state bits
> > > > > properly in new code where devs/reviewers might not know much about
> > > > > scrub state or have it in mind. Short of having some kind of generic
> > > > > helper to handle corruption state, ISTM that the combination of using
> > > > > verifiers where possible and common exit labels anywhere else we
> > > > > generate -EFSCORRUPTED at multiple places within some function could
> > > > > shrink these patches a bit..
> > > > 
> > > > <nod> Eric suggested on IRC that maybe the _mark_sick functions should
> > > > return EFSCORRUPTED so that we could at least collapse that to:
> > > > 
> > > > if (XFS_IS_CORRUPT(...)) {
> > > > 	error = xfs_da_mark_sick(...);
> > > > 	goto barf;
> > > > }
> > > > 
> > > > However, doing it the wordy way I've done it has the neat effects (IMHO)
> > > > that you can find all the places where xfs decides some metadata is
> > > > corrupt by grepping for EFSCORRUPTED, and confirm that each place it
> > > > does that also has a corresponding _mark_sick call.
> > > > 
> > > 
> > > Yeah, that was actually my thought process in suggesting pushing the
> > > mark_sick() calls down into verifiers as well.
> > 
> > <nod> It does strike me as a little odd that the verifiers are the /one/
> > place where EFSCORRUPTED isn't preceded or followed by a _mark_sick.
> > 
> > > It seems a little more clear (and open to future cleanups) with a
> > > strict pattern of setting sickness in the locations that generate
> > > corruption errors. Of course that likely means some special macro or
> > > something like you propose below, but I didn't want to quite go there
> > > until we could put the state updates in the right places.
> > 
> > Yeah....
> > 
> > > > I guess you could create a dorky shouty wrapper to maintain that greppy
> > > > property:
> > > > 
> > > > #define XFS_DA_EFSCORRUPTED(...) \
> > > > 	(xfs_da_mark_sick(...), -EFSCORRUPTED)
> > > > 
> > > > But... that might be stylistically undesirable.  OTOH I guess it
> > > > wouldn't be so bad either to do:
> > > > 
> > > > 	if (XFS_IS_CORRUPT(...)) {
> > > > 		error = -EFSCORRUPTED;
> > > > 		goto bad;
> > > > 	}
> > > > 
> > > > 	if (XFS_IS_CORRUPT(...)) {
> > > > 		error = -EFSCORRUPTED;
> > > > 		goto bad;
> > > > 	}
> > > > 
> > > > 	return 0;
> > > > bad:
> > > > 	if (error == -EFSCORRUPTED)
> > > > 		xfs_da_mark_sick(...);
> > > > 	return error;
> > > > 
> > > > Or using the shouty macro above:
> > > > 
> > > > 	if (XFS_IS_CORRUPT(...)) {
> > > > 		error = XFS_DA_EFSCORRUPTED(...);
> > > > 		goto bad;
> > > > 	}
> > > > 
> > > > 	if (XFS_IS_CORRUPT(...)) {
> > > > 		error = XFS_DA_EFSCORRUPTED(...);
> > > > 		goto bad;
> > > > 	}
> > > > 
> > > > bad:
> > > > 	return error;
> > > > 
> > > > I'll think about that.  It doesn't sound so bad when coding it up in
> > > > this email.
> > > > 
> > > 
> > > I suppose a macro is nice in that it enforces sickness is updated
> > > wherever -EFSCORRUPTED occurs, or at least can easily be verified by
> > > grepping. I find the separate macros pattern a little confusing, FWIW,
> > > simply because at a glance it looks like a garbled bunch of logic to me.
> > > I.e. I see 'if (IS_CORRUPT()) SOMETHING_CORRUPTED(); ...' and wonder wtf
> > > that is doing, for one. It's also not immediately obvious when we should
> > > use one or not the other, etc. This is getting into bikeshedding
> > > territory though and I don't have much of a better suggestion atm...
> > 
> > ...one /could/ have specific IS_CORRUPT macros mapping to different
> > types of things.  Though I think this could easily get messy:
> > 
> 
> Yep.
> 
> > #define XFS_DIR_IS_CORRUPT(dp, perror, expr) \
> > 	(unlikely(expr) ? xfs_corruption_report(#expr, ...), \
> > 			  *(perror) = -EFSCORRUPTED, \
> > 			  xfs_da_mark_sick(dp, XFS_DATA_FORK), true : false)
> > 
> > I don't want to load up these macros with too much stuff, but I guess at
> > least that reduces the directory code to:
> > 
> > 	if (XFS_DIR_IS_CORRUPT(dp, &error, blah == badvalue))
> > 		goto out;
> > 	...
> > 	if (XFS_DIR_IS_CORRUPT(dp, &error, ugh == NULL))
> > 		return error;
> > out:
> > 	return error;
> > 
> > Though now we're getting pretty far from the original intent to kill off
> > wonky macros.  At least these are less weird, so maybe this won't set
> > off a round of macro bikeshed rage?
> > 
> 
> I dunno.. I'm trying to find an opinion beyond a waffley sense of "is it
> worth changing?" on the whole macro thing. While I agree that the
> original macros are ugly, they never really confused me or affected
> readability so I didn't care too much whether they stay or go TBH.

Same here.

> In general, I think having usable interfaces for the developer and
> readable functional code is more important than how ugly/bloated the
> macro might be. That's why I really don't like the previous example that
> combines multiple "simple" macros and turns that into some reusable
> pattern. The resulting user code is not really readable IMO.

Yeah, now that I've gone through several rounds of reworking things,
inflating those error handling clauses isn't much of an improvement.
I promise I'm not being paid by the kLOC.

> The DIR_IS_CORRUPT() example above reminds me a little more of the
> original macros in that it is easy to use and makes the user code
> concise. Indeed, it somewhat overloads the macro, but that seems
> advantageous to me if the intent of this series is to add more
> boilerplate associated with how we handle corruption errors generically.
> In that regard, I find the DIR_IS_CORRUPT() approach preferable to
> alternatives discussed so far (though I'd probably name it XFS_DA_*()
> for consistency with the underlying health state type). Just my .02
> though.. ;)

Hm, yeah.  I think I'll rework the rest of this series to do that...

--D

> 
> Brian
> 
> > --D
> > 
> > > 
> > > Brian
> > > 
> > > > --D
> > > > 
> > > > > 
> > > > > Brian
> > > > > 
> > > > > >  		if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
> > > > > > @@ -2250,8 +2262,10 @@ xfs_da3_swap_lastblock(
> > > > > >  	error = xfs_bmap_last_before(tp, dp, &lastoff, w);
> > > > > >  	if (error)
> > > > > >  		return error;
> > > > > > -	if (XFS_IS_CORRUPT(mp, lastoff == 0))
> > > > > > +	if (XFS_IS_CORRUPT(mp, lastoff == 0)) {
> > > > > > +		xfs_da_mark_sick(args);
> > > > > >  		return -EFSCORRUPTED;
> > > > > > +	}
> > > > > >  	/*
> > > > > >  	 * Read the last block in the btree space.
> > > > > >  	 */
> > > > > > @@ -2300,6 +2314,7 @@ xfs_da3_swap_lastblock(
> > > > > >  		if (XFS_IS_CORRUPT(mp,
> > > > > >  				   be32_to_cpu(sib_info->forw) != last_blkno ||
> > > > > >  				   sib_info->magic != dead_info->magic)) {
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			error = -EFSCORRUPTED;
> > > > > >  			goto done;
> > > > > >  		}
> > > > > > @@ -2320,6 +2335,7 @@ xfs_da3_swap_lastblock(
> > > > > >  		if (XFS_IS_CORRUPT(mp,
> > > > > >  				   be32_to_cpu(sib_info->back) != last_blkno ||
> > > > > >  				   sib_info->magic != dead_info->magic)) {
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			error = -EFSCORRUPTED;
> > > > > >  			goto done;
> > > > > >  		}
> > > > > > @@ -2342,6 +2358,7 @@ xfs_da3_swap_lastblock(
> > > > > >  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
> > > > > >  		if (XFS_IS_CORRUPT(mp,
> > > > > >  				   level >= 0 && level != par_hdr.level + 1)) {
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			error = -EFSCORRUPTED;
> > > > > >  			goto done;
> > > > > >  		}
> > > > > > @@ -2353,6 +2370,7 @@ xfs_da3_swap_lastblock(
> > > > > >  		     entno++)
> > > > > >  			continue;
> > > > > >  		if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			error = -EFSCORRUPTED;
> > > > > >  			goto done;
> > > > > >  		}
> > > > > > @@ -2378,6 +2396,7 @@ xfs_da3_swap_lastblock(
> > > > > >  		xfs_trans_brelse(tp, par_buf);
> > > > > >  		par_buf = NULL;
> > > > > >  		if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			error = -EFSCORRUPTED;
> > > > > >  			goto done;
> > > > > >  		}
> > > > > > @@ -2387,6 +2406,7 @@ xfs_da3_swap_lastblock(
> > > > > >  		par_node = par_buf->b_addr;
> > > > > >  		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
> > > > > >  		if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			error = -EFSCORRUPTED;
> > > > > >  			goto done;
> > > > > >  		}
> > > > > > @@ -2601,6 +2621,7 @@ xfs_dabuf_map(
> > > > > >  					irecs[i].br_state);
> > > > > >  			}
> > > > > >  		}
> > > > > > +		xfs_dirattr_mark_sick(dp, whichfork);
> > > > > >  		error = -EFSCORRUPTED;
> > > > > >  		goto out;
> > > > > >  	}
> > > > > > @@ -2693,6 +2714,8 @@ xfs_da_read_buf(
> > > > > >  	error = xfs_trans_read_buf_map(dp->i_mount, trans,
> > > > > >  					dp->i_mount->m_ddev_targp,
> > > > > >  					mapp, nmap, 0, &bp, ops);
> > > > > > +	if (xfs_metadata_is_sick(error))
> > > > > > +		xfs_dirattr_mark_sick(dp, whichfork);
> > > > > >  	if (error)
> > > > > >  		goto out_free;
> > > > > >  
> > > > > > diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
> > > > > > index 0aa87cbde49e..e1aa411a1b8b 100644
> > > > > > --- a/fs/xfs/libxfs/xfs_dir2.c
> > > > > > +++ b/fs/xfs/libxfs/xfs_dir2.c
> > > > > > @@ -18,6 +18,7 @@
> > > > > >  #include "xfs_errortag.h"
> > > > > >  #include "xfs_error.h"
> > > > > >  #include "xfs_trace.h"
> > > > > > +#include "xfs_health.h"
> > > > > >  
> > > > > >  struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
> > > > > >  
> > > > > > @@ -608,8 +609,10 @@ xfs_dir2_isblock(
> > > > > >  	rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
> > > > > >  	if (XFS_IS_CORRUPT(args->dp->i_mount,
> > > > > >  			   rval != 0 &&
> > > > > > -			   args->dp->i_d.di_size != args->geo->blksize))
> > > > > > +			   args->dp->i_d.di_size != args->geo->blksize)) {
> > > > > > +		xfs_da_mark_sick(args);
> > > > > >  		return -EFSCORRUPTED;
> > > > > > +	}
> > > > > >  	*vp = rval;
> > > > > >  	return 0;
> > > > > >  }
> > > > > > diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
> > > > > > index a6eb71a62b53..80cc9c7ea4e5 100644
> > > > > > --- a/fs/xfs/libxfs/xfs_dir2_data.c
> > > > > > +++ b/fs/xfs/libxfs/xfs_dir2_data.c
> > > > > > @@ -18,6 +18,7 @@
> > > > > >  #include "xfs_trans.h"
> > > > > >  #include "xfs_buf_item.h"
> > > > > >  #include "xfs_log.h"
> > > > > > +#include "xfs_health.h"
> > > > > >  
> > > > > >  static xfs_failaddr_t xfs_dir2_data_freefind_verify(
> > > > > >  		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
> > > > > > @@ -1170,6 +1171,7 @@ xfs_dir2_data_use_free(
> > > > > >  corrupt:
> > > > > >  	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
> > > > > >  			hdr, sizeof(*hdr), __FILE__, __LINE__, fa);
> > > > > > +	xfs_da_mark_sick(args);
> > > > > >  	return -EFSCORRUPTED;
> > > > > >  }
> > > > > >  
> > > > > > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > > > index 73edd96ce0ac..32d17420fff3 100644
> > > > > > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > > > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
> > > > > > @@ -19,6 +19,7 @@
> > > > > >  #include "xfs_trace.h"
> > > > > >  #include "xfs_trans.h"
> > > > > >  #include "xfs_buf_item.h"
> > > > > > +#include "xfs_health.h"
> > > > > >  
> > > > > >  /*
> > > > > >   * Local function declarations.
> > > > > > @@ -1386,8 +1387,10 @@ xfs_dir2_leaf_removename(
> > > > > >  	bestsp = xfs_dir2_leaf_bests_p(ltp);
> > > > > >  	if (be16_to_cpu(bestsp[db]) != oldbest) {
> > > > > >  		xfs_buf_corruption_error(lbp);
> > > > > > +		xfs_da_mark_sick(args);
> > > > > >  		return -EFSCORRUPTED;
> > > > > >  	}
> > > > > > +
> > > > > >  	/*
> > > > > >  	 * Mark the former data entry unused.
> > > > > >  	 */
> > > > > > diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
> > > > > > index 3a8b0625a08b..e0f3ab254a1a 100644
> > > > > > --- a/fs/xfs/libxfs/xfs_dir2_node.c
> > > > > > +++ b/fs/xfs/libxfs/xfs_dir2_node.c
> > > > > > @@ -20,6 +20,7 @@
> > > > > >  #include "xfs_trans.h"
> > > > > >  #include "xfs_buf_item.h"
> > > > > >  #include "xfs_log.h"
> > > > > > +#include "xfs_health.h"
> > > > > >  
> > > > > >  /*
> > > > > >   * Function declarations.
> > > > > > @@ -228,6 +229,7 @@ __xfs_dir3_free_read(
> > > > > >  	if (fa) {
> > > > > >  		xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
> > > > > >  		xfs_trans_brelse(tp, *bpp);
> > > > > > +		xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > > > > >  		return -EFSCORRUPTED;
> > > > > >  	}
> > > > > >  
> > > > > > @@ -440,6 +442,7 @@ xfs_dir2_leaf_to_node(
> > > > > >  	if (be32_to_cpu(ltp->bestcount) >
> > > > > >  				(uint)dp->i_d.di_size / args->geo->blksize) {
> > > > > >  		xfs_buf_corruption_error(lbp);
> > > > > > +		xfs_da_mark_sick(args);
> > > > > >  		return -EFSCORRUPTED;
> > > > > >  	}
> > > > > >  
> > > > > > @@ -514,6 +517,7 @@ xfs_dir2_leafn_add(
> > > > > >  	 */
> > > > > >  	if (index < 0) {
> > > > > >  		xfs_buf_corruption_error(bp);
> > > > > > +		xfs_da_mark_sick(args);
> > > > > >  		return -EFSCORRUPTED;
> > > > > >  	}
> > > > > >  
> > > > > > @@ -733,6 +737,7 @@ xfs_dir2_leafn_lookup_for_addname(
> > > > > >  					   cpu_to_be16(NULLDATAOFF))) {
> > > > > >  				if (curfdb != newfdb)
> > > > > >  					xfs_trans_brelse(tp, curbp);
> > > > > > +				xfs_da_mark_sick(args);
> > > > > >  				return -EFSCORRUPTED;
> > > > > >  			}
> > > > > >  			curfdb = newfdb;
> > > > > > @@ -801,6 +806,7 @@ xfs_dir2_leafn_lookup_for_entry(
> > > > > >  	xfs_dir3_leaf_check(dp, bp);
> > > > > >  	if (leafhdr.count <= 0) {
> > > > > >  		xfs_buf_corruption_error(bp);
> > > > > > +		xfs_da_mark_sick(args);
> > > > > >  		return -EFSCORRUPTED;
> > > > > >  	}
> > > > > >  
> > > > > > @@ -1737,6 +1743,7 @@ xfs_dir2_node_add_datablk(
> > > > > >  			} else {
> > > > > >  				xfs_alert(mp, " ... fblk is NULL");
> > > > > >  			}
> > > > > > +			xfs_da_mark_sick(args);
> > > > > >  			return -EFSCORRUPTED;
> > > > > >  		}
> > > > > >  
> > > > > > diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
> > > > > > index 2049419e9555..d9404cd3d09b 100644
> > > > > > --- a/fs/xfs/libxfs/xfs_health.h
> > > > > > +++ b/fs/xfs/libxfs/xfs_health.h
> > > > > > @@ -38,6 +38,7 @@ struct xfs_perag;
> > > > > >  struct xfs_inode;
> > > > > >  struct xfs_fsop_geom;
> > > > > >  struct xfs_btree_cur;
> > > > > > +struct xfs_da_args;
> > > > > >  
> > > > > >  /* Observable health issues for metadata spanning the entire filesystem. */
> > > > > >  #define XFS_SICK_FS_COUNTERS	(1 << 0)  /* summary counters */
> > > > > > @@ -141,6 +142,8 @@ void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
> > > > > >  void xfs_health_unmount(struct xfs_mount *mp);
> > > > > >  void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
> > > > > >  void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
> > > > > > +void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
> > > > > > +void xfs_da_mark_sick(struct xfs_da_args *args);
> > > > > >  
> > > > > >  /* Now some helpers. */
> > > > > >  
> > > > > > diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
> > > > > > index a78c501f6fb1..429a97494ffa 100644
> > > > > > --- a/fs/xfs/xfs_attr_inactive.c
> > > > > > +++ b/fs/xfs/xfs_attr_inactive.c
> > > > > > @@ -23,6 +23,7 @@
> > > > > >  #include "xfs_quota.h"
> > > > > >  #include "xfs_dir2.h"
> > > > > >  #include "xfs_error.h"
> > > > > > +#include "xfs_health.h"
> > > > > >  
> > > > > >  /*
> > > > > >   * Look at all the extents for this logical region,
> > > > > > @@ -209,6 +210,7 @@ xfs_attr3_node_inactive(
> > > > > >  	if (level > XFS_DA_NODE_MAXDEPTH) {
> > > > > >  		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
> > > > > >  		xfs_buf_corruption_error(bp);
> > > > > > +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > > >  		return -EFSCORRUPTED;
> > > > > >  	}
> > > > > >  
> > > > > > @@ -256,6 +258,7 @@ xfs_attr3_node_inactive(
> > > > > >  			error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
> > > > > >  			break;
> > > > > >  		default:
> > > > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > > >  			xfs_buf_corruption_error(child_bp);
> > > > > >  			xfs_trans_brelse(*trans, child_bp);
> > > > > >  			error = -EFSCORRUPTED;
> > > > > > @@ -342,6 +345,7 @@ xfs_attr3_root_inactive(
> > > > > >  		error = xfs_attr3_leaf_inactive(trans, dp, bp);
> > > > > >  		break;
> > > > > >  	default:
> > > > > > +		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > > >  		error = -EFSCORRUPTED;
> > > > > >  		xfs_buf_corruption_error(bp);
> > > > > >  		xfs_trans_brelse(*trans, bp);
> > > > > > diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
> > > > > > index 7a099df88a0c..1a2a3d4ce422 100644
> > > > > > --- a/fs/xfs/xfs_attr_list.c
> > > > > > +++ b/fs/xfs/xfs_attr_list.c
> > > > > > @@ -21,6 +21,7 @@
> > > > > >  #include "xfs_error.h"
> > > > > >  #include "xfs_trace.h"
> > > > > >  #include "xfs_dir2.h"
> > > > > > +#include "xfs_health.h"
> > > > > >  
> > > > > >  STATIC int
> > > > > >  xfs_attr_shortform_compare(const void *a, const void *b)
> > > > > > @@ -88,8 +89,10 @@ xfs_attr_shortform_list(
> > > > > >  		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
> > > > > >  			if (XFS_IS_CORRUPT(context->dp->i_mount,
> > > > > >  					   !xfs_attr_namecheck(sfe->nameval,
> > > > > > -							       sfe->namelen)))
> > > > > > +							       sfe->namelen))) {
> > > > > > +				xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > > >  				return -EFSCORRUPTED;
> > > > > > +			}
> > > > > >  			context->put_listent(context,
> > > > > >  					     sfe->flags,
> > > > > >  					     sfe->nameval,
> > > > > > @@ -131,6 +134,7 @@ xfs_attr_shortform_list(
> > > > > >  					     context->dp->i_mount, sfe,
> > > > > >  					     sizeof(*sfe));
> > > > > >  			kmem_free(sbuf);
> > > > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > > >  			return -EFSCORRUPTED;
> > > > > >  		}
> > > > > >  
> > > > > > @@ -181,6 +185,7 @@ xfs_attr_shortform_list(
> > > > > >  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> > > > > >  				   !xfs_attr_namecheck(sbp->name,
> > > > > >  						       sbp->namelen))) {
> > > > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > > >  			error = -EFSCORRUPTED;
> > > > > >  			goto out;
> > > > > >  		}
> > > > > > @@ -268,8 +273,10 @@ xfs_attr_node_list_lookup(
> > > > > >  			return 0;
> > > > > >  
> > > > > >  		/* We can't point back to the root. */
> > > > > > -		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0))
> > > > > > +		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0)) {
> > > > > > +			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > > >  			return -EFSCORRUPTED;
> > > > > > +		}
> > > > > >  	}
> > > > > >  
> > > > > >  	if (expected_level != 0)
> > > > > > @@ -281,6 +288,7 @@ xfs_attr_node_list_lookup(
> > > > > >  out_corruptbuf:
> > > > > >  	xfs_buf_corruption_error(bp);
> > > > > >  	xfs_trans_brelse(tp, bp);
> > > > > > +	xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
> > > > > >  	return -EFSCORRUPTED;
> > > > > >  }
> > > > > >  
> > > > > > @@ -471,8 +479,10 @@ xfs_attr3_leaf_list_int(
> > > > > >  		}
> > > > > >  
> > > > > >  		if (XFS_IS_CORRUPT(context->dp->i_mount,
> > > > > > -				   !xfs_attr_namecheck(name, namelen)))
> > > > > > +				   !xfs_attr_namecheck(name, namelen))) {
> > > > > > +			xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK);
> > > > > >  			return -EFSCORRUPTED;
> > > > > > +		}
> > > > > >  		context->put_listent(context, entry->flags,
> > > > > >  					      name, namelen, valuelen);
> > > > > >  		if (context->seen_enough)
> > > > > > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
> > > > > > index 95bc9ef8f5f9..715ded503334 100644
> > > > > > --- a/fs/xfs/xfs_dir2_readdir.c
> > > > > > +++ b/fs/xfs/xfs_dir2_readdir.c
> > > > > > @@ -18,6 +18,7 @@
> > > > > >  #include "xfs_bmap.h"
> > > > > >  #include "xfs_trans.h"
> > > > > >  #include "xfs_error.h"
> > > > > > +#include "xfs_health.h"
> > > > > >  
> > > > > >  /*
> > > > > >   * Directory file type support functions
> > > > > > @@ -119,8 +120,10 @@ xfs_dir2_sf_getdents(
> > > > > >  		ctx->pos = off & 0x7fffffff;
> > > > > >  		if (XFS_IS_CORRUPT(dp->i_mount,
> > > > > >  				   !xfs_dir2_namecheck(sfep->name,
> > > > > > -						       sfep->namelen)))
> > > > > > +						       sfep->namelen))) {
> > > > > > +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > > > > >  			return -EFSCORRUPTED;
> > > > > > +		}
> > > > > >  		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
> > > > > >  			    xfs_dir3_get_dtype(mp, filetype)))
> > > > > >  			return 0;
> > > > > > @@ -461,6 +464,7 @@ xfs_dir2_leaf_getdents(
> > > > > >  		if (XFS_IS_CORRUPT(dp->i_mount,
> > > > > >  				   !xfs_dir2_namecheck(dep->name,
> > > > > >  						       dep->namelen))) {
> > > > > > +			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
> > > > > >  			error = -EFSCORRUPTED;
> > > > > >  			break;
> > > > > >  		}
> > > > > > diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
> > > > > > index 1f09027c55ad..c1b6e8fb72ec 100644
> > > > > > --- a/fs/xfs/xfs_health.c
> > > > > > +++ b/fs/xfs/xfs_health.c
> > > > > > @@ -15,6 +15,8 @@
> > > > > >  #include "xfs_trace.h"
> > > > > >  #include "xfs_health.h"
> > > > > >  #include "xfs_btree.h"
> > > > > > +#include "xfs_da_format.h"
> > > > > > +#include "xfs_da_btree.h"
> > > > > >  
> > > > > >  /*
> > > > > >   * Warn about metadata corruption that we detected but haven't fixed, and
> > > > > > @@ -517,3 +519,40 @@ xfs_btree_mark_sick(
> > > > > >  
> > > > > >  	xfs_agno_mark_sick(cur->bc_mp, cur->bc_private.a.agno, mask);
> > > > > >  }
> > > > > > +
> > > > > > +/*
> > > > > > + * Record observations of dir/attr btree corruption with the health tracking
> > > > > > + * system.
> > > > > > + */
> > > > > > +void
> > > > > > +xfs_dirattr_mark_sick(
> > > > > > +	struct xfs_inode	*ip,
> > > > > > +	int			whichfork)
> > > > > > +{
> > > > > > +	unsigned int		mask;
> > > > > > +
> > > > > > +	switch (whichfork) {
> > > > > > +	case XFS_DATA_FORK:
> > > > > > +		mask = XFS_SICK_INO_DIR;
> > > > > > +		break;
> > > > > > +	case XFS_ATTR_FORK:
> > > > > > +		mask = XFS_SICK_INO_XATTR;
> > > > > > +		break;
> > > > > > +	default:
> > > > > > +		ASSERT(0);
> > > > > > +		return;
> > > > > > +	}
> > > > > > +
> > > > > > +	xfs_inode_mark_sick(ip, mask);
> > > > > > +}
> > > > > > +
> > > > > > +/*
> > > > > > + * Record observations of dir/attr btree corruption with the health tracking
> > > > > > + * system.
> > > > > > + */
> > > > > > +void
> > > > > > +xfs_da_mark_sick(
> > > > > > +	struct xfs_da_args	*args)
> > > > > > +{
> > > > > > +	xfs_dirattr_mark_sick(args->dp, args->whichfork);
> > > > > > +}
> > > > > > 
> > > > > 
> > > > 
> > > 
> > 
>

Patch
diff mbox series

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 85ec5945d29f..e347b5dabded 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -27,7 +27,7 @@ 
 #include "xfs_buf_item.h"
 #include "xfs_dir2.h"
 #include "xfs_log.h"
-
+#include "xfs_health.h"
 
 /*
  * xfs_attr_leaf.c
@@ -2346,6 +2346,7 @@  xfs_attr3_leaf_lookup_int(
 	entries = xfs_attr3_leaf_entryp(leaf);
 	if (ichdr.count >= args->geo->blksize / 8) {
 		xfs_buf_corruption_error(bp);
+		xfs_da_mark_sick(args);
 		return -EFSCORRUPTED;
 	}
 
@@ -2365,10 +2366,12 @@  xfs_attr3_leaf_lookup_int(
 	}
 	if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) {
 		xfs_buf_corruption_error(bp);
+		xfs_da_mark_sick(args);
 		return -EFSCORRUPTED;
 	}
 	if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) {
 		xfs_buf_corruption_error(bp);
+		xfs_da_mark_sick(args);
 		return -EFSCORRUPTED;
 	}
 
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index a6ef5df42669..ef34d90501a7 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -22,6 +22,7 @@ 
 #include "xfs_attr_remote.h"
 #include "xfs_trace.h"
 #include "xfs_error.h"
+#include "xfs_health.h"
 
 #define ATTR_RMTVALUE_MAPSIZE	1	/* # of map entries at once */
 
@@ -261,17 +262,18 @@  xfs_attr3_rmt_hdr_set(
  */
 STATIC int
 xfs_attr_rmtval_copyout(
-	struct xfs_mount *mp,
-	struct xfs_buf	*bp,
-	xfs_ino_t	ino,
-	int		*offset,
-	int		*valuelen,
-	uint8_t		**dst)
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
+	struct xfs_inode	*dp,
+	int			*offset,
+	int			*valuelen,
+	uint8_t			**dst)
 {
-	char		*src = bp->b_addr;
-	xfs_daddr_t	bno = bp->b_bn;
-	int		len = BBTOB(bp->b_length);
-	int		blksize = mp->m_attr_geo->blksize;
+	char			*src = bp->b_addr;
+	xfs_ino_t		ino = dp->i_ino;
+	xfs_daddr_t		bno = bp->b_bn;
+	int			len = BBTOB(bp->b_length);
+	int			blksize = mp->m_attr_geo->blksize;
 
 	ASSERT(len >= blksize);
 
@@ -287,6 +289,7 @@  xfs_attr_rmtval_copyout(
 				xfs_alert(mp,
 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
 					bno, *offset, byte_cnt, ino);
+				xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
 				return -EFSCORRUPTED;
 			}
 			hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
@@ -405,10 +408,12 @@  xfs_attr_rmtval_get(
 						   mp->m_ddev_targp,
 						   dblkno, dblkcnt, 0, &bp,
 						   &xfs_attr3_rmt_buf_ops);
+			if (xfs_metadata_is_sick(error))
+				xfs_da_mark_sick(args);
 			if (error)
 				return error;
 
-			error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
+			error = xfs_attr_rmtval_copyout(mp, bp, args->dp,
 							&offset, &valuelen,
 							&dst);
 			xfs_trans_brelse(args->trans, bp);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e424b004e3cb..a17622dadf00 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -22,6 +22,7 @@ 
 #include "xfs_trace.h"
 #include "xfs_buf_item.h"
 #include "xfs_log.h"
+#include "xfs_health.h"
 
 /*
  * xfs_da_btree.c
@@ -359,6 +360,7 @@  xfs_da3_node_read(
 					tp->t_mountp, info, sizeof(*info));
 			xfs_trans_brelse(tp, *bpp);
 			*bpp = NULL;
+			xfs_dirattr_mark_sick(dp, which_fork);
 			return -EFSCORRUPTED;
 		}
 		xfs_trans_buf_set_type(tp, *bpp, type);
@@ -554,6 +556,7 @@  xfs_da3_split(
 	if (node->hdr.info.forw) {
 		if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
 			xfs_buf_corruption_error(oldblk->bp);
+			xfs_da_mark_sick(state->args);
 			error = -EFSCORRUPTED;
 			goto out;
 		}
@@ -567,6 +570,7 @@  xfs_da3_split(
 	if (node->hdr.info.back) {
 		if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
 			xfs_buf_corruption_error(oldblk->bp);
+			xfs_da_mark_sick(state->args);
 			error = -EFSCORRUPTED;
 			goto out;
 		}
@@ -1589,6 +1593,7 @@  xfs_da3_node_lookup_int(
 
 		if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
 			xfs_buf_corruption_error(blk->bp);
+			xfs_da_mark_sick(args);
 			return -EFSCORRUPTED;
 		}
 
@@ -1604,6 +1609,7 @@  xfs_da3_node_lookup_int(
 		/* Tree taller than we can handle; bail out! */
 		if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
 			xfs_buf_corruption_error(blk->bp);
+			xfs_da_mark_sick(args);
 			return -EFSCORRUPTED;
 		}
 
@@ -1612,6 +1618,7 @@  xfs_da3_node_lookup_int(
 			expected_level = nodehdr.level - 1;
 		else if (expected_level != nodehdr.level) {
 			xfs_buf_corruption_error(blk->bp);
+			xfs_da_mark_sick(args);
 			return -EFSCORRUPTED;
 		} else
 			expected_level--;
@@ -1663,12 +1670,16 @@  xfs_da3_node_lookup_int(
 		}
 
 		/* We can't point back to the root. */
-		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
+		if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
+			xfs_da_mark_sick(args);
 			return -EFSCORRUPTED;
+		}
 	}
 
-	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
+	if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
+		xfs_da_mark_sick(args);
 		return -EFSCORRUPTED;
+	}
 
 	/*
 	 * A leaf block that ends in the hashval that we are interested in
@@ -1686,6 +1697,7 @@  xfs_da3_node_lookup_int(
 			args->blkno = blk->blkno;
 		} else {
 			ASSERT(0);
+			xfs_da_mark_sick(args);
 			return -EFSCORRUPTED;
 		}
 		if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
@@ -2250,8 +2262,10 @@  xfs_da3_swap_lastblock(
 	error = xfs_bmap_last_before(tp, dp, &lastoff, w);
 	if (error)
 		return error;
-	if (XFS_IS_CORRUPT(mp, lastoff == 0))
+	if (XFS_IS_CORRUPT(mp, lastoff == 0)) {
+		xfs_da_mark_sick(args);
 		return -EFSCORRUPTED;
+	}
 	/*
 	 * Read the last block in the btree space.
 	 */
@@ -2300,6 +2314,7 @@  xfs_da3_swap_lastblock(
 		if (XFS_IS_CORRUPT(mp,
 				   be32_to_cpu(sib_info->forw) != last_blkno ||
 				   sib_info->magic != dead_info->magic)) {
+			xfs_da_mark_sick(args);
 			error = -EFSCORRUPTED;
 			goto done;
 		}
@@ -2320,6 +2335,7 @@  xfs_da3_swap_lastblock(
 		if (XFS_IS_CORRUPT(mp,
 				   be32_to_cpu(sib_info->back) != last_blkno ||
 				   sib_info->magic != dead_info->magic)) {
+			xfs_da_mark_sick(args);
 			error = -EFSCORRUPTED;
 			goto done;
 		}
@@ -2342,6 +2358,7 @@  xfs_da3_swap_lastblock(
 		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
 		if (XFS_IS_CORRUPT(mp,
 				   level >= 0 && level != par_hdr.level + 1)) {
+			xfs_da_mark_sick(args);
 			error = -EFSCORRUPTED;
 			goto done;
 		}
@@ -2353,6 +2370,7 @@  xfs_da3_swap_lastblock(
 		     entno++)
 			continue;
 		if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
+			xfs_da_mark_sick(args);
 			error = -EFSCORRUPTED;
 			goto done;
 		}
@@ -2378,6 +2396,7 @@  xfs_da3_swap_lastblock(
 		xfs_trans_brelse(tp, par_buf);
 		par_buf = NULL;
 		if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
+			xfs_da_mark_sick(args);
 			error = -EFSCORRUPTED;
 			goto done;
 		}
@@ -2387,6 +2406,7 @@  xfs_da3_swap_lastblock(
 		par_node = par_buf->b_addr;
 		xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
 		if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
+			xfs_da_mark_sick(args);
 			error = -EFSCORRUPTED;
 			goto done;
 		}
@@ -2601,6 +2621,7 @@  xfs_dabuf_map(
 					irecs[i].br_state);
 			}
 		}
+		xfs_dirattr_mark_sick(dp, whichfork);
 		error = -EFSCORRUPTED;
 		goto out;
 	}
@@ -2693,6 +2714,8 @@  xfs_da_read_buf(
 	error = xfs_trans_read_buf_map(dp->i_mount, trans,
 					dp->i_mount->m_ddev_targp,
 					mapp, nmap, 0, &bp, ops);
+	if (xfs_metadata_is_sick(error))
+		xfs_dirattr_mark_sick(dp, whichfork);
 	if (error)
 		goto out_free;
 
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 0aa87cbde49e..e1aa411a1b8b 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -18,6 +18,7 @@ 
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
+#include "xfs_health.h"
 
 struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
 
@@ -608,8 +609,10 @@  xfs_dir2_isblock(
 	rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
 	if (XFS_IS_CORRUPT(args->dp->i_mount,
 			   rval != 0 &&
-			   args->dp->i_d.di_size != args->geo->blksize))
+			   args->dp->i_d.di_size != args->geo->blksize)) {
+		xfs_da_mark_sick(args);
 		return -EFSCORRUPTED;
+	}
 	*vp = rval;
 	return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index a6eb71a62b53..80cc9c7ea4e5 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -18,6 +18,7 @@ 
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_log.h"
+#include "xfs_health.h"
 
 static xfs_failaddr_t xfs_dir2_data_freefind_verify(
 		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
@@ -1170,6 +1171,7 @@  xfs_dir2_data_use_free(
 corrupt:
 	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
 			hdr, sizeof(*hdr), __FILE__, __LINE__, fa);
+	xfs_da_mark_sick(args);
 	return -EFSCORRUPTED;
 }
 
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 73edd96ce0ac..32d17420fff3 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -19,6 +19,7 @@ 
 #include "xfs_trace.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
+#include "xfs_health.h"
 
 /*
  * Local function declarations.
@@ -1386,8 +1387,10 @@  xfs_dir2_leaf_removename(
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
 	if (be16_to_cpu(bestsp[db]) != oldbest) {
 		xfs_buf_corruption_error(lbp);
+		xfs_da_mark_sick(args);
 		return -EFSCORRUPTED;
 	}
+
 	/*
 	 * Mark the former data entry unused.
 	 */
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 3a8b0625a08b..e0f3ab254a1a 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -20,6 +20,7 @@ 
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_log.h"
+#include "xfs_health.h"
 
 /*
  * Function declarations.
@@ -228,6 +229,7 @@  __xfs_dir3_free_read(
 	if (fa) {
 		xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
 		xfs_trans_brelse(tp, *bpp);
+		xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
 		return -EFSCORRUPTED;
 	}
 
@@ -440,6 +442,7 @@  xfs_dir2_leaf_to_node(
 	if (be32_to_cpu(ltp->bestcount) >
 				(uint)dp->i_d.di_size / args->geo->blksize) {
 		xfs_buf_corruption_error(lbp);
+		xfs_da_mark_sick(args);
 		return -EFSCORRUPTED;
 	}
 
@@ -514,6 +517,7 @@  xfs_dir2_leafn_add(
 	 */
 	if (index < 0) {
 		xfs_buf_corruption_error(bp);
+		xfs_da_mark_sick(args);
 		return -EFSCORRUPTED;
 	}
 
@@ -733,6 +737,7 @@  xfs_dir2_leafn_lookup_for_addname(
 					   cpu_to_be16(NULLDATAOFF))) {
 				if (curfdb != newfdb)
 					xfs_trans_brelse(tp, curbp);
+				xfs_da_mark_sick(args);
 				return -EFSCORRUPTED;
 			}
 			curfdb = newfdb;
@@ -801,6 +806,7 @@  xfs_dir2_leafn_lookup_for_entry(
 	xfs_dir3_leaf_check(dp, bp);
 	if (leafhdr.count <= 0) {
 		xfs_buf_corruption_error(bp);
+		xfs_da_mark_sick(args);
 		return -EFSCORRUPTED;
 	}
 
@@ -1737,6 +1743,7 @@  xfs_dir2_node_add_datablk(
 			} else {
 				xfs_alert(mp, " ... fblk is NULL");
 			}
+			xfs_da_mark_sick(args);
 			return -EFSCORRUPTED;
 		}
 
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index 2049419e9555..d9404cd3d09b 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -38,6 +38,7 @@  struct xfs_perag;
 struct xfs_inode;
 struct xfs_fsop_geom;
 struct xfs_btree_cur;
+struct xfs_da_args;
 
 /* Observable health issues for metadata spanning the entire filesystem. */
 #define XFS_SICK_FS_COUNTERS	(1 << 0)  /* summary counters */
@@ -141,6 +142,8 @@  void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
 void xfs_health_unmount(struct xfs_mount *mp);
 void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
 void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
+void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
+void xfs_da_mark_sick(struct xfs_da_args *args);
 
 /* Now some helpers. */
 
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index a78c501f6fb1..429a97494ffa 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -23,6 +23,7 @@ 
 #include "xfs_quota.h"
 #include "xfs_dir2.h"
 #include "xfs_error.h"
+#include "xfs_health.h"
 
 /*
  * Look at all the extents for this logical region,
@@ -209,6 +210,7 @@  xfs_attr3_node_inactive(
 	if (level > XFS_DA_NODE_MAXDEPTH) {
 		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
 		xfs_buf_corruption_error(bp);
+		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
 		return -EFSCORRUPTED;
 	}
 
@@ -256,6 +258,7 @@  xfs_attr3_node_inactive(
 			error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
 			break;
 		default:
+			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
 			xfs_buf_corruption_error(child_bp);
 			xfs_trans_brelse(*trans, child_bp);
 			error = -EFSCORRUPTED;
@@ -342,6 +345,7 @@  xfs_attr3_root_inactive(
 		error = xfs_attr3_leaf_inactive(trans, dp, bp);
 		break;
 	default:
+		xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
 		error = -EFSCORRUPTED;
 		xfs_buf_corruption_error(bp);
 		xfs_trans_brelse(*trans, bp);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 7a099df88a0c..1a2a3d4ce422 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -21,6 +21,7 @@ 
 #include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_dir2.h"
+#include "xfs_health.h"
 
 STATIC int
 xfs_attr_shortform_compare(const void *a, const void *b)
@@ -88,8 +89,10 @@  xfs_attr_shortform_list(
 		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
 			if (XFS_IS_CORRUPT(context->dp->i_mount,
 					   !xfs_attr_namecheck(sfe->nameval,
-							       sfe->namelen)))
+							       sfe->namelen))) {
+				xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
 				return -EFSCORRUPTED;
+			}
 			context->put_listent(context,
 					     sfe->flags,
 					     sfe->nameval,
@@ -131,6 +134,7 @@  xfs_attr_shortform_list(
 					     context->dp->i_mount, sfe,
 					     sizeof(*sfe));
 			kmem_free(sbuf);
+			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
 			return -EFSCORRUPTED;
 		}
 
@@ -181,6 +185,7 @@  xfs_attr_shortform_list(
 		if (XFS_IS_CORRUPT(context->dp->i_mount,
 				   !xfs_attr_namecheck(sbp->name,
 						       sbp->namelen))) {
+			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
 			error = -EFSCORRUPTED;
 			goto out;
 		}
@@ -268,8 +273,10 @@  xfs_attr_node_list_lookup(
 			return 0;
 
 		/* We can't point back to the root. */
-		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0))
+		if (XFS_IS_CORRUPT(mp, cursor->blkno == 0)) {
+			xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
 			return -EFSCORRUPTED;
+		}
 	}
 
 	if (expected_level != 0)
@@ -281,6 +288,7 @@  xfs_attr_node_list_lookup(
 out_corruptbuf:
 	xfs_buf_corruption_error(bp);
 	xfs_trans_brelse(tp, bp);
+	xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
 	return -EFSCORRUPTED;
 }
 
@@ -471,8 +479,10 @@  xfs_attr3_leaf_list_int(
 		}
 
 		if (XFS_IS_CORRUPT(context->dp->i_mount,
-				   !xfs_attr_namecheck(name, namelen)))
+				   !xfs_attr_namecheck(name, namelen))) {
+			xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK);
 			return -EFSCORRUPTED;
+		}
 		context->put_listent(context, entry->flags,
 					      name, namelen, valuelen);
 		if (context->seen_enough)
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 95bc9ef8f5f9..715ded503334 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -18,6 +18,7 @@ 
 #include "xfs_bmap.h"
 #include "xfs_trans.h"
 #include "xfs_error.h"
+#include "xfs_health.h"
 
 /*
  * Directory file type support functions
@@ -119,8 +120,10 @@  xfs_dir2_sf_getdents(
 		ctx->pos = off & 0x7fffffff;
 		if (XFS_IS_CORRUPT(dp->i_mount,
 				   !xfs_dir2_namecheck(sfep->name,
-						       sfep->namelen)))
+						       sfep->namelen))) {
+			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
 			return -EFSCORRUPTED;
+		}
 		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
 			    xfs_dir3_get_dtype(mp, filetype)))
 			return 0;
@@ -461,6 +464,7 @@  xfs_dir2_leaf_getdents(
 		if (XFS_IS_CORRUPT(dp->i_mount,
 				   !xfs_dir2_namecheck(dep->name,
 						       dep->namelen))) {
+			xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
 			error = -EFSCORRUPTED;
 			break;
 		}
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index 1f09027c55ad..c1b6e8fb72ec 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -15,6 +15,8 @@ 
 #include "xfs_trace.h"
 #include "xfs_health.h"
 #include "xfs_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
 
 /*
  * Warn about metadata corruption that we detected but haven't fixed, and
@@ -517,3 +519,40 @@  xfs_btree_mark_sick(
 
 	xfs_agno_mark_sick(cur->bc_mp, cur->bc_private.a.agno, mask);
 }
+
+/*
+ * Record observations of dir/attr btree corruption with the health tracking
+ * system.
+ */
+void
+xfs_dirattr_mark_sick(
+	struct xfs_inode	*ip,
+	int			whichfork)
+{
+	unsigned int		mask;
+
+	switch (whichfork) {
+	case XFS_DATA_FORK:
+		mask = XFS_SICK_INO_DIR;
+		break;
+	case XFS_ATTR_FORK:
+		mask = XFS_SICK_INO_XATTR;
+		break;
+	default:
+		ASSERT(0);
+		return;
+	}
+
+	xfs_inode_mark_sick(ip, mask);
+}
+
+/*
+ * Record observations of dir/attr btree corruption with the health tracking
+ * system.
+ */
+void
+xfs_da_mark_sick(
+	struct xfs_da_args	*args)
+{
+	xfs_dirattr_mark_sick(args->dp, args->whichfork);
+}