[RFC] xfs: consolidate local format inode fork verifiers
diff mbox

Message ID 20170719160318.GP4224@magnolia
State New
Headers show

Commit Message

Darrick J. Wong July 19, 2017, 4:03 p.m. UTC
Create a centralized function to verify local format inode forks, then
migrate the existing short format directory verifier to use this
framework and add verifiers for the other two things we support (attrs
and symlinks).

Obviously this will get broken up into smaller pieces (one patch to
refactor/move the existing verifier calls, another one to add the two
new verifiers), but what do people think?

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_leaf.c      |   70 ++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_attr_leaf.h      |    1 +
 fs/xfs/libxfs/xfs_inode_fork.c     |   10 -----
 fs/xfs/libxfs/xfs_shared.h         |    1 +
 fs/xfs/libxfs/xfs_symlink_remote.c |   29 +++++++++++++++
 fs/xfs/xfs_icache.c                |   46 ++++++++++++++++++++++++
 fs/xfs/xfs_icache.h                |    1 +
 fs/xfs/xfs_inode.c                 |    6 +--
 8 files changed, 150 insertions(+), 14 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Dave Chinner July 20, 2017, 2:26 a.m. UTC | #1
On Wed, Jul 19, 2017 at 09:03:18AM -0700, Darrick J. Wong wrote:
> Create a centralized function to verify local format inode forks, then
> migrate the existing short format directory verifier to use this
> framework and add verifiers for the other two things we support (attrs
> and symlinks).
> 
> Obviously this will get broken up into smaller pieces (one patch to
> refactor/move the existing verifier calls, another one to add the two
> new verifiers), but what do people think?
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_attr_leaf.c      |   70 ++++++++++++++++++++++++++++++++++++
>  fs/xfs/libxfs/xfs_attr_leaf.h      |    1 +
>  fs/xfs/libxfs/xfs_inode_fork.c     |   10 -----
>  fs/xfs/libxfs/xfs_shared.h         |    1 +
>  fs/xfs/libxfs/xfs_symlink_remote.c |   29 +++++++++++++++
>  fs/xfs/xfs_icache.c                |   46 ++++++++++++++++++++++++
>  fs/xfs/xfs_icache.h                |    1 +
>  fs/xfs/xfs_inode.c                 |    6 +--
>  8 files changed, 150 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
> index c6c15e5..43b881e 100644
> --- a/fs/xfs/libxfs/xfs_attr_leaf.c
> +++ b/fs/xfs/libxfs/xfs_attr_leaf.c
> @@ -871,6 +871,76 @@ xfs_attr_shortform_allfit(
>  	return xfs_attr_shortform_bytesfit(dp, bytes);
>  }
>  
> +/* Verify the consistency of an inline attribute fork. */
> +int
> +xfs_attr_shortform_verify(
> +	struct xfs_inode		*ip)
> +{
> +	struct xfs_attr_shortform	*sfp;
> +	struct xfs_attr_sf_entry	*sfep;
> +	struct xfs_attr_sf_entry	*next_sfep;
> +	char				*endp;
> +	struct xfs_ifork		*ifp;
> +	int				i;
> +	int				size;
> +
> +	ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL);
> +	ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
> +	sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
> +	size = ifp->if_bytes;
> +
> +	/*
> +	 * Give up if the attribute is way too short.
> +	 */
> +	if (size < sizeof (struct xfs_attr_sf_hdr))
> +		return -EFSCORRUPTED;

This seems to already be checked in xfs_iformat_fork() for
attr forks. There are also inconsistent checks between data/attr
forks whether size is valid prior formatting the fork, so there's
more cleanup needed there, and if we are checking attr/dir validity
in these functions, we shouldn't be peeking inside the fork
in xfs_iformat_fork().

> +	endp = (char *)sfp + size;
> +
> +	/* Check all reported entries */
> +	sfep = &sfp->list[0];
> +	for (i = 0; i < sfp->hdr.count; i++) {
> +		/*
> +		 * struct xfs_attr_sf_entry has a variable length.
> +		 * Check the fixed-offset parts of the structure are
> +		 * within the data buffer.
> +		 */
> +		if (((char *)sfep + sizeof(*sfep)) >= endp)
> +			return -EFSCORRUPTED;
> +
> +		/* Don't allow names with known bad length. */
> +		if (sfep->namelen == 0)
> +			return -EFSCORRUPTED;
> +
> +		/*
> +		 * Check that the variable-length part of the structure is
> +		 * within the data buffer.  The next entry starts after the
> +		 * name component, so nextentry is an acceptable test.
> +		 */
> +		next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep);
> +		if (endp < (char *)next_sfep)
> +			return -EFSCORRUPTED;

Can we make the "past endp" checks consistent in variable order and
operators? i.e

		if ((char *)next_sfep >= endp)


[snip]

> diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
> index c484877..96e2957 100644
> --- a/fs/xfs/libxfs/xfs_symlink_remote.c
> +++ b/fs/xfs/libxfs/xfs_symlink_remote.c
> @@ -207,3 +207,32 @@ xfs_symlink_local_to_remote(
>  	xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
>  					ifp->if_bytes - 1);
>  }
> +
> +/* Verify the consistency of an inline symlink. */
> +int
> +xfs_symlink_sf_verify(
> +	struct xfs_inode	*ip)
> +{
> +	char			*sfp;
> +	char			*endp;
> +	struct xfs_ifork	*ifp;
> +	int			size;
> +
> +	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
> +	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> +	sfp = (char *)ifp->if_u1.if_data;
> +	size = ifp->if_bytes;
> +	endp = sfp + size;
> +
> +	/* No negative sizes or overly long symlink targets. */
> +	if (size < 0 || size > XFS_SYMLINK_MAXLEN)
> +		return -EFSCORRUPTED;
> +
> +	/* No NULLs in the target either. */
> +	for (; sfp < endp; sfp++) {
> +		if (*sfp == 0)
> +			return -EFSCORRUPTED;
> +	}

Hmmm - do we need to check that the symlink has been correctly null
terminated in memory (i.e. that *endp == 0) because the VFS requires
this and we add the zero termination before checking fork contents
validity?

> +/* Check inline fork contents. */
> +int
> +xfs_iget_verify_forks(

*_verify_inline_content()?

> +	struct xfs_inode		*ip)
> +{
> +	int				error;

Too much indenting :P

> +
> +	/* Check the attribute fork if there is one. */
> +	if (XFS_IFORK_PTR(ip, XFS_ATTR_FORK) &&
> +	    ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {

If there is no attr fork, then ip->i_d.di_aformat should be set to
XFS_DINODE_FMT_EXTENTS. Hence we can just do the same check as
for the data fork....

OTOH, having a value of XFS_DINODE_FMT_LOCAL in di_aformat without
an attr fork indicates corruption, so perhaps we need to catch that
here as it's not checked in xfs_ifork_format() or xfs_iflush_int().
Indeed, there are partial attr/data fork format/size checks in
xfs_ifork_format() and xfs_iflush_int(), but we don't do
comprehensive checks in either place. Maybe they should all be moved
inside this function and expanded to check that all the fork format
information is valid?

Cheers,

Dave.
Darrick J. Wong July 20, 2017, 5:50 a.m. UTC | #2
On Thu, Jul 20, 2017 at 12:26:02PM +1000, Dave Chinner wrote:
> On Wed, Jul 19, 2017 at 09:03:18AM -0700, Darrick J. Wong wrote:
> > Create a centralized function to verify local format inode forks, then
> > migrate the existing short format directory verifier to use this
> > framework and add verifiers for the other two things we support (attrs
> > and symlinks).
> > 
> > Obviously this will get broken up into smaller pieces (one patch to
> > refactor/move the existing verifier calls, another one to add the two
> > new verifiers), but what do people think?
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/libxfs/xfs_attr_leaf.c      |   70 ++++++++++++++++++++++++++++++++++++
> >  fs/xfs/libxfs/xfs_attr_leaf.h      |    1 +
> >  fs/xfs/libxfs/xfs_inode_fork.c     |   10 -----
> >  fs/xfs/libxfs/xfs_shared.h         |    1 +
> >  fs/xfs/libxfs/xfs_symlink_remote.c |   29 +++++++++++++++
> >  fs/xfs/xfs_icache.c                |   46 ++++++++++++++++++++++++
> >  fs/xfs/xfs_icache.h                |    1 +
> >  fs/xfs/xfs_inode.c                 |    6 +--
> >  8 files changed, 150 insertions(+), 14 deletions(-)
> > 
> > diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
> > index c6c15e5..43b881e 100644
> > --- a/fs/xfs/libxfs/xfs_attr_leaf.c
> > +++ b/fs/xfs/libxfs/xfs_attr_leaf.c
> > @@ -871,6 +871,76 @@ xfs_attr_shortform_allfit(
> >  	return xfs_attr_shortform_bytesfit(dp, bytes);
> >  }
> >  
> > +/* Verify the consistency of an inline attribute fork. */
> > +int
> > +xfs_attr_shortform_verify(
> > +	struct xfs_inode		*ip)
> > +{
> > +	struct xfs_attr_shortform	*sfp;
> > +	struct xfs_attr_sf_entry	*sfep;
> > +	struct xfs_attr_sf_entry	*next_sfep;
> > +	char				*endp;
> > +	struct xfs_ifork		*ifp;
> > +	int				i;
> > +	int				size;
> > +
> > +	ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL);
> > +	ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
> > +	sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
> > +	size = ifp->if_bytes;
> > +
> > +	/*
> > +	 * Give up if the attribute is way too short.
> > +	 */
> > +	if (size < sizeof (struct xfs_attr_sf_hdr))
> > +		return -EFSCORRUPTED;
> 
> This seems to already be checked in xfs_iformat_fork() for
> attr forks. There are also inconsistent checks between data/attr
> forks whether size is valid prior formatting the fork, so there's
> more cleanup needed there, and if we are checking attr/dir validity
> in these functions, we shouldn't be peeking inside the fork
> in xfs_iformat_fork().

Hm.  Right now iformat_fork seems to want to use sf attr header totsize
as the size of the attr fork, but that only works if the attr fork area
is large enough to have a full sf attr header, and... (keep reading)

> > +	endp = (char *)sfp + size;
> > +
> > +	/* Check all reported entries */
> > +	sfep = &sfp->list[0];
> > +	for (i = 0; i < sfp->hdr.count; i++) {
> > +		/*
> > +		 * struct xfs_attr_sf_entry has a variable length.
> > +		 * Check the fixed-offset parts of the structure are
> > +		 * within the data buffer.
> > +		 */
> > +		if (((char *)sfep + sizeof(*sfep)) >= endp)
> > +			return -EFSCORRUPTED;
> > +
> > +		/* Don't allow names with known bad length. */
> > +		if (sfep->namelen == 0)
> > +			return -EFSCORRUPTED;
> > +
> > +		/*
> > +		 * Check that the variable-length part of the structure is
> > +		 * within the data buffer.  The next entry starts after the
> > +		 * name component, so nextentry is an acceptable test.
> > +		 */
> > +		next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep);
> > +		if (endp < (char *)next_sfep)
> > +			return -EFSCORRUPTED;
> 
> Can we make the "past endp" checks consistent in variable order and
> operators? i.e
> 
> 		if ((char *)next_sfep >= endp)

Sure.

> 
> [snip]
> 
> > diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
> > index c484877..96e2957 100644
> > --- a/fs/xfs/libxfs/xfs_symlink_remote.c
> > +++ b/fs/xfs/libxfs/xfs_symlink_remote.c
> > @@ -207,3 +207,32 @@ xfs_symlink_local_to_remote(
> >  	xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
> >  					ifp->if_bytes - 1);
> >  }
> > +
> > +/* Verify the consistency of an inline symlink. */
> > +int
> > +xfs_symlink_sf_verify(
> > +	struct xfs_inode	*ip)
> > +{
> > +	char			*sfp;
> > +	char			*endp;
> > +	struct xfs_ifork	*ifp;
> > +	int			size;
> > +
> > +	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
> > +	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> > +	sfp = (char *)ifp->if_u1.if_data;
> > +	size = ifp->if_bytes;
> > +	endp = sfp + size;
> > +
> > +	/* No negative sizes or overly long symlink targets. */
> > +	if (size < 0 || size > XFS_SYMLINK_MAXLEN)
> > +		return -EFSCORRUPTED;
> > +
> > +	/* No NULLs in the target either. */
> > +	for (; sfp < endp; sfp++) {
> > +		if (*sfp == 0)
> > +			return -EFSCORRUPTED;
> > +	}
> 
> Hmmm - do we need to check that the symlink has been correctly null
> terminated in memory (i.e. that *endp == 0) because the VFS requires
> this and we add the zero termination before checking fork contents
> validity?
> 
> > +/* Check inline fork contents. */
> > +int
> > +xfs_iget_verify_forks(
> 
> *_verify_inline_content()?
> 
> > +	struct xfs_inode		*ip)
> > +{
> > +	int				error;
> 
> Too much indenting :P
> 
> > +
> > +	/* Check the attribute fork if there is one. */
> > +	if (XFS_IFORK_PTR(ip, XFS_ATTR_FORK) &&
> > +	    ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
> 
> If there is no attr fork, then ip->i_d.di_aformat should be set to
> XFS_DINODE_FMT_EXTENTS. Hence we can just do the same check as
> for the data fork....
> 
> OTOH, having a value of XFS_DINODE_FMT_LOCAL in di_aformat without
> an attr fork indicates corruption, so perhaps we need to catch that

Hm, the documentation ought to reflect that. :)

> here as it's not checked in xfs_ifork_format() or xfs_iflush_int().
> Indeed, there are partial attr/data fork format/size checks in
> xfs_ifork_format() and xfs_iflush_int(), but we don't do
> comprehensive checks in either place. Maybe they should all be moved
> inside this function and expanded to check that all the fork format
> information is valid?

Yes, this could get cleaned up this way...  What if we make
_iformat_fork check that the sizes requested aren't insane, allocate
memory, and load contents from the dinode; then we later use
_verify_ifork_content to pick all the bugs out and destroy the inode if
we finds any.

Hm.  The bmbt root actually needs numrecs read out of the header.  The
sf attr header has the total size, though we're never going to need more
than (inodesize - forkoff) bytes for it.  The bmdr thing could
complicate this.

(Maybe I'll sleep on this...)

Also, I thought di_forkoff was multiplied by 8 to calculate the distance
of the attr fork from the data fork?  If that's the case, then isn't
this verifier wrong?

    if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {

<rambling off topic now>

While we're on the subject of verifiers, Eric Sandeen has been wishing
that we could make it easier to figure out which buffer verifier test
failed, and it would seem that the XFS_CORRUPTION_ERROR macro is used to
highlight bad inode fork contents.  Perhaps we should create a similar
macro that we could use to log exactly which buffer verifier test
failed?

--D

> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster July 20, 2017, 11:51 a.m. UTC | #3
On Wed, Jul 19, 2017 at 09:03:18AM -0700, Darrick J. Wong wrote:
> Create a centralized function to verify local format inode forks, then
> migrate the existing short format directory verifier to use this
> framework and add verifiers for the other two things we support (attrs
> and symlinks).
> 
> Obviously this will get broken up into smaller pieces (one patch to
> refactor/move the existing verifier calls, another one to add the two
> new verifiers), but what do people think?
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---

I haven't combed through the details of the verifiers, but just a few
very quick thoughts..

>  fs/xfs/libxfs/xfs_attr_leaf.c      |   70 ++++++++++++++++++++++++++++++++++++
>  fs/xfs/libxfs/xfs_attr_leaf.h      |    1 +
>  fs/xfs/libxfs/xfs_inode_fork.c     |   10 -----
>  fs/xfs/libxfs/xfs_shared.h         |    1 +
>  fs/xfs/libxfs/xfs_symlink_remote.c |   29 +++++++++++++++
>  fs/xfs/xfs_icache.c                |   46 ++++++++++++++++++++++++
>  fs/xfs/xfs_icache.h                |    1 +
>  fs/xfs/xfs_inode.c                 |    6 +--
>  8 files changed, 150 insertions(+), 14 deletions(-)
> 
...
> diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> index 0e80f34..5484211 100644
> --- a/fs/xfs/libxfs/xfs_inode_fork.c
> +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> @@ -183,16 +183,6 @@ xfs_iformat_fork(
>  	if (error)
>  		return error;
>  
> -	/* Check inline dir contents. */
> -	if (S_ISDIR(VFS_I(ip)->i_mode) &&
> -	    dip->di_format == XFS_DINODE_FMT_LOCAL) {
> -		error = xfs_dir2_sf_verify(ip);
> -		if (error) {
> -			xfs_idestroy_fork(ip, XFS_DATA_FORK);
> -			return error;
> -		}
> -	}
> -

What's the purpose of moving this? Just to accommodate userspace? As it
is, it looks like this leaves a coverage gap in a log recovery case
where xfs_iget() is not used (xfs_recover_inode_owner_change()).

Also, I think this is best split up into two or three smaller patches.
E.g., one to move/rename the existing mechanism (with justification as
to why), one or more to add the additional verifiers for the other
formats.

>  	if (xfs_is_reflink_inode(ip)) {
>  		ASSERT(ip->i_cowfp == NULL);
>  		xfs_ifork_init_cow(ip);
...
> diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
> index c484877..96e2957 100644
> --- a/fs/xfs/libxfs/xfs_symlink_remote.c
> +++ b/fs/xfs/libxfs/xfs_symlink_remote.c
> @@ -207,3 +207,32 @@ xfs_symlink_local_to_remote(
>  	xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
>  					ifp->if_bytes - 1);
>  }
> +
> +/* Verify the consistency of an inline symlink. */
> +int
> +xfs_symlink_sf_verify(
> +	struct xfs_inode	*ip)
> +{
> +	char			*sfp;
> +	char			*endp;
> +	struct xfs_ifork	*ifp;
> +	int			size;
> +
> +	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
> +	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> +	sfp = (char *)ifp->if_u1.if_data;
> +	size = ifp->if_bytes;
> +	endp = sfp + size;
> +
> +	/* No negative sizes or overly long symlink targets. */
> +	if (size < 0 || size > XFS_SYMLINK_MAXLEN)
> +		return -EFSCORRUPTED;
> +
> +	/* No NULLs in the target either. */
> +	for (; sfp < endp; sfp++) {
> +		if (*sfp == 0)
> +			return -EFSCORRUPTED;
> +	}

	memchr() ?

Brian

> +
> +	return 0;
> +}
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index 0a9e698..fd1d430 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -34,6 +34,11 @@
>  #include "xfs_dquot_item.h"
>  #include "xfs_dquot.h"
>  #include "xfs_reflink.h"
> +#include "xfs_da_format.h"
> +#include "xfs_da_btree.h"
> +#include "xfs_dir2_priv.h"
> +#include "xfs_attr_leaf.h"
> +#include "xfs_shared.h"
>  
>  #include <linux/kthread.h>
>  #include <linux/freezer.h>
> @@ -449,6 +454,43 @@ xfs_iget_cache_hit(
>  	return error;
>  }
>  
> +/* Check inline fork contents. */
> +int
> +xfs_iget_verify_forks(
> +	struct xfs_inode		*ip)
> +{
> +	int				error;
> +
> +	/* Check the attribute fork if there is one. */
> +	if (XFS_IFORK_PTR(ip, XFS_ATTR_FORK) &&
> +	    ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
> +		error = xfs_attr_shortform_verify(ip);
> +		if (error) {
> +			xfs_alert(ip->i_mount,
> +					"%s: bad inode %Lu inline attr fork",
> +					__func__, ip->i_ino);
> +			return error;
> +		}
> +	}
> +
> +	/* Non-local data fork, jump out. */
> +	if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
> +		return 0;
> +
> +	/* Check the inline data fork if there is one. */
> +	if (S_ISDIR(VFS_I(ip)->i_mode))
> +		error = xfs_dir2_sf_verify(ip);
> +	else if (S_ISLNK(VFS_I(ip)->i_mode))
> +		error = xfs_symlink_sf_verify(ip);
> +	else
> +		error = -EFSCORRUPTED;
> +
> +	if (error)
> +		xfs_alert(ip->i_mount, "%s: bad inode %Lu inline data fork",
> +				__func__, ip->i_ino);
> +	return error;
> +}
> +
>  
>  static int
>  xfs_iget_cache_miss(
> @@ -473,6 +515,10 @@ xfs_iget_cache_miss(
>  	if (error)
>  		goto out_destroy;
>  
> +	error = xfs_iget_verify_forks(ip);
> +	if (error)
> +		goto out_destroy;
> +
>  	trace_xfs_iget_miss(ip);
>  
>  	if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
> diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
> index bff4d85..abf1cff 100644
> --- a/fs/xfs/xfs_icache.h
> +++ b/fs/xfs/xfs_icache.h
> @@ -129,5 +129,6 @@ xfs_fs_eofblocks_from_user(
>  
>  int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
>  				  xfs_ino_t ino, bool *inuse);
> +int xfs_iget_verify_forks(struct xfs_inode *ip);
>  
>  #endif
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index ceef77c..7ca8336 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -3547,10 +3547,8 @@ xfs_iflush_int(
>  	if (ip->i_d.di_version < 3)
>  		ip->i_d.di_flushiter++;
>  
> -	/* Check the inline directory data. */
> -	if (S_ISDIR(VFS_I(ip)->i_mode) &&
> -	    ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
> -	    xfs_dir2_sf_verify(ip))
> +	/* Check the inline fork data. */
> +	if (xfs_iget_verify_forks(ip))
>  		goto corrupt_out;
>  
>  	/*
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong July 20, 2017, 8:20 p.m. UTC | #4
On Thu, Jul 20, 2017 at 07:51:46AM -0400, Brian Foster wrote:
> On Wed, Jul 19, 2017 at 09:03:18AM -0700, Darrick J. Wong wrote:
> > Create a centralized function to verify local format inode forks, then
> > migrate the existing short format directory verifier to use this
> > framework and add verifiers for the other two things we support (attrs
> > and symlinks).
> > 
> > Obviously this will get broken up into smaller pieces (one patch to
> > refactor/move the existing verifier calls, another one to add the two
> > new verifiers), but what do people think?
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> 
> I haven't combed through the details of the verifiers, but just a few
> very quick thoughts..
> 
> >  fs/xfs/libxfs/xfs_attr_leaf.c      |   70 ++++++++++++++++++++++++++++++++++++
> >  fs/xfs/libxfs/xfs_attr_leaf.h      |    1 +
> >  fs/xfs/libxfs/xfs_inode_fork.c     |   10 -----
> >  fs/xfs/libxfs/xfs_shared.h         |    1 +
> >  fs/xfs/libxfs/xfs_symlink_remote.c |   29 +++++++++++++++
> >  fs/xfs/xfs_icache.c                |   46 ++++++++++++++++++++++++
> >  fs/xfs/xfs_icache.h                |    1 +
> >  fs/xfs/xfs_inode.c                 |    6 +--
> >  8 files changed, 150 insertions(+), 14 deletions(-)
> > 
> ...
> > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> > index 0e80f34..5484211 100644
> > --- a/fs/xfs/libxfs/xfs_inode_fork.c
> > +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> > @@ -183,16 +183,6 @@ xfs_iformat_fork(
> >  	if (error)
> >  		return error;
> >  
> > -	/* Check inline dir contents. */
> > -	if (S_ISDIR(VFS_I(ip)->i_mode) &&
> > -	    dip->di_format == XFS_DINODE_FMT_LOCAL) {
> > -		error = xfs_dir2_sf_verify(ip);
> > -		if (error) {
> > -			xfs_idestroy_fork(ip, XFS_DATA_FORK);
> > -			return error;
> > -		}
> > -	}
> > -
> 
> What's the purpose of moving this? Just to accommodate userspace?

Yes, it's part of letting repair supply its own ifork verifiers so that
phase 6 can libxfs_iget to inspect and correct shortform directories.

> As it is, it looks like this leaves a coverage gap in a log recovery
> case where xfs_iget() is not used (xfs_recover_inode_owner_change()).

Ugh, I didn't even notice that.

As it stands today, xfs_recover_inode_owner_change also won't notice
corrupt attr forks or inline symlinks.  Seeing as we're (theoretically)
only changing the owner fields in v5 bmbt tree blocks this might not
have any practical consequence.  At that point we've already replayed
everything else in that inode that was dirty and are about to write
things out to disk, so everything has to be correct.

> Also, I think this is best split up into two or three smaller patches.
> E.g., one to move/rename the existing mechanism (with justification as
> to why), one or more to add the additional verifiers for the other
> formats.

(I mentioned that I'd do that before sending a real series...)

> >  	if (xfs_is_reflink_inode(ip)) {
> >  		ASSERT(ip->i_cowfp == NULL);
> >  		xfs_ifork_init_cow(ip);
> ...
> > diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
> > index c484877..96e2957 100644
> > --- a/fs/xfs/libxfs/xfs_symlink_remote.c
> > +++ b/fs/xfs/libxfs/xfs_symlink_remote.c
> > @@ -207,3 +207,32 @@ xfs_symlink_local_to_remote(
> >  	xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
> >  					ifp->if_bytes - 1);
> >  }
> > +
> > +/* Verify the consistency of an inline symlink. */
> > +int
> > +xfs_symlink_sf_verify(
> > +	struct xfs_inode	*ip)
> > +{
> > +	char			*sfp;
> > +	char			*endp;
> > +	struct xfs_ifork	*ifp;
> > +	int			size;
> > +
> > +	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
> > +	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> > +	sfp = (char *)ifp->if_u1.if_data;
> > +	size = ifp->if_bytes;
> > +	endp = sfp + size;
> > +
> > +	/* No negative sizes or overly long symlink targets. */
> > +	if (size < 0 || size > XFS_SYMLINK_MAXLEN)
> > +		return -EFSCORRUPTED;
> > +
> > +	/* No NULLs in the target either. */
> > +	for (; sfp < endp; sfp++) {
> > +		if (*sfp == 0)
> > +			return -EFSCORRUPTED;
> > +	}
> 
> 	memchr() ?

<nod> Will do.  I should also check the padding fields too...

--D

> Brian
> 
> > +
> > +	return 0;
> > +}
> > diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> > index 0a9e698..fd1d430 100644
> > --- a/fs/xfs/xfs_icache.c
> > +++ b/fs/xfs/xfs_icache.c
> > @@ -34,6 +34,11 @@
> >  #include "xfs_dquot_item.h"
> >  #include "xfs_dquot.h"
> >  #include "xfs_reflink.h"
> > +#include "xfs_da_format.h"
> > +#include "xfs_da_btree.h"
> > +#include "xfs_dir2_priv.h"
> > +#include "xfs_attr_leaf.h"
> > +#include "xfs_shared.h"
> >  
> >  #include <linux/kthread.h>
> >  #include <linux/freezer.h>
> > @@ -449,6 +454,43 @@ xfs_iget_cache_hit(
> >  	return error;
> >  }
> >  
> > +/* Check inline fork contents. */
> > +int
> > +xfs_iget_verify_forks(
> > +	struct xfs_inode		*ip)
> > +{
> > +	int				error;
> > +
> > +	/* Check the attribute fork if there is one. */
> > +	if (XFS_IFORK_PTR(ip, XFS_ATTR_FORK) &&
> > +	    ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
> > +		error = xfs_attr_shortform_verify(ip);
> > +		if (error) {
> > +			xfs_alert(ip->i_mount,
> > +					"%s: bad inode %Lu inline attr fork",
> > +					__func__, ip->i_ino);
> > +			return error;
> > +		}
> > +	}
> > +
> > +	/* Non-local data fork, jump out. */
> > +	if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
> > +		return 0;
> > +
> > +	/* Check the inline data fork if there is one. */
> > +	if (S_ISDIR(VFS_I(ip)->i_mode))
> > +		error = xfs_dir2_sf_verify(ip);
> > +	else if (S_ISLNK(VFS_I(ip)->i_mode))
> > +		error = xfs_symlink_sf_verify(ip);
> > +	else
> > +		error = -EFSCORRUPTED;
> > +
> > +	if (error)
> > +		xfs_alert(ip->i_mount, "%s: bad inode %Lu inline data fork",
> > +				__func__, ip->i_ino);
> > +	return error;
> > +}
> > +
> >  
> >  static int
> >  xfs_iget_cache_miss(
> > @@ -473,6 +515,10 @@ xfs_iget_cache_miss(
> >  	if (error)
> >  		goto out_destroy;
> >  
> > +	error = xfs_iget_verify_forks(ip);
> > +	if (error)
> > +		goto out_destroy;
> > +
> >  	trace_xfs_iget_miss(ip);
> >  
> >  	if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
> > diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
> > index bff4d85..abf1cff 100644
> > --- a/fs/xfs/xfs_icache.h
> > +++ b/fs/xfs/xfs_icache.h
> > @@ -129,5 +129,6 @@ xfs_fs_eofblocks_from_user(
> >  
> >  int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
> >  				  xfs_ino_t ino, bool *inuse);
> > +int xfs_iget_verify_forks(struct xfs_inode *ip);
> >  
> >  #endif
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index ceef77c..7ca8336 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -3547,10 +3547,8 @@ xfs_iflush_int(
> >  	if (ip->i_d.di_version < 3)
> >  		ip->i_d.di_flushiter++;
> >  
> > -	/* Check the inline directory data. */
> > -	if (S_ISDIR(VFS_I(ip)->i_mode) &&
> > -	    ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
> > -	    xfs_dir2_sf_verify(ip))
> > +	/* Check the inline fork data. */
> > +	if (xfs_iget_verify_forks(ip))
> >  		goto corrupt_out;
> >  
> >  	/*
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner July 20, 2017, 10:49 p.m. UTC | #5
On Wed, Jul 19, 2017 at 10:50:57PM -0700, Darrick J. Wong wrote:
> On Thu, Jul 20, 2017 at 12:26:02PM +1000, Dave Chinner wrote:
> > On Wed, Jul 19, 2017 at 09:03:18AM -0700, Darrick J. Wong wrote:
> > > +
> > > +	/* Check the attribute fork if there is one. */
> > > +	if (XFS_IFORK_PTR(ip, XFS_ATTR_FORK) &&
> > > +	    ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
> > 
> > If there is no attr fork, then ip->i_d.di_aformat should be set to
> > XFS_DINODE_FMT_EXTENTS. Hence we can just do the same check as
> > for the data fork....
> > 
> > OTOH, having a value of XFS_DINODE_FMT_LOCAL in di_aformat without
> > an attr fork indicates corruption, so perhaps we need to catch that
> 
> Hm, the documentation ought to reflect that. :)
> 
> > here as it's not checked in xfs_ifork_format() or xfs_iflush_int().
> > Indeed, there are partial attr/data fork format/size checks in
> > xfs_ifork_format() and xfs_iflush_int(), but we don't do
> > comprehensive checks in either place. Maybe they should all be moved
> > inside this function and expanded to check that all the fork format
> > information is valid?
> 
> Yes, this could get cleaned up this way...  What if we make
> _iformat_fork check that the sizes requested aren't insane, allocate
> memory, and load contents from the dinode; then we later use
> _verify_ifork_content to pick all the bugs out and destroy the inode if
> we finds any.
> 
> Hm.  The bmbt root actually needs numrecs read out of the header.  The
> sf attr header has the total size, though we're never going to need more
> than (inodesize - forkoff) bytes for it.  The bmdr thing could
> complicate this.

Right, but that's only when the fork format is in
XFS_DINODE_FMT_BTREE, so it's not an issue for local format
validation.

> (Maybe I'll sleep on this...)
> 
> Also, I thought di_forkoff was multiplied by 8 to calculate the distance
> of the attr fork from the data fork?  If that's the case, then isn't
> this verifier wrong?
> 
>     if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {

Yeah, but rather than "wrong" it's better to think of it as "not
exact". It'll catch most errors, but not really small ones. ISTR
that quite a few of the initial verifiers I wrote did things like
this because it wasn't possible or not clear how to do exact range
checks on some values.

In this case, if we want exact checks then we have to consider that
the di_forkoff has both a minimum and a maximum valid value - the
minimum valid value is dependent on data fork contents, the maximum
is inodesize - inode core size....

> <rambling off topic now>
> 
> While we're on the subject of verifiers, Eric Sandeen has been wishing
> that we could make it easier to figure out which buffer verifier test
> failed, and it would seem that the XFS_CORRUPTION_ERROR macro is used to
> highlight bad inode fork contents.  Perhaps we should create a similar
> macro that we could use to log exactly which buffer verifier test
> failed?

I don't want to put some shouty macro on every second line of a
verifier. Think differently - we currently return a true/false
from the internal verifier functions to trigger a call to
xfs_verifier_error(). How about they return __line__
on error and 0 on success and then pass that returned value into
xfs_verifier_error() and add that to the error output?

That tells us which check failed without adding more code to every
single verifier check - use the compiler to give us what we need
without any additional code, maintenance or runtime overhead.  All
we need to know is the kernel version so we can translate the line
number to a failed check...

Cheers,

Dave.
Brian Foster July 21, 2017, 12:58 p.m. UTC | #6
On Thu, Jul 20, 2017 at 01:20:37PM -0700, Darrick J. Wong wrote:
> On Thu, Jul 20, 2017 at 07:51:46AM -0400, Brian Foster wrote:
> > On Wed, Jul 19, 2017 at 09:03:18AM -0700, Darrick J. Wong wrote:
> > > Create a centralized function to verify local format inode forks, then
> > > migrate the existing short format directory verifier to use this
> > > framework and add verifiers for the other two things we support (attrs
> > > and symlinks).
> > > 
> > > Obviously this will get broken up into smaller pieces (one patch to
> > > refactor/move the existing verifier calls, another one to add the two
> > > new verifiers), but what do people think?
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > 
> > I haven't combed through the details of the verifiers, but just a few
> > very quick thoughts..
> > 
> > >  fs/xfs/libxfs/xfs_attr_leaf.c      |   70 ++++++++++++++++++++++++++++++++++++
> > >  fs/xfs/libxfs/xfs_attr_leaf.h      |    1 +
> > >  fs/xfs/libxfs/xfs_inode_fork.c     |   10 -----
> > >  fs/xfs/libxfs/xfs_shared.h         |    1 +
> > >  fs/xfs/libxfs/xfs_symlink_remote.c |   29 +++++++++++++++
> > >  fs/xfs/xfs_icache.c                |   46 ++++++++++++++++++++++++
> > >  fs/xfs/xfs_icache.h                |    1 +
> > >  fs/xfs/xfs_inode.c                 |    6 +--
> > >  8 files changed, 150 insertions(+), 14 deletions(-)
> > > 
> > ...
> > > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> > > index 0e80f34..5484211 100644
> > > --- a/fs/xfs/libxfs/xfs_inode_fork.c
> > > +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> > > @@ -183,16 +183,6 @@ xfs_iformat_fork(
> > >  	if (error)
> > >  		return error;
> > >  
> > > -	/* Check inline dir contents. */
> > > -	if (S_ISDIR(VFS_I(ip)->i_mode) &&
> > > -	    dip->di_format == XFS_DINODE_FMT_LOCAL) {
> > > -		error = xfs_dir2_sf_verify(ip);
> > > -		if (error) {
> > > -			xfs_idestroy_fork(ip, XFS_DATA_FORK);
> > > -			return error;
> > > -		}
> > > -	}
> > > -
> > 
> > What's the purpose of moving this? Just to accommodate userspace?
> 
> Yes, it's part of letting repair supply its own ifork verifiers so that
> phase 6 can libxfs_iget to inspect and correct shortform directories.
> 

Ok. FWIW, that seems like the wrong reason to shuffle the location of
the verifier around to me. IMO, we should invoke the verifier in a
common location with respect to where the target object is
read/initialized and then implement it in such a way that allows
conditional verification if that is a requirement of other, higher level
(outside of libxfs) contexts.

I suppose this is a bit more tricky because we are verifying the in-core
fork rather than the on-disk format as other verifiers do at the
read/write boundaries, but otherwise the flag approach (or something
like it) still makes more sense to me. Just my .02.

BTW, did we ever consider attempting to verify local format forks at
read/write time rather than ifork init/flush time? I don't recall
whether there was a problem/drawback there or we just didn't consider
it.

> > As it is, it looks like this leaves a coverage gap in a log recovery
> > case where xfs_iget() is not used (xfs_recover_inode_owner_change()).
> 
> Ugh, I didn't even notice that.
> 
> As it stands today, xfs_recover_inode_owner_change also won't notice
> corrupt attr forks or inline symlinks.  Seeing as we're (theoretically)
> only changing the owner fields in v5 bmbt tree blocks this might not
> have any practical consequence.  At that point we've already replayed
> everything else in that inode that was dirty and are about to write
> things out to disk, so everything has to be correct.
> 

It's not the particular case I was worried about (it looked like
recovery of an operation associated with extent swap during log
recovery), just that it's easier to miss the requirement that the inline
fork format has to be verified explicitly (if so desired) anywhere it is
initialized.

> > Also, I think this is best split up into two or three smaller patches.
> > E.g., one to move/rename the existing mechanism (with justification as
> > to why), one or more to add the additional verifiers for the other
> > formats.
> 
> (I mentioned that I'd do that before sending a real series...)
> 

Oops, missed that. Thanks.

Brian

> > >  	if (xfs_is_reflink_inode(ip)) {
> > >  		ASSERT(ip->i_cowfp == NULL);
> > >  		xfs_ifork_init_cow(ip);
> > ...
> > > diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
> > > index c484877..96e2957 100644
> > > --- a/fs/xfs/libxfs/xfs_symlink_remote.c
> > > +++ b/fs/xfs/libxfs/xfs_symlink_remote.c
> > > @@ -207,3 +207,32 @@ xfs_symlink_local_to_remote(
> > >  	xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
> > >  					ifp->if_bytes - 1);
> > >  }
> > > +
> > > +/* Verify the consistency of an inline symlink. */
> > > +int
> > > +xfs_symlink_sf_verify(
> > > +	struct xfs_inode	*ip)
> > > +{
> > > +	char			*sfp;
> > > +	char			*endp;
> > > +	struct xfs_ifork	*ifp;
> > > +	int			size;
> > > +
> > > +	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
> > > +	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> > > +	sfp = (char *)ifp->if_u1.if_data;
> > > +	size = ifp->if_bytes;
> > > +	endp = sfp + size;
> > > +
> > > +	/* No negative sizes or overly long symlink targets. */
> > > +	if (size < 0 || size > XFS_SYMLINK_MAXLEN)
> > > +		return -EFSCORRUPTED;
> > > +
> > > +	/* No NULLs in the target either. */
> > > +	for (; sfp < endp; sfp++) {
> > > +		if (*sfp == 0)
> > > +			return -EFSCORRUPTED;
> > > +	}
> > 
> > 	memchr() ?
> 
> <nod> Will do.  I should also check the padding fields too...
> 
> --D
> 
> > Brian
> > 
> > > +
> > > +	return 0;
> > > +}
> > > diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> > > index 0a9e698..fd1d430 100644
> > > --- a/fs/xfs/xfs_icache.c
> > > +++ b/fs/xfs/xfs_icache.c
> > > @@ -34,6 +34,11 @@
> > >  #include "xfs_dquot_item.h"
> > >  #include "xfs_dquot.h"
> > >  #include "xfs_reflink.h"
> > > +#include "xfs_da_format.h"
> > > +#include "xfs_da_btree.h"
> > > +#include "xfs_dir2_priv.h"
> > > +#include "xfs_attr_leaf.h"
> > > +#include "xfs_shared.h"
> > >  
> > >  #include <linux/kthread.h>
> > >  #include <linux/freezer.h>
> > > @@ -449,6 +454,43 @@ xfs_iget_cache_hit(
> > >  	return error;
> > >  }
> > >  
> > > +/* Check inline fork contents. */
> > > +int
> > > +xfs_iget_verify_forks(
> > > +	struct xfs_inode		*ip)
> > > +{
> > > +	int				error;
> > > +
> > > +	/* Check the attribute fork if there is one. */
> > > +	if (XFS_IFORK_PTR(ip, XFS_ATTR_FORK) &&
> > > +	    ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
> > > +		error = xfs_attr_shortform_verify(ip);
> > > +		if (error) {
> > > +			xfs_alert(ip->i_mount,
> > > +					"%s: bad inode %Lu inline attr fork",
> > > +					__func__, ip->i_ino);
> > > +			return error;
> > > +		}
> > > +	}
> > > +
> > > +	/* Non-local data fork, jump out. */
> > > +	if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
> > > +		return 0;
> > > +
> > > +	/* Check the inline data fork if there is one. */
> > > +	if (S_ISDIR(VFS_I(ip)->i_mode))
> > > +		error = xfs_dir2_sf_verify(ip);
> > > +	else if (S_ISLNK(VFS_I(ip)->i_mode))
> > > +		error = xfs_symlink_sf_verify(ip);
> > > +	else
> > > +		error = -EFSCORRUPTED;
> > > +
> > > +	if (error)
> > > +		xfs_alert(ip->i_mount, "%s: bad inode %Lu inline data fork",
> > > +				__func__, ip->i_ino);
> > > +	return error;
> > > +}
> > > +
> > >  
> > >  static int
> > >  xfs_iget_cache_miss(
> > > @@ -473,6 +515,10 @@ xfs_iget_cache_miss(
> > >  	if (error)
> > >  		goto out_destroy;
> > >  
> > > +	error = xfs_iget_verify_forks(ip);
> > > +	if (error)
> > > +		goto out_destroy;
> > > +
> > >  	trace_xfs_iget_miss(ip);
> > >  
> > >  	if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
> > > diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
> > > index bff4d85..abf1cff 100644
> > > --- a/fs/xfs/xfs_icache.h
> > > +++ b/fs/xfs/xfs_icache.h
> > > @@ -129,5 +129,6 @@ xfs_fs_eofblocks_from_user(
> > >  
> > >  int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
> > >  				  xfs_ino_t ino, bool *inuse);
> > > +int xfs_iget_verify_forks(struct xfs_inode *ip);
> > >  
> > >  #endif
> > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > > index ceef77c..7ca8336 100644
> > > --- a/fs/xfs/xfs_inode.c
> > > +++ b/fs/xfs/xfs_inode.c
> > > @@ -3547,10 +3547,8 @@ xfs_iflush_int(
> > >  	if (ip->i_d.di_version < 3)
> > >  		ip->i_d.di_flushiter++;
> > >  
> > > -	/* Check the inline directory data. */
> > > -	if (S_ISDIR(VFS_I(ip)->i_mode) &&
> > > -	    ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
> > > -	    xfs_dir2_sf_verify(ip))
> > > +	/* Check the inline fork data. */
> > > +	if (xfs_iget_verify_forks(ip))
> > >  		goto corrupt_out;
> > >  
> > >  	/*
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster July 21, 2017, 1:54 p.m. UTC | #7
On Fri, Jul 21, 2017 at 08:49:34AM +1000, Dave Chinner wrote:
> On Wed, Jul 19, 2017 at 10:50:57PM -0700, Darrick J. Wong wrote:
> > On Thu, Jul 20, 2017 at 12:26:02PM +1000, Dave Chinner wrote:
> > > On Wed, Jul 19, 2017 at 09:03:18AM -0700, Darrick J. Wong wrote:
...
> > <rambling off topic now>
> > 
> > While we're on the subject of verifiers, Eric Sandeen has been wishing
> > that we could make it easier to figure out which buffer verifier test
> > failed, and it would seem that the XFS_CORRUPTION_ERROR macro is used to
> > highlight bad inode fork contents.  Perhaps we should create a similar
> > macro that we could use to log exactly which buffer verifier test
> > failed?
> 
> I don't want to put some shouty macro on every second line of a
> verifier. Think differently - we currently return a true/false
> from the internal verifier functions to trigger a call to
> xfs_verifier_error(). How about they return __line__
> on error and 0 on success and then pass that returned value into
> xfs_verifier_error() and add that to the error output?
> 
> That tells us which check failed without adding more code to every
> single verifier check - use the compiler to give us what we need
> without any additional code, maintenance or runtime overhead.  All
> we need to know is the kernel version so we can translate the line
> number to a failed check...
> 

I think the ideal situation is the verifier error prints the check that
failed, similar to an assert failure. I'm not aware of any way to do
that without a macro, but I'm also not against crafting a new, verifier
specific one to accomplish that. Technically, it doesn't have to be
shouty :), but IMO, the diagnostic/usability benefit outweighs the
aesthetic cost.

Beyond that, I'm not against dumping a line number but it would seem
kind of unusual to dump a line number without at least a filename. FWIW,
the generic verifier error reporting function also dumps an instruction
address for where the report is generated:

 XFS (...): Metadata corruption detected at xfs_symlink_read_verify+0xcd/0x100 [xfs], xfs_symlink block 0x58

We obviously want to have information about which verifier failed, but
I'm not sure we need the actual address of the xfs_verifier_error()
caller. It would be nice if we could replace (the address, not
necessarily the function name) that with, or add to it, an address that
refers to the particular check that failed. Granted, that may require
some kind of noinline context setting helper function if
__return_address is the only option to get that information or if we
wanted to include multiple bits of data. Just a thought.

Brian

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner July 21, 2017, 11 p.m. UTC | #8
On Fri, Jul 21, 2017 at 09:54:05AM -0400, Brian Foster wrote:
> On Fri, Jul 21, 2017 at 08:49:34AM +1000, Dave Chinner wrote:
> > On Wed, Jul 19, 2017 at 10:50:57PM -0700, Darrick J. Wong wrote:
> > > On Thu, Jul 20, 2017 at 12:26:02PM +1000, Dave Chinner wrote:
> > > > On Wed, Jul 19, 2017 at 09:03:18AM -0700, Darrick J. Wong wrote:
> ...
> > > <rambling off topic now>
> > > 
> > > While we're on the subject of verifiers, Eric Sandeen has been wishing
> > > that we could make it easier to figure out which buffer verifier test
> > > failed, and it would seem that the XFS_CORRUPTION_ERROR macro is used to
> > > highlight bad inode fork contents.  Perhaps we should create a similar
> > > macro that we could use to log exactly which buffer verifier test
> > > failed?
> > 
> > I don't want to put some shouty macro on every second line of a
> > verifier. Think differently - we currently return a true/false
> > from the internal verifier functions to trigger a call to
> > xfs_verifier_error(). How about they return __line__
> > on error and 0 on success and then pass that returned value into
> > xfs_verifier_error() and add that to the error output?
> > 
> > That tells us which check failed without adding more code to every
> > single verifier check - use the compiler to give us what we need
> > without any additional code, maintenance or runtime overhead.  All
> > we need to know is the kernel version so we can translate the line
> > number to a failed check...
> > 
> 
> I think the ideal situation is the verifier error prints the check that
> failed, similar to an assert failure.

Well, that comes from a macro that feeds the assert failure message
__func__ and __line__.

> I'm not aware of any way to do
> that without a macro,

I just outlined how to do it above.

> but I'm also not against crafting a new, verifier
> specific one to accomplish that. Technically, it doesn't have to be
> shouty :), but IMO, the diagnostic/usability benefit outweighs the
> aesthetic cost.

I disagree - there are so many verifier checks (and we only grow
more as time goes on) so whatever we do needs them to be
easily maintainable and not compromise the readabilty of the verifer
code.

> Beyond that, I'm not against dumping a line number but it would seem
> kind of unusual to dump a line number without at least a filename. FWIW,

We don't really need the filename because we have the name of the
verifier that failed in the ops structure. Hence we can print a
{verfier name, line number} tuple which is effectively the same as
{filename, line number}.

> the generic verifier error reporting function also dumps an instruction
> address for where the report is generated:
> 
>  XFS (...): Metadata corruption detected at xfs_symlink_read_verify+0xcd/0x100 [xfs], xfs_symlink block 0x58
>
> We obviously want to have information about which verifier failed, but
> I'm not sure we need the actual address of the xfs_verifier_error()
> caller. It would be nice if we could replace (the address, not
> necessarily the function name) that with, or add to it, an address that
> refers to the particular check that failed.

Yes, that's exactly what I'm proposing we do.  What we really need to know is

	1. the block that was corrupted (from bp)
	2. the verifier that detected the corruption (from bp)
	3. the IO type (read/write from bp)
	4. the verifier check that failed (returned from verifier)

We already have 1-3, but we don't have 4. We need to replace the
replace __return_address used in the error message with __line__ or
__THIS_IP__ that is returned from the if() branch that failed and
from that we can then easily track the cause of the failure back to
the source.

Returning __line__ or __THIS_IP__ from the verifier doesn't require
new macros, or really any significant code change as most verifiers
arelady return a true/false. All we need to do is plumb it into
xfs_verifier_error().

With that extra info we can output a slightly different message,
say:

XFS (...): Metadata corruption detected during read of block 0xx58 by xfs_symlink verifier, line 132

Cheers,

Dave.
Brian Foster July 22, 2017, 12:29 p.m. UTC | #9
On Sat, Jul 22, 2017 at 09:00:58AM +1000, Dave Chinner wrote:
> On Fri, Jul 21, 2017 at 09:54:05AM -0400, Brian Foster wrote:
> > On Fri, Jul 21, 2017 at 08:49:34AM +1000, Dave Chinner wrote:
> > > On Wed, Jul 19, 2017 at 10:50:57PM -0700, Darrick J. Wong wrote:
> > > > On Thu, Jul 20, 2017 at 12:26:02PM +1000, Dave Chinner wrote:
> > > > > On Wed, Jul 19, 2017 at 09:03:18AM -0700, Darrick J. Wong wrote:
> > ...
> > > > <rambling off topic now>
> > > > 
> > > > While we're on the subject of verifiers, Eric Sandeen has been wishing
> > > > that we could make it easier to figure out which buffer verifier test
> > > > failed, and it would seem that the XFS_CORRUPTION_ERROR macro is used to
> > > > highlight bad inode fork contents.  Perhaps we should create a similar
> > > > macro that we could use to log exactly which buffer verifier test
> > > > failed?
> > > 
> > > I don't want to put some shouty macro on every second line of a
> > > verifier. Think differently - we currently return a true/false
> > > from the internal verifier functions to trigger a call to
> > > xfs_verifier_error(). How about they return __line__
> > > on error and 0 on success and then pass that returned value into
> > > xfs_verifier_error() and add that to the error output?
> > > 
> > > That tells us which check failed without adding more code to every
> > > single verifier check - use the compiler to give us what we need
> > > without any additional code, maintenance or runtime overhead.  All
> > > we need to know is the kernel version so we can translate the line
> > > number to a failed check...
> > > 
> > 
> > I think the ideal situation is the verifier error prints the check that
> > failed, similar to an assert failure.
> 
> Well, that comes from a macro that feeds the assert failure message
> __func__ and __line__.
> 
> > I'm not aware of any way to do
> > that without a macro,
> 
> I just outlined how to do it above.
> 

What I mean is that assert failures (warnings, etc.) print the actual
expression that caused the report along with the file/line information.
What you've described here doesn't accomplish that.

> > but I'm also not against crafting a new, verifier
> > specific one to accomplish that. Technically, it doesn't have to be
> > shouty :), but IMO, the diagnostic/usability benefit outweighs the
> > aesthetic cost.
> 
> I disagree - there are so many verifier checks (and we only grow
> more as time goes on) so whatever we do needs them to be
> easily maintainable and not compromise the readabilty of the verifer
> code.
> 

I agree. ;) But I disagree that using a macro somehow automatically
negatively affects the readability of verifers. Anybody who can make
sense of this:

        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return __line__;
        if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
                return __line__;
        if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
                return __line__;

... could just as easily make sense of something like this:

	xfs_verify_assert(xfs_sb_version_hascrc(&mp->m_sb));
	xfs_verify_assert(dsl->sl_magic == cpu_to_be32(XFS_SYMLINK_MAGIC));
	xfs_verify_assert(uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid));

... where xfs_verify_assert() is some macro that magically packages up
the error information for the report to userspace and affects code
execution appropriately (i.e., function exit).

The macros themselves may be ugly, but nobody concerned with the error
report needs to know anything about how the error reporting mechanism
works. Even then, there is no real complexity here IMO. This is a simple
abstraction to package up error state information. The verifiers
themselves remain just as straightforward (and may very well end up more
compact). Further, we already use these kinds of macros all over the
place today as they seem to be suited for this purpose. 

> > Beyond that, I'm not against dumping a line number but it would seem
> > kind of unusual to dump a line number without at least a filename. FWIW,
> 
> We don't really need the filename because we have the name of the
> verifier that failed in the ops structure. Hence we can print a
> {verfier name, line number} tuple which is effectively the same as
> {filename, line number}.
> 

Technically, better than nothing...

> > the generic verifier error reporting function also dumps an instruction
> > address for where the report is generated:
> > 
> >  XFS (...): Metadata corruption detected at xfs_symlink_read_verify+0xcd/0x100 [xfs], xfs_symlink block 0x58
> >
> > We obviously want to have information about which verifier failed, but
> > I'm not sure we need the actual address of the xfs_verifier_error()
> > caller. It would be nice if we could replace (the address, not
> > necessarily the function name) that with, or add to it, an address that
> > refers to the particular check that failed.
> 
> Yes, that's exactly what I'm proposing we do.  What we really need to know is
> 
> 	1. the block that was corrupted (from bp)
> 	2. the verifier that detected the corruption (from bp)
> 	3. the IO type (read/write from bp)
> 	4. the verifier check that failed (returned from verifier)
> 
> We already have 1-3, but we don't have 4. We need to replace the
> replace __return_address used in the error message with __line__ or
> __THIS_IP__ that is returned from the if() branch that failed and
> from that we can then easily track the cause of the failure back to
> the source.
> 
> Returning __line__ or __THIS_IP__ from the verifier doesn't require
> new macros, or really any significant code change as most verifiers
> arelady return a true/false. All we need to do is plumb it into
> xfs_verifier_error().
> 
> With that extra info we can output a slightly different message,
> say:
> 
> XFS (...): Metadata corruption detected during read of block 0xx58 by xfs_symlink verifier, line 132
> 

It sounds to me you're more preoccupied with making a particular code
change here than focusing on what the ideal error report should look
like, and then figuring out the best way to implement it. The above is
incrementally more useful than the original, but still not sufficiently
usable IMO.

Consider the case where a bug report includes a logfile with tens of
(unique) such corruption errors. Nobody wants to go fishing through
verifier callchains to validate that the file of the specific failure
matches the top-level verifier function and then subsequently that the
reported line is sane with respect to the failure ("Oh, did that
verifier call any other functions in separate files with line numbers
that I have to rule out? Let me double check.." "Oh you're on for-next?
I hope my for-next line numbers match up.." "If they don't, hopefully
it's a severe enough mismatch not to point to another test in the same
function." :/). Compound that further with cases where reports may
involve sosreports across 5 or 6 different physical nodes, all with
similar corruption reports (and maybe even running slightly different
kernels, perhaps some with test patches that render all previous line
numbers invalid, etc.) and this all becomes rather painful.

There is a simple solution to this problem: just include all of the
readily accessible state information in the error report. If the
messages above looked something like the following:

  XFS (...): Metadata corruption detected at xfs_symlink_read_verify [xfs], xfs_symlink block 0x58
  XFS (...): Read verifier failed: xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)), xfs_symlink_remote.c:114

... we wouldn't have to decode what the error means for every possible
kernel. The error report is self contained like most of the other
errors/assert reports in XFS and throughout the kernel.

Brian

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner July 24, 2017, 5:26 a.m. UTC | #10
On Sat, Jul 22, 2017 at 08:29:44AM -0400, Brian Foster wrote:
> On Sat, Jul 22, 2017 at 09:00:58AM +1000, Dave Chinner wrote:
> > On Fri, Jul 21, 2017 at 09:54:05AM -0400, Brian Foster wrote:
> > > On Fri, Jul 21, 2017 at 08:49:34AM +1000, Dave Chinner wrote:
> > > but I'm also not against crafting a new, verifier
> > > specific one to accomplish that. Technically, it doesn't have to be
> > > shouty :), but IMO, the diagnostic/usability benefit outweighs the
> > > aesthetic cost.
> > 
> > I disagree - there are so many verifier checks (and we only grow
> > more as time goes on) so whatever we do needs them to be
> > easily maintainable and not compromise the readabilty of the verifer
> > code.
> > 
> 
> I agree. ;) But I disagree that using a macro somehow automatically
> negatively affects the readability of verifers. Anybody who can make
> sense of this:
> 
>         if (!xfs_sb_version_hascrc(&mp->m_sb))
>                 return __line__;
>         if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
>                 return __line__;
>         if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
>                 return __line__;
> 
> ... could just as easily make sense of something like this:
> 
> 	xfs_verify_assert(xfs_sb_version_hascrc(&mp->m_sb));
> 	xfs_verify_assert(dsl->sl_magic == cpu_to_be32(XFS_SYMLINK_MAGIC));
> 	xfs_verify_assert(uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid));

Still needs branches to return on error so the verifier code
functions correctly. So it simply isn't as clean as your example
suggests. And, no hiding goto/return/errno variables inside a macro
is not an option - that makes for horrible code and we should not
repeat those past mistakes we inherited from the old Irix
codebase.

And we need to be really careful here - making the code more complex
for pretty errors blows out the size of the code. That means it runs
slower, even though we never execute most of the pretty error print
stuff.  This is an important consideration as verifiers are a
performance critical fast path, both in the kernel and in userspace.

> There is a simple solution to this problem: just include all of the
> readily accessible state information in the error report. If the
> messages above looked something like the following:
> 
>   XFS (...): Metadata corruption detected at xfs_symlink_read_verify [xfs], xfs_symlink block 0x58
>   XFS (...): Read verifier failed: xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)), xfs_symlink_remote.c:114
>
> ... we wouldn't have to decode what the error means for every possible
> kernel. The error report is self contained like most of the other
> errors/assert reports in XFS and throughout the kernel.

Just like an assert/warning, we've still got to go match it to the
exact kernel source tree to determine the context in which the
message was emitted and what it actually means.  Making it pretty
doesn't change the amount of work a developer needs to do to
classify or analyse errors in log files, all it does is change the
amount of overhead needed to generate the error message,

Really, verifiers are performance critical fast paths, so the code
that runs inside them needs to be implemented with this in mind. I'm
not against making messages pretty, but let's not do it based on the
on the premise that pretty error messages are more important than
runtime performance when there are no errors...

Cheers,

Dave.
Brian Foster July 24, 2017, 1:07 p.m. UTC | #11
On Mon, Jul 24, 2017 at 03:26:38PM +1000, Dave Chinner wrote:
> On Sat, Jul 22, 2017 at 08:29:44AM -0400, Brian Foster wrote:
> > On Sat, Jul 22, 2017 at 09:00:58AM +1000, Dave Chinner wrote:
> > > On Fri, Jul 21, 2017 at 09:54:05AM -0400, Brian Foster wrote:
> > > > On Fri, Jul 21, 2017 at 08:49:34AM +1000, Dave Chinner wrote:
> > > > but I'm also not against crafting a new, verifier
> > > > specific one to accomplish that. Technically, it doesn't have to be
> > > > shouty :), but IMO, the diagnostic/usability benefit outweighs the
> > > > aesthetic cost.
> > > 
> > > I disagree - there are so many verifier checks (and we only grow
> > > more as time goes on) so whatever we do needs them to be
> > > easily maintainable and not compromise the readabilty of the verifer
> > > code.
> > > 
> > 
> > I agree. ;) But I disagree that using a macro somehow automatically
> > negatively affects the readability of verifers. Anybody who can make
> > sense of this:
> > 
> >         if (!xfs_sb_version_hascrc(&mp->m_sb))
> >                 return __line__;
> >         if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
> >                 return __line__;
> >         if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
> >                 return __line__;
> > 
> > ... could just as easily make sense of something like this:
> > 
> > 	xfs_verify_assert(xfs_sb_version_hascrc(&mp->m_sb));
> > 	xfs_verify_assert(dsl->sl_magic == cpu_to_be32(XFS_SYMLINK_MAGIC));
> > 	xfs_verify_assert(uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid));
> 
> Still needs branches to return on error so the verifier code
> functions correctly. So it simply isn't as clean as your example
> suggests. And, no hiding goto/return/errno variables inside a macro
> is not an option - that makes for horrible code and we should not
> repeat those past mistakes we inherited from the old Irix
> codebase.
> 

Clearly the example implies (and the supporting text explains) that the
macro would lead to function exit. I don't really see the problem with
doing that. Again, we already do that all over the place. If we don't
want to alter function execution via the macro, then have it return an
expression that resolves to false (or an error code or some such) and
leave the function execution logic in function context. It doesn't
really matter to me either way.

> And we need to be really careful here - making the code more complex
> for pretty errors blows out the size of the code. That means it runs
> slower, even though we never execute most of the pretty error print
> stuff.  This is an important consideration as verifiers are a
> performance critical fast path, both in the kernel and in userspace.
> 

I guess we'd increase the size of the code for strings that represent
the associated checks and whatever extra the macro code may end up
duplicating. Once an error is detected, I think much of the latter could
be packaged into a noinline helper. It's not immediately clear to me
what we're looking at for a size increase, so I don't think "blows out"
is a fair characterization without some actual data [1]. I do think this
is a fair/valid concern however, we'd just need to try and experiment
and see how much it costs to convert over a verifier or two, for
example.

And I'm not sure how you make the leap from there to an automatic
performance degradation. I also agree that the verifiers are performance
critical and wouldn't suggest taking an approach if I thought we'd
measurably affect performance. As it is, I'd expect that additional
overhead of storing error context (hell, we could always just dump it
from where the problem was originally detected rather than pass it to
the generic verifier report function) would essentially be nil up until
a verification check fails.

Would you consider such an approach if we could demonstrate a reasonable
code size tradeoff and that runtime performance is not affected? If you
simply hate macros then I guess there isn't much we can do but agree to
disagree (and I'm not against a non-macro approach if one exists, I just
don't know what it would be). But otherwise I wouldn't mind running some
experiments (unless Darrick wanted to..? he brought this up after all..
:P) if the results will be fairly considered.

> > There is a simple solution to this problem: just include all of the
> > readily accessible state information in the error report. If the
> > messages above looked something like the following:
> > 
> >   XFS (...): Metadata corruption detected at xfs_symlink_read_verify [xfs], xfs_symlink block 0x58
> >   XFS (...): Read verifier failed: xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)), xfs_symlink_remote.c:114
> >
> > ... we wouldn't have to decode what the error means for every possible
> > kernel. The error report is self contained like most of the other
> > errors/assert reports in XFS and throughout the kernel.
> 
> Just like an assert/warning, we've still got to go match it to the
> exact kernel source tree to determine the context in which the
> message was emitted and what it actually means.  Making it pretty
> doesn't change the amount of work a developer needs to do to
> classify or analyse errors in log files, all it does is change the
> amount of overhead needed to generate the error message,
> 

Yes, you're still going to want to ultimately dig down into the code and
do complex root cause analysis. Unfortunately, we can't fix that with
error reporting. :) I've laid out several situations above as to why I
think the "pretty" format makes it significantly easier to make sense of
corruption reports that may be more involved than a single instance or a
single physical node. A broad assessment can be made from the actual
corruption report without the need to decode tens or more cryptic
messages. It potentially saves significant time spent doing unnecessary
busy work. It also helps reduce unnecessary confusion with general
reports that might arise from users on distro kernels, etc., where the
line numbers might not be sufficient or easily misaligned.

Of course, once we get into debugging an actual problem, we've kind of
moved outside the scope of this discussion...

> Really, verifiers are performance critical fast paths, so the code
> that runs inside them needs to be implemented with this in mind. I'm
> not against making messages pretty, but let's not do it based on the
> on the premise that pretty error messages are more important than
> runtime performance when there are no errors...
> 

I agree. As noted above, I would not expect any more overhead than the
original suggestion to return a line number in the normal runtime case.
The difference is primarily what we do in the code between when a
verifier check fails and we report the failure.

FWIW, an approach that might be a reasonable compromise is to use the
original suggestion to pass along the line number, but also wrap each
verifier check in a thinner macro that simply asserts[2] on the verifier
expression. The new macro could be ifdef'd out completely in production
builds to eliminate code size concerns. Then if we have reports with
significant enough corruption issues to warrant it, we collect a
metadump and use any local debug kernel to access the fs and decode the
errors for us (or ask the user to run a debug kernel). We can expand the
verifier macro in the future with production build support if warranted.
Thoughts?

Brian

[1] A simple test that we can do at the moment is to compare a
production xfs.ko with one with asserts enabled. My local for-next
branch builds a module of 40104000 bytes without XFS_WARN and 40540208
bytes with XFS_WARN enabled. That makes for a difference of ~425k and
roughly a 1% code size increase (accounting for ~1750 asserts and
supporting code).

[2] We may not want to use ASSERT() here, but perhaps define a new
variant to omit the stack trace and otherwise customize to a verifier
report.

> Cheers,
> 
> Dave.
> 
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong July 24, 2017, 5:15 p.m. UTC | #12
On Fri, Jul 21, 2017 at 08:49:34AM +1000, Dave Chinner wrote:
> On Wed, Jul 19, 2017 at 10:50:57PM -0700, Darrick J. Wong wrote:
> > On Thu, Jul 20, 2017 at 12:26:02PM +1000, Dave Chinner wrote:
> > > On Wed, Jul 19, 2017 at 09:03:18AM -0700, Darrick J. Wong wrote:
> > > > +
> > > > +	/* Check the attribute fork if there is one. */
> > > > +	if (XFS_IFORK_PTR(ip, XFS_ATTR_FORK) &&
> > > > +	    ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
> > > 
> > > If there is no attr fork, then ip->i_d.di_aformat should be set to
> > > XFS_DINODE_FMT_EXTENTS. Hence we can just do the same check as
> > > for the data fork....
> > > 
> > > OTOH, having a value of XFS_DINODE_FMT_LOCAL in di_aformat without
> > > an attr fork indicates corruption, so perhaps we need to catch that
> > 
> > Hm, the documentation ought to reflect that. :)
> > 
> > > here as it's not checked in xfs_ifork_format() or xfs_iflush_int().
> > > Indeed, there are partial attr/data fork format/size checks in
> > > xfs_ifork_format() and xfs_iflush_int(), but we don't do
> > > comprehensive checks in either place. Maybe they should all be moved
> > > inside this function and expanded to check that all the fork format
> > > information is valid?
> > 
> > Yes, this could get cleaned up this way...  What if we make
> > _iformat_fork check that the sizes requested aren't insane, allocate
> > memory, and load contents from the dinode; then we later use
> > _verify_ifork_content to pick all the bugs out and destroy the inode if
> > we finds any.
> > 
> > Hm.  The bmbt root actually needs numrecs read out of the header.  The
> > sf attr header has the total size, though we're never going to need more
> > than (inodesize - forkoff) bytes for it.  The bmdr thing could
> > complicate this.
> 
> Right, but that's only when the fork format is in
> XFS_DINODE_FMT_BTREE, so it's not an issue for local format
> validation.
> 
> > (Maybe I'll sleep on this...)

Several sleeps later, it occurs to me that both of these header types
put the size fields within the first eight bytes of the header struct.
So long as di_forkoff isn't zero or the maximum, it should be fine to
read that length out of the header, so we actually can do all the size
sanity checks in _iformat_fork.

> > Also, I thought di_forkoff was multiplied by 8 to calculate the distance
> > of the attr fork from the data fork?  If that's the case, then isn't
> > this verifier wrong?
> > 
> >     if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
> 
> Yeah, but rather than "wrong" it's better to think of it as "not
> exact". It'll catch most errors, but not really small ones. ISTR
> that quite a few of the initial verifiers I wrote did things like
> this because it wasn't possible or not clear how to do exact range
> checks on some values.
> 
> In this case, if we want exact checks then we have to consider that
> the di_forkoff has both a minimum and a maximum valid value - the
> minimum valid value is dependent on data fork contents, the maximum
> is inodesize - inode core size....

<nod> Maybe we push that off to scrub/inode.c then?

> > <rambling off topic now>
> > 
> > While we're on the subject of verifiers, Eric Sandeen has been wishing
> > that we could make it easier to figure out which buffer verifier test
> > failed, and it would seem that the XFS_CORRUPTION_ERROR macro is used to
> > highlight bad inode fork contents.  Perhaps we should create a similar
> > macro that we could use to log exactly which buffer verifier test
> > failed?
> 
> I don't want to put some shouty macro on every second line of a
> verifier. Think differently - we currently return a true/false
> from the internal verifier functions to trigger a call to
> xfs_verifier_error(). How about they return __line__
> on error and 0 on success and then pass that returned value into
> xfs_verifier_error() and add that to the error output?
> 
> That tells us which check failed without adding more code to every
> single verifier check - use the compiler to give us what we need
> without any additional code, maintenance or runtime overhead.  All
> we need to know is the kernel version so we can translate the line
> number to a failed check...

We could do that, though if verifier functionality spreads across more
than one file it'll become difficult to disambiguate, which is already
difficult since we have to match the dmesg with the kernel source to
figure out which check it was that failed.  OTOH I do see that you're
aiming for minimal verifier overhead, since success happens most often.

Heh, what if we just spit back _THIS_IP_ instead?  It has no more
overhead than passing a pointer around, and we can use it to identify 
exactly which function/line/instruction failed.

(Ok, continuing down the thread...)

--D

> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster July 24, 2017, 6:36 p.m. UTC | #13
On Mon, Jul 24, 2017 at 09:07:52AM -0400, Brian Foster wrote:
> On Mon, Jul 24, 2017 at 03:26:38PM +1000, Dave Chinner wrote:
> > On Sat, Jul 22, 2017 at 08:29:44AM -0400, Brian Foster wrote:
> > > On Sat, Jul 22, 2017 at 09:00:58AM +1000, Dave Chinner wrote:
> > > > On Fri, Jul 21, 2017 at 09:54:05AM -0400, Brian Foster wrote:
> > > > > On Fri, Jul 21, 2017 at 08:49:34AM +1000, Dave Chinner wrote:
...
> 
> [1] A simple test that we can do at the moment is to compare a
> production xfs.ko with one with asserts enabled. My local for-next
> branch builds a module of 40104000 bytes without XFS_WARN and 40540208
> bytes with XFS_WARN enabled. That makes for a difference of ~425k and
> roughly a 1% code size increase (accounting for ~1750 asserts and
> supporting code).
> 

FWIW, the huge size here didn't register to me until Darrick mentioned
it on irc. I'm guessing I have a bunch of debug enabled in my kernel
config (that I don't really want to disable right now). If I strip the
resulting module binaries, I end up with 1076736 bytes vs. 1210624 bytes
(for a 130kb delta and ~12% increase), which I'm guessing is a more
accurate assessment.

Brian

> [2] We may not want to use ASSERT() here, but perhaps define a new
> variant to omit the stack trace and otherwise customize to a verifier
> report.
> 
> > Cheers,
> > 
> > Dave.
> > 
> > -- 
> > Dave Chinner
> > david@fromorbit.com
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong July 24, 2017, 6:44 p.m. UTC | #14
On Mon, Jul 24, 2017 at 09:07:52AM -0400, Brian Foster wrote:
> On Mon, Jul 24, 2017 at 03:26:38PM +1000, Dave Chinner wrote:
> > On Sat, Jul 22, 2017 at 08:29:44AM -0400, Brian Foster wrote:
> > > On Sat, Jul 22, 2017 at 09:00:58AM +1000, Dave Chinner wrote:
> > > > On Fri, Jul 21, 2017 at 09:54:05AM -0400, Brian Foster wrote:
> > > > > On Fri, Jul 21, 2017 at 08:49:34AM +1000, Dave Chinner wrote:
> > > > > but I'm also not against crafting a new, verifier
> > > > > specific one to accomplish that. Technically, it doesn't have to be
> > > > > shouty :), but IMO, the diagnostic/usability benefit outweighs the
> > > > > aesthetic cost.
> > > > 
> > > > I disagree - there are so many verifier checks (and we only grow
> > > > more as time goes on) so whatever we do needs them to be
> > > > easily maintainable and not compromise the readabilty of the verifer
> > > > code.
> > > > 
> > > 
> > > I agree. ;) But I disagree that using a macro somehow automatically
> > > negatively affects the readability of verifers. Anybody who can make
> > > sense of this:
> > > 
> > >         if (!xfs_sb_version_hascrc(&mp->m_sb))
> > >                 return __line__;
> > >         if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
> > >                 return __line__;
> > >         if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
> > >                 return __line__;
> > > 
> > > ... could just as easily make sense of something like this:
> > > 
> > > 	xfs_verify_assert(xfs_sb_version_hascrc(&mp->m_sb));
> > > 	xfs_verify_assert(dsl->sl_magic == cpu_to_be32(XFS_SYMLINK_MAGIC));
> > > 	xfs_verify_assert(uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid));
> > 
> > Still needs branches to return on error so the verifier code
> > functions correctly. So it simply isn't as clean as your example
> > suggests. And, no hiding goto/return/errno variables inside a macro
> > is not an option - that makes for horrible code and we should not
> > repeat those past mistakes we inherited from the old Irix
> > codebase.
> > 
> 
> Clearly the example implies (and the supporting text explains) that the
> macro would lead to function exit.

It does?  Every time I see an *assert() I assume that execution either
continues past it or stops dead in its tracks.  The _GOTO macros make
that clear by pairing the word goto with a label, though I concede that
it feels odd to pass a label as an argument.

> I don't really see the problem with doing that. Again, we already do
> that all over the place. If we don't want to alter function execution
> via the macro, then have it return an expression that resolves to
> false (or an error code or some such) and leave the function execution
> logic in function context. It doesn't really matter to me either way.

I get that hiding branches inside macros makes it too easy for later
readers to be mislead, but if there's one aspect of them that I /do/
like, its that (provided the naming makes it clear that we're branching
somewhere) we /can/ use them to ensure that the return values are
consistent and avoid screwing up repetitive if tests.

Anyway, I was picturing something like this:

#define XFS_VERIFY_FAIL_UNLESS(expr) do { if (expr) return _THIS_IP_; } while(0)

static unsigned long
xfs_blah_verify(...)
{
	XFS_VERIFY_FAIL_UNLESS(blah1 == GOOD1);
	XFS_VERIFY_FAIL_UNLESS(blah2 == GOOD2);
	XFS_VERIFY_FAIL_UNLESS(blah3 == GOOD3);
	...
	return 0;
}

Granted, with code review to spot inconsistencies maybe the macro really
is pointless, and we can just open-code everything.  I dunno.  To be
honest I actually do prefer the macro in this particular case.

static unsigned long
xfs_blah_verify(...)
{
	if (!(blah1 == GOOD1))
		return _THIS_IP_;
	if (!(blah2 == GOOD2))
		return _THIS_IP_;
	if (!(blah3 == GOOD3))
		return _THIS_IP_;
	...
	return 0;
}

static void
xfs_blah_read_verify(bp)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;
	unsigned long		x;

	if (xfs_sb_version_hascrc(&mp->m_sb) &&
	    !xfs_buf_verify_cksum(bp, XFS_BLAH_CRC_OFF))
		xfs_buf_ioerror(bp, -EFSBADCRC);
	x = xfs_blah_verify(mp, bp);
	if (x) {
		xfs_err(mp, "Read verifier failed on daddr %llu at %pF within %s",
				bp->b_bn, x, __func__);
		xfs_buf_ioerror(bp, -EFSCORRUPTED);
	}

	if (bp->b_error)
		xfs_verifier_error(bp);

}

> > And we need to be really careful here - making the code more complex
> > for pretty errors blows out the size of the code. That means it runs
> > slower, even though we never execute most of the pretty error print
> > stuff.  This is an important consideration as verifiers are a
> > performance critical fast path, both in the kernel and in userspace.

Fair enough.  Adding more stuff to facilitate prettyprinting could
indeed bloat the size of the verifiers, but let's convert one of the
verifiers and compare before and after numbers.

> I guess we'd increase the size of the code for strings that represent
> the associated checks and whatever extra the macro code may end up
> duplicating. Once an error is detected, I think much of the latter could
> be packaged into a noinline helper. It's not immediately clear to me
> what we're looking at for a size increase, so I don't think "blows out"
> is a fair characterization without some actual data [1]. I do think this
> is a fair/valid concern however, we'd just need to try and experiment
> and see how much it costs to convert over a verifier or two, for
> example.
> 
> And I'm not sure how you make the leap from there to an automatic
> performance degradation. I also agree that the verifiers are performance
> critical and wouldn't suggest taking an approach if I thought we'd
> measurably affect performance. As it is, I'd expect that additional
> overhead of storing error context (hell, we could always just dump it
> from where the problem was originally detected rather than pass it to
> the generic verifier report function) would essentially be nil up until
> a verification check fails.

I don't think there's much difference in the generated code between
opencoded verifier tests and a macro that more or less does the same
thing but also helps us to avoid copy-pasta mistakes.  TBH I'm more
worried about us making a mistake in the verifiers that screw up what we
accept coming from / allow to be written to disk than I am about the
exact size of each verifier function.

That said, I've warmed to Dave's idea of passing back _THIS_IP_ as a
compromise between __line__ (not quite enough info) and throwing
(#fs_ok, __func__, __line__) back to callers.

> Would you consider such an approach if we could demonstrate a reasonable
> code size tradeoff and that runtime performance is not affected? If you
> simply hate macros then I guess there isn't much we can do but agree to
> disagree (and I'm not against a non-macro approach if one exists, I just
> don't know what it would be). But otherwise I wouldn't mind running some
> experiments (unless Darrick wanted to..? he brought this up after all..
> :P) if the results will be fairly considered.

So.... the online scrub code actually /does/ stringify every single
metadata object field check so that it can record exactly which check
failed in the ftrace output.  Given that online scrub has less stringent
performance requirements and does expensive cross referencing with other
metadata, one could argue that for scrub the code/performance tradeoff
is acceptable.

I just built a 4.13-rc2 kernel with the scrub/repair patches, and then
summed up the section sizes of the resulting .o files:

note 0
bss 1
comment 1007
mcount 1568
jump 1584
rodata 23503
data 23815
text 130569
debug 3318092

Most of rodata seems to be the stringified text that we save, and rodata
seems to constitute ~18% of the built module size.  If I modify the code
not to stringify anything, the sizes become:

note 0
bss 1
comment 1007
mcount 1568
jump 1584
rodata 6657
data 6969
text 130347
debug 3318092

Now the rodata section is only ~5% of the module size.  I conclude that
there is a definite cost to stringifying things, though I'm actually
surprised that it's only 17KB.

> > > There is a simple solution to this problem: just include all of
> > > the
> > > readily accessible state information in the error report. If the
> > > messages above looked something like the following:
> > > 
> > >   XFS (...): Metadata corruption detected at xfs_symlink_read_verify [xfs], xfs_symlink block 0x58
> > >   XFS (...): Read verifier failed: xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)), xfs_symlink_remote.c:114
> > >
> > > ... we wouldn't have to decode what the error means for every possible
> > > kernel. The error report is self contained like most of the other
> > > errors/assert reports in XFS and throughout the kernel.
> > 
> > Just like an assert/warning, we've still got to go match it to the
> > exact kernel source tree to determine the context in which the
> > message was emitted and what it actually means.  Making it pretty
> > doesn't change the amount of work a developer needs to do to
> > classify or analyse errors in log files, all it does is change the
> > amount of overhead needed to generate the error message,
> > 
> 
> Yes, you're still going to want to ultimately dig down into the code and
> do complex root cause analysis. Unfortunately, we can't fix that with
> error reporting. :) I've laid out several situations above as to why I
> think the "pretty" format makes it significantly easier to make sense of
> corruption reports that may be more involved than a single instance or a
> single physical node. A broad assessment can be made from the actual
> corruption report without the need to decode tens or more cryptic
> messages. It potentially saves significant time spent doing unnecessary
> busy work. It also helps reduce unnecessary confusion with general
> reports that might arise from users on distro kernels, etc., where the
> line numbers might not be sufficient or easily misaligned.

Admittedly, bloating up the scrub module with stringified stuff has made
it way quicker for me to figure out what got flagged as a failure and
then decide if the check I wrote is incorrect or if we really did find
something broken.  One more upside to the macro is that we could easily
make each verifier test spit out more information if CONFIG_XFS_DEBUG is
set.  This won't help us with customers encountering production
problems, obviously, but I think there's still value.

> Of course, once we get into debugging an actual problem, we've kind of
> moved outside the scope of this discussion...
> 
> > Really, verifiers are performance critical fast paths, so the code
> > that runs inside them needs to be implemented with this in mind. I'm
> > not against making messages pretty, but let's not do it based on the
> > on the premise that pretty error messages are more important than
> > runtime performance when there are no errors...
> > 
> 
> I agree. As noted above, I would not expect any more overhead than the
> original suggestion to return a line number in the normal runtime case.
> The difference is primarily what we do in the code between when a
> verifier check fails and we report the failure.
> 
> FWIW, an approach that might be a reasonable compromise is to use the
> original suggestion to pass along the line number, but also wrap each
> verifier check in a thinner macro that simply asserts[2] on the verifier
> expression. The new macro could be ifdef'd out completely in production
> builds to eliminate code size concerns. Then if we have reports with
> significant enough corruption issues to warrant it, we collect a
> metadump and use any local debug kernel to access the fs and decode the
> errors for us (or ask the user to run a debug kernel). We can expand the
> verifier macro in the future with production build support if warranted.
> Thoughts?

<shrug> _THIS_IP_ ?

(Sorry for starting to sound like a broken record...)

> Brian
> 
> [1] A simple test that we can do at the moment is to compare a
> production xfs.ko with one with asserts enabled. My local for-next
> branch builds a module of 40104000 bytes without XFS_WARN and 40540208
> bytes with XFS_WARN enabled. That makes for a difference of ~425k and
> roughly a 1% code size increase (accounting for ~1750 asserts and
> supporting code).
> 
> [2] We may not want to use ASSERT() here, but perhaps define a new
> variant to omit the stack trace and otherwise customize to a verifier
> report.

Speaking of ASSERTs, I was going to propose removing the ASSERT calls
from the XFS_WANT_CORRUPTED_* macros since they're used in some of the
directory verifiers and we shouldn't really BUG_ON corrupted disk
contents.  An XFS_ERROR_REPORT follows soon after the ASSERT, so imho I
think we could get by with only one round of complaining.

--D

> 
> > Cheers,
> > 
> > Dave.
> > 
> > -- 
> > Dave Chinner
> > david@fromorbit.com
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster July 24, 2017, 7:34 p.m. UTC | #15
On Mon, Jul 24, 2017 at 11:44:56AM -0700, Darrick J. Wong wrote:
> On Mon, Jul 24, 2017 at 09:07:52AM -0400, Brian Foster wrote:
> > On Mon, Jul 24, 2017 at 03:26:38PM +1000, Dave Chinner wrote:
> > > On Sat, Jul 22, 2017 at 08:29:44AM -0400, Brian Foster wrote:
> > > > On Sat, Jul 22, 2017 at 09:00:58AM +1000, Dave Chinner wrote:
> > > > > On Fri, Jul 21, 2017 at 09:54:05AM -0400, Brian Foster wrote:
> > > > > > On Fri, Jul 21, 2017 at 08:49:34AM +1000, Dave Chinner wrote:
> > > > > > but I'm also not against crafting a new, verifier
> > > > > > specific one to accomplish that. Technically, it doesn't have to be
> > > > > > shouty :), but IMO, the diagnostic/usability benefit outweighs the
> > > > > > aesthetic cost.
> > > > > 
> > > > > I disagree - there are so many verifier checks (and we only grow
> > > > > more as time goes on) so whatever we do needs them to be
> > > > > easily maintainable and not compromise the readabilty of the verifer
> > > > > code.
> > > > > 
> > > > 
> > > > I agree. ;) But I disagree that using a macro somehow automatically
> > > > negatively affects the readability of verifers. Anybody who can make
> > > > sense of this:
> > > > 
> > > >         if (!xfs_sb_version_hascrc(&mp->m_sb))
> > > >                 return __line__;
> > > >         if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
> > > >                 return __line__;
> > > >         if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
> > > >                 return __line__;
> > > > 
> > > > ... could just as easily make sense of something like this:
> > > > 
> > > > 	xfs_verify_assert(xfs_sb_version_hascrc(&mp->m_sb));
> > > > 	xfs_verify_assert(dsl->sl_magic == cpu_to_be32(XFS_SYMLINK_MAGIC));
> > > > 	xfs_verify_assert(uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid));
> > > 
> > > Still needs branches to return on error so the verifier code
> > > functions correctly. So it simply isn't as clean as your example
> > > suggests. And, no hiding goto/return/errno variables inside a macro
> > > is not an option - that makes for horrible code and we should not
> > > repeat those past mistakes we inherited from the old Irix
> > > codebase.
> > > 
> > 
> > Clearly the example implies (and the supporting text explains) that the
> > macro would lead to function exit.
> 
> It does?  Every time I see an *assert() I assume that execution either
> continues past it or stops dead in its tracks.  The _GOTO macros make
> that clear by pairing the word goto with a label, though I concede that
> it feels odd to pass a label as an argument.
> 

I wasn't referring to the macro name, I just picked that out of a hat. I
mentioned that the macro would return in the sentence immediately
following the example. _GOTO/_RETURN or something like that certainly
makes sense for a legitimate name in this case.

> > I don't really see the problem with doing that. Again, we already do
> > that all over the place. If we don't want to alter function execution
> > via the macro, then have it return an expression that resolves to
> > false (or an error code or some such) and leave the function execution
> > logic in function context. It doesn't really matter to me either way.
> 
> I get that hiding branches inside macros makes it too easy for later
> readers to be mislead, but if there's one aspect of them that I /do/
> like, its that (provided the naming makes it clear that we're branching
> somewhere) we /can/ use them to ensure that the return values are
> consistent and avoid screwing up repetitive if tests.
> 
> Anyway, I was picturing something like this:
> 
> #define XFS_VERIFY_FAIL_UNLESS(expr) do { if (expr) return _THIS_IP_; } while(0)
> 
> static unsigned long
> xfs_blah_verify(...)
> {
> 	XFS_VERIFY_FAIL_UNLESS(blah1 == GOOD1);
> 	XFS_VERIFY_FAIL_UNLESS(blah2 == GOOD2);
> 	XFS_VERIFY_FAIL_UNLESS(blah3 == GOOD3);
> 	...
> 	return 0;
> }
> 
> Granted, with code review to spot inconsistencies maybe the macro really
> is pointless, and we can just open-code everything.  I dunno.  To be
> honest I actually do prefer the macro in this particular case.
> 

I think you could argue it actually cleans up the code by hiding the
fact that the code does something unusual by returning a line number or
instruction pointer). That's not something you typically see in !DEBUG
code in general.

> static unsigned long
> xfs_blah_verify(...)
> {
> 	if (!(blah1 == GOOD1))
> 		return _THIS_IP_;
> 	if (!(blah2 == GOOD2))
> 		return _THIS_IP_;
> 	if (!(blah3 == GOOD3))
> 		return _THIS_IP_;
> 	...
> 	return 0;
> }
> 
...
> 
> I don't think there's much difference in the generated code between
> opencoded verifier tests and a macro that more or less does the same
> thing but also helps us to avoid copy-pasta mistakes.  TBH I'm more
> worried about us making a mistake in the verifiers that screw up what we
> accept coming from / allow to be written to disk than I am about the
> exact size of each verifier function.
> 
> That said, I've warmed to Dave's idea of passing back _THIS_IP_ as a
> compromise between __line__ (not quite enough info) and throwing
> (#fs_ok, __func__, __line__) back to callers.
> 

I like the idea of using _THIS_IP_ over __LINE__ because the former at
least returns a precise reference straight from the kernel (as opposed
to one that is partly subject to interpretation). It can be easily
converted to a file/line combination with existing tools as well.

...
> 
> Admittedly, bloating up the scrub module with stringified stuff has made
> it way quicker for me to figure out what got flagged as a failure and
> then decide if the check I wrote is incorrect or if we really did find
> something broken.  One more upside to the macro is that we could easily
> make each verifier test spit out more information if CONFIG_XFS_DEBUG is
> set.  This won't help us with customers encountering production
> problems, obviously, but I think there's still value.
> 

I agree that there is value here...

> > Of course, once we get into debugging an actual problem, we've kind of
> > moved outside the scope of this discussion...
> > 
> > > Really, verifiers are performance critical fast paths, so the code
> > > that runs inside them needs to be implemented with this in mind. I'm
> > > not against making messages pretty, but let's not do it based on the
> > > on the premise that pretty error messages are more important than
> > > runtime performance when there are no errors...
> > > 
> > 
> > I agree. As noted above, I would not expect any more overhead than the
> > original suggestion to return a line number in the normal runtime case.
> > The difference is primarily what we do in the code between when a
> > verifier check fails and we report the failure.
> > 
> > FWIW, an approach that might be a reasonable compromise is to use the
> > original suggestion to pass along the line number, but also wrap each
> > verifier check in a thinner macro that simply asserts[2] on the verifier
> > expression. The new macro could be ifdef'd out completely in production
> > builds to eliminate code size concerns. Then if we have reports with
> > significant enough corruption issues to warrant it, we collect a
> > metadump and use any local debug kernel to access the fs and decode the
> > errors for us (or ask the user to run a debug kernel). We can expand the
> > verifier macro in the future with production build support if warranted.
> > Thoughts?
> 
> <shrug> _THIS_IP_ ?
> 

I'm not following how _THIS_IP_ is relevant to this point..? The
compromise I suggested above is very similar to what you've suggested
earlier with regard to CONFIG_XFS_DEBUG. We'd basically automatically
print assert failures for failed verifier checks (it would require at
least XFS_WARN and perhaps skip the backtrace, but that's getting into
details) via use of a macro.

The value of the macro is that we can at least conditionally stringify
the checks to support the ability to use a debug kernel to process
metadumps or targeted environments that might have too many corruption
errors to resolve to specific LOC manually. It kind of sounds like we
fundamentally agree on that point. :)

Brian

> (Sorry for starting to sound like a broken record...)
> 
> > Brian
> > 
> > [1] A simple test that we can do at the moment is to compare a
> > production xfs.ko with one with asserts enabled. My local for-next
> > branch builds a module of 40104000 bytes without XFS_WARN and 40540208
> > bytes with XFS_WARN enabled. That makes for a difference of ~425k and
> > roughly a 1% code size increase (accounting for ~1750 asserts and
> > supporting code).
> > 
> > [2] We may not want to use ASSERT() here, but perhaps define a new
> > variant to omit the stack trace and otherwise customize to a verifier
> > report.
> 
> Speaking of ASSERTs, I was going to propose removing the ASSERT calls
> from the XFS_WANT_CORRUPTED_* macros since they're used in some of the
> directory verifiers and we shouldn't really BUG_ON corrupted disk
> contents.  An XFS_ERROR_REPORT follows soon after the ASSERT, so imho I
> think we could get by with only one round of complaining.
> 
> --D
> 
> > 
> > > Cheers,
> > > 
> > > Dave.
> > > 
> > > -- 
> > > Dave Chinner
> > > david@fromorbit.com
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index c6c15e5..43b881e 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -871,6 +871,76 @@  xfs_attr_shortform_allfit(
 	return xfs_attr_shortform_bytesfit(dp, bytes);
 }
 
+/* Verify the consistency of an inline attribute fork. */
+int
+xfs_attr_shortform_verify(
+	struct xfs_inode		*ip)
+{
+	struct xfs_attr_shortform	*sfp;
+	struct xfs_attr_sf_entry	*sfep;
+	struct xfs_attr_sf_entry	*next_sfep;
+	char				*endp;
+	struct xfs_ifork		*ifp;
+	int				i;
+	int				size;
+
+	ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL);
+	ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
+	sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
+	size = ifp->if_bytes;
+
+	/*
+	 * Give up if the attribute is way too short.
+	 */
+	if (size < sizeof (struct xfs_attr_sf_hdr))
+		return -EFSCORRUPTED;
+
+	endp = (char *)sfp + size;
+
+	/* Check all reported entries */
+	sfep = &sfp->list[0];
+	for (i = 0; i < sfp->hdr.count; i++) {
+		/*
+		 * struct xfs_attr_sf_entry has a variable length.
+		 * Check the fixed-offset parts of the structure are
+		 * within the data buffer.
+		 */
+		if (((char *)sfep + sizeof(*sfep)) >= endp)
+			return -EFSCORRUPTED;
+
+		/* Don't allow names with known bad length. */
+		if (sfep->namelen == 0)
+			return -EFSCORRUPTED;
+
+		/*
+		 * Check that the variable-length part of the structure is
+		 * within the data buffer.  The next entry starts after the
+		 * name component, so nextentry is an acceptable test.
+		 */
+		next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep);
+		if (endp < (char *)next_sfep)
+			return -EFSCORRUPTED;
+
+		/*
+		 * Check for unknown flags.  Short form doesn't support
+		 * the incomplete or local bits.
+		 */
+		if (sfep->flags & ~(XFS_ATTR_ROOT | XFS_ATTR_SECURE))
+			return -EFSCORRUPTED;
+
+		/* Check for invalid namespace combinations. */
+		if ((sfep->flags & XFS_ATTR_ROOT) &&
+		    (sfep->flags & XFS_ATTR_SECURE))
+			return -EFSCORRUPTED;
+
+		sfep = next_sfep;
+	}
+	if ((void *)sfep != (void *)endp)
+		return -EFSCORRUPTED;
+
+	return 0;
+}
+
 /*
  * Convert a leaf attribute list to shortform attribute list
  */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index f7dda0c..ec7291d 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -52,6 +52,7 @@  int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
 int	xfs_attr_shortform_remove(struct xfs_da_args *args);
 int	xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int	xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
+int	xfs_attr_shortform_verify(struct xfs_inode *ip);
 void	xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
 
 /*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 0e80f34..5484211 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -183,16 +183,6 @@  xfs_iformat_fork(
 	if (error)
 		return error;
 
-	/* Check inline dir contents. */
-	if (S_ISDIR(VFS_I(ip)->i_mode) &&
-	    dip->di_format == XFS_DINODE_FMT_LOCAL) {
-		error = xfs_dir2_sf_verify(ip);
-		if (error) {
-			xfs_idestroy_fork(ip, XFS_DATA_FORK);
-			return error;
-		}
-	}
-
 	if (xfs_is_reflink_inode(ip)) {
 		ASSERT(ip->i_cowfp == NULL);
 		xfs_ifork_init_cow(ip);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index c6f4eb4..7e35a16 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -143,5 +143,6 @@  bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
 			uint32_t size, struct xfs_buf *bp);
 void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
 				 struct xfs_inode *ip, struct xfs_ifork *ifp);
+int xfs_symlink_sf_verify(struct xfs_inode *ip);
 
 #endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index c484877..96e2957 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -207,3 +207,32 @@  xfs_symlink_local_to_remote(
 	xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
 					ifp->if_bytes - 1);
 }
+
+/* Verify the consistency of an inline symlink. */
+int
+xfs_symlink_sf_verify(
+	struct xfs_inode	*ip)
+{
+	char			*sfp;
+	char			*endp;
+	struct xfs_ifork	*ifp;
+	int			size;
+
+	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	sfp = (char *)ifp->if_u1.if_data;
+	size = ifp->if_bytes;
+	endp = sfp + size;
+
+	/* No negative sizes or overly long symlink targets. */
+	if (size < 0 || size > XFS_SYMLINK_MAXLEN)
+		return -EFSCORRUPTED;
+
+	/* No NULLs in the target either. */
+	for (; sfp < endp; sfp++) {
+		if (*sfp == 0)
+			return -EFSCORRUPTED;
+	}
+
+	return 0;
+}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0a9e698..fd1d430 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -34,6 +34,11 @@ 
 #include "xfs_dquot_item.h"
 #include "xfs_dquot.h"
 #include "xfs_reflink.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_shared.h"
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
@@ -449,6 +454,43 @@  xfs_iget_cache_hit(
 	return error;
 }
 
+/* Check inline fork contents. */
+int
+xfs_iget_verify_forks(
+	struct xfs_inode		*ip)
+{
+	int				error;
+
+	/* Check the attribute fork if there is one. */
+	if (XFS_IFORK_PTR(ip, XFS_ATTR_FORK) &&
+	    ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+		error = xfs_attr_shortform_verify(ip);
+		if (error) {
+			xfs_alert(ip->i_mount,
+					"%s: bad inode %Lu inline attr fork",
+					__func__, ip->i_ino);
+			return error;
+		}
+	}
+
+	/* Non-local data fork, jump out. */
+	if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+		return 0;
+
+	/* Check the inline data fork if there is one. */
+	if (S_ISDIR(VFS_I(ip)->i_mode))
+		error = xfs_dir2_sf_verify(ip);
+	else if (S_ISLNK(VFS_I(ip)->i_mode))
+		error = xfs_symlink_sf_verify(ip);
+	else
+		error = -EFSCORRUPTED;
+
+	if (error)
+		xfs_alert(ip->i_mount, "%s: bad inode %Lu inline data fork",
+				__func__, ip->i_ino);
+	return error;
+}
+
 
 static int
 xfs_iget_cache_miss(
@@ -473,6 +515,10 @@  xfs_iget_cache_miss(
 	if (error)
 		goto out_destroy;
 
+	error = xfs_iget_verify_forks(ip);
+	if (error)
+		goto out_destroy;
+
 	trace_xfs_iget_miss(ip);
 
 	if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index bff4d85..abf1cff 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -129,5 +129,6 @@  xfs_fs_eofblocks_from_user(
 
 int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
 				  xfs_ino_t ino, bool *inuse);
+int xfs_iget_verify_forks(struct xfs_inode *ip);
 
 #endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ceef77c..7ca8336 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3547,10 +3547,8 @@  xfs_iflush_int(
 	if (ip->i_d.di_version < 3)
 		ip->i_d.di_flushiter++;
 
-	/* Check the inline directory data. */
-	if (S_ISDIR(VFS_I(ip)->i_mode) &&
-	    ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
-	    xfs_dir2_sf_verify(ip))
+	/* Check the inline fork data. */
+	if (xfs_iget_verify_forks(ip))
 		goto corrupt_out;
 
 	/*