diff mbox series

[08/12] xfs: remove xfs_ifork_ops

Message ID 20200501081424.2598914-9-hch@lst.de (mailing list archive)
State Superseded
Headers show
Series [01/12] xfs: xfs_bmapi_read doesn't take a fork id as the last argument | expand

Commit Message

Christoph Hellwig May 1, 2020, 8:14 a.m. UTC
xfs_ifork_ops add up to two indirect calls per inode read and flush,
despite just having a single instance in the kernel.  In xfsprogs
phase6 in xfs_repair overrides the verify_dir method to deal with inodes
that do not have a valid parent.  Instead of the costly indirection just
life the repair code into xfs_dir2_sf.c under a condition that ensures
it is compiled as part of a kernel build, but instantly eliminated as
it is unreachable.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_dir2_sf.c    | 64 ++++++++++++++++++++++++++++++++--
 fs/xfs/libxfs/xfs_inode_fork.c | 19 +++-------
 fs/xfs/libxfs/xfs_inode_fork.h | 15 ++------
 fs/xfs/xfs_inode.c             |  4 +--
 4 files changed, 71 insertions(+), 31 deletions(-)

Comments

Brian Foster May 1, 2020, 3:56 p.m. UTC | #1
On Fri, May 01, 2020 at 10:14:20AM +0200, Christoph Hellwig wrote:
> xfs_ifork_ops add up to two indirect calls per inode read and flush,
> despite just having a single instance in the kernel.  In xfsprogs
> phase6 in xfs_repair overrides the verify_dir method to deal with inodes
> that do not have a valid parent.  Instead of the costly indirection just
> life the repair code into xfs_dir2_sf.c under a condition that ensures
> it is compiled as part of a kernel build, but instantly eliminated as
> it is unreachable.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/libxfs/xfs_dir2_sf.c    | 64 ++++++++++++++++++++++++++++++++--
>  fs/xfs/libxfs/xfs_inode_fork.c | 19 +++-------
>  fs/xfs/libxfs/xfs_inode_fork.h | 15 ++------
>  fs/xfs/xfs_inode.c             |  4 +--
>  4 files changed, 71 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
> index 7b7f6fb2ea3b2..1f6c30b68917c 100644
> --- a/fs/xfs/libxfs/xfs_dir2_sf.c
> +++ b/fs/xfs/libxfs/xfs_dir2_sf.c
...
> @@ -804,6 +804,66 @@ xfs_dir2_sf_verify(
>  	return NULL;
>  }
>  
> +/*
> + * When we're checking directory inodes, we're allowed to set a directory's
> + * dotdot entry to zero to signal that the parent needs to be reconnected
> + * during xfs_repair phase 6.  If we're handling a shortform directory the ifork
> + * verifiers will fail, so temporarily patch out this canary so that we can
> + * verify the rest of the fork and move on to fixing the dir.
> + */
> +static xfs_failaddr_t
> +xfs_dir2_sf_verify_dir_check(
> +	struct xfs_inode		*ip)
> +{
> +	struct xfs_mount		*mp = ip->i_mount;
> +	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> +	struct xfs_dir2_sf_hdr		*sfp =
> +		(struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
> +	int				size = ifp->if_bytes;
> +	bool				parent_bypass = false;
> +	xfs_ino_t			old_parent;
> +	xfs_failaddr_t			fa;
> +
> +	/*
> +	 * If this is a shortform directory, phase4 in xfs_repair may have set
> +	 * the parent inode to zero to indicate that it must be fixed.
> +	 * Temporarily set a valid parent so that the directory verifier will
> +	 * pass.
> +	 */
> +	if (size > offsetof(struct xfs_dir2_sf_hdr, parent) &&
> +	    size >= xfs_dir2_sf_hdr_size(sfp->i8count)) {
> +		old_parent = xfs_dir2_sf_get_parent_ino(sfp);
> +		if (!old_parent) {
> +			xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
> +			parent_bypass = true;
> +		}
> +	}
> +
> +	fa = __xfs_dir2_sf_verify(ip);
> +
> +	/* Put it back. */
> +	if (parent_bypass)
> +		xfs_dir2_sf_put_parent_ino(sfp, old_parent);
> +	return fa;
> +}

I'm not sure the cleanup is worth the kludge of including repair code in
the kernel like this. It might be better to reduce or replace ifork_ops
to a single directory function pointer until there's a reason for this
to become common. I dunno, maybe others have thoughts...

Brian

> +
> +/*
> + * Allow xfs_repair to enable the parent bypass mode.  For now this is entirely
> + * unused in the kernel, but might come in useful for online repair eventually.
> + */
> +#ifndef xfs_inode_parent_bypass
> +#define xfs_inode_parent_bypass(ip)	0
> +#endif
> +
> +xfs_failaddr_t
> +xfs_dir2_sf_verify(
> +	struct xfs_inode		*ip)
> +{
> +	if (xfs_inode_parent_bypass(ip))
> +		return xfs_dir2_sf_verify_dir_check(ip);
> +	return __xfs_dir2_sf_verify(ip);
> +}
> +
>  /*
>   * Create a new (shortform) directory.
>   */
> diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> index f30d43364aa92..f6dcee919f59e 100644
> --- a/fs/xfs/libxfs/xfs_inode_fork.c
> +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> @@ -673,18 +673,10 @@ xfs_ifork_init_cow(
>  	ip->i_cnextents = 0;
>  }
>  
> -/* Default fork content verifiers. */
> -struct xfs_ifork_ops xfs_default_ifork_ops = {
> -	.verify_attr	= xfs_attr_shortform_verify,
> -	.verify_dir	= xfs_dir2_sf_verify,
> -	.verify_symlink	= xfs_symlink_shortform_verify,
> -};
> -
>  /* Verify the inline contents of the data fork of an inode. */
>  xfs_failaddr_t
>  xfs_ifork_verify_data(
> -	struct xfs_inode	*ip,
> -	struct xfs_ifork_ops	*ops)
> +	struct xfs_inode	*ip)
>  {
>  	/* Non-local data fork, we're done. */
>  	if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
> @@ -693,9 +685,9 @@ xfs_ifork_verify_data(
>  	/* Check the inline data fork if there is one. */
>  	switch (VFS_I(ip)->i_mode & S_IFMT) {
>  	case S_IFDIR:
> -		return ops->verify_dir(ip);
> +		return xfs_dir2_sf_verify(ip);
>  	case S_IFLNK:
> -		return ops->verify_symlink(ip);
> +		return xfs_symlink_shortform_verify(ip);
>  	default:
>  		return NULL;
>  	}
> @@ -704,13 +696,12 @@ xfs_ifork_verify_data(
>  /* Verify the inline contents of the attr fork of an inode. */
>  xfs_failaddr_t
>  xfs_ifork_verify_attr(
> -	struct xfs_inode	*ip,
> -	struct xfs_ifork_ops	*ops)
> +	struct xfs_inode	*ip)
>  {
>  	/* There has to be an attr fork allocated if aformat is local. */
>  	if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
>  		return NULL;
>  	if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK))
>  		return __this_address;
> -	return ops->verify_attr(ip);
> +	return xfs_attr_shortform_verify(ip);
>  }
> diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
> index 8487b0c88a75e..3f84d33abd3b7 100644
> --- a/fs/xfs/libxfs/xfs_inode_fork.h
> +++ b/fs/xfs/libxfs/xfs_inode_fork.h
> @@ -176,18 +176,7 @@ extern struct kmem_zone	*xfs_ifork_zone;
>  
>  extern void xfs_ifork_init_cow(struct xfs_inode *ip);
>  
> -typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *);
> -
> -struct xfs_ifork_ops {
> -	xfs_ifork_verifier_t	verify_symlink;
> -	xfs_ifork_verifier_t	verify_dir;
> -	xfs_ifork_verifier_t	verify_attr;
> -};
> -extern struct xfs_ifork_ops	xfs_default_ifork_ops;
> -
> -xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip,
> -		struct xfs_ifork_ops *ops);
> -xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip,
> -		struct xfs_ifork_ops *ops);
> +xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip);
> +xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip);
>  
>  #endif	/* __XFS_INODE_FORK_H__ */
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index d1772786af29d..93967278355de 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -3769,7 +3769,7 @@ xfs_inode_verify_forks(
>  	struct xfs_ifork	*ifp;
>  	xfs_failaddr_t		fa;
>  
> -	fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops);
> +	fa = xfs_ifork_verify_data(ip);
>  	if (fa) {
>  		ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
>  		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
> @@ -3777,7 +3777,7 @@ xfs_inode_verify_forks(
>  		return false;
>  	}
>  
> -	fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops);
> +	fa = xfs_ifork_verify_attr(ip);
>  	if (fa) {
>  		ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
>  		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
> -- 
> 2.26.2
>
Darrick J. Wong May 1, 2020, 4:08 p.m. UTC | #2
On Fri, May 01, 2020 at 11:56:49AM -0400, Brian Foster wrote:
> On Fri, May 01, 2020 at 10:14:20AM +0200, Christoph Hellwig wrote:
> > xfs_ifork_ops add up to two indirect calls per inode read and flush,
> > despite just having a single instance in the kernel.  In xfsprogs
> > phase6 in xfs_repair overrides the verify_dir method to deal with inodes
> > that do not have a valid parent.  Instead of the costly indirection just
> > life the repair code into xfs_dir2_sf.c under a condition that ensures
> > it is compiled as part of a kernel build, but instantly eliminated as
> > it is unreachable.
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  fs/xfs/libxfs/xfs_dir2_sf.c    | 64 ++++++++++++++++++++++++++++++++--
> >  fs/xfs/libxfs/xfs_inode_fork.c | 19 +++-------
> >  fs/xfs/libxfs/xfs_inode_fork.h | 15 ++------
> >  fs/xfs/xfs_inode.c             |  4 +--
> >  4 files changed, 71 insertions(+), 31 deletions(-)
> > 
> > diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
> > index 7b7f6fb2ea3b2..1f6c30b68917c 100644
> > --- a/fs/xfs/libxfs/xfs_dir2_sf.c
> > +++ b/fs/xfs/libxfs/xfs_dir2_sf.c
> ...
> > @@ -804,6 +804,66 @@ xfs_dir2_sf_verify(
> >  	return NULL;
> >  }
> >  
> > +/*
> > + * When we're checking directory inodes, we're allowed to set a directory's
> > + * dotdot entry to zero to signal that the parent needs to be reconnected
> > + * during xfs_repair phase 6.  If we're handling a shortform directory the ifork
> > + * verifiers will fail, so temporarily patch out this canary so that we can
> > + * verify the rest of the fork and move on to fixing the dir.
> > + */
> > +static xfs_failaddr_t
> > +xfs_dir2_sf_verify_dir_check(
> > +	struct xfs_inode		*ip)
> > +{
> > +	struct xfs_mount		*mp = ip->i_mount;
> > +	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> > +	struct xfs_dir2_sf_hdr		*sfp =
> > +		(struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
> > +	int				size = ifp->if_bytes;
> > +	bool				parent_bypass = false;
> > +	xfs_ino_t			old_parent;
> > +	xfs_failaddr_t			fa;
> > +
> > +	/*
> > +	 * If this is a shortform directory, phase4 in xfs_repair may have set
> > +	 * the parent inode to zero to indicate that it must be fixed.
> > +	 * Temporarily set a valid parent so that the directory verifier will
> > +	 * pass.
> > +	 */
> > +	if (size > offsetof(struct xfs_dir2_sf_hdr, parent) &&
> > +	    size >= xfs_dir2_sf_hdr_size(sfp->i8count)) {
> > +		old_parent = xfs_dir2_sf_get_parent_ino(sfp);
> > +		if (!old_parent) {
> > +			xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
> > +			parent_bypass = true;
> > +		}
> > +	}
> > +
> > +	fa = __xfs_dir2_sf_verify(ip);
> > +
> > +	/* Put it back. */
> > +	if (parent_bypass)
> > +		xfs_dir2_sf_put_parent_ino(sfp, old_parent);
> > +	return fa;
> > +}
> 
> I'm not sure the cleanup is worth the kludge of including repair code in
> the kernel like this. It might be better to reduce or replace ifork_ops
> to a single directory function pointer until there's a reason for this
> to become common. I dunno, maybe others have thoughts...

One of the online repair gaps I haven't figured out how to close yet is
what to do when there's a short format directory that fails validation
(such that iget fails).  The inode repairer gets stuck with the job of
fixing the sf dir, but the (future) directory repair code will have all
the expertise in fixing directories.  Regrettably, it also requires a
working xfs_inode.

So I could just set the sf parent to some obviously garbage value (like
repair does) to make the verifiers pass and then trip the directory
repair, and then this hunk would be useful to have in the kernel.  OTOH
that means more special case flags and other junk, just to end up with
this kludge that sucks even for xfs_repair.

OTOH I have spent quite a bit of time trying to figure out how to kill
that stupid kludge of repair's, and come up emptyhanded, so <shrug>?

--D

> Brian
> 
> > +
> > +/*
> > + * Allow xfs_repair to enable the parent bypass mode.  For now this is entirely
> > + * unused in the kernel, but might come in useful for online repair eventually.
> > + */
> > +#ifndef xfs_inode_parent_bypass
> > +#define xfs_inode_parent_bypass(ip)	0
> > +#endif
> > +
> > +xfs_failaddr_t
> > +xfs_dir2_sf_verify(
> > +	struct xfs_inode		*ip)
> > +{
> > +	if (xfs_inode_parent_bypass(ip))
> > +		return xfs_dir2_sf_verify_dir_check(ip);
> > +	return __xfs_dir2_sf_verify(ip);
> > +}
> > +
> >  /*
> >   * Create a new (shortform) directory.
> >   */
> > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> > index f30d43364aa92..f6dcee919f59e 100644
> > --- a/fs/xfs/libxfs/xfs_inode_fork.c
> > +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> > @@ -673,18 +673,10 @@ xfs_ifork_init_cow(
> >  	ip->i_cnextents = 0;
> >  }
> >  
> > -/* Default fork content verifiers. */
> > -struct xfs_ifork_ops xfs_default_ifork_ops = {
> > -	.verify_attr	= xfs_attr_shortform_verify,
> > -	.verify_dir	= xfs_dir2_sf_verify,
> > -	.verify_symlink	= xfs_symlink_shortform_verify,
> > -};
> > -
> >  /* Verify the inline contents of the data fork of an inode. */
> >  xfs_failaddr_t
> >  xfs_ifork_verify_data(
> > -	struct xfs_inode	*ip,
> > -	struct xfs_ifork_ops	*ops)
> > +	struct xfs_inode	*ip)
> >  {
> >  	/* Non-local data fork, we're done. */
> >  	if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
> > @@ -693,9 +685,9 @@ xfs_ifork_verify_data(
> >  	/* Check the inline data fork if there is one. */
> >  	switch (VFS_I(ip)->i_mode & S_IFMT) {
> >  	case S_IFDIR:
> > -		return ops->verify_dir(ip);
> > +		return xfs_dir2_sf_verify(ip);
> >  	case S_IFLNK:
> > -		return ops->verify_symlink(ip);
> > +		return xfs_symlink_shortform_verify(ip);
> >  	default:
> >  		return NULL;
> >  	}
> > @@ -704,13 +696,12 @@ xfs_ifork_verify_data(
> >  /* Verify the inline contents of the attr fork of an inode. */
> >  xfs_failaddr_t
> >  xfs_ifork_verify_attr(
> > -	struct xfs_inode	*ip,
> > -	struct xfs_ifork_ops	*ops)
> > +	struct xfs_inode	*ip)
> >  {
> >  	/* There has to be an attr fork allocated if aformat is local. */
> >  	if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
> >  		return NULL;
> >  	if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK))
> >  		return __this_address;
> > -	return ops->verify_attr(ip);
> > +	return xfs_attr_shortform_verify(ip);
> >  }
> > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
> > index 8487b0c88a75e..3f84d33abd3b7 100644
> > --- a/fs/xfs/libxfs/xfs_inode_fork.h
> > +++ b/fs/xfs/libxfs/xfs_inode_fork.h
> > @@ -176,18 +176,7 @@ extern struct kmem_zone	*xfs_ifork_zone;
> >  
> >  extern void xfs_ifork_init_cow(struct xfs_inode *ip);
> >  
> > -typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *);
> > -
> > -struct xfs_ifork_ops {
> > -	xfs_ifork_verifier_t	verify_symlink;
> > -	xfs_ifork_verifier_t	verify_dir;
> > -	xfs_ifork_verifier_t	verify_attr;
> > -};
> > -extern struct xfs_ifork_ops	xfs_default_ifork_ops;
> > -
> > -xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip,
> > -		struct xfs_ifork_ops *ops);
> > -xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip,
> > -		struct xfs_ifork_ops *ops);
> > +xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip);
> > +xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip);
> >  
> >  #endif	/* __XFS_INODE_FORK_H__ */
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index d1772786af29d..93967278355de 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -3769,7 +3769,7 @@ xfs_inode_verify_forks(
> >  	struct xfs_ifork	*ifp;
> >  	xfs_failaddr_t		fa;
> >  
> > -	fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops);
> > +	fa = xfs_ifork_verify_data(ip);
> >  	if (fa) {
> >  		ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
> >  		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
> > @@ -3777,7 +3777,7 @@ xfs_inode_verify_forks(
> >  		return false;
> >  	}
> >  
> > -	fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops);
> > +	fa = xfs_ifork_verify_attr(ip);
> >  	if (fa) {
> >  		ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
> >  		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
> > -- 
> > 2.26.2
> > 
>
Christoph Hellwig May 1, 2020, 4:38 p.m. UTC | #3
On Fri, May 01, 2020 at 09:08:09AM -0700, Darrick J. Wong wrote:
> > I'm not sure the cleanup is worth the kludge of including repair code in
> > the kernel like this. It might be better to reduce or replace ifork_ops
> > to a single directory function pointer until there's a reason for this
> > to become common. I dunno, maybe others have thoughts...

The whole point is trying to avoid calling function pointers and
keeping that cruft around.

> 
> One of the online repair gaps I haven't figured out how to close yet is
> what to do when there's a short format directory that fails validation
> (such that iget fails).  The inode repairer gets stuck with the job of
> fixing the sf dir, but the (future) directory repair code will have all
> the expertise in fixing directories.  Regrettably, it also requires a
> working xfs_inode.
> 
> So I could just set the sf parent to some obviously garbage value (like
> repair does) to make the verifiers pass and then trip the directory
> repair, and then this hunk would be useful to have in the kernel.  OTOH
> that means more special case flags and other junk, just to end up with
> this kludge that sucks even for xfs_repair.

That being said my approach here was a little too dumb.  Once we are
all in the same code base we can stop the stupid patching of the
parent and just handle the case directly.  Something like this
incremental diff on top of the sent out version (not actually tested).

Total diffstate with the original patch is:

 4 files changed, 37 insertions(+), 35 deletions(-)

and this should also help with online repair while killing a horrible
kludge.


diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 1f6c30b68917c..b4195fafe2172 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -704,9 +704,17 @@ xfs_dir2_sf_check(
 }
 #endif	/* DEBUG */
 
+/*
+ * Allow xfs_repair to enable the parent bypass mode.  For now this is entirely
+ * unused in the kernel, but might come in useful for online repair eventually.
+ */
+#ifndef xfs_inode_parent_bypass
+#define xfs_inode_parent_bypass(ip)	0
+#endif
+
 /* Verify the consistency of an inline directory. */
-static xfs_failaddr_t
-__xfs_dir2_sf_verify(
+xfs_failaddr_t
+xfs_dir2_sf_verify(
 	struct xfs_inode		*ip)
 {
 	struct xfs_mount		*mp = ip->i_mount;
@@ -738,12 +746,26 @@ __xfs_dir2_sf_verify(
 
 	endp = (char *)sfp + size;
 
-	/* Check .. entry */
-	ino = xfs_dir2_sf_get_parent_ino(sfp);
-	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
-	error = xfs_dir_ino_validate(mp, ino);
-	if (error)
-		return __this_address;
+	/*
+	 * Check the .. entry.
+	 *
+	 * If we are running a repair, phase4 may have set the parent inode to
+	 * zero to indicate that it must be fixed.  Skip validating the parent
+	 * in that case.
+	 */
+	if (likely(!xfs_inode_parent_bypass(ip))) {
+		ino = xfs_dir2_sf_get_parent_ino(sfp);
+		i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+		error = xfs_dir_ino_validate(mp, ino);
+		if (error)
+			return __this_address;
+	} else {
+		/*
+		 * Ensure we account the missing parent as in the right format.
+		 */
+		if (sfp->i8count)
+			i8count++;
+	}
 	offset = mp->m_dir_geo->data_first_offset;
 
 	/* Check all reported entries */
@@ -804,66 +826,6 @@ __xfs_dir2_sf_verify(
 	return NULL;
 }
 
-/*
- * When we're checking directory inodes, we're allowed to set a directory's
- * dotdot entry to zero to signal that the parent needs to be reconnected
- * during xfs_repair phase 6.  If we're handling a shortform directory the ifork
- * verifiers will fail, so temporarily patch out this canary so that we can
- * verify the rest of the fork and move on to fixing the dir.
- */
-static xfs_failaddr_t
-xfs_dir2_sf_verify_dir_check(
-	struct xfs_inode		*ip)
-{
-	struct xfs_mount		*mp = ip->i_mount;
-	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-	struct xfs_dir2_sf_hdr		*sfp =
-		(struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
-	int				size = ifp->if_bytes;
-	bool				parent_bypass = false;
-	xfs_ino_t			old_parent;
-	xfs_failaddr_t			fa;
-
-	/*
-	 * If this is a shortform directory, phase4 in xfs_repair may have set
-	 * the parent inode to zero to indicate that it must be fixed.
-	 * Temporarily set a valid parent so that the directory verifier will
-	 * pass.
-	 */
-	if (size > offsetof(struct xfs_dir2_sf_hdr, parent) &&
-	    size >= xfs_dir2_sf_hdr_size(sfp->i8count)) {
-		old_parent = xfs_dir2_sf_get_parent_ino(sfp);
-		if (!old_parent) {
-			xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
-			parent_bypass = true;
-		}
-	}
-
-	fa = __xfs_dir2_sf_verify(ip);
-
-	/* Put it back. */
-	if (parent_bypass)
-		xfs_dir2_sf_put_parent_ino(sfp, old_parent);
-	return fa;
-}
-
-/*
- * Allow xfs_repair to enable the parent bypass mode.  For now this is entirely
- * unused in the kernel, but might come in useful for online repair eventually.
- */
-#ifndef xfs_inode_parent_bypass
-#define xfs_inode_parent_bypass(ip)	0
-#endif
-
-xfs_failaddr_t
-xfs_dir2_sf_verify(
-	struct xfs_inode		*ip)
-{
-	if (xfs_inode_parent_bypass(ip))
-		return xfs_dir2_sf_verify_dir_check(ip);
-	return __xfs_dir2_sf_verify(ip);
-}
-
 /*
  * Create a new (shortform) directory.
  */
Christoph Hellwig May 1, 2020, 4:50 p.m. UTC | #4
On Fri, May 01, 2020 at 06:38:09PM +0200, Christoph Hellwig wrote:
> That being said my approach here was a little too dumb.  Once we are
> all in the same code base we can stop the stupid patching of the
> parent and just handle the case directly.  Something like this
> incremental diff on top of the sent out version (not actually tested).
> 
> Total diffstate with the original patch is:
> 
>  4 files changed, 37 insertions(+), 35 deletions(-)
> 
> and this should also help with online repair while killing a horrible
> kludge.

Btw, І wonder if for repair / online repair just skipping the verifiers
entirely would make more sense.  But I think we can go there
incrementally and just keep the existing repair behavior for now.
Brian Foster May 1, 2020, 6:23 p.m. UTC | #5
On Fri, May 01, 2020 at 06:50:17PM +0200, Christoph Hellwig wrote:
> On Fri, May 01, 2020 at 06:38:09PM +0200, Christoph Hellwig wrote:
> > That being said my approach here was a little too dumb.  Once we are
> > all in the same code base we can stop the stupid patching of the
> > parent and just handle the case directly.  Something like this
> > incremental diff on top of the sent out version (not actually tested).
> > 
> > Total diffstate with the original patch is:
> > 
> >  4 files changed, 37 insertions(+), 35 deletions(-)
> > 
> > and this should also help with online repair while killing a horrible
> > kludge.
> 
> Btw, І wonder if for repair / online repair just skipping the verifiers
> entirely would make more sense.  But I think we can go there
> incrementally and just keep the existing repair behavior for now.
> 

Can we use another dummy parent inode value in xfs_repair? It looks to
me that we set it to zero in phase 4 if it fails verification and set
the parent to NULLFSINO (i.e. unknown) in repair's in-core tracking.
Phase 6 walks the directory entries and explicitly sets the parent inode
number of entries with an unknown parent (according to the in-core
tracking). IOW, I don't see where we actually rely on the directory
header having a parent inode of zero outside of detecting it in the
custom verifier. If that's the only functional purpose, I wonder if we
could do something like set the bogus parent field of a sf dir to the
root inode or to itself, that way the default verifier wouldn't trip
over it..

Brian
Christoph Hellwig May 7, 2020, 12:34 p.m. UTC | #6
On Fri, May 01, 2020 at 02:23:16PM -0400, Brian Foster wrote:
> Can we use another dummy parent inode value in xfs_repair? It looks to
> me that we set it to zero in phase 4 if it fails verification and set
> the parent to NULLFSINO (i.e. unknown) in repair's in-core tracking.
> Phase 6 walks the directory entries and explicitly sets the parent inode
> number of entries with an unknown parent (according to the in-core
> tracking). IOW, I don't see where we actually rely on the directory
> header having a parent inode of zero outside of detecting it in the
> custom verifier. If that's the only functional purpose, I wonder if we
> could do something like set the bogus parent field of a sf dir to the
> root inode or to itself, that way the default verifier wouldn't trip
> over it..

I don't think we need a dummy parent at all - we can just skip the
parent validation entirely, which is what my incremental patch does.
Brian Foster May 7, 2020, 1:43 p.m. UTC | #7
On Thu, May 07, 2020 at 02:34:11PM +0200, Christoph Hellwig wrote:
> On Fri, May 01, 2020 at 02:23:16PM -0400, Brian Foster wrote:
> > Can we use another dummy parent inode value in xfs_repair? It looks to
> > me that we set it to zero in phase 4 if it fails verification and set
> > the parent to NULLFSINO (i.e. unknown) in repair's in-core tracking.
> > Phase 6 walks the directory entries and explicitly sets the parent inode
> > number of entries with an unknown parent (according to the in-core
> > tracking). IOW, I don't see where we actually rely on the directory
> > header having a parent inode of zero outside of detecting it in the
> > custom verifier. If that's the only functional purpose, I wonder if we
> > could do something like set the bogus parent field of a sf dir to the
> > root inode or to itself, that way the default verifier wouldn't trip
> > over it..
> 
> I don't think we need a dummy parent at all - we can just skip the
> parent validation entirely, which is what my incremental patch does.
> 

xfs_repair already skips the parent validation, this patch just
refactors it. What I was considering above is whether repair uses the
current dummy value of zero for any functional reason. If not, it kind
of looks like the earlier phase of repair checks the parent, sees that
it would fail a verifier, replaces it with zero (which would also fail
the verifier) and then eventually replaces zero with a valid parent or
ditches the entry in phase 6. If we placed a temporary parent value in
the early phase that wouldn't explicitly fail a verifier by being an
invalid inode number (instead of using 0 to notify the verifier to skip
the validation), then we wouldn't need to skip the parent validation in
phase 6 when we look up the inode again.

Brian
Brian Foster May 7, 2020, 4:28 p.m. UTC | #8
On Thu, May 07, 2020 at 09:43:55AM -0400, Brian Foster wrote:
> On Thu, May 07, 2020 at 02:34:11PM +0200, Christoph Hellwig wrote:
> > On Fri, May 01, 2020 at 02:23:16PM -0400, Brian Foster wrote:
> > > Can we use another dummy parent inode value in xfs_repair? It looks to
> > > me that we set it to zero in phase 4 if it fails verification and set
> > > the parent to NULLFSINO (i.e. unknown) in repair's in-core tracking.
> > > Phase 6 walks the directory entries and explicitly sets the parent inode
> > > number of entries with an unknown parent (according to the in-core
> > > tracking). IOW, I don't see where we actually rely on the directory
> > > header having a parent inode of zero outside of detecting it in the
> > > custom verifier. If that's the only functional purpose, I wonder if we
> > > could do something like set the bogus parent field of a sf dir to the
> > > root inode or to itself, that way the default verifier wouldn't trip
> > > over it..
> > 
> > I don't think we need a dummy parent at all - we can just skip the
> > parent validation entirely, which is what my incremental patch does.
> > 
> 
> xfs_repair already skips the parent validation, this patch just
> refactors it. What I was considering above is whether repair uses the
> current dummy value of zero for any functional reason. If not, it kind
> of looks like the earlier phase of repair checks the parent, sees that
> it would fail a verifier, replaces it with zero (which would also fail
> the verifier) and then eventually replaces zero with a valid parent or
> ditches the entry in phase 6. If we placed a temporary parent value in
> the early phase that wouldn't explicitly fail a verifier by being an
> invalid inode number (instead of using 0 to notify the verifier to skip
> the validation), then we wouldn't need to skip the parent validation in
> phase 6 when we look up the inode again.
> 
...

To demonstrate, I hacked on repair a bit using an fs with an
intentionally corrupted shortform parent inode and had to make the
following tweaks to work around the custom fork verifier. The
ino_discovery checks were added because phases 3 and 4 toggle that flag
such that the former clears the parent value in the inode, but the
latter actually updates the external parent tracking. IOW, setting a
"valid" inode in phase 3 would otherwise trick phase 4 into using it.
I'd probably try to think of something cleaner for that issue if we were
to take such an approach.

Brian

diff --git a/repair/dir2.c b/repair/dir2.c
index cbbce601..c30ccb37 100644
--- a/repair/dir2.c
+++ b/repair/dir2.c
@@ -165,7 +165,7 @@ process_sf_dir2(
 	int			tmp_elen;
 	int			tmp_len;
 	xfs_dir2_sf_entry_t	*tmp_sfep;
-	xfs_ino_t		zero = 0;
+	xfs_ino_t		zero = mp->m_sb.sb_rootino;
 
 	sfp = (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip);
 	max_size = XFS_DFORK_DSIZE(dip, mp);
@@ -494,7 +494,8 @@ _("bogus .. inode number (%" PRIu64 ") in directory inode %" PRIu64 ", "),
 		if (!no_modify)  {
 			do_warn(_("clearing inode number\n"));
 
-			libxfs_dir2_sf_put_parent_ino(sfp, zero);
+			if (!ino_discovery)
+				libxfs_dir2_sf_put_parent_ino(sfp, zero);
 			*dino_dirty = 1;
 			*repair = 1;
 		} else  {
@@ -528,8 +529,8 @@ _("bad .. entry in directory inode %" PRIu64 ", points to self, "),
 			ino);
 		if (!no_modify)  {
 			do_warn(_("clearing inode number\n"));
-
-			libxfs_dir2_sf_put_parent_ino(sfp, zero);
+			if (!ino_discovery)
+				libxfs_dir2_sf_put_parent_ino(sfp, zero);
 			*dino_dirty = 1;
 			*repair = 1;
 		} else  {
diff --git a/repair/phase6.c b/repair/phase6.c
index beceea9a..613ca578 100644
--- a/repair/phase6.c
+++ b/repair/phase6.c
@@ -1104,7 +1104,7 @@ mv_orphanage(
 					(unsigned long long)ino, ++incr);
 
 	/* Orphans may not have a proper parent, so use custom ops here */
-	err = -libxfs_iget(mp, NULL, ino, 0, &ino_p, &phase6_ifork_ops);
+	err = -libxfs_iget(mp, NULL, ino, 0, &ino_p, &xfs_default_ifork_ops);
 	if (err)
 		do_error(_("%d - couldn't iget disconnected inode\n"), err);
 
@@ -2875,7 +2875,7 @@ process_dir_inode(
 
 	ASSERT(!is_inode_refchecked(irec, ino_offset) || dotdot_update);
 
-	error = -libxfs_iget(mp, NULL, ino, 0, &ip, &phase6_ifork_ops);
+	error = -libxfs_iget(mp, NULL, ino, 0, &ip, &xfs_default_ifork_ops);
 	if (error) {
 		if (!no_modify)
 			do_error(
Christoph Hellwig May 7, 2020, 5:18 p.m. UTC | #9
On Thu, May 07, 2020 at 12:28:46PM -0400, Brian Foster wrote:
> To demonstrate, I hacked on repair a bit using an fs with an
> intentionally corrupted shortform parent inode and had to make the
> following tweaks to work around the custom fork verifier. The
> ino_discovery checks were added because phases 3 and 4 toggle that flag
> such that the former clears the parent value in the inode, but the
> latter actually updates the external parent tracking. IOW, setting a
> "valid" inode in phase 3 would otherwise trick phase 4 into using it.
> I'd probably try to think of something cleaner for that issue if we were
> to take such an approach.

Ok, so instead of clearing the parent we'll set it to a guaranteed good
value (the root ino).  That could kill the workaround I had entirely.
Darrick J. Wong May 12, 2020, 11:50 p.m. UTC | #10
On Thu, May 07, 2020 at 07:18:57PM +0200, Christoph Hellwig wrote:
> On Thu, May 07, 2020 at 12:28:46PM -0400, Brian Foster wrote:
> > To demonstrate, I hacked on repair a bit using an fs with an
> > intentionally corrupted shortform parent inode and had to make the
> > following tweaks to work around the custom fork verifier. The
> > ino_discovery checks were added because phases 3 and 4 toggle that flag
> > such that the former clears the parent value in the inode, but the
> > latter actually updates the external parent tracking. IOW, setting a
> > "valid" inode in phase 3 would otherwise trick phase 4 into using it.
> > I'd probably try to think of something cleaner for that issue if we were
> > to take such an approach.
> 
> Ok, so instead of clearing the parent we'll set it to a guaranteed good
> value (the root ino).  That could kill the workaround I had entirely.

Seems reasonable to me, but someone should try it and see how xfs_repair
reacts.  I think it ought to be fine since it will detect the lack of a
rootdir entry pointing to the damaged dir and move it to lost+found.

--D
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 7b7f6fb2ea3b2..1f6c30b68917c 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -705,8 +705,8 @@  xfs_dir2_sf_check(
 #endif	/* DEBUG */
 
 /* Verify the consistency of an inline directory. */
-xfs_failaddr_t
-xfs_dir2_sf_verify(
+static xfs_failaddr_t
+__xfs_dir2_sf_verify(
 	struct xfs_inode		*ip)
 {
 	struct xfs_mount		*mp = ip->i_mount;
@@ -804,6 +804,66 @@  xfs_dir2_sf_verify(
 	return NULL;
 }
 
+/*
+ * When we're checking directory inodes, we're allowed to set a directory's
+ * dotdot entry to zero to signal that the parent needs to be reconnected
+ * during xfs_repair phase 6.  If we're handling a shortform directory the ifork
+ * verifiers will fail, so temporarily patch out this canary so that we can
+ * verify the rest of the fork and move on to fixing the dir.
+ */
+static xfs_failaddr_t
+xfs_dir2_sf_verify_dir_check(
+	struct xfs_inode		*ip)
+{
+	struct xfs_mount		*mp = ip->i_mount;
+	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	struct xfs_dir2_sf_hdr		*sfp =
+		(struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
+	int				size = ifp->if_bytes;
+	bool				parent_bypass = false;
+	xfs_ino_t			old_parent;
+	xfs_failaddr_t			fa;
+
+	/*
+	 * If this is a shortform directory, phase4 in xfs_repair may have set
+	 * the parent inode to zero to indicate that it must be fixed.
+	 * Temporarily set a valid parent so that the directory verifier will
+	 * pass.
+	 */
+	if (size > offsetof(struct xfs_dir2_sf_hdr, parent) &&
+	    size >= xfs_dir2_sf_hdr_size(sfp->i8count)) {
+		old_parent = xfs_dir2_sf_get_parent_ino(sfp);
+		if (!old_parent) {
+			xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
+			parent_bypass = true;
+		}
+	}
+
+	fa = __xfs_dir2_sf_verify(ip);
+
+	/* Put it back. */
+	if (parent_bypass)
+		xfs_dir2_sf_put_parent_ino(sfp, old_parent);
+	return fa;
+}
+
+/*
+ * Allow xfs_repair to enable the parent bypass mode.  For now this is entirely
+ * unused in the kernel, but might come in useful for online repair eventually.
+ */
+#ifndef xfs_inode_parent_bypass
+#define xfs_inode_parent_bypass(ip)	0
+#endif
+
+xfs_failaddr_t
+xfs_dir2_sf_verify(
+	struct xfs_inode		*ip)
+{
+	if (xfs_inode_parent_bypass(ip))
+		return xfs_dir2_sf_verify_dir_check(ip);
+	return __xfs_dir2_sf_verify(ip);
+}
+
 /*
  * Create a new (shortform) directory.
  */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index f30d43364aa92..f6dcee919f59e 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -673,18 +673,10 @@  xfs_ifork_init_cow(
 	ip->i_cnextents = 0;
 }
 
-/* Default fork content verifiers. */
-struct xfs_ifork_ops xfs_default_ifork_ops = {
-	.verify_attr	= xfs_attr_shortform_verify,
-	.verify_dir	= xfs_dir2_sf_verify,
-	.verify_symlink	= xfs_symlink_shortform_verify,
-};
-
 /* Verify the inline contents of the data fork of an inode. */
 xfs_failaddr_t
 xfs_ifork_verify_data(
-	struct xfs_inode	*ip,
-	struct xfs_ifork_ops	*ops)
+	struct xfs_inode	*ip)
 {
 	/* Non-local data fork, we're done. */
 	if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
@@ -693,9 +685,9 @@  xfs_ifork_verify_data(
 	/* Check the inline data fork if there is one. */
 	switch (VFS_I(ip)->i_mode & S_IFMT) {
 	case S_IFDIR:
-		return ops->verify_dir(ip);
+		return xfs_dir2_sf_verify(ip);
 	case S_IFLNK:
-		return ops->verify_symlink(ip);
+		return xfs_symlink_shortform_verify(ip);
 	default:
 		return NULL;
 	}
@@ -704,13 +696,12 @@  xfs_ifork_verify_data(
 /* Verify the inline contents of the attr fork of an inode. */
 xfs_failaddr_t
 xfs_ifork_verify_attr(
-	struct xfs_inode	*ip,
-	struct xfs_ifork_ops	*ops)
+	struct xfs_inode	*ip)
 {
 	/* There has to be an attr fork allocated if aformat is local. */
 	if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
 		return NULL;
 	if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK))
 		return __this_address;
-	return ops->verify_attr(ip);
+	return xfs_attr_shortform_verify(ip);
 }
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 8487b0c88a75e..3f84d33abd3b7 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -176,18 +176,7 @@  extern struct kmem_zone	*xfs_ifork_zone;
 
 extern void xfs_ifork_init_cow(struct xfs_inode *ip);
 
-typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *);
-
-struct xfs_ifork_ops {
-	xfs_ifork_verifier_t	verify_symlink;
-	xfs_ifork_verifier_t	verify_dir;
-	xfs_ifork_verifier_t	verify_attr;
-};
-extern struct xfs_ifork_ops	xfs_default_ifork_ops;
-
-xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip,
-		struct xfs_ifork_ops *ops);
-xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip,
-		struct xfs_ifork_ops *ops);
+xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip);
+xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip);
 
 #endif	/* __XFS_INODE_FORK_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d1772786af29d..93967278355de 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3769,7 +3769,7 @@  xfs_inode_verify_forks(
 	struct xfs_ifork	*ifp;
 	xfs_failaddr_t		fa;
 
-	fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops);
+	fa = xfs_ifork_verify_data(ip);
 	if (fa) {
 		ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
 		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
@@ -3777,7 +3777,7 @@  xfs_inode_verify_forks(
 		return false;
 	}
 
-	fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops);
+	fa = xfs_ifork_verify_attr(ip);
 	if (fa) {
 		ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
 		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",