diff mbox series

[RFC,2/2] overlayfs: propagate errors from upper to overlay sb in sync_fs

Message ID 20201213132713.66864-3-jlayton@kernel.org (mailing list archive)
State New, archived
Headers show
Series errseq+overlayfs: accomodate the volatile upper layer use-case | expand

Commit Message

Jeff Layton Dec. 13, 2020, 1:27 p.m. UTC
Peek at the upper layer's errseq_t at mount time for volatile mounts,
and record it in the per-sb info. In sync_fs, check for an error since
the recorded point and set it in the overlayfs superblock if there was
one.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/overlayfs/ovl_entry.h |  1 +
 fs/overlayfs/super.c     | 14 +++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

Comments

Vivek Goyal Dec. 14, 2020, 9:38 p.m. UTC | #1
On Sun, Dec 13, 2020 at 08:27:13AM -0500, Jeff Layton wrote:
> Peek at the upper layer's errseq_t at mount time for volatile mounts,
> and record it in the per-sb info. In sync_fs, check for an error since
> the recorded point and set it in the overlayfs superblock if there was
> one.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---

While we are solving problem for non-volatile overlay mount, I also
started thinking, what about non-volatile overlay syncfs() writeback errors.
Looks like these will not be reported to user space at all as of now
(because we never update overlay_sb->s_wb_err ever).

A patch like this might fix it. (compile tested only).

overlayfs: Report syncfs() errors to user space

Currently, syncfs(), calls filesystem ->sync_fs() method but ignores the
return code. But certain writeback errors can still be reported on 
syncfs() by checking errors on super block.

ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);

For the case of overlayfs, we never set overlayfs super block s_wb_err. That
means sync() will never report writeback errors on overlayfs uppon syncfs().

Fix this by updating overlay sb->sb_wb_err upon ->sync_fs() call. And that
should mean that user space syncfs() call should see writeback errors.

ovl_fsync() does not need anything special because if there are writeback
errors underlying filesystem will report it through vfs_fsync_range() return
code and user space will see it.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 fs/overlayfs/ovl_entry.h |    1 +
 fs/overlayfs/super.c     |   14 +++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

Index: redhat-linux/fs/overlayfs/super.c
===================================================================
--- redhat-linux.orig/fs/overlayfs/super.c	2020-12-14 15:33:43.934400880 -0500
+++ redhat-linux/fs/overlayfs/super.c	2020-12-14 16:15:07.127400880 -0500
@@ -259,7 +259,7 @@ static int ovl_sync_fs(struct super_bloc
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
 	struct super_block *upper_sb;
-	int ret;
+	int ret, ret2;
 
 	if (!ovl_upper_mnt(ofs))
 		return 0;
@@ -283,7 +283,14 @@ static int ovl_sync_fs(struct super_bloc
 	ret = sync_filesystem(upper_sb);
 	up_read(&upper_sb->s_umount);
 
-	return ret;
+	if (errseq_check(&upper_sb->s_wb_err, sb->s_wb_err)) {
+		/* Upper sb has errors since last time */
+		spin_lock(&ofs->errseq_lock);
+		ret2 = errseq_check_and_advance(&upper_sb->s_wb_err,
+						&sb->s_wb_err);
+		spin_unlock(&ofs->errseq_lock);
+	}
+	return ret ? ret : ret2;
 }
 
 /**
@@ -1873,6 +1880,7 @@ static int ovl_fill_super(struct super_b
 	if (!cred)
 		goto out_err;
 
+	spin_lock_init(&ofs->errseq_lock);
 	/* Is there a reason anyone would want not to share whiteouts? */
 	ofs->share_whiteout = true;
 
@@ -1945,7 +1953,7 @@ static int ovl_fill_super(struct super_b
 
 		sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
 		sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
-
+		sb->s_wb_err = errseq_sample(&ovl_upper_mnt(ofs)->mnt_sb->s_wb_err);
 	}
 	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
 	err = PTR_ERR(oe);
Index: redhat-linux/fs/overlayfs/ovl_entry.h
===================================================================
--- redhat-linux.orig/fs/overlayfs/ovl_entry.h	2020-12-14 15:33:43.934400880 -0500
+++ redhat-linux/fs/overlayfs/ovl_entry.h	2020-12-14 15:34:13.509400880 -0500
@@ -79,6 +79,7 @@ struct ovl_fs {
 	atomic_long_t last_ino;
 	/* Whiteout dentry cache */
 	struct dentry *whiteout;
+	spinlock_t errseq_lock;
 };
 
 static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
Sargun Dhillon Dec. 14, 2020, 10:04 p.m. UTC | #2
On Mon, Dec 14, 2020 at 04:38:43PM -0500, Vivek Goyal wrote:
> On Sun, Dec 13, 2020 at 08:27:13AM -0500, Jeff Layton wrote:
> > Peek at the upper layer's errseq_t at mount time for volatile mounts,
> > and record it in the per-sb info. In sync_fs, check for an error since
> > the recorded point and set it in the overlayfs superblock if there was
> > one.
> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> 
> While we are solving problem for non-volatile overlay mount, I also
> started thinking, what about non-volatile overlay syncfs() writeback errors.
> Looks like these will not be reported to user space at all as of now
> (because we never update overlay_sb->s_wb_err ever).
> 
> A patch like this might fix it. (compile tested only).
> 
> overlayfs: Report syncfs() errors to user space
> 
> Currently, syncfs(), calls filesystem ->sync_fs() method but ignores the
> return code. But certain writeback errors can still be reported on 
> syncfs() by checking errors on super block.
> 
> ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
> 
> For the case of overlayfs, we never set overlayfs super block s_wb_err. That
> means sync() will never report writeback errors on overlayfs uppon syncfs().
> 
> Fix this by updating overlay sb->sb_wb_err upon ->sync_fs() call. And that
> should mean that user space syncfs() call should see writeback errors.
> 
> ovl_fsync() does not need anything special because if there are writeback
> errors underlying filesystem will report it through vfs_fsync_range() return
> code and user space will see it.
> 
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> ---
>  fs/overlayfs/ovl_entry.h |    1 +
>  fs/overlayfs/super.c     |   14 +++++++++++---
>  2 files changed, 12 insertions(+), 3 deletions(-)
> 
> Index: redhat-linux/fs/overlayfs/super.c
> ===================================================================
> --- redhat-linux.orig/fs/overlayfs/super.c	2020-12-14 15:33:43.934400880 -0500
> +++ redhat-linux/fs/overlayfs/super.c	2020-12-14 16:15:07.127400880 -0500
> @@ -259,7 +259,7 @@ static int ovl_sync_fs(struct super_bloc
>  {
>  	struct ovl_fs *ofs = sb->s_fs_info;
>  	struct super_block *upper_sb;
> -	int ret;
> +	int ret, ret2;
>  
>  	if (!ovl_upper_mnt(ofs))
>  		return 0;
> @@ -283,7 +283,14 @@ static int ovl_sync_fs(struct super_bloc
>  	ret = sync_filesystem(upper_sb);
>  	up_read(&upper_sb->s_umount);
>  
> -	return ret;
> +	if (errseq_check(&upper_sb->s_wb_err, sb->s_wb_err)) {
> +		/* Upper sb has errors since last time */
> +		spin_lock(&ofs->errseq_lock);
> +		ret2 = errseq_check_and_advance(&upper_sb->s_wb_err,
> +						&sb->s_wb_err);
> +		spin_unlock(&ofs->errseq_lock);
> +	}
> +	return ret ? ret : ret2;
>  }
>  
>  /**
> @@ -1873,6 +1880,7 @@ static int ovl_fill_super(struct super_b
>  	if (!cred)
>  		goto out_err;
>  
> +	spin_lock_init(&ofs->errseq_lock);
>  	/* Is there a reason anyone would want not to share whiteouts? */
>  	ofs->share_whiteout = true;
>  
> @@ -1945,7 +1953,7 @@ static int ovl_fill_super(struct super_b
>  
>  		sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
>  		sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
> -
> +		sb->s_wb_err = errseq_sample(&ovl_upper_mnt(ofs)->mnt_sb->s_wb_err);
>  	}
>  	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
>  	err = PTR_ERR(oe);
> Index: redhat-linux/fs/overlayfs/ovl_entry.h
> ===================================================================
> --- redhat-linux.orig/fs/overlayfs/ovl_entry.h	2020-12-14 15:33:43.934400880 -0500
> +++ redhat-linux/fs/overlayfs/ovl_entry.h	2020-12-14 15:34:13.509400880 -0500
> @@ -79,6 +79,7 @@ struct ovl_fs {
>  	atomic_long_t last_ino;
>  	/* Whiteout dentry cache */
>  	struct dentry *whiteout;
> +	spinlock_t errseq_lock;
>  };
>  
>  static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
> 

This was on my list of things to look at. I don't think we can / should use 
errseq_check_and_advance because it will hide errors from userspace. I think we 
need something like:

At startup, call errseq_peek and stash that value somewhere. This sets the 
MUSTINC flag.

At syncfs time: call errseq check, if it says there is an error, call 
errseq_peek again, and store the error in our superblock. Take the error value 
from the differenceb between the previous one and the new one, and copy it up to 
the superblock.

Either way, I think Jeff's work of making it so other kernel subsytems can 
interact with errseq on a superblock bears fruit elsewhere. If the first patch 
gets merged, I can put together the patches to do the standard error bubble
up for normal syncfs, volatile syncfs, and volatile remount.
Vivek Goyal Dec. 14, 2020, 11:01 p.m. UTC | #3
On Mon, Dec 14, 2020 at 10:04:14PM +0000, Sargun Dhillon wrote:
> On Mon, Dec 14, 2020 at 04:38:43PM -0500, Vivek Goyal wrote:
> > On Sun, Dec 13, 2020 at 08:27:13AM -0500, Jeff Layton wrote:
> > > Peek at the upper layer's errseq_t at mount time for volatile mounts,
> > > and record it in the per-sb info. In sync_fs, check for an error since
> > > the recorded point and set it in the overlayfs superblock if there was
> > > one.
> > > 
> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > 
> > While we are solving problem for non-volatile overlay mount, I also
> > started thinking, what about non-volatile overlay syncfs() writeback errors.
> > Looks like these will not be reported to user space at all as of now
> > (because we never update overlay_sb->s_wb_err ever).
> > 
> > A patch like this might fix it. (compile tested only).
> > 
> > overlayfs: Report syncfs() errors to user space
> > 
> > Currently, syncfs(), calls filesystem ->sync_fs() method but ignores the
> > return code. But certain writeback errors can still be reported on 
> > syncfs() by checking errors on super block.
> > 
> > ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
> > 
> > For the case of overlayfs, we never set overlayfs super block s_wb_err. That
> > means sync() will never report writeback errors on overlayfs uppon syncfs().
> > 
> > Fix this by updating overlay sb->sb_wb_err upon ->sync_fs() call. And that
> > should mean that user space syncfs() call should see writeback errors.
> > 
> > ovl_fsync() does not need anything special because if there are writeback
> > errors underlying filesystem will report it through vfs_fsync_range() return
> > code and user space will see it.
> > 
> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > ---
> >  fs/overlayfs/ovl_entry.h |    1 +
> >  fs/overlayfs/super.c     |   14 +++++++++++---
> >  2 files changed, 12 insertions(+), 3 deletions(-)
> > 
> > Index: redhat-linux/fs/overlayfs/super.c
> > ===================================================================
> > --- redhat-linux.orig/fs/overlayfs/super.c	2020-12-14 15:33:43.934400880 -0500
> > +++ redhat-linux/fs/overlayfs/super.c	2020-12-14 16:15:07.127400880 -0500
> > @@ -259,7 +259,7 @@ static int ovl_sync_fs(struct super_bloc
> >  {
> >  	struct ovl_fs *ofs = sb->s_fs_info;
> >  	struct super_block *upper_sb;
> > -	int ret;
> > +	int ret, ret2;
> >  
> >  	if (!ovl_upper_mnt(ofs))
> >  		return 0;
> > @@ -283,7 +283,14 @@ static int ovl_sync_fs(struct super_bloc
> >  	ret = sync_filesystem(upper_sb);
> >  	up_read(&upper_sb->s_umount);
> >  
> > -	return ret;
> > +	if (errseq_check(&upper_sb->s_wb_err, sb->s_wb_err)) {
> > +		/* Upper sb has errors since last time */
> > +		spin_lock(&ofs->errseq_lock);
> > +		ret2 = errseq_check_and_advance(&upper_sb->s_wb_err,
> > +						&sb->s_wb_err);
> > +		spin_unlock(&ofs->errseq_lock);
> > +	}
> > +	return ret ? ret : ret2;
> >  }
> >  
> >  /**
> > @@ -1873,6 +1880,7 @@ static int ovl_fill_super(struct super_b
> >  	if (!cred)
> >  		goto out_err;
> >  
> > +	spin_lock_init(&ofs->errseq_lock);
> >  	/* Is there a reason anyone would want not to share whiteouts? */
> >  	ofs->share_whiteout = true;
> >  
> > @@ -1945,7 +1953,7 @@ static int ovl_fill_super(struct super_b
> >  
> >  		sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
> >  		sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
> > -
> > +		sb->s_wb_err = errseq_sample(&ovl_upper_mnt(ofs)->mnt_sb->s_wb_err);
> >  	}
> >  	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
> >  	err = PTR_ERR(oe);
> > Index: redhat-linux/fs/overlayfs/ovl_entry.h
> > ===================================================================
> > --- redhat-linux.orig/fs/overlayfs/ovl_entry.h	2020-12-14 15:33:43.934400880 -0500
> > +++ redhat-linux/fs/overlayfs/ovl_entry.h	2020-12-14 15:34:13.509400880 -0500
> > @@ -79,6 +79,7 @@ struct ovl_fs {
> >  	atomic_long_t last_ino;
> >  	/* Whiteout dentry cache */
> >  	struct dentry *whiteout;
> > +	spinlock_t errseq_lock;
> >  };
> >  
> >  static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
> > 
> 
> This was on my list of things to look at. I don't think we can / should use 
> errseq_check_and_advance because it will hide errors from userspace.

Hi Sargun,

I have been struggling to figure out when to use
errseq_check_and_advance() and when to use this error_check() and
errorseq_set() combination.

My rational for using errseq_check_and_advance() is that say there
is an unseen error on upper super block, and if overlayfs calls
syncfs(), then this call should set SEEN flag on upper super
block, isn't it. This is equivalent of an app directly calling
syncfs() on upper.

If we use error_check() and errseq_set() combination, then we
are just reading state of upper superblock but really not impacting
it despite the fact overlay apps are calling syncfs().

Comapre it with fsync(). For non-volatile overlay, an fsync() overlay
call will set SEEN flag on upper file. And I believe same thing
should happen for ovl_syncfs() call as well. It makes more sense to me.

Wondering how will it hide errors from user space. ovl "struct file"
will have its own f->f_sb_err initialized from overlay superblock. And
if overlay super block gets updated with error, a later
errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err) should
still return an error.

What am I missing?

Vivek


> I think we 
> need something like:
> 
> At startup, call errseq_peek and stash that value somewhere. This sets the 
> MUSTINC flag.

So this errseq_peek() stuff is required only if we don't want to 
consume error while checking for error. In fact consuming error
is not bad as long as we do it only ovl_syncfs() path. In fact
it will match current semantics of syncfs().

But issue we have is that we want to check for error outside syncfs()
path too and don't want to consume it (otherwise it breaks the semantics
that it will bee seen marked in syncfs() path).

So that's why this notion of checking error without consuming it
so tha we can check it in remount path.

But in syncfs() path, it should be ok to consume unseen error and
we should be able to call errseq_check_and_advance(), both for
volatile and non-volatile mounts, isn't it?

For ther paths, like remount, we probably can stash away on persistent
storage and compare that value on remount and fail remount without
actually consuming unseen error (because it is not syncfs path). This
possibly can be used in other paths like read/write as well to
make sure we can notice error without consuming it.

IOW, we seem to have to paths we want to check errors in. In ovl_syncfs()
path we should be able consume exisiting unseen error on upper, so
errseq_check_and_advance() makes sense. In rest of the paths, we
should use new semantics to check for errors.

Vivek

> 
> At syncfs time: call errseq check, if it says there is an error, call 
> errseq_peek again, and store the error in our superblock. Take the error value 
> from the differenceb between the previous one and the new one, and copy it up to 
> the superblock.
> 
> Either way, I think Jeff's work of making it so other kernel subsytems can 
> interact with errseq on a superblock bears fruit elsewhere. If the first patch 
> gets merged, I can put together the patches to do the standard error bubble
> up for normal syncfs, volatile syncfs, and volatile remount.
>
Jeff Layton Dec. 14, 2020, 11:53 p.m. UTC | #4
On Mon, 2020-12-14 at 16:38 -0500, Vivek Goyal wrote:
> On Sun, Dec 13, 2020 at 08:27:13AM -0500, Jeff Layton wrote:
> > Peek at the upper layer's errseq_t at mount time for volatile mounts,
> > and record it in the per-sb info. In sync_fs, check for an error since
> > the recorded point and set it in the overlayfs superblock if there was
> > one.
> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> 
> While we are solving problem for non-volatile overlay mount, I also
> started thinking, what about non-volatile overlay syncfs() writeback errors.
> Looks like these will not be reported to user space at all as of now
> (because we never update overlay_sb->s_wb_err ever).
> 
> A patch like this might fix it. (compile tested only).
> 
> overlayfs: Report syncfs() errors to user space
> 
> Currently, syncfs(), calls filesystem ->sync_fs() method but ignores the
> return code. But certain writeback errors can still be reported on 
> syncfs() by checking errors on super block.
> 
> ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
> 
> For the case of overlayfs, we never set overlayfs super block s_wb_err. That
> means sync() will never report writeback errors on overlayfs uppon syncfs().
> 
> Fix this by updating overlay sb->sb_wb_err upon ->sync_fs() call. And that
> should mean that user space syncfs() call should see writeback errors.
> 
> ovl_fsync() does not need anything special because if there are writeback
> errors underlying filesystem will report it through vfs_fsync_range() return
> code and user space will see it.
> 
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> ---
>  fs/overlayfs/ovl_entry.h |    1 +
>  fs/overlayfs/super.c     |   14 +++++++++++---
>  2 files changed, 12 insertions(+), 3 deletions(-)
> 
> Index: redhat-linux/fs/overlayfs/super.c
> ===================================================================
> --- redhat-linux.orig/fs/overlayfs/super.c	2020-12-14 15:33:43.934400880 -0500
> +++ redhat-linux/fs/overlayfs/super.c	2020-12-14 16:15:07.127400880 -0500
> @@ -259,7 +259,7 @@ static int ovl_sync_fs(struct super_bloc
>  {
>  	struct ovl_fs *ofs = sb->s_fs_info;
>  	struct super_block *upper_sb;
> -	int ret;
> +	int ret, ret2;
>  
> 
> 
> 
>  	if (!ovl_upper_mnt(ofs))
>  		return 0;
> @@ -283,7 +283,14 @@ static int ovl_sync_fs(struct super_bloc
>  	ret = sync_filesystem(upper_sb);
>  	up_read(&upper_sb->s_umount);
>  
> 
> 
> 
> -	return ret;
> +	if (errseq_check(&upper_sb->s_wb_err, sb->s_wb_err)) {
> +		/* Upper sb has errors since last time */
> +		spin_lock(&ofs->errseq_lock);
> +		ret2 = errseq_check_and_advance(&upper_sb->s_wb_err,
> +						&sb->s_wb_err);
> +		spin_unlock(&ofs->errseq_lock);
> +	}
> +	return ret ? ret : ret2;

I think this is probably not quite right.

The problem I think is that the SEEN flag is always going to end up
being set in sb->s_wb_err, and that is going to violate the desired
semantics. If the writeback error occurred after all fd's were closed,
then the next opener wouldn't see it and you'd lose the error.

We probably need a function to cleanly propagate the error from one
errseq_t to another so that that doesn't occur. I'll have to think about
it.

>  }
>  
> 
> 
> 
>  /**
> @@ -1873,6 +1880,7 @@ static int ovl_fill_super(struct super_b
>  	if (!cred)
>  		goto out_err;
>  
> 
> 
> 
> +	spin_lock_init(&ofs->errseq_lock);
>  	/* Is there a reason anyone would want not to share whiteouts? */
>  	ofs->share_whiteout = true;
>  
> 
> 
> 
> @@ -1945,7 +1953,7 @@ static int ovl_fill_super(struct super_b
>  
> 
> 
> 
>  		sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
>  		sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
> -
> +		sb->s_wb_err = errseq_sample(&ovl_upper_mnt(ofs)->mnt_sb->s_wb_err);
>  	}
>  	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
>  	err = PTR_ERR(oe);
> Index: redhat-linux/fs/overlayfs/ovl_entry.h
> ===================================================================
> --- redhat-linux.orig/fs/overlayfs/ovl_entry.h	2020-12-14 15:33:43.934400880 -0500
> +++ redhat-linux/fs/overlayfs/ovl_entry.h	2020-12-14 15:34:13.509400880 -0500
> @@ -79,6 +79,7 @@ struct ovl_fs {
>  	atomic_long_t last_ino;
>  	/* Whiteout dentry cache */
>  	struct dentry *whiteout;
> +	spinlock_t errseq_lock;
>  };
>  
> 
> 
> 
>  static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
>
Jeff Layton Dec. 15, 2020, 1:16 p.m. UTC | #5
On Mon, 2020-12-14 at 18:53 -0500, Jeff Layton wrote:
> On Mon, 2020-12-14 at 16:38 -0500, Vivek Goyal wrote:
> > On Sun, Dec 13, 2020 at 08:27:13AM -0500, Jeff Layton wrote:
> > > Peek at the upper layer's errseq_t at mount time for volatile mounts,
> > > and record it in the per-sb info. In sync_fs, check for an error since
> > > the recorded point and set it in the overlayfs superblock if there was
> > > one.
> > > 
> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > 
> > While we are solving problem for non-volatile overlay mount, I also
> > started thinking, what about non-volatile overlay syncfs() writeback errors.
> > Looks like these will not be reported to user space at all as of now
> > (because we never update overlay_sb->s_wb_err ever).
> > 
> > A patch like this might fix it. (compile tested only).
> > 
> > overlayfs: Report syncfs() errors to user space
> > 
> > Currently, syncfs(), calls filesystem ->sync_fs() method but ignores the
> > return code. But certain writeback errors can still be reported on 
> > syncfs() by checking errors on super block.
> > 
> > ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
> > 
> > For the case of overlayfs, we never set overlayfs super block s_wb_err. That
> > means sync() will never report writeback errors on overlayfs uppon syncfs().
> > 
> > Fix this by updating overlay sb->sb_wb_err upon ->sync_fs() call. And that
> > should mean that user space syncfs() call should see writeback errors.
> > 
> > ovl_fsync() does not need anything special because if there are writeback
> > errors underlying filesystem will report it through vfs_fsync_range() return
> > code and user space will see it.
> > 
> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > ---
> >  fs/overlayfs/ovl_entry.h |    1 +
> >  fs/overlayfs/super.c     |   14 +++++++++++---
> >  2 files changed, 12 insertions(+), 3 deletions(-)
> > 
> > Index: redhat-linux/fs/overlayfs/super.c
> > ===================================================================
> > --- redhat-linux.orig/fs/overlayfs/super.c	2020-12-14 15:33:43.934400880 -0500
> > +++ redhat-linux/fs/overlayfs/super.c	2020-12-14 16:15:07.127400880 -0500
> > @@ -259,7 +259,7 @@ static int ovl_sync_fs(struct super_bloc
> >  {
> >  	struct ovl_fs *ofs = sb->s_fs_info;
> >  	struct super_block *upper_sb;
> > -	int ret;
> > +	int ret, ret2;
> >  
> > 
> > 
> > 
> >  	if (!ovl_upper_mnt(ofs))
> >  		return 0;
> > @@ -283,7 +283,14 @@ static int ovl_sync_fs(struct super_bloc
> >  	ret = sync_filesystem(upper_sb);
> >  	up_read(&upper_sb->s_umount);
> >  
> > 
> > 
> > 
> > -	return ret;
> > +	if (errseq_check(&upper_sb->s_wb_err, sb->s_wb_err)) {
> > +		/* Upper sb has errors since last time */
> > +		spin_lock(&ofs->errseq_lock);
> > +		ret2 = errseq_check_and_advance(&upper_sb->s_wb_err,
> > +						&sb->s_wb_err);
> > +		spin_unlock(&ofs->errseq_lock);
> > +	}
> > +	return ret ? ret : ret2;
> 
> I think this is probably not quite right.
> 
> The problem I think is that the SEEN flag is always going to end up
> being set in sb->s_wb_err, and that is going to violate the desired
> semantics. If the writeback error occurred after all fd's were closed,
> then the next opener wouldn't see it and you'd lose the error.
> 
> We probably need a function to cleanly propagate the error from one
> errseq_t to another so that that doesn't occur. I'll have to think about
> it.
> 

So, the problem is that we can't guarantee that we'll have an open file
when sync_fs is called. So if you do the check_and_advance in the
context of a sync() syscall, you'll effectively ensure that a later
opener on the upper layer won't see the error (since the upper_sb's
errseq_t will be marked SEEN.

It's not clear to me what semantics you want in the following situation:

mount upper layer
mount overlayfs with non-volatile upper layer
do "stuff" on overlayfs, and close all files on overlayfs
get a writeback error on upper layer
call sync() (sync_fs gets run)
open file on upper layer mount
call syncfs() on upper-layer fd

Should that last syncfs error report an error?

Also, suppose if at the end we instead opened a file on overlayfs and
issued the syncfs() there -- should we see the error in that case? 

> >  }
> >  
> > 
> > 
> > 
> >  /**
> > @@ -1873,6 +1880,7 @@ static int ovl_fill_super(struct super_b
> >  	if (!cred)
> >  		goto out_err;
> >  
> > 
> > 
> > 
> > +	spin_lock_init(&ofs->errseq_lock);
> >  	/* Is there a reason anyone would want not to share whiteouts? */
> >  	ofs->share_whiteout = true;
> >  
> > 
> > 
> > 
> > @@ -1945,7 +1953,7 @@ static int ovl_fill_super(struct super_b
> >  
> > 
> > 
> > 
> >  		sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
> >  		sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
> > -
> > +		sb->s_wb_err = errseq_sample(&ovl_upper_mnt(ofs)->mnt_sb->s_wb_err);
> >  	}
> >  	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
> >  	err = PTR_ERR(oe);
> > Index: redhat-linux/fs/overlayfs/ovl_entry.h
> > ===================================================================
> > --- redhat-linux.orig/fs/overlayfs/ovl_entry.h	2020-12-14 15:33:43.934400880 -0500
> > +++ redhat-linux/fs/overlayfs/ovl_entry.h	2020-12-14 15:34:13.509400880 -0500
> > @@ -79,6 +79,7 @@ struct ovl_fs {
> >  	atomic_long_t last_ino;
> >  	/* Whiteout dentry cache */
> >  	struct dentry *whiteout;
> > +	spinlock_t errseq_lock;
> >  };
> >  
> > 
> > 
> > 
> >  static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
> > 
>
Vivek Goyal Dec. 15, 2020, 2:59 p.m. UTC | #6
On Tue, Dec 15, 2020 at 08:16:12AM -0500, Jeff Layton wrote:
> On Mon, 2020-12-14 at 18:53 -0500, Jeff Layton wrote:
> > On Mon, 2020-12-14 at 16:38 -0500, Vivek Goyal wrote:
> > > On Sun, Dec 13, 2020 at 08:27:13AM -0500, Jeff Layton wrote:
> > > > Peek at the upper layer's errseq_t at mount time for volatile mounts,
> > > > and record it in the per-sb info. In sync_fs, check for an error since
> > > > the recorded point and set it in the overlayfs superblock if there was
> > > > one.
> > > > 
> > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > ---
> > > 
> > > While we are solving problem for non-volatile overlay mount, I also
> > > started thinking, what about non-volatile overlay syncfs() writeback errors.
> > > Looks like these will not be reported to user space at all as of now
> > > (because we never update overlay_sb->s_wb_err ever).
> > > 
> > > A patch like this might fix it. (compile tested only).
> > > 
> > > overlayfs: Report syncfs() errors to user space
> > > 
> > > Currently, syncfs(), calls filesystem ->sync_fs() method but ignores the
> > > return code. But certain writeback errors can still be reported on 
> > > syncfs() by checking errors on super block.
> > > 
> > > ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
> > > 
> > > For the case of overlayfs, we never set overlayfs super block s_wb_err. That
> > > means sync() will never report writeback errors on overlayfs uppon syncfs().
> > > 
> > > Fix this by updating overlay sb->sb_wb_err upon ->sync_fs() call. And that
> > > should mean that user space syncfs() call should see writeback errors.
> > > 
> > > ovl_fsync() does not need anything special because if there are writeback
> > > errors underlying filesystem will report it through vfs_fsync_range() return
> > > code and user space will see it.
> > > 
> > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > > ---
> > >  fs/overlayfs/ovl_entry.h |    1 +
> > >  fs/overlayfs/super.c     |   14 +++++++++++---
> > >  2 files changed, 12 insertions(+), 3 deletions(-)
> > > 
> > > Index: redhat-linux/fs/overlayfs/super.c
> > > ===================================================================
> > > --- redhat-linux.orig/fs/overlayfs/super.c	2020-12-14 15:33:43.934400880 -0500
> > > +++ redhat-linux/fs/overlayfs/super.c	2020-12-14 16:15:07.127400880 -0500
> > > @@ -259,7 +259,7 @@ static int ovl_sync_fs(struct super_bloc
> > >  {
> > >  	struct ovl_fs *ofs = sb->s_fs_info;
> > >  	struct super_block *upper_sb;
> > > -	int ret;
> > > +	int ret, ret2;
> > >  
> > > 
> > > 
> > > 
> > >  	if (!ovl_upper_mnt(ofs))
> > >  		return 0;
> > > @@ -283,7 +283,14 @@ static int ovl_sync_fs(struct super_bloc
> > >  	ret = sync_filesystem(upper_sb);
> > >  	up_read(&upper_sb->s_umount);
> > >  
> > > 
> > > 
> > > 
> > > -	return ret;
> > > +	if (errseq_check(&upper_sb->s_wb_err, sb->s_wb_err)) {
> > > +		/* Upper sb has errors since last time */
> > > +		spin_lock(&ofs->errseq_lock);
> > > +		ret2 = errseq_check_and_advance(&upper_sb->s_wb_err,
> > > +						&sb->s_wb_err);
> > > +		spin_unlock(&ofs->errseq_lock);
> > > +	}
> > > +	return ret ? ret : ret2;
> > 
> > I think this is probably not quite right.
> > 
> > The problem I think is that the SEEN flag is always going to end up
> > being set in sb->s_wb_err, and that is going to violate the desired
> > semantics. If the writeback error occurred after all fd's were closed,
> > then the next opener wouldn't see it and you'd lose the error.
> > 
> > We probably need a function to cleanly propagate the error from one
> > errseq_t to another so that that doesn't occur. I'll have to think about
> > it.
> > 
> 
> So, the problem is that we can't guarantee that we'll have an open file
> when sync_fs is called. So if you do the check_and_advance in the
> context of a sync() syscall, you'll effectively ensure that a later
> opener on the upper layer won't see the error (since the upper_sb's
> errseq_t will be marked SEEN.

Aha.., I assumed that when ->sync_fs() is called, we always have a
valid fd open. But that's only true if ->sync_fs() is being called
through syncfs(fd) syscall. For the case of plain sync() syscall,
this is not true.

So it leads us back to need of passing "struct file" in ->sync_fs().
And fetching the writeback error from upper can be done only
if a file is open on which syncfs() has been called.

> 
> It's not clear to me what semantics you want in the following situation:
> 
> mount upper layer
> mount overlayfs with non-volatile upper layer
> do "stuff" on overlayfs, and close all files on overlayfs
> get a writeback error on upper layer
> call sync() (sync_fs gets run)
> open file on upper layer mount
> call syncfs() on upper-layer fd
> 
> Should that last syncfs error report an error?

Actually, I was thinking of following.
- mount upper layer
- mount overlayfs (non-volatile)
- Do bunch of writes.
- A writeback error happens on upper file and gets recorded in
  upper fs sb.
- overlay application calls syncfs(fd) and gets the error back. IIUC,
  the way currently things are written, syncfs(fd) will not return
  writeback errors on overlayfs.

> 
> Also, suppose if at the end we instead opened a file on overlayfs and
> issued the syncfs() there -- should we see the error in that case? 

I am thinking that behavior should be similar to as if two file
descriptors have been opened on a regular filesystem. So if I open
one fd1 on overlay and one fd2 on upper and they both were opened
before writeback error happend, then syncfs(fd1) and syncfs(fd2),
both should see the error.

And any of syncfs(fd1) and syncfs(fd2) should set the SEEN flag in 
upper_sb so that new errors can continue to be reported.

IOW, so looks like major problem with this patch is that we need
to propagate error from upper_sb to overlaysb only if a valid
file descriptor is open. IOW, do this in syncfs(fd) path and not
sync() path. And to distinguish between two, we probably need to
pass additional parameter in ->sync_fs().

Am I missing somehting. Just trying to make sure that if we are
solving the problem of syncfs error propagation in overlay, lets
solve it both for volatile as well as non-volatile case so that
there is less confusion later.

Vivek
Vivek Goyal Dec. 15, 2020, 3:06 p.m. UTC | #7
On Mon, Dec 14, 2020 at 06:53:10PM -0500, Jeff Layton wrote:
> On Mon, 2020-12-14 at 16:38 -0500, Vivek Goyal wrote:
> > On Sun, Dec 13, 2020 at 08:27:13AM -0500, Jeff Layton wrote:
> > > Peek at the upper layer's errseq_t at mount time for volatile mounts,
> > > and record it in the per-sb info. In sync_fs, check for an error since
> > > the recorded point and set it in the overlayfs superblock if there was
> > > one.
> > > 
> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > 
> > While we are solving problem for non-volatile overlay mount, I also
> > started thinking, what about non-volatile overlay syncfs() writeback errors.
> > Looks like these will not be reported to user space at all as of now
> > (because we never update overlay_sb->s_wb_err ever).
> > 
> > A patch like this might fix it. (compile tested only).
> > 
> > overlayfs: Report syncfs() errors to user space
> > 
> > Currently, syncfs(), calls filesystem ->sync_fs() method but ignores the
> > return code. But certain writeback errors can still be reported on 
> > syncfs() by checking errors on super block.
> > 
> > ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
> > 
> > For the case of overlayfs, we never set overlayfs super block s_wb_err. That
> > means sync() will never report writeback errors on overlayfs uppon syncfs().
> > 
> > Fix this by updating overlay sb->sb_wb_err upon ->sync_fs() call. And that
> > should mean that user space syncfs() call should see writeback errors.
> > 
> > ovl_fsync() does not need anything special because if there are writeback
> > errors underlying filesystem will report it through vfs_fsync_range() return
> > code and user space will see it.
> > 
> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > ---
> >  fs/overlayfs/ovl_entry.h |    1 +
> >  fs/overlayfs/super.c     |   14 +++++++++++---
> >  2 files changed, 12 insertions(+), 3 deletions(-)
> > 
> > Index: redhat-linux/fs/overlayfs/super.c
> > ===================================================================
> > --- redhat-linux.orig/fs/overlayfs/super.c	2020-12-14 15:33:43.934400880 -0500
> > +++ redhat-linux/fs/overlayfs/super.c	2020-12-14 16:15:07.127400880 -0500
> > @@ -259,7 +259,7 @@ static int ovl_sync_fs(struct super_bloc
> >  {
> >  	struct ovl_fs *ofs = sb->s_fs_info;
> >  	struct super_block *upper_sb;
> > -	int ret;
> > +	int ret, ret2;
> >  
> > 
> > 
> > 
> >  	if (!ovl_upper_mnt(ofs))
> >  		return 0;
> > @@ -283,7 +283,14 @@ static int ovl_sync_fs(struct super_bloc
> >  	ret = sync_filesystem(upper_sb);
> >  	up_read(&upper_sb->s_umount);
> >  
> > 
> > 
> > 
> > -	return ret;
> > +	if (errseq_check(&upper_sb->s_wb_err, sb->s_wb_err)) {
> > +		/* Upper sb has errors since last time */
> > +		spin_lock(&ofs->errseq_lock);
> > +		ret2 = errseq_check_and_advance(&upper_sb->s_wb_err,
> > +						&sb->s_wb_err);
> > +		spin_unlock(&ofs->errseq_lock);
> > +	}
> > +	return ret ? ret : ret2;
> 
> I think this is probably not quite right.
> 
> The problem I think is that the SEEN flag is always going to end up
> being set in sb->s_wb_err, and that is going to violate the desired
> semantics. If the writeback error occurred after all fd's were closed,
> then the next opener wouldn't see it and you'd lose the error.

So this will happen only due to sync() path and not syncfs() path, right?
If we avoid calling errseq_check_and_advance() in sync() path in
ovleryafs(), then we always have a valid fd and marking error SEEN
on upper is perfectly valid?

> 
> We probably need a function to cleanly propagate the error from one
> errseq_t to another so that that doesn't occur. I'll have to think about
> it.

Thanks
Vivek

> 
> >  }
> >  
> > 
> > 
> > 
> >  /**
> > @@ -1873,6 +1880,7 @@ static int ovl_fill_super(struct super_b
> >  	if (!cred)
> >  		goto out_err;
> >  
> > 
> > 
> > 
> > +	spin_lock_init(&ofs->errseq_lock);
> >  	/* Is there a reason anyone would want not to share whiteouts? */
> >  	ofs->share_whiteout = true;
> >  
> > 
> > 
> > 
> > @@ -1945,7 +1953,7 @@ static int ovl_fill_super(struct super_b
> >  
> > 
> > 
> > 
> >  		sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
> >  		sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
> > -
> > +		sb->s_wb_err = errseq_sample(&ovl_upper_mnt(ofs)->mnt_sb->s_wb_err);
> >  	}
> >  	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
> >  	err = PTR_ERR(oe);
> > Index: redhat-linux/fs/overlayfs/ovl_entry.h
> > ===================================================================
> > --- redhat-linux.orig/fs/overlayfs/ovl_entry.h	2020-12-14 15:33:43.934400880 -0500
> > +++ redhat-linux/fs/overlayfs/ovl_entry.h	2020-12-14 15:34:13.509400880 -0500
> > @@ -79,6 +79,7 @@ struct ovl_fs {
> >  	atomic_long_t last_ino;
> >  	/* Whiteout dentry cache */
> >  	struct dentry *whiteout;
> > +	spinlock_t errseq_lock;
> >  };
> >  
> > 
> > 
> > 
> >  static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
> > 
> 
> -- 
> Jeff Layton <jlayton@kernel.org>
>
Jeff Layton Dec. 15, 2020, 3:23 p.m. UTC | #8
On Tue, 2020-12-15 at 09:59 -0500, Vivek Goyal wrote:
> On Tue, Dec 15, 2020 at 08:16:12AM -0500, Jeff Layton wrote:
> > On Mon, 2020-12-14 at 18:53 -0500, Jeff Layton wrote:
> > > On Mon, 2020-12-14 at 16:38 -0500, Vivek Goyal wrote:
> > > > On Sun, Dec 13, 2020 at 08:27:13AM -0500, Jeff Layton wrote:
> > > > > Peek at the upper layer's errseq_t at mount time for volatile mounts,
> > > > > and record it in the per-sb info. In sync_fs, check for an error since
> > > > > the recorded point and set it in the overlayfs superblock if there was
> > > > > one.
> > > > > 
> > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > ---
> > > > 
> > > > While we are solving problem for non-volatile overlay mount, I also
> > > > started thinking, what about non-volatile overlay syncfs() writeback errors.
> > > > Looks like these will not be reported to user space at all as of now
> > > > (because we never update overlay_sb->s_wb_err ever).
> > > > 
> > > > A patch like this might fix it. (compile tested only).
> > > > 
> > > > overlayfs: Report syncfs() errors to user space
> > > > 
> > > > Currently, syncfs(), calls filesystem ->sync_fs() method but ignores the
> > > > return code. But certain writeback errors can still be reported on 
> > > > syncfs() by checking errors on super block.
> > > > 
> > > > ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
> > > > 
> > > > For the case of overlayfs, we never set overlayfs super block s_wb_err. That
> > > > means sync() will never report writeback errors on overlayfs uppon syncfs().
> > > > 
> > > > Fix this by updating overlay sb->sb_wb_err upon ->sync_fs() call. And that
> > > > should mean that user space syncfs() call should see writeback errors.
> > > > 
> > > > ovl_fsync() does not need anything special because if there are writeback
> > > > errors underlying filesystem will report it through vfs_fsync_range() return
> > > > code and user space will see it.
> > > > 
> > > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > > > ---
> > > >  fs/overlayfs/ovl_entry.h |    1 +
> > > >  fs/overlayfs/super.c     |   14 +++++++++++---
> > > >  2 files changed, 12 insertions(+), 3 deletions(-)
> > > > 
> > > > Index: redhat-linux/fs/overlayfs/super.c
> > > > ===================================================================
> > > > --- redhat-linux.orig/fs/overlayfs/super.c	2020-12-14 15:33:43.934400880 -0500
> > > > +++ redhat-linux/fs/overlayfs/super.c	2020-12-14 16:15:07.127400880 -0500
> > > > @@ -259,7 +259,7 @@ static int ovl_sync_fs(struct super_bloc
> > > >  {
> > > >  	struct ovl_fs *ofs = sb->s_fs_info;
> > > >  	struct super_block *upper_sb;
> > > > -	int ret;
> > > > +	int ret, ret2;
> > > >  
> > > > 
> > > > 
> > > > 
> > > >  	if (!ovl_upper_mnt(ofs))
> > > >  		return 0;
> > > > @@ -283,7 +283,14 @@ static int ovl_sync_fs(struct super_bloc
> > > >  	ret = sync_filesystem(upper_sb);
> > > >  	up_read(&upper_sb->s_umount);
> > > >  
> > > > 
> > > > 
> > > > 
> > > > -	return ret;
> > > > +	if (errseq_check(&upper_sb->s_wb_err, sb->s_wb_err)) {
> > > > +		/* Upper sb has errors since last time */
> > > > +		spin_lock(&ofs->errseq_lock);
> > > > +		ret2 = errseq_check_and_advance(&upper_sb->s_wb_err,
> > > > +						&sb->s_wb_err);
> > > > +		spin_unlock(&ofs->errseq_lock);
> > > > +	}
> > > > +	return ret ? ret : ret2;
> > > 
> > > I think this is probably not quite right.
> > > 
> > > The problem I think is that the SEEN flag is always going to end up
> > > being set in sb->s_wb_err, and that is going to violate the desired
> > > semantics. If the writeback error occurred after all fd's were closed,
> > > then the next opener wouldn't see it and you'd lose the error.
> > > 
> > > We probably need a function to cleanly propagate the error from one
> > > errseq_t to another so that that doesn't occur. I'll have to think about
> > > it.
> > > 
> > 
> > So, the problem is that we can't guarantee that we'll have an open file
> > when sync_fs is called. So if you do the check_and_advance in the
> > context of a sync() syscall, you'll effectively ensure that a later
> > opener on the upper layer won't see the error (since the upper_sb's
> > errseq_t will be marked SEEN.
> 
> Aha.., I assumed that when ->sync_fs() is called, we always have a
> valid fd open. But that's only true if ->sync_fs() is being called
> through syncfs(fd) syscall. For the case of plain sync() syscall,
> this is not true.
> 
> So it leads us back to need of passing "struct file" in ->sync_fs().
> And fetching the writeback error from upper can be done only
> if a file is open on which syncfs() has been called.
> 
> > 
> > It's not clear to me what semantics you want in the following situation:
> > 
> > mount upper layer
> > mount overlayfs with non-volatile upper layer
> > do "stuff" on overlayfs, and close all files on overlayfs
> > get a writeback error on upper layer
> > call sync() (sync_fs gets run)
> > open file on upper layer mount
> > call syncfs() on upper-layer fd
> > 
> > Should that last syncfs error report an error?
> 
> Actually, I was thinking of following.
> - mount upper layer
> - mount overlayfs (non-volatile)
> - Do bunch of writes.
> - A writeback error happens on upper file and gets recorded in
>   upper fs sb.
> - overlay application calls syncfs(fd) and gets the error back. IIUC,
>   the way currently things are written, syncfs(fd) will not return
>   writeback errors on overlayfs.
> 
> > 
> > Also, suppose if at the end we instead opened a file on overlayfs and
> > issued the syncfs() there -- should we see the error in that case? 
> 
> I am thinking that behavior should be similar to as if two file
> descriptors have been opened on a regular filesystem. So if I open
> one fd1 on overlay and one fd2 on upper and they both were opened
> before writeback error happend, then syncfs(fd1) and syncfs(fd2),
> both should see the error.
> 


Yes, that will happen as a matter of course.

> And any of syncfs(fd1) and syncfs(fd2) should set the SEEN flag in 
> upper_sb so that new errors can continue to be reported.
> 

The SEEN flag indicates whether a later opener should see an error that
predated the open. Currently, it will iff no one else has scraped the
error when the open is done.

Once we start dealing with overlayfs though, things are a bit more
murky. If someone issues a sync on the upper sb and that triggers a
writeback error. If I then do an open+syncfs on the overlay, should I
see the error?

What about in the reverse case?

> IOW, so looks like major problem with this patch is that we need
> to propagate error from upper_sb to overlaysb only if a valid
> file descriptor is open. IOW, do this in syncfs(fd) path and not
> sync() path. And to distinguish between two, we probably need to
> pass additional parameter in ->sync_fs().
> 
> Am I missing somehting. Just trying to make sure that if we are
> solving the problem of syncfs error propagation in overlay, lets
> solve it both for volatile as well as non-volatile case so that
> there is less confusion later.
> 

It may be possible to propagate the errors in some fashion, but it's
starting to sound pretty complex. I think we'd probably be better served
by cleaning things up so that overlayfs can just return an error of its
choosing to syncfs().

What may actually be best is to add a new ->syncfs op to struct
file_operations, and turn the current syncfs syscall wrapper into a
generic_syncfs or something. Then you could just define a syncfs op for
overlayfs and do what you like in there.
Vivek Goyal Dec. 15, 2020, 3:39 p.m. UTC | #9
On Tue, Dec 15, 2020 at 10:23:08AM -0500, Jeff Layton wrote:
> On Tue, 2020-12-15 at 09:59 -0500, Vivek Goyal wrote:
> > On Tue, Dec 15, 2020 at 08:16:12AM -0500, Jeff Layton wrote:
> > > On Mon, 2020-12-14 at 18:53 -0500, Jeff Layton wrote:
> > > > On Mon, 2020-12-14 at 16:38 -0500, Vivek Goyal wrote:
> > > > > On Sun, Dec 13, 2020 at 08:27:13AM -0500, Jeff Layton wrote:
> > > > > > Peek at the upper layer's errseq_t at mount time for volatile mounts,
> > > > > > and record it in the per-sb info. In sync_fs, check for an error since
> > > > > > the recorded point and set it in the overlayfs superblock if there was
> > > > > > one.
> > > > > > 
> > > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > > ---
> > > > > 
> > > > > While we are solving problem for non-volatile overlay mount, I also
> > > > > started thinking, what about non-volatile overlay syncfs() writeback errors.
> > > > > Looks like these will not be reported to user space at all as of now
> > > > > (because we never update overlay_sb->s_wb_err ever).
> > > > > 
> > > > > A patch like this might fix it. (compile tested only).
> > > > > 
> > > > > overlayfs: Report syncfs() errors to user space
> > > > > 
> > > > > Currently, syncfs(), calls filesystem ->sync_fs() method but ignores the
> > > > > return code. But certain writeback errors can still be reported on 
> > > > > syncfs() by checking errors on super block.
> > > > > 
> > > > > ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
> > > > > 
> > > > > For the case of overlayfs, we never set overlayfs super block s_wb_err. That
> > > > > means sync() will never report writeback errors on overlayfs uppon syncfs().
> > > > > 
> > > > > Fix this by updating overlay sb->sb_wb_err upon ->sync_fs() call. And that
> > > > > should mean that user space syncfs() call should see writeback errors.
> > > > > 
> > > > > ovl_fsync() does not need anything special because if there are writeback
> > > > > errors underlying filesystem will report it through vfs_fsync_range() return
> > > > > code and user space will see it.
> > > > > 
> > > > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > > > > ---
> > > > >  fs/overlayfs/ovl_entry.h |    1 +
> > > > >  fs/overlayfs/super.c     |   14 +++++++++++---
> > > > >  2 files changed, 12 insertions(+), 3 deletions(-)
> > > > > 
> > > > > Index: redhat-linux/fs/overlayfs/super.c
> > > > > ===================================================================
> > > > > --- redhat-linux.orig/fs/overlayfs/super.c	2020-12-14 15:33:43.934400880 -0500
> > > > > +++ redhat-linux/fs/overlayfs/super.c	2020-12-14 16:15:07.127400880 -0500
> > > > > @@ -259,7 +259,7 @@ static int ovl_sync_fs(struct super_bloc
> > > > >  {
> > > > >  	struct ovl_fs *ofs = sb->s_fs_info;
> > > > >  	struct super_block *upper_sb;
> > > > > -	int ret;
> > > > > +	int ret, ret2;
> > > > >  
> > > > > 
> > > > > 
> > > > > 
> > > > >  	if (!ovl_upper_mnt(ofs))
> > > > >  		return 0;
> > > > > @@ -283,7 +283,14 @@ static int ovl_sync_fs(struct super_bloc
> > > > >  	ret = sync_filesystem(upper_sb);
> > > > >  	up_read(&upper_sb->s_umount);
> > > > >  
> > > > > 
> > > > > 
> > > > > 
> > > > > -	return ret;
> > > > > +	if (errseq_check(&upper_sb->s_wb_err, sb->s_wb_err)) {
> > > > > +		/* Upper sb has errors since last time */
> > > > > +		spin_lock(&ofs->errseq_lock);
> > > > > +		ret2 = errseq_check_and_advance(&upper_sb->s_wb_err,
> > > > > +						&sb->s_wb_err);
> > > > > +		spin_unlock(&ofs->errseq_lock);
> > > > > +	}
> > > > > +	return ret ? ret : ret2;
> > > > 
> > > > I think this is probably not quite right.
> > > > 
> > > > The problem I think is that the SEEN flag is always going to end up
> > > > being set in sb->s_wb_err, and that is going to violate the desired
> > > > semantics. If the writeback error occurred after all fd's were closed,
> > > > then the next opener wouldn't see it and you'd lose the error.
> > > > 
> > > > We probably need a function to cleanly propagate the error from one
> > > > errseq_t to another so that that doesn't occur. I'll have to think about
> > > > it.
> > > > 
> > > 
> > > So, the problem is that we can't guarantee that we'll have an open file
> > > when sync_fs is called. So if you do the check_and_advance in the
> > > context of a sync() syscall, you'll effectively ensure that a later
> > > opener on the upper layer won't see the error (since the upper_sb's
> > > errseq_t will be marked SEEN.
> > 
> > Aha.., I assumed that when ->sync_fs() is called, we always have a
> > valid fd open. But that's only true if ->sync_fs() is being called
> > through syncfs(fd) syscall. For the case of plain sync() syscall,
> > this is not true.
> > 
> > So it leads us back to need of passing "struct file" in ->sync_fs().
> > And fetching the writeback error from upper can be done only
> > if a file is open on which syncfs() has been called.
> > 
> > > 
> > > It's not clear to me what semantics you want in the following situation:
> > > 
> > > mount upper layer
> > > mount overlayfs with non-volatile upper layer
> > > do "stuff" on overlayfs, and close all files on overlayfs
> > > get a writeback error on upper layer
> > > call sync() (sync_fs gets run)
> > > open file on upper layer mount
> > > call syncfs() on upper-layer fd
> > > 
> > > Should that last syncfs error report an error?
> > 
> > Actually, I was thinking of following.
> > - mount upper layer
> > - mount overlayfs (non-volatile)
> > - Do bunch of writes.
> > - A writeback error happens on upper file and gets recorded in
> >   upper fs sb.
> > - overlay application calls syncfs(fd) and gets the error back. IIUC,
> >   the way currently things are written, syncfs(fd) will not return
> >   writeback errors on overlayfs.
> > 
> > > 
> > > Also, suppose if at the end we instead opened a file on overlayfs and
> > > issued the syncfs() there -- should we see the error in that case? 
> > 
> > I am thinking that behavior should be similar to as if two file
> > descriptors have been opened on a regular filesystem. So if I open
> > one fd1 on overlay and one fd2 on upper and they both were opened
> > before writeback error happend, then syncfs(fd1) and syncfs(fd2),
> > both should see the error.
> > 
> 
> 
> Yes, that will happen as a matter of course.
> 
> > And any of syncfs(fd1) and syncfs(fd2) should set the SEEN flag in 
> > upper_sb so that new errors can continue to be reported.
> > 
> 
> The SEEN flag indicates whether a later opener should see an error that
> predated the open. Currently, it will iff no one else has scraped the
> error when the open is done.
> 
> Once we start dealing with overlayfs though, things are a bit more
> murky. If someone issues a sync on the upper sb and that triggers a
> writeback error. If I then do an open+syncfs on the overlay, should I
> see the error?

I think that yes, open+syncfs on the overlay should see this UNSEEN error.
IOW, this will be similar to as if somebody did an open+syncfs on upper
and scrapped UNSEEN error.

> 
> What about in the reverse case?

Same for reverse case. If overlayfs triggered sync and resulted in
in unseen error on upper sb, then a later open+syncfs on upper should
see the error.

Thanks
Vivek

> 
> > IOW, so looks like major problem with this patch is that we need
> > to propagate error from upper_sb to overlaysb only if a valid
> > file descriptor is open. IOW, do this in syncfs(fd) path and not
> > sync() path. And to distinguish between two, we probably need to
> > pass additional parameter in ->sync_fs().
> > 
> > Am I missing somehting. Just trying to make sure that if we are
> > solving the problem of syncfs error propagation in overlay, lets
> > solve it both for volatile as well as non-volatile case so that
> > there is less confusion later.
> > 
> 
> It may be possible to propagate the errors in some fashion, but it's
> starting to sound pretty complex. I think we'd probably be better served
> by cleaning things up so that overlayfs can just return an error of its
> choosing to syncfs().
> 
> What may actually be best is to add a new ->syncfs op to struct
> file_operations, and turn the current syncfs syscall wrapper into a
> generic_syncfs or something. Then you could just define a syncfs op for
> overlayfs and do what you like in there.
> 
> -- 
> Jeff Layton <jlayton@kernel.org>
>
Sargun Dhillon Dec. 17, 2020, 7:28 p.m. UTC | #10
On Sun, Dec 13, 2020 at 08:27:13AM -0500, Jeff Layton wrote:
> Peek at the upper layer's errseq_t at mount time for volatile mounts,
> and record it in the per-sb info. In sync_fs, check for an error since
> the recorded point and set it in the overlayfs superblock if there was
> one.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/overlayfs/ovl_entry.h |  1 +
>  fs/overlayfs/super.c     | 14 +++++++++++---
>  2 files changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> index 1b5a2094df8e..fcfcc3951973 100644
> --- a/fs/overlayfs/ovl_entry.h
> +++ b/fs/overlayfs/ovl_entry.h
> @@ -79,6 +79,7 @@ struct ovl_fs {
>  	atomic_long_t last_ino;
>  	/* Whiteout dentry cache */
>  	struct dentry *whiteout;
> +	errseq_t err_mark;
>  };
>  
>  static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index 290983bcfbb3..2985d2752970 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -264,8 +264,13 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
>  	if (!ovl_upper_mnt(ofs))
>  		return 0;
>  
> -	if (!ovl_should_sync(ofs))
> -		return 0;
> +	if (!ovl_should_sync(ofs)) {
> +		/* Propagate errors from upper to overlayfs */
> +		ret = errseq_check(&upper_sb->s_wb_err, ofs->err_mark);
> +		errseq_set(&sb->s_wb_err, ret);
> +		return ret;
> +	}
> +
>  	/*
>  	 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
>  	 * All the super blocks will be iterated, including upper_sb.
> @@ -1945,8 +1950,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>  
>  		sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
>  		sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
> -
>  	}
> +
> +	if (ofs->config.ovl_volatile)
> +		ofs->err_mark = errseq_peek(&ovl_upper_mnt(ofs)->mnt_sb->s_wb_err);
> +
>  	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
>  	err = PTR_ERR(oe);
>  	if (IS_ERR(oe))
> -- 
> 2.29.2
> 

I've tested this with the following scenarios, seems to work:
Test:
1. Mount ext2 on /mnt/loop, and cause a writeback error
2. Verify syncfs on /mnt/loop shows error
3. Mount volatile filesystem  
4. Create file on volatile filesystem, and verify that I can syncfs it without error
---
Fork:

5a. Create a file on overlayfs, and generate a writeback error
6a. Syncfs overlayfs.
7a. Create a new file on overlayfs, and syncfs, and verify it returns error

---
5b. Create a file on loop back, and generate a writeback error
6b. Sync said file
7b. Verify syncfs on loop returns error once, and then success on next attempts
8b. Verify all syncfs on overlayfs now fail

---
5c. Create file on overlayfs, and generate a writeback error
6c. Sync overlayfs, and verify all syncs are failures               
7c. Verify syncfs on loop fails once.
diff mbox series

Patch

diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 1b5a2094df8e..fcfcc3951973 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -79,6 +79,7 @@  struct ovl_fs {
 	atomic_long_t last_ino;
 	/* Whiteout dentry cache */
 	struct dentry *whiteout;
+	errseq_t err_mark;
 };
 
 static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 290983bcfbb3..2985d2752970 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -264,8 +264,13 @@  static int ovl_sync_fs(struct super_block *sb, int wait)
 	if (!ovl_upper_mnt(ofs))
 		return 0;
 
-	if (!ovl_should_sync(ofs))
-		return 0;
+	if (!ovl_should_sync(ofs)) {
+		/* Propagate errors from upper to overlayfs */
+		ret = errseq_check(&upper_sb->s_wb_err, ofs->err_mark);
+		errseq_set(&sb->s_wb_err, ret);
+		return ret;
+	}
+
 	/*
 	 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
 	 * All the super blocks will be iterated, including upper_sb.
@@ -1945,8 +1950,11 @@  static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 		sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
 		sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
-
 	}
+
+	if (ofs->config.ovl_volatile)
+		ofs->err_mark = errseq_peek(&ovl_upper_mnt(ofs)->mnt_sb->s_wb_err);
+
 	oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
 	err = PTR_ERR(oe);
 	if (IS_ERR(oe))