diff mbox series

[RFC,v2] fuse: In fuse_flush only wait if someone wants the return code

Message ID 875yjfdw3a.fsf_-_@email.froward.int.ebiederm.org (mailing list archive)
State New, archived
Headers show
Series [RFC,v2] fuse: In fuse_flush only wait if someone wants the return code | expand

Commit Message

Eric W. Biederman July 30, 2022, 5:10 a.m. UTC
In my very light testing this resolves a hang where a thread of the
fuse server was accessing the fuse filesystem (the fuse server is
serving up), when the fuse server is killed.

The practical problem is that the fuse server file descriptor was
being closed after the file descriptor into the fuse filesystem so
that the fuse filesystem operations were being blocked for instead of
being aborted.  Simply skipping the unnecessary wait resolves this
issue.

This is just a proof of concept and someone should look to see if the
fuse max_background limit could cause a problem with this approach.

Additionally testing PF_EXITING is a very crude way to tell if someone
wants the return code from the vfs flush operation.  As such in the
long run it probably makes sense to get some direct vfs support for
knowing if flush needs to block until all of the flushing is complete
and a status/return code can be returned.

Unless I have missed something this is a generic optimization that can
apply to many network filesystems.

Al, vfs folks? (igrab/iput sorted so as not to be distractions).

Perhaps a .flush_async method without a return code and a
filp_close_async function without a return code to take advantage of
this in the general sense.

Waiting potentially indefinitely for user space in do_exit seems like a
bad idea.  Especially when all that the wait is for is to get a return
code that will never be examined.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/fuse/file.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

Comments

Tycho Andersen Aug. 1, 2022, 3:16 p.m. UTC | #1
On Sat, Jul 30, 2022 at 12:10:33AM -0500, Eric W. Biederman wrote:
> +static int fuse_flush_async(struct file *file, fl_owner_t id)
> +{
> +	struct inode *inode = file_inode(file);
> +	struct fuse_mount *fm = get_fuse_mount(inode);
> +	struct fuse_file *ff = file->private_data;
> +	struct fuse_flush_args *fa;
> +	int err;
> +
> +	fa = kzalloc(sizeof(*fa), GFP_KERNEL);
> +	if (!fa)
> +		return -ENOMEM;
> +
> +	fa->inarg.fh = ff->fh;
> +	fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
> +	fa->args.opcode = FUSE_FLUSH;
> +	fa->args.nodeid = get_node_id(inode);
> +	fa->args.in_numargs = 1;
> +	fa->args.in_args[0].size = sizeof(fa->inarg);
> +	fa->args.in_args[0].value = &fa->inarg;
> +	fa->args.force = true;

Seems like you need a

    fa->args.nocreds = true;

here or you'll hit the WARN() in fuse_simple_background().

Tycho
Miklos Szeredi Aug. 2, 2022, 12:50 p.m. UTC | #2
On Sat, 30 Jul 2022 at 07:11, Eric W. Biederman <ebiederm@xmission.com> wrote:
>
>
> In my very light testing this resolves a hang where a thread of the
> fuse server was accessing the fuse filesystem (the fuse server is
> serving up), when the fuse server is killed.
>
> The practical problem is that the fuse server file descriptor was
> being closed after the file descriptor into the fuse filesystem so
> that the fuse filesystem operations were being blocked for instead of
> being aborted.  Simply skipping the unnecessary wait resolves this
> issue.
>
> This is just a proof of concept and someone should look to see if the
> fuse max_background limit could cause a problem with this approach.

max_background just throttles the number of background requests that
the userspace filesystem can *unqueue*.   It doesn't affect queuing in
any way.

> Additionally testing PF_EXITING is a very crude way to tell if someone
> wants the return code from the vfs flush operation.  As such in the
> long run it probably makes sense to get some direct vfs support for
> knowing if flush needs to block until all of the flushing is complete
> and a status/return code can be returned.
>
> Unless I have missed something this is a generic optimization that can
> apply to many network filesystems.
>
> Al, vfs folks? (igrab/iput sorted so as not to be distractions).
>
> Perhaps a .flush_async method without a return code and a
> filp_close_async function without a return code to take advantage of
> this in the general sense.
>
> Waiting potentially indefinitely for user space in do_exit seems like a
> bad idea.  Especially when all that the wait is for is to get a return
> code that will never be examined.

The wait is for posix locks to get unlocked.  But "remote" posix locks
are almost never used due to problems like this, so I think it's safe
to do this.

>
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---
>  fs/fuse/file.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 59 insertions(+)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 05caa2b9272e..2bd94acd761f 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -464,6 +464,62 @@ static void fuse_sync_writes(struct inode *inode)
>         fuse_release_nowrite(inode);
>  }
>
> +struct fuse_flush_args {
> +       struct fuse_args args;
> +       struct fuse_flush_in inarg;
> +       struct inode *inode;
> +};
> +
> +static void fuse_flush_end(struct fuse_mount *fm, struct fuse_args *args, int err)
> +{
> +       struct fuse_flush_args *fa = container_of(args, typeof(*fa), args);
> +
> +       if (err == -ENOSYS) {
> +               fm->fc->no_flush = 1;
> +               err = 0;
> +       }
> +
> +       /*
> +        * In memory i_blocks is not maintained by fuse, if writeback cache is
> +        * enabled, i_blocks from cached attr may not be accurate.
> +        */
> +       if (!err && fm->fc->writeback_cache)
> +               fuse_invalidate_attr_mask(fa->inode, STATX_BLOCKS);
> +
> +       iput(fa->inode);

Filesystems might expect not just he inode to not be destroyed but
also the file, so do what other file operations do, keep a ref on ff:

fuse_file_put(fa->ff, false, false);

> +       kfree(fa);
> +}
> +
> +static int fuse_flush_async(struct file *file, fl_owner_t id)
> +{
> +       struct inode *inode = file_inode(file);
> +       struct fuse_mount *fm = get_fuse_mount(inode);
> +       struct fuse_file *ff = file->private_data;
> +       struct fuse_flush_args *fa;
> +       int err;
> +
> +       fa = kzalloc(sizeof(*fa), GFP_KERNEL);
> +       if (!fa)
> +               return -ENOMEM;
> +
> +       fa->inarg.fh = ff->fh;
> +       fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
> +       fa->args.opcode = FUSE_FLUSH;
> +       fa->args.nodeid = get_node_id(inode);
> +       fa->args.in_numargs = 1;
> +       fa->args.in_args[0].size = sizeof(fa->inarg);
> +       fa->args.in_args[0].value = &fa->inarg;
> +       fa->args.force = true;
> +       fa->args.end = fuse_flush_end;
> +       fa->inode = igrab(inode);

fa->ff = fuse_file_get(ff);

> +
> +       err = fuse_simple_background(fm, &fa->args, GFP_KERNEL);
> +       if (err)
> +               fuse_flush_end(fm, &fa->args, err);
> +
> +       return err;
> +}
> +
>  static int fuse_flush(struct file *file, fl_owner_t id)
>  {
>         struct inode *inode = file_inode(file);
> @@ -495,6 +551,9 @@ static int fuse_flush(struct file *file, fl_owner_t id)
>         if (fm->fc->no_flush)
>                 goto inval_attr_out;
>
> +       if (current->flags & PF_EXITING)
> +               return fuse_flush_async(file, id);
> +
>         memset(&inarg, 0, sizeof(inarg));
>         inarg.fh = ff->fh;
>         inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
> --
> 2.35.3
>
Tycho Andersen Aug. 15, 2022, 1:59 p.m. UTC | #3
Hi,

On Sat, Jul 30, 2022 at 12:10:33AM -0500, Eric W. Biederman wrote:
> Al, vfs folks? (igrab/iput sorted so as not to be distractions).

Any movement on this? Can you resend (or I can) the patch with the
fixes for fuse at the very least?

Thanks,
Serge Hallyn Aug. 15, 2022, 5:55 p.m. UTC | #4
On Mon, Aug 15, 2022 at 07:59:08AM -0600, Tycho Andersen wrote:
> Hi,
> 
> On Sat, Jul 30, 2022 at 12:10:33AM -0500, Eric W. Biederman wrote:
> > Al, vfs folks? (igrab/iput sorted so as not to be distractions).
> 
> Any movement on this? Can you resend (or I can) the patch with the
> fixes for fuse at the very least?
> 
> Thanks,

If you resend with the fixes, I'd like to do a bit of testing with it.
Eric W. Biederman Sept. 19, 2022, 3:46 p.m. UTC | #5
Tycho Andersen <tycho@tycho.pizza> writes:

> Hi,
>
> On Sat, Jul 30, 2022 at 12:10:33AM -0500, Eric W. Biederman wrote:
>> Al, vfs folks? (igrab/iput sorted so as not to be distractions).
>
> Any movement on this? Can you resend (or I can) the patch with the
> fixes for fuse at the very least?

Sorry for not replying earlier.  Thank you for taking this.

I had really meant to suggest something like that.  At the moment I have
a bit too much on my plate, so I am glad to see this moving forward.

I am a bit sad that I didn't succeed in starting a general vfs
discussion about this.  Oh well.  As long as we get weird bugs
like this fixed.

Eric
diff mbox series

Patch

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 05caa2b9272e..2bd94acd761f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -464,6 +464,62 @@  static void fuse_sync_writes(struct inode *inode)
 	fuse_release_nowrite(inode);
 }
 
+struct fuse_flush_args {
+	struct fuse_args args;
+	struct fuse_flush_in inarg;
+	struct inode *inode;
+};
+
+static void fuse_flush_end(struct fuse_mount *fm, struct fuse_args *args, int err)
+{
+	struct fuse_flush_args *fa = container_of(args, typeof(*fa), args);
+
+	if (err == -ENOSYS) {
+		fm->fc->no_flush = 1;
+		err = 0;
+	}
+
+	/*
+	 * In memory i_blocks is not maintained by fuse, if writeback cache is
+	 * enabled, i_blocks from cached attr may not be accurate.
+	 */
+	if (!err && fm->fc->writeback_cache)
+		fuse_invalidate_attr_mask(fa->inode, STATX_BLOCKS);
+
+	iput(fa->inode);
+	kfree(fa);
+}
+
+static int fuse_flush_async(struct file *file, fl_owner_t id)
+{
+	struct inode *inode = file_inode(file);
+	struct fuse_mount *fm = get_fuse_mount(inode);
+	struct fuse_file *ff = file->private_data;
+	struct fuse_flush_args *fa;
+	int err;
+
+	fa = kzalloc(sizeof(*fa), GFP_KERNEL);
+	if (!fa)
+		return -ENOMEM;
+
+	fa->inarg.fh = ff->fh;
+	fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
+	fa->args.opcode = FUSE_FLUSH;
+	fa->args.nodeid = get_node_id(inode);
+	fa->args.in_numargs = 1;
+	fa->args.in_args[0].size = sizeof(fa->inarg);
+	fa->args.in_args[0].value = &fa->inarg;
+	fa->args.force = true;
+	fa->args.end = fuse_flush_end;
+	fa->inode = igrab(inode);
+
+	err = fuse_simple_background(fm, &fa->args, GFP_KERNEL);
+	if (err)
+		fuse_flush_end(fm, &fa->args, err);
+
+	return err;
+}
+
 static int fuse_flush(struct file *file, fl_owner_t id)
 {
 	struct inode *inode = file_inode(file);
@@ -495,6 +551,9 @@  static int fuse_flush(struct file *file, fl_owner_t id)
 	if (fm->fc->no_flush)
 		goto inval_attr_out;
 
+	if (current->flags & PF_EXITING)
+		return fuse_flush_async(file, id);
+
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.fh = ff->fh;
 	inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);