diff mbox series

[for-6.1,v3,3/3] virtiofsd: Add support for FUSE_SYNCFS request

Message ID 20210510155539.998747-4-groug@kaod.org (mailing list archive)
State New, archived
Headers show
Series virtiofsd: Add support for FUSE_SYNCFS request | expand

Commit Message

Greg Kurz May 10, 2021, 3:55 p.m. UTC
Honor the expected behavior of syncfs() to synchronously flush all data
and metadata on linux systems. Simply loop on all known submounts and
call syncfs() on them.

Note that syncfs() might suffer from a time penalty if the submounts
are being hammered by some unrelated workload on the host. The only
solution to avoid that is to avoid shared submounts.

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 tools/virtiofsd/fuse_lowlevel.c       | 11 ++++++++
 tools/virtiofsd/fuse_lowlevel.h       | 12 +++++++++
 tools/virtiofsd/passthrough_ll.c      | 38 +++++++++++++++++++++++++++
 tools/virtiofsd/passthrough_seccomp.c |  1 +
 4 files changed, 62 insertions(+)

Comments

Vivek Goyal May 10, 2021, 7:15 p.m. UTC | #1
On Mon, May 10, 2021 at 05:55:39PM +0200, Greg Kurz wrote:
> Honor the expected behavior of syncfs() to synchronously flush all data
> and metadata on linux systems. Simply loop on all known submounts and
> call syncfs() on them.
> 
> Note that syncfs() might suffer from a time penalty if the submounts
> are being hammered by some unrelated workload on the host. The only
> solution to avoid that is to avoid shared submounts.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>  tools/virtiofsd/fuse_lowlevel.c       | 11 ++++++++
>  tools/virtiofsd/fuse_lowlevel.h       | 12 +++++++++
>  tools/virtiofsd/passthrough_ll.c      | 38 +++++++++++++++++++++++++++
>  tools/virtiofsd/passthrough_seccomp.c |  1 +
>  4 files changed, 62 insertions(+)
> 
> diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
> index 58e32fc96369..3be95ec903c9 100644
> --- a/tools/virtiofsd/fuse_lowlevel.c
> +++ b/tools/virtiofsd/fuse_lowlevel.c
> @@ -1870,6 +1870,16 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid,
>      }
>  }
>  
> +static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid,
> +                      struct fuse_mbuf_iter *iter)
> +{
> +    if (req->se->op.syncfs) {
> +        req->se->op.syncfs(req);
> +    } else {
> +        fuse_reply_err(req, ENOSYS);
> +    }
> +}
> +
>  static void do_init(fuse_req_t req, fuse_ino_t nodeid,
>                      struct fuse_mbuf_iter *iter)
>  {
> @@ -2267,6 +2277,7 @@ static struct {
>      [FUSE_RENAME2] = { do_rename2, "RENAME2" },
>      [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
>      [FUSE_LSEEK] = { do_lseek, "LSEEK" },
> +    [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" },
>  };
>  
>  #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
> diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
> index 3bf786b03485..890c520b195a 100644
> --- a/tools/virtiofsd/fuse_lowlevel.h
> +++ b/tools/virtiofsd/fuse_lowlevel.h
> @@ -1225,6 +1225,18 @@ struct fuse_lowlevel_ops {
>       */
>      void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
>                    struct fuse_file_info *fi);
> +
> +    /**
> +     * Synchronize file system content
> +     *
> +     * If this request is answered with an error code of ENOSYS,
> +     * this is treated as success and future calls to syncfs() will
> +     * succeed automatically without being sent to the filesystem
> +     * process.
> +     *
> +     * @param req request handle
> +     */
> +    void (*syncfs)(fuse_req_t req);
>  };
>  
>  /**
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index dc940a1d048b..289900c6d274 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -3153,6 +3153,43 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
>      }
>  }
>  
> +static void lo_syncfs(fuse_req_t req)
> +{
> +    struct lo_data *lo = lo_data(req);
> +    GHashTableIter iter;
> +    gpointer key, value;
> +    int err = 0;
> +
> +    pthread_mutex_lock(&lo->mutex);
> +
> +    g_hash_table_iter_init(&iter, lo->mnt_inodes);
> +    while (g_hash_table_iter_next(&iter, &key, &value)) {
> +        struct lo_inode *inode = value;
> +        int fd;
> +
> +        fuse_log(FUSE_LOG_DEBUG, "lo_syncfs(ino=%" PRIu64 ")\n",
> +                 inode->fuse_ino);
> +
> +        fd = lo_inode_open(lo, inode, O_RDONLY);
> +        if (fd < 0) {
> +            err = -fd;
> +            break;
> +        }
> +
> +        if (syncfs(fd) < 0) {

I don't have a good feeling about calling syncfs() with lo->mutex held.
This seems to be that global mutex which is held at so many places
and will serialize everything else. I think we agreed that syncfs()
can take 10s of seconds if fs is busy. And that means we will stall
other filesystem operations too.

So will be good if we can call syncfs() outside the lock. May be prepare
a list of inodes which are there, take a reference and drop the lock.
call syncfs and then drop the reference on inode.

Vivek

> +            err = errno;
> +            close(fd);
> +            break;
> +        }
> +
> +        close(fd);
> +    }
> +
> +    pthread_mutex_unlock(&lo->mutex);
> +
> +    fuse_reply_err(req, err);
> +}
> +
>  static void lo_destroy(void *userdata)
>  {
>      struct lo_data *lo = (struct lo_data *)userdata;
> @@ -3214,6 +3251,7 @@ static struct fuse_lowlevel_ops lo_oper = {
>      .copy_file_range = lo_copy_file_range,
>  #endif
>      .lseek = lo_lseek,
> +    .syncfs = lo_syncfs,
>      .destroy = lo_destroy,
>  };
>  
> diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
> index 62441cfcdb95..343188447901 100644
> --- a/tools/virtiofsd/passthrough_seccomp.c
> +++ b/tools/virtiofsd/passthrough_seccomp.c
> @@ -107,6 +107,7 @@ static const int syscall_allowlist[] = {
>      SCMP_SYS(set_robust_list),
>      SCMP_SYS(setxattr),
>      SCMP_SYS(symlinkat),
> +    SCMP_SYS(syncfs),
>      SCMP_SYS(time), /* Rarely needed, except on static builds */
>      SCMP_SYS(tgkill),
>      SCMP_SYS(unlinkat),
> -- 
> 2.26.3
>
Greg Kurz May 11, 2021, 10:09 a.m. UTC | #2
On Mon, 10 May 2021 15:15:02 -0400
Vivek Goyal <vgoyal@redhat.com> wrote:

> On Mon, May 10, 2021 at 05:55:39PM +0200, Greg Kurz wrote:
> > Honor the expected behavior of syncfs() to synchronously flush all data
> > and metadata on linux systems. Simply loop on all known submounts and
> > call syncfs() on them.
> > 
> > Note that syncfs() might suffer from a time penalty if the submounts
> > are being hammered by some unrelated workload on the host. The only
> > solution to avoid that is to avoid shared submounts.
> > 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >  tools/virtiofsd/fuse_lowlevel.c       | 11 ++++++++
> >  tools/virtiofsd/fuse_lowlevel.h       | 12 +++++++++
> >  tools/virtiofsd/passthrough_ll.c      | 38 +++++++++++++++++++++++++++
> >  tools/virtiofsd/passthrough_seccomp.c |  1 +
> >  4 files changed, 62 insertions(+)
> > 
> > diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
> > index 58e32fc96369..3be95ec903c9 100644
> > --- a/tools/virtiofsd/fuse_lowlevel.c
> > +++ b/tools/virtiofsd/fuse_lowlevel.c
> > @@ -1870,6 +1870,16 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid,
> >      }
> >  }
> >  
> > +static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid,
> > +                      struct fuse_mbuf_iter *iter)
> > +{
> > +    if (req->se->op.syncfs) {
> > +        req->se->op.syncfs(req);
> > +    } else {
> > +        fuse_reply_err(req, ENOSYS);
> > +    }
> > +}
> > +
> >  static void do_init(fuse_req_t req, fuse_ino_t nodeid,
> >                      struct fuse_mbuf_iter *iter)
> >  {
> > @@ -2267,6 +2277,7 @@ static struct {
> >      [FUSE_RENAME2] = { do_rename2, "RENAME2" },
> >      [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
> >      [FUSE_LSEEK] = { do_lseek, "LSEEK" },
> > +    [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" },
> >  };
> >  
> >  #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
> > diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
> > index 3bf786b03485..890c520b195a 100644
> > --- a/tools/virtiofsd/fuse_lowlevel.h
> > +++ b/tools/virtiofsd/fuse_lowlevel.h
> > @@ -1225,6 +1225,18 @@ struct fuse_lowlevel_ops {
> >       */
> >      void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
> >                    struct fuse_file_info *fi);
> > +
> > +    /**
> > +     * Synchronize file system content
> > +     *
> > +     * If this request is answered with an error code of ENOSYS,
> > +     * this is treated as success and future calls to syncfs() will
> > +     * succeed automatically without being sent to the filesystem
> > +     * process.
> > +     *
> > +     * @param req request handle
> > +     */
> > +    void (*syncfs)(fuse_req_t req);
> >  };
> >  
> >  /**
> > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> > index dc940a1d048b..289900c6d274 100644
> > --- a/tools/virtiofsd/passthrough_ll.c
> > +++ b/tools/virtiofsd/passthrough_ll.c
> > @@ -3153,6 +3153,43 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
> >      }
> >  }
> >  
> > +static void lo_syncfs(fuse_req_t req)
> > +{
> > +    struct lo_data *lo = lo_data(req);
> > +    GHashTableIter iter;
> > +    gpointer key, value;
> > +    int err = 0;
> > +
> > +    pthread_mutex_lock(&lo->mutex);
> > +
> > +    g_hash_table_iter_init(&iter, lo->mnt_inodes);
> > +    while (g_hash_table_iter_next(&iter, &key, &value)) {
> > +        struct lo_inode *inode = value;
> > +        int fd;
> > +
> > +        fuse_log(FUSE_LOG_DEBUG, "lo_syncfs(ino=%" PRIu64 ")\n",
> > +                 inode->fuse_ino);
> > +
> > +        fd = lo_inode_open(lo, inode, O_RDONLY);
> > +        if (fd < 0) {
> > +            err = -fd;
> > +            break;
> > +        }
> > +
> > +        if (syncfs(fd) < 0) {
> 
> I don't have a good feeling about calling syncfs() with lo->mutex held.
> This seems to be that global mutex which is held at so many places
> and will serialize everything else. I think we agreed that syncfs()
> can take 10s of seconds if fs is busy. And that means we will stall
> other filesystem operations too.
> 
> So will be good if we can call syncfs() outside the lock. May be prepare
> a list of inodes which are there, take a reference and drop the lock.
> call syncfs and then drop the reference on inode.
> 

You're right. I'll do that.

> Vivek
> 
> > +            err = errno;
> > +            close(fd);
> > +            break;
> > +        }
> > +
> > +        close(fd);
> > +    }
> > +
> > +    pthread_mutex_unlock(&lo->mutex);
> > +
> > +    fuse_reply_err(req, err);
> > +}
> > +
> >  static void lo_destroy(void *userdata)
> >  {
> >      struct lo_data *lo = (struct lo_data *)userdata;
> > @@ -3214,6 +3251,7 @@ static struct fuse_lowlevel_ops lo_oper = {
> >      .copy_file_range = lo_copy_file_range,
> >  #endif
> >      .lseek = lo_lseek,
> > +    .syncfs = lo_syncfs,
> >      .destroy = lo_destroy,
> >  };
> >  
> > diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
> > index 62441cfcdb95..343188447901 100644
> > --- a/tools/virtiofsd/passthrough_seccomp.c
> > +++ b/tools/virtiofsd/passthrough_seccomp.c
> > @@ -107,6 +107,7 @@ static const int syscall_allowlist[] = {
> >      SCMP_SYS(set_robust_list),
> >      SCMP_SYS(setxattr),
> >      SCMP_SYS(symlinkat),
> > +    SCMP_SYS(syncfs),
> >      SCMP_SYS(time), /* Rarely needed, except on static builds */
> >      SCMP_SYS(tgkill),
> >      SCMP_SYS(unlinkat),
> > -- 
> > 2.26.3
> > 
>
Miklos Szeredi May 11, 2021, 12:31 p.m. UTC | #3
On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote:
>
> Honor the expected behavior of syncfs() to synchronously flush all data
> and metadata on linux systems. Simply loop on all known submounts and
> call syncfs() on them.

Why not pass the submount's root to the server, so it can do just one
targeted syncfs?

E.g. somehting like this in fuse_sync_fs():

args.nodeid = get_node_id(sb->s_root->d_inode);

Thanks,
Miklos
Vivek Goyal May 11, 2021, 12:54 p.m. UTC | #4
On Tue, May 11, 2021 at 02:31:14PM +0200, Miklos Szeredi wrote:
> On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote:
> >
> > Honor the expected behavior of syncfs() to synchronously flush all data
> > and metadata on linux systems. Simply loop on all known submounts and
> > call syncfs() on them.
> 
> Why not pass the submount's root to the server, so it can do just one
> targeted syncfs?
> 
> E.g. somehting like this in fuse_sync_fs():
> 
> args.nodeid = get_node_id(sb->s_root->d_inode);

Hi Miklos,

I think current proposal was due to lack of full understanding on my part.
I was assuming we have one super block in client and that's not the case
looks like. For every submount, we will have another superblock known
to vfs, IIUC. That means when sync() happens, we will receive ->syncfs()
for each of those super blocks. And that means file server does not
have to keep track of submounts explicitly and it will either receive
a single targeted SYNCFS (for the case of syncfs(fd)) or receive
multile SYNCFS calls (one for each submount when sync() is called).

If that's the case, it makes sense to send nodeid of the root dentry
of superblock and file server can just call syncfs(inode->fd).

Thanks
Vivek
Vivek Goyal May 11, 2021, 2:49 p.m. UTC | #5
On Tue, May 11, 2021 at 08:54:09AM -0400, Vivek Goyal wrote:
> On Tue, May 11, 2021 at 02:31:14PM +0200, Miklos Szeredi wrote:
> > On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote:
> > >
> > > Honor the expected behavior of syncfs() to synchronously flush all data
> > > and metadata on linux systems. Simply loop on all known submounts and
> > > call syncfs() on them.
> > 
> > Why not pass the submount's root to the server, so it can do just one
> > targeted syncfs?
> > 
> > E.g. somehting like this in fuse_sync_fs():
> > 
> > args.nodeid = get_node_id(sb->s_root->d_inode);
> 
> Hi Miklos,
> 
> I think current proposal was due to lack of full understanding on my part.
> I was assuming we have one super block in client and that's not the case
> looks like. For every submount, we will have another superblock known
> to vfs, IIUC. That means when sync() happens, we will receive ->syncfs()
> for each of those super blocks. And that means file server does not
> have to keep track of submounts explicitly and it will either receive
> a single targeted SYNCFS (for the case of syncfs(fd)) or receive
> multile SYNCFS calls (one for each submount when sync() is called).

Tried sync() with submounts enabled and we are seeing a SYNCFS call
only for top level super block and not for submounts.

Greg noticed that it probably is due to the fact that iterate_super()
skips super blocks which don't have SB_BORN flag set. 

Only vfs_get_tree() seems to set SB_BORN and for our submounts we
are not calling vfs_get_tree(), hence SB_BORN is not set. NFS seems
to call vfs_get_tree() and hence SB_BORN must be set for submounts.

Maybe we need to modify virtio_fs_get_tree() so that it can deal with
mount as well as submounts and then fuse_dentry_automount() should
probably call vfs_get_tree() and that should set SB_BORN and hopefully
sync() will work with it. Greg is planning to give it a try.

Does it sound reasonable.

Thanks
Vivek
Greg Kurz May 11, 2021, 3:08 p.m. UTC | #6
On Tue, 11 May 2021 14:31:14 +0200
Miklos Szeredi <mszeredi@redhat.com> wrote:

> On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote:
> >
> > Honor the expected behavior of syncfs() to synchronously flush all data
> > and metadata on linux systems. Simply loop on all known submounts and
> > call syncfs() on them.
> 
> Why not pass the submount's root to the server, so it can do just one
> targeted syncfs?
> 
> E.g. somehting like this in fuse_sync_fs():
> 
> args.nodeid = get_node_id(sb->s_root->d_inode);
> 
> Thanks,
> Miklos
> 

As Vivek already pointed out, there was some misunderstanding on
how submounts were supposed to work. Things got clearer since then :)

So, basically, we have two cases:

1) virtiofsd announces submounts : the d_automount implementation
   creates a new super block and mounts the submount

2) virtiofsd doesn't announce submounts: the client only knows
   about the top-level super block

You suggestion is for case 1) while this series was made with
case 2) in mind, hence the tracking of the super blocks in
the server.

Vivek and I discussed and agreed to address 2) later and
to just focus on 1) for now.

Your suggestion doesn't work with the current code base
because ->sync_fs() is never called on our submounts'
super blocks. This is because they don't have SB_BORN
set, which looks incorrect. A call to vfs_get_tree() would
fix it, but some code refactoring is needed in
fuse_dentry_automount() and virtio_fs_get_tree() for that.

Cheers,

--
Greg
Miklos Szeredi May 11, 2021, 3:08 p.m. UTC | #7
On Tue, May 11, 2021 at 4:49 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Tue, May 11, 2021 at 08:54:09AM -0400, Vivek Goyal wrote:
> > On Tue, May 11, 2021 at 02:31:14PM +0200, Miklos Szeredi wrote:
> > > On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote:
> > > >
> > > > Honor the expected behavior of syncfs() to synchronously flush all data
> > > > and metadata on linux systems. Simply loop on all known submounts and
> > > > call syncfs() on them.
> > >
> > > Why not pass the submount's root to the server, so it can do just one
> > > targeted syncfs?
> > >
> > > E.g. somehting like this in fuse_sync_fs():
> > >
> > > args.nodeid = get_node_id(sb->s_root->d_inode);
> >
> > Hi Miklos,
> >
> > I think current proposal was due to lack of full understanding on my part.
> > I was assuming we have one super block in client and that's not the case
> > looks like. For every submount, we will have another superblock known
> > to vfs, IIUC. That means when sync() happens, we will receive ->syncfs()
> > for each of those super blocks. And that means file server does not
> > have to keep track of submounts explicitly and it will either receive
> > a single targeted SYNCFS (for the case of syncfs(fd)) or receive
> > multile SYNCFS calls (one for each submount when sync() is called).
>
> Tried sync() with submounts enabled and we are seeing a SYNCFS call
> only for top level super block and not for submounts.
>
> Greg noticed that it probably is due to the fact that iterate_super()
> skips super blocks which don't have SB_BORN flag set.
>
> Only vfs_get_tree() seems to set SB_BORN and for our submounts we
> are not calling vfs_get_tree(), hence SB_BORN is not set. NFS seems
> to call vfs_get_tree() and hence SB_BORN must be set for submounts.
>
> Maybe we need to modify virtio_fs_get_tree() so that it can deal with
> mount as well as submounts and then fuse_dentry_automount() should
> probably call vfs_get_tree() and that should set SB_BORN and hopefully
> sync() will work with it. Greg is planning to give it a try.
>
> Does it sound reasonable.

Just setting SB_BORN sounds much simpler.  What's the disadvantage?

Thanks,
Miklos
Vivek Goyal May 11, 2021, 3:16 p.m. UTC | #8
On Tue, May 11, 2021 at 05:08:42PM +0200, Miklos Szeredi wrote:
> On Tue, May 11, 2021 at 4:49 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> >
> > On Tue, May 11, 2021 at 08:54:09AM -0400, Vivek Goyal wrote:
> > > On Tue, May 11, 2021 at 02:31:14PM +0200, Miklos Szeredi wrote:
> > > > On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote:
> > > > >
> > > > > Honor the expected behavior of syncfs() to synchronously flush all data
> > > > > and metadata on linux systems. Simply loop on all known submounts and
> > > > > call syncfs() on them.
> > > >
> > > > Why not pass the submount's root to the server, so it can do just one
> > > > targeted syncfs?
> > > >
> > > > E.g. somehting like this in fuse_sync_fs():
> > > >
> > > > args.nodeid = get_node_id(sb->s_root->d_inode);
> > >
> > > Hi Miklos,
> > >
> > > I think current proposal was due to lack of full understanding on my part.
> > > I was assuming we have one super block in client and that's not the case
> > > looks like. For every submount, we will have another superblock known
> > > to vfs, IIUC. That means when sync() happens, we will receive ->syncfs()
> > > for each of those super blocks. And that means file server does not
> > > have to keep track of submounts explicitly and it will either receive
> > > a single targeted SYNCFS (for the case of syncfs(fd)) or receive
> > > multile SYNCFS calls (one for each submount when sync() is called).
> >
> > Tried sync() with submounts enabled and we are seeing a SYNCFS call
> > only for top level super block and not for submounts.
> >
> > Greg noticed that it probably is due to the fact that iterate_super()
> > skips super blocks which don't have SB_BORN flag set.
> >
> > Only vfs_get_tree() seems to set SB_BORN and for our submounts we
> > are not calling vfs_get_tree(), hence SB_BORN is not set. NFS seems
> > to call vfs_get_tree() and hence SB_BORN must be set for submounts.
> >
> > Maybe we need to modify virtio_fs_get_tree() so that it can deal with
> > mount as well as submounts and then fuse_dentry_automount() should
> > probably call vfs_get_tree() and that should set SB_BORN and hopefully
> > sync() will work with it. Greg is planning to give it a try.
> >
> > Does it sound reasonable.
> 
> Just setting SB_BORN sounds much simpler.  What's the disadvantage?

I was little hesitant to set it directly because no other filesystem
seems to be doing it. Hence I assumed that VFS expects filesystems to
not set SB_BORN.

But I do agree that setting SB_BORN in automount code is much simpler
solution.

Thanks
Vivek
diff mbox series

Patch

diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 58e32fc96369..3be95ec903c9 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -1870,6 +1870,16 @@  static void do_lseek(fuse_req_t req, fuse_ino_t nodeid,
     }
 }
 
+static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid,
+                      struct fuse_mbuf_iter *iter)
+{
+    if (req->se->op.syncfs) {
+        req->se->op.syncfs(req);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
 static void do_init(fuse_req_t req, fuse_ino_t nodeid,
                     struct fuse_mbuf_iter *iter)
 {
@@ -2267,6 +2277,7 @@  static struct {
     [FUSE_RENAME2] = { do_rename2, "RENAME2" },
     [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
     [FUSE_LSEEK] = { do_lseek, "LSEEK" },
+    [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" },
 };
 
 #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
index 3bf786b03485..890c520b195a 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -1225,6 +1225,18 @@  struct fuse_lowlevel_ops {
      */
     void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
                   struct fuse_file_info *fi);
+
+    /**
+     * Synchronize file system content
+     *
+     * If this request is answered with an error code of ENOSYS,
+     * this is treated as success and future calls to syncfs() will
+     * succeed automatically without being sent to the filesystem
+     * process.
+     *
+     * @param req request handle
+     */
+    void (*syncfs)(fuse_req_t req);
 };
 
 /**
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index dc940a1d048b..289900c6d274 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -3153,6 +3153,43 @@  static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
     }
 }
 
+static void lo_syncfs(fuse_req_t req)
+{
+    struct lo_data *lo = lo_data(req);
+    GHashTableIter iter;
+    gpointer key, value;
+    int err = 0;
+
+    pthread_mutex_lock(&lo->mutex);
+
+    g_hash_table_iter_init(&iter, lo->mnt_inodes);
+    while (g_hash_table_iter_next(&iter, &key, &value)) {
+        struct lo_inode *inode = value;
+        int fd;
+
+        fuse_log(FUSE_LOG_DEBUG, "lo_syncfs(ino=%" PRIu64 ")\n",
+                 inode->fuse_ino);
+
+        fd = lo_inode_open(lo, inode, O_RDONLY);
+        if (fd < 0) {
+            err = -fd;
+            break;
+        }
+
+        if (syncfs(fd) < 0) {
+            err = errno;
+            close(fd);
+            break;
+        }
+
+        close(fd);
+    }
+
+    pthread_mutex_unlock(&lo->mutex);
+
+    fuse_reply_err(req, err);
+}
+
 static void lo_destroy(void *userdata)
 {
     struct lo_data *lo = (struct lo_data *)userdata;
@@ -3214,6 +3251,7 @@  static struct fuse_lowlevel_ops lo_oper = {
     .copy_file_range = lo_copy_file_range,
 #endif
     .lseek = lo_lseek,
+    .syncfs = lo_syncfs,
     .destroy = lo_destroy,
 };
 
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index 62441cfcdb95..343188447901 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -107,6 +107,7 @@  static const int syscall_allowlist[] = {
     SCMP_SYS(set_robust_list),
     SCMP_SYS(setxattr),
     SCMP_SYS(symlinkat),
+    SCMP_SYS(syncfs),
     SCMP_SYS(time), /* Rarely needed, except on static builds */
     SCMP_SYS(tgkill),
     SCMP_SYS(unlinkat),