diff mbox series

[v5,1/3] virtiofsd: Add support for FUSE_SYNCFS request with announce_submounts

Message ID 20220214135820.43897-2-groug@kaod.org (mailing list archive)
State New, archived
Headers show
Series virtiofsd: Add support for FUSE_SYNCFS request | expand

Commit Message

Greg Kurz Feb. 14, 2022, 1:58 p.m. UTC
Honor the expected behavior of syncfs() to synchronously flush all data
and metadata to disk on linux systems.

If virtiofsd is started with '-o announce_submounts', the client is
expected to send a FUSE_SYNCFS request for each individual submount.
In this case, we just create a new file descriptor on the submount
inode with lo_inode_open(), call syncfs() on it and close it. The
intermediary file is needed because O_PATH descriptors aren't
backed by an actual file and syncfs() would fail with EBADF.

If virtiofsd is started without '-o announce_submounts' or if the
client doesn't have the FUSE_CAP_SUBMOUNTS capability, the client
only sends a single FUSE_SYNCFS request for the root inode. The
server would thus need to track submounts internally and call
syncfs() on each of them. This will be implemented later.

Note that syncfs() might suffer from a time penalty if the submounts
are being hammered by some unrelated workload on the host. The only
solution to prevent that is to avoid shared mounts.

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 tools/virtiofsd/fuse_lowlevel.c       | 11 +++++++
 tools/virtiofsd/fuse_lowlevel.h       | 13 ++++++++
 tools/virtiofsd/passthrough_ll.c      | 45 +++++++++++++++++++++++++++
 tools/virtiofsd/passthrough_seccomp.c |  1 +
 4 files changed, 70 insertions(+)

Comments

German Maglione Feb. 14, 2022, 3:43 p.m. UTC | #1
On Mon, Feb 14, 2022 at 3:00 PM Greg Kurz <groug@kaod.org> wrote:

> Honor the expected behavior of syncfs() to synchronously flush all data
> and metadata to disk on linux systems.
>
> If virtiofsd is started with '-o announce_submounts', the client is
> expected to send a FUSE_SYNCFS request for each individual submount.
> In this case, we just create a new file descriptor on the submount
> inode with lo_inode_open(), call syncfs() on it and close it. The
> intermediary file is needed because O_PATH descriptors aren't
> backed by an actual file and syncfs() would fail with EBADF.
>
> If virtiofsd is started without '-o announce_submounts' or if the
> client doesn't have the FUSE_CAP_SUBMOUNTS capability, the client
> only sends a single FUSE_SYNCFS request for the root inode. The
> server would thus need to track submounts internally and call
> syncfs() on each of them. This will be implemented later.
>
> Note that syncfs() might suffer from a time penalty if the submounts
> are being hammered by some unrelated workload on the host. The only
> solution to prevent that is to avoid shared mounts.
>
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>  tools/virtiofsd/fuse_lowlevel.c       | 11 +++++++
>  tools/virtiofsd/fuse_lowlevel.h       | 13 ++++++++
>  tools/virtiofsd/passthrough_ll.c      | 45 +++++++++++++++++++++++++++
>  tools/virtiofsd/passthrough_seccomp.c |  1 +
>  4 files changed, 70 insertions(+)
>
> diff --git a/tools/virtiofsd/fuse_lowlevel.c
> b/tools/virtiofsd/fuse_lowlevel.c
> index e4679c73abc2..e02d8b25a5f6 100644
> --- a/tools/virtiofsd/fuse_lowlevel.c
> +++ b/tools/virtiofsd/fuse_lowlevel.c
> @@ -1876,6 +1876,16 @@ static void do_lseek(fuse_req_t req, fuse_ino_t
> nodeid,
>      }
>  }
>
> +static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid,
> +                      struct fuse_mbuf_iter *iter)
> +{
> +    if (req->se->op.syncfs) {
> +        req->se->op.syncfs(req, nodeid);
> +    } else {
> +        fuse_reply_err(req, ENOSYS);
> +    }
> +}
> +
>  static void do_init(fuse_req_t req, fuse_ino_t nodeid,
>                      struct fuse_mbuf_iter *iter)
>  {
> @@ -2280,6 +2290,7 @@ static struct {
>      [FUSE_RENAME2] = { do_rename2, "RENAME2" },
>      [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
>      [FUSE_LSEEK] = { do_lseek, "LSEEK" },
> +    [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" },
>  };
>
>  #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
> diff --git a/tools/virtiofsd/fuse_lowlevel.h
> b/tools/virtiofsd/fuse_lowlevel.h
> index c55c0ca2fc1c..b889dae4de0e 100644
> --- a/tools/virtiofsd/fuse_lowlevel.h
> +++ b/tools/virtiofsd/fuse_lowlevel.h
> @@ -1226,6 +1226,19 @@ struct fuse_lowlevel_ops {
>       */
>      void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
>                    struct fuse_file_info *fi);
> +
> +    /**
> +     * Synchronize file system content
> +     *
> +     * If this request is answered with an error code of ENOSYS,
> +     * this is treated as success and future calls to syncfs() will
> +     * succeed automatically without being sent to the filesystem
> +     * process.
> +     *
> +     * @param req request handle
> +     * @param ino the inode number
> +     */
> +    void (*syncfs)(fuse_req_t req, fuse_ino_t ino);
>  };
>
>  /**
> diff --git a/tools/virtiofsd/passthrough_ll.c
> b/tools/virtiofsd/passthrough_ll.c
> index b3d0674f6d2f..2bf5d40df531 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -3357,6 +3357,50 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t
> ino, off_t off, int whence,
>      }
>  }
>
> +static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode)
> +{
> +    int fd, ret = 0;
> +
> +    fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n",
> +             inode->fuse_ino);
> +
> +    fd = lo_inode_open(lo, inode, O_RDONLY);
> +    if (fd < 0) {
> +        return -fd;
> +    }
> +
> +    if (syncfs(fd) < 0) {
> +        ret = errno;
> +    }
> +
> +    close(fd);
> +    return ret;
> +}
> +
> +static void lo_syncfs(fuse_req_t req, fuse_ino_t ino)
> +{
> +    struct lo_data *lo = lo_data(req);
> +    int err;
> +
> +    if (lo->announce_submounts) {
> +        struct lo_inode *inode;
> +
> +        inode = lo_inode(req, ino);
> +        if (!inode) {
> +            fuse_reply_err(req, EBADF);
> +            return;
> +        }
> +
> +        err = lo_do_syncfs(lo, inode);
> +        lo_inode_put(lo, &inode);
> +    } else {
> +        /* Requires the sever to track submounts. Not implemented yet */
> +        err = ENOSYS;
> +    }
>

In the current rust version we don't check if announce_submount is enabled,
with and without it lo_syncfs do the same thing. So, if announce_submount
is not enabled, at least the root shared dir get synced.



> +
> +    fuse_reply_err(req, err);
> +}
> +
>  static void lo_destroy(void *userdata)
>  {
>      struct lo_data *lo = (struct lo_data *)userdata;
> @@ -3417,6 +3461,7 @@ static struct fuse_lowlevel_ops lo_oper = {
>      .copy_file_range = lo_copy_file_range,
>  #endif
>      .lseek = lo_lseek,
> +    .syncfs = lo_syncfs,
>      .destroy = lo_destroy,
>  };
>
> diff --git a/tools/virtiofsd/passthrough_seccomp.c
> b/tools/virtiofsd/passthrough_seccomp.c
> index a3ce9f898d2d..3e9d6181dc69 100644
> --- a/tools/virtiofsd/passthrough_seccomp.c
> +++ b/tools/virtiofsd/passthrough_seccomp.c
> @@ -108,6 +108,7 @@ static const int syscall_allowlist[] = {
>      SCMP_SYS(set_robust_list),
>      SCMP_SYS(setxattr),
>      SCMP_SYS(symlinkat),
> +    SCMP_SYS(syncfs),
>      SCMP_SYS(time), /* Rarely needed, except on static builds */
>      SCMP_SYS(tgkill),
>      SCMP_SYS(unlinkat),
> --
> 2.34.1
>
> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs@redhat.com
> https://listman.redhat.com/mailman/listinfo/virtio-fs
>
>
Greg Kurz Feb. 14, 2022, 4:02 p.m. UTC | #2
On Mon, 14 Feb 2022 16:43:15 +0100
German Maglione <gmaglione@redhat.com> wrote:

> On Mon, Feb 14, 2022 at 3:00 PM Greg Kurz <groug@kaod.org> wrote:
> 
> > Honor the expected behavior of syncfs() to synchronously flush all data
> > and metadata to disk on linux systems.
> >
> > If virtiofsd is started with '-o announce_submounts', the client is
> > expected to send a FUSE_SYNCFS request for each individual submount.
> > In this case, we just create a new file descriptor on the submount
> > inode with lo_inode_open(), call syncfs() on it and close it. The
> > intermediary file is needed because O_PATH descriptors aren't
> > backed by an actual file and syncfs() would fail with EBADF.
> >
> > If virtiofsd is started without '-o announce_submounts' or if the
> > client doesn't have the FUSE_CAP_SUBMOUNTS capability, the client
> > only sends a single FUSE_SYNCFS request for the root inode. The
> > server would thus need to track submounts internally and call
> > syncfs() on each of them. This will be implemented later.
> >
> > Note that syncfs() might suffer from a time penalty if the submounts
> > are being hammered by some unrelated workload on the host. The only
> > solution to prevent that is to avoid shared mounts.
> >
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >  tools/virtiofsd/fuse_lowlevel.c       | 11 +++++++
> >  tools/virtiofsd/fuse_lowlevel.h       | 13 ++++++++
> >  tools/virtiofsd/passthrough_ll.c      | 45 +++++++++++++++++++++++++++
> >  tools/virtiofsd/passthrough_seccomp.c |  1 +
> >  4 files changed, 70 insertions(+)
> >
> > diff --git a/tools/virtiofsd/fuse_lowlevel.c
> > b/tools/virtiofsd/fuse_lowlevel.c
> > index e4679c73abc2..e02d8b25a5f6 100644
> > --- a/tools/virtiofsd/fuse_lowlevel.c
> > +++ b/tools/virtiofsd/fuse_lowlevel.c
> > @@ -1876,6 +1876,16 @@ static void do_lseek(fuse_req_t req, fuse_ino_t
> > nodeid,
> >      }
> >  }
> >
> > +static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid,
> > +                      struct fuse_mbuf_iter *iter)
> > +{
> > +    if (req->se->op.syncfs) {
> > +        req->se->op.syncfs(req, nodeid);
> > +    } else {
> > +        fuse_reply_err(req, ENOSYS);
> > +    }
> > +}
> > +
> >  static void do_init(fuse_req_t req, fuse_ino_t nodeid,
> >                      struct fuse_mbuf_iter *iter)
> >  {
> > @@ -2280,6 +2290,7 @@ static struct {
> >      [FUSE_RENAME2] = { do_rename2, "RENAME2" },
> >      [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
> >      [FUSE_LSEEK] = { do_lseek, "LSEEK" },
> > +    [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" },
> >  };
> >
> >  #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
> > diff --git a/tools/virtiofsd/fuse_lowlevel.h
> > b/tools/virtiofsd/fuse_lowlevel.h
> > index c55c0ca2fc1c..b889dae4de0e 100644
> > --- a/tools/virtiofsd/fuse_lowlevel.h
> > +++ b/tools/virtiofsd/fuse_lowlevel.h
> > @@ -1226,6 +1226,19 @@ struct fuse_lowlevel_ops {
> >       */
> >      void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
> >                    struct fuse_file_info *fi);
> > +
> > +    /**
> > +     * Synchronize file system content
> > +     *
> > +     * If this request is answered with an error code of ENOSYS,
> > +     * this is treated as success and future calls to syncfs() will
> > +     * succeed automatically without being sent to the filesystem
> > +     * process.
> > +     *
> > +     * @param req request handle
> > +     * @param ino the inode number
> > +     */
> > +    void (*syncfs)(fuse_req_t req, fuse_ino_t ino);
> >  };
> >
> >  /**
> > diff --git a/tools/virtiofsd/passthrough_ll.c
> > b/tools/virtiofsd/passthrough_ll.c
> > index b3d0674f6d2f..2bf5d40df531 100644
> > --- a/tools/virtiofsd/passthrough_ll.c
> > +++ b/tools/virtiofsd/passthrough_ll.c
> > @@ -3357,6 +3357,50 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t
> > ino, off_t off, int whence,
> >      }
> >  }
> >
> > +static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode)
> > +{
> > +    int fd, ret = 0;
> > +
> > +    fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n",
> > +             inode->fuse_ino);
> > +
> > +    fd = lo_inode_open(lo, inode, O_RDONLY);
> > +    if (fd < 0) {
> > +        return -fd;
> > +    }
> > +
> > +    if (syncfs(fd) < 0) {
> > +        ret = errno;
> > +    }
> > +
> > +    close(fd);
> > +    return ret;
> > +}
> > +
> > +static void lo_syncfs(fuse_req_t req, fuse_ino_t ino)
> > +{
> > +    struct lo_data *lo = lo_data(req);
> > +    int err;
> > +
> > +    if (lo->announce_submounts) {
> > +        struct lo_inode *inode;
> > +
> > +        inode = lo_inode(req, ino);
> > +        if (!inode) {
> > +            fuse_reply_err(req, EBADF);
> > +            return;
> > +        }
> > +
> > +        err = lo_do_syncfs(lo, inode);
> > +        lo_inode_put(lo, &inode);
> > +    } else {
> > +        /* Requires the sever to track submounts. Not implemented yet */
> > +        err = ENOSYS;
> > +    }
> >
> 
> In the current rust version we don't check if announce_submount is enabled,
> with and without it lo_syncfs do the same thing. So, if announce_submount
> is not enabled, at least the root shared dir get synced.
> 

I hesitated to do that but since the announce_submount case is
handled in patch 3, I kept it simple. Anyway, I'm fine to post
a new version that matches what the rust implementation does.

> 
> 
> > +
> > +    fuse_reply_err(req, err);
> > +}
> > +
> >  static void lo_destroy(void *userdata)
> >  {
> >      struct lo_data *lo = (struct lo_data *)userdata;
> > @@ -3417,6 +3461,7 @@ static struct fuse_lowlevel_ops lo_oper = {
> >      .copy_file_range = lo_copy_file_range,
> >  #endif
> >      .lseek = lo_lseek,
> > +    .syncfs = lo_syncfs,
> >      .destroy = lo_destroy,
> >  };
> >
> > diff --git a/tools/virtiofsd/passthrough_seccomp.c
> > b/tools/virtiofsd/passthrough_seccomp.c
> > index a3ce9f898d2d..3e9d6181dc69 100644
> > --- a/tools/virtiofsd/passthrough_seccomp.c
> > +++ b/tools/virtiofsd/passthrough_seccomp.c
> > @@ -108,6 +108,7 @@ static const int syscall_allowlist[] = {
> >      SCMP_SYS(set_robust_list),
> >      SCMP_SYS(setxattr),
> >      SCMP_SYS(symlinkat),
> > +    SCMP_SYS(syncfs),
> >      SCMP_SYS(time), /* Rarely needed, except on static builds */
> >      SCMP_SYS(tgkill),
> >      SCMP_SYS(unlinkat),
> > --
> > 2.34.1
> >
> > _______________________________________________
> > Virtio-fs mailing list
> > Virtio-fs@redhat.com
> > https://listman.redhat.com/mailman/listinfo/virtio-fs
> >
> >
>
diff mbox series

Patch

diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index e4679c73abc2..e02d8b25a5f6 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -1876,6 +1876,16 @@  static void do_lseek(fuse_req_t req, fuse_ino_t nodeid,
     }
 }
 
+static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid,
+                      struct fuse_mbuf_iter *iter)
+{
+    if (req->se->op.syncfs) {
+        req->se->op.syncfs(req, nodeid);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
 static void do_init(fuse_req_t req, fuse_ino_t nodeid,
                     struct fuse_mbuf_iter *iter)
 {
@@ -2280,6 +2290,7 @@  static struct {
     [FUSE_RENAME2] = { do_rename2, "RENAME2" },
     [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
     [FUSE_LSEEK] = { do_lseek, "LSEEK" },
+    [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" },
 };
 
 #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
index c55c0ca2fc1c..b889dae4de0e 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -1226,6 +1226,19 @@  struct fuse_lowlevel_ops {
      */
     void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
                   struct fuse_file_info *fi);
+
+    /**
+     * Synchronize file system content
+     *
+     * If this request is answered with an error code of ENOSYS,
+     * this is treated as success and future calls to syncfs() will
+     * succeed automatically without being sent to the filesystem
+     * process.
+     *
+     * @param req request handle
+     * @param ino the inode number
+     */
+    void (*syncfs)(fuse_req_t req, fuse_ino_t ino);
 };
 
 /**
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index b3d0674f6d2f..2bf5d40df531 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -3357,6 +3357,50 @@  static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
     }
 }
 
+static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode)
+{
+    int fd, ret = 0;
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n",
+             inode->fuse_ino);
+
+    fd = lo_inode_open(lo, inode, O_RDONLY);
+    if (fd < 0) {
+        return -fd;
+    }
+
+    if (syncfs(fd) < 0) {
+        ret = errno;
+    }
+
+    close(fd);
+    return ret;
+}
+
+static void lo_syncfs(fuse_req_t req, fuse_ino_t ino)
+{
+    struct lo_data *lo = lo_data(req);
+    int err;
+
+    if (lo->announce_submounts) {
+        struct lo_inode *inode;
+
+        inode = lo_inode(req, ino);
+        if (!inode) {
+            fuse_reply_err(req, EBADF);
+            return;
+        }
+
+        err = lo_do_syncfs(lo, inode);
+        lo_inode_put(lo, &inode);
+    } else {
+        /* Requires the sever to track submounts. Not implemented yet */
+        err = ENOSYS;
+    }
+
+    fuse_reply_err(req, err);
+}
+
 static void lo_destroy(void *userdata)
 {
     struct lo_data *lo = (struct lo_data *)userdata;
@@ -3417,6 +3461,7 @@  static struct fuse_lowlevel_ops lo_oper = {
     .copy_file_range = lo_copy_file_range,
 #endif
     .lseek = lo_lseek,
+    .syncfs = lo_syncfs,
     .destroy = lo_destroy,
 };
 
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index a3ce9f898d2d..3e9d6181dc69 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -108,6 +108,7 @@  static const int syscall_allowlist[] = {
     SCMP_SYS(set_robust_list),
     SCMP_SYS(setxattr),
     SCMP_SYS(symlinkat),
+    SCMP_SYS(syncfs),
     SCMP_SYS(time), /* Rarely needed, except on static builds */
     SCMP_SYS(tgkill),
     SCMP_SYS(unlinkat),