Message ID | 20210510155539.998747-4-groug@kaod.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | virtiofsd: Add support for FUSE_SYNCFS request | expand |
On Mon, May 10, 2021 at 05:55:39PM +0200, Greg Kurz wrote: > Honor the expected behavior of syncfs() to synchronously flush all data > and metadata on linux systems. Simply loop on all known submounts and > call syncfs() on them. > > Note that syncfs() might suffer from a time penalty if the submounts > are being hammered by some unrelated workload on the host. The only > solution to avoid that is to avoid shared submounts. > > Signed-off-by: Greg Kurz <groug@kaod.org> > --- > tools/virtiofsd/fuse_lowlevel.c | 11 ++++++++ > tools/virtiofsd/fuse_lowlevel.h | 12 +++++++++ > tools/virtiofsd/passthrough_ll.c | 38 +++++++++++++++++++++++++++ > tools/virtiofsd/passthrough_seccomp.c | 1 + > 4 files changed, 62 insertions(+) > > diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c > index 58e32fc96369..3be95ec903c9 100644 > --- a/tools/virtiofsd/fuse_lowlevel.c > +++ b/tools/virtiofsd/fuse_lowlevel.c > @@ -1870,6 +1870,16 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, > } > } > > +static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid, > + struct fuse_mbuf_iter *iter) > +{ > + if (req->se->op.syncfs) { > + req->se->op.syncfs(req); > + } else { > + fuse_reply_err(req, ENOSYS); > + } > +} > + > static void do_init(fuse_req_t req, fuse_ino_t nodeid, > struct fuse_mbuf_iter *iter) > { > @@ -2267,6 +2277,7 @@ static struct { > [FUSE_RENAME2] = { do_rename2, "RENAME2" }, > [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, > [FUSE_LSEEK] = { do_lseek, "LSEEK" }, > + [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" }, > }; > > #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) > diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h > index 3bf786b03485..890c520b195a 100644 > --- a/tools/virtiofsd/fuse_lowlevel.h > +++ b/tools/virtiofsd/fuse_lowlevel.h > @@ -1225,6 +1225,18 @@ struct fuse_lowlevel_ops { > */ > void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, > struct fuse_file_info *fi); > + > + /** > + * Synchronize file system content > + * > + * If this request is answered with an error code of ENOSYS, > + * this is treated as success and future calls to syncfs() will > + * succeed automatically without being sent to the filesystem > + * process. > + * > + * @param req request handle > + */ > + void (*syncfs)(fuse_req_t req); > }; > > /** > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c > index dc940a1d048b..289900c6d274 100644 > --- a/tools/virtiofsd/passthrough_ll.c > +++ b/tools/virtiofsd/passthrough_ll.c > @@ -3153,6 +3153,43 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, > } > } > > +static void lo_syncfs(fuse_req_t req) > +{ > + struct lo_data *lo = lo_data(req); > + GHashTableIter iter; > + gpointer key, value; > + int err = 0; > + > + pthread_mutex_lock(&lo->mutex); > + > + g_hash_table_iter_init(&iter, lo->mnt_inodes); > + while (g_hash_table_iter_next(&iter, &key, &value)) { > + struct lo_inode *inode = value; > + int fd; > + > + fuse_log(FUSE_LOG_DEBUG, "lo_syncfs(ino=%" PRIu64 ")\n", > + inode->fuse_ino); > + > + fd = lo_inode_open(lo, inode, O_RDONLY); > + if (fd < 0) { > + err = -fd; > + break; > + } > + > + if (syncfs(fd) < 0) { I don't have a good feeling about calling syncfs() with lo->mutex held. This seems to be that global mutex which is held at so many places and will serialize everything else. I think we agreed that syncfs() can take 10s of seconds if fs is busy. And that means we will stall other filesystem operations too. So will be good if we can call syncfs() outside the lock. May be prepare a list of inodes which are there, take a reference and drop the lock. call syncfs and then drop the reference on inode. Vivek > + err = errno; > + close(fd); > + break; > + } > + > + close(fd); > + } > + > + pthread_mutex_unlock(&lo->mutex); > + > + fuse_reply_err(req, err); > +} > + > static void lo_destroy(void *userdata) > { > struct lo_data *lo = (struct lo_data *)userdata; > @@ -3214,6 +3251,7 @@ static struct fuse_lowlevel_ops lo_oper = { > .copy_file_range = lo_copy_file_range, > #endif > .lseek = lo_lseek, > + .syncfs = lo_syncfs, > .destroy = lo_destroy, > }; > > diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c > index 62441cfcdb95..343188447901 100644 > --- a/tools/virtiofsd/passthrough_seccomp.c > +++ b/tools/virtiofsd/passthrough_seccomp.c > @@ -107,6 +107,7 @@ static const int syscall_allowlist[] = { > SCMP_SYS(set_robust_list), > SCMP_SYS(setxattr), > SCMP_SYS(symlinkat), > + SCMP_SYS(syncfs), > SCMP_SYS(time), /* Rarely needed, except on static builds */ > SCMP_SYS(tgkill), > SCMP_SYS(unlinkat), > -- > 2.26.3 >
On Mon, 10 May 2021 15:15:02 -0400 Vivek Goyal <vgoyal@redhat.com> wrote: > On Mon, May 10, 2021 at 05:55:39PM +0200, Greg Kurz wrote: > > Honor the expected behavior of syncfs() to synchronously flush all data > > and metadata on linux systems. Simply loop on all known submounts and > > call syncfs() on them. > > > > Note that syncfs() might suffer from a time penalty if the submounts > > are being hammered by some unrelated workload on the host. The only > > solution to avoid that is to avoid shared submounts. > > > > Signed-off-by: Greg Kurz <groug@kaod.org> > > --- > > tools/virtiofsd/fuse_lowlevel.c | 11 ++++++++ > > tools/virtiofsd/fuse_lowlevel.h | 12 +++++++++ > > tools/virtiofsd/passthrough_ll.c | 38 +++++++++++++++++++++++++++ > > tools/virtiofsd/passthrough_seccomp.c | 1 + > > 4 files changed, 62 insertions(+) > > > > diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c > > index 58e32fc96369..3be95ec903c9 100644 > > --- a/tools/virtiofsd/fuse_lowlevel.c > > +++ b/tools/virtiofsd/fuse_lowlevel.c > > @@ -1870,6 +1870,16 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, > > } > > } > > > > +static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid, > > + struct fuse_mbuf_iter *iter) > > +{ > > + if (req->se->op.syncfs) { > > + req->se->op.syncfs(req); > > + } else { > > + fuse_reply_err(req, ENOSYS); > > + } > > +} > > + > > static void do_init(fuse_req_t req, fuse_ino_t nodeid, > > struct fuse_mbuf_iter *iter) > > { > > @@ -2267,6 +2277,7 @@ static struct { > > [FUSE_RENAME2] = { do_rename2, "RENAME2" }, > > [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, > > [FUSE_LSEEK] = { do_lseek, "LSEEK" }, > > + [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" }, > > }; > > > > #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) > > diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h > > index 3bf786b03485..890c520b195a 100644 > > --- a/tools/virtiofsd/fuse_lowlevel.h > > +++ b/tools/virtiofsd/fuse_lowlevel.h > > @@ -1225,6 +1225,18 @@ struct fuse_lowlevel_ops { > > */ > > void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, > > struct fuse_file_info *fi); > > + > > + /** > > + * Synchronize file system content > > + * > > + * If this request is answered with an error code of ENOSYS, > > + * this is treated as success and future calls to syncfs() will > > + * succeed automatically without being sent to the filesystem > > + * process. > > + * > > + * @param req request handle > > + */ > > + void (*syncfs)(fuse_req_t req); > > }; > > > > /** > > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c > > index dc940a1d048b..289900c6d274 100644 > > --- a/tools/virtiofsd/passthrough_ll.c > > +++ b/tools/virtiofsd/passthrough_ll.c > > @@ -3153,6 +3153,43 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, > > } > > } > > > > +static void lo_syncfs(fuse_req_t req) > > +{ > > + struct lo_data *lo = lo_data(req); > > + GHashTableIter iter; > > + gpointer key, value; > > + int err = 0; > > + > > + pthread_mutex_lock(&lo->mutex); > > + > > + g_hash_table_iter_init(&iter, lo->mnt_inodes); > > + while (g_hash_table_iter_next(&iter, &key, &value)) { > > + struct lo_inode *inode = value; > > + int fd; > > + > > + fuse_log(FUSE_LOG_DEBUG, "lo_syncfs(ino=%" PRIu64 ")\n", > > + inode->fuse_ino); > > + > > + fd = lo_inode_open(lo, inode, O_RDONLY); > > + if (fd < 0) { > > + err = -fd; > > + break; > > + } > > + > > + if (syncfs(fd) < 0) { > > I don't have a good feeling about calling syncfs() with lo->mutex held. > This seems to be that global mutex which is held at so many places > and will serialize everything else. I think we agreed that syncfs() > can take 10s of seconds if fs is busy. And that means we will stall > other filesystem operations too. > > So will be good if we can call syncfs() outside the lock. May be prepare > a list of inodes which are there, take a reference and drop the lock. > call syncfs and then drop the reference on inode. > You're right. I'll do that. > Vivek > > > + err = errno; > > + close(fd); > > + break; > > + } > > + > > + close(fd); > > + } > > + > > + pthread_mutex_unlock(&lo->mutex); > > + > > + fuse_reply_err(req, err); > > +} > > + > > static void lo_destroy(void *userdata) > > { > > struct lo_data *lo = (struct lo_data *)userdata; > > @@ -3214,6 +3251,7 @@ static struct fuse_lowlevel_ops lo_oper = { > > .copy_file_range = lo_copy_file_range, > > #endif > > .lseek = lo_lseek, > > + .syncfs = lo_syncfs, > > .destroy = lo_destroy, > > }; > > > > diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c > > index 62441cfcdb95..343188447901 100644 > > --- a/tools/virtiofsd/passthrough_seccomp.c > > +++ b/tools/virtiofsd/passthrough_seccomp.c > > @@ -107,6 +107,7 @@ static const int syscall_allowlist[] = { > > SCMP_SYS(set_robust_list), > > SCMP_SYS(setxattr), > > SCMP_SYS(symlinkat), > > + SCMP_SYS(syncfs), > > SCMP_SYS(time), /* Rarely needed, except on static builds */ > > SCMP_SYS(tgkill), > > SCMP_SYS(unlinkat), > > -- > > 2.26.3 > > >
On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote: > > Honor the expected behavior of syncfs() to synchronously flush all data > and metadata on linux systems. Simply loop on all known submounts and > call syncfs() on them. Why not pass the submount's root to the server, so it can do just one targeted syncfs? E.g. somehting like this in fuse_sync_fs(): args.nodeid = get_node_id(sb->s_root->d_inode); Thanks, Miklos
On Tue, May 11, 2021 at 02:31:14PM +0200, Miklos Szeredi wrote: > On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote: > > > > Honor the expected behavior of syncfs() to synchronously flush all data > > and metadata on linux systems. Simply loop on all known submounts and > > call syncfs() on them. > > Why not pass the submount's root to the server, so it can do just one > targeted syncfs? > > E.g. somehting like this in fuse_sync_fs(): > > args.nodeid = get_node_id(sb->s_root->d_inode); Hi Miklos, I think current proposal was due to lack of full understanding on my part. I was assuming we have one super block in client and that's not the case looks like. For every submount, we will have another superblock known to vfs, IIUC. That means when sync() happens, we will receive ->syncfs() for each of those super blocks. And that means file server does not have to keep track of submounts explicitly and it will either receive a single targeted SYNCFS (for the case of syncfs(fd)) or receive multile SYNCFS calls (one for each submount when sync() is called). If that's the case, it makes sense to send nodeid of the root dentry of superblock and file server can just call syncfs(inode->fd). Thanks Vivek
On Tue, May 11, 2021 at 08:54:09AM -0400, Vivek Goyal wrote: > On Tue, May 11, 2021 at 02:31:14PM +0200, Miklos Szeredi wrote: > > On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote: > > > > > > Honor the expected behavior of syncfs() to synchronously flush all data > > > and metadata on linux systems. Simply loop on all known submounts and > > > call syncfs() on them. > > > > Why not pass the submount's root to the server, so it can do just one > > targeted syncfs? > > > > E.g. somehting like this in fuse_sync_fs(): > > > > args.nodeid = get_node_id(sb->s_root->d_inode); > > Hi Miklos, > > I think current proposal was due to lack of full understanding on my part. > I was assuming we have one super block in client and that's not the case > looks like. For every submount, we will have another superblock known > to vfs, IIUC. That means when sync() happens, we will receive ->syncfs() > for each of those super blocks. And that means file server does not > have to keep track of submounts explicitly and it will either receive > a single targeted SYNCFS (for the case of syncfs(fd)) or receive > multile SYNCFS calls (one for each submount when sync() is called). Tried sync() with submounts enabled and we are seeing a SYNCFS call only for top level super block and not for submounts. Greg noticed that it probably is due to the fact that iterate_super() skips super blocks which don't have SB_BORN flag set. Only vfs_get_tree() seems to set SB_BORN and for our submounts we are not calling vfs_get_tree(), hence SB_BORN is not set. NFS seems to call vfs_get_tree() and hence SB_BORN must be set for submounts. Maybe we need to modify virtio_fs_get_tree() so that it can deal with mount as well as submounts and then fuse_dentry_automount() should probably call vfs_get_tree() and that should set SB_BORN and hopefully sync() will work with it. Greg is planning to give it a try. Does it sound reasonable. Thanks Vivek
On Tue, 11 May 2021 14:31:14 +0200 Miklos Szeredi <mszeredi@redhat.com> wrote: > On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote: > > > > Honor the expected behavior of syncfs() to synchronously flush all data > > and metadata on linux systems. Simply loop on all known submounts and > > call syncfs() on them. > > Why not pass the submount's root to the server, so it can do just one > targeted syncfs? > > E.g. somehting like this in fuse_sync_fs(): > > args.nodeid = get_node_id(sb->s_root->d_inode); > > Thanks, > Miklos > As Vivek already pointed out, there was some misunderstanding on how submounts were supposed to work. Things got clearer since then :) So, basically, we have two cases: 1) virtiofsd announces submounts : the d_automount implementation creates a new super block and mounts the submount 2) virtiofsd doesn't announce submounts: the client only knows about the top-level super block You suggestion is for case 1) while this series was made with case 2) in mind, hence the tracking of the super blocks in the server. Vivek and I discussed and agreed to address 2) later and to just focus on 1) for now. Your suggestion doesn't work with the current code base because ->sync_fs() is never called on our submounts' super blocks. This is because they don't have SB_BORN set, which looks incorrect. A call to vfs_get_tree() would fix it, but some code refactoring is needed in fuse_dentry_automount() and virtio_fs_get_tree() for that. Cheers, -- Greg
On Tue, May 11, 2021 at 4:49 PM Vivek Goyal <vgoyal@redhat.com> wrote: > > On Tue, May 11, 2021 at 08:54:09AM -0400, Vivek Goyal wrote: > > On Tue, May 11, 2021 at 02:31:14PM +0200, Miklos Szeredi wrote: > > > On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote: > > > > > > > > Honor the expected behavior of syncfs() to synchronously flush all data > > > > and metadata on linux systems. Simply loop on all known submounts and > > > > call syncfs() on them. > > > > > > Why not pass the submount's root to the server, so it can do just one > > > targeted syncfs? > > > > > > E.g. somehting like this in fuse_sync_fs(): > > > > > > args.nodeid = get_node_id(sb->s_root->d_inode); > > > > Hi Miklos, > > > > I think current proposal was due to lack of full understanding on my part. > > I was assuming we have one super block in client and that's not the case > > looks like. For every submount, we will have another superblock known > > to vfs, IIUC. That means when sync() happens, we will receive ->syncfs() > > for each of those super blocks. And that means file server does not > > have to keep track of submounts explicitly and it will either receive > > a single targeted SYNCFS (for the case of syncfs(fd)) or receive > > multile SYNCFS calls (one for each submount when sync() is called). > > Tried sync() with submounts enabled and we are seeing a SYNCFS call > only for top level super block and not for submounts. > > Greg noticed that it probably is due to the fact that iterate_super() > skips super blocks which don't have SB_BORN flag set. > > Only vfs_get_tree() seems to set SB_BORN and for our submounts we > are not calling vfs_get_tree(), hence SB_BORN is not set. NFS seems > to call vfs_get_tree() and hence SB_BORN must be set for submounts. > > Maybe we need to modify virtio_fs_get_tree() so that it can deal with > mount as well as submounts and then fuse_dentry_automount() should > probably call vfs_get_tree() and that should set SB_BORN and hopefully > sync() will work with it. Greg is planning to give it a try. > > Does it sound reasonable. Just setting SB_BORN sounds much simpler. What's the disadvantage? Thanks, Miklos
On Tue, May 11, 2021 at 05:08:42PM +0200, Miklos Szeredi wrote: > On Tue, May 11, 2021 at 4:49 PM Vivek Goyal <vgoyal@redhat.com> wrote: > > > > On Tue, May 11, 2021 at 08:54:09AM -0400, Vivek Goyal wrote: > > > On Tue, May 11, 2021 at 02:31:14PM +0200, Miklos Szeredi wrote: > > > > On Mon, May 10, 2021 at 5:55 PM Greg Kurz <groug@kaod.org> wrote: > > > > > > > > > > Honor the expected behavior of syncfs() to synchronously flush all data > > > > > and metadata on linux systems. Simply loop on all known submounts and > > > > > call syncfs() on them. > > > > > > > > Why not pass the submount's root to the server, so it can do just one > > > > targeted syncfs? > > > > > > > > E.g. somehting like this in fuse_sync_fs(): > > > > > > > > args.nodeid = get_node_id(sb->s_root->d_inode); > > > > > > Hi Miklos, > > > > > > I think current proposal was due to lack of full understanding on my part. > > > I was assuming we have one super block in client and that's not the case > > > looks like. For every submount, we will have another superblock known > > > to vfs, IIUC. That means when sync() happens, we will receive ->syncfs() > > > for each of those super blocks. And that means file server does not > > > have to keep track of submounts explicitly and it will either receive > > > a single targeted SYNCFS (for the case of syncfs(fd)) or receive > > > multile SYNCFS calls (one for each submount when sync() is called). > > > > Tried sync() with submounts enabled and we are seeing a SYNCFS call > > only for top level super block and not for submounts. > > > > Greg noticed that it probably is due to the fact that iterate_super() > > skips super blocks which don't have SB_BORN flag set. > > > > Only vfs_get_tree() seems to set SB_BORN and for our submounts we > > are not calling vfs_get_tree(), hence SB_BORN is not set. NFS seems > > to call vfs_get_tree() and hence SB_BORN must be set for submounts. > > > > Maybe we need to modify virtio_fs_get_tree() so that it can deal with > > mount as well as submounts and then fuse_dentry_automount() should > > probably call vfs_get_tree() and that should set SB_BORN and hopefully > > sync() will work with it. Greg is planning to give it a try. > > > > Does it sound reasonable. > > Just setting SB_BORN sounds much simpler. What's the disadvantage? I was little hesitant to set it directly because no other filesystem seems to be doing it. Hence I assumed that VFS expects filesystems to not set SB_BORN. But I do agree that setting SB_BORN in automount code is much simpler solution. Thanks Vivek
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index 58e32fc96369..3be95ec903c9 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -1870,6 +1870,16 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, } } +static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid, + struct fuse_mbuf_iter *iter) +{ + if (req->se->op.syncfs) { + req->se->op.syncfs(req); + } else { + fuse_reply_err(req, ENOSYS); + } +} + static void do_init(fuse_req_t req, fuse_ino_t nodeid, struct fuse_mbuf_iter *iter) { @@ -2267,6 +2277,7 @@ static struct { [FUSE_RENAME2] = { do_rename2, "RENAME2" }, [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, [FUSE_LSEEK] = { do_lseek, "LSEEK" }, + [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" }, }; #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h index 3bf786b03485..890c520b195a 100644 --- a/tools/virtiofsd/fuse_lowlevel.h +++ b/tools/virtiofsd/fuse_lowlevel.h @@ -1225,6 +1225,18 @@ struct fuse_lowlevel_ops { */ void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, struct fuse_file_info *fi); + + /** + * Synchronize file system content + * + * If this request is answered with an error code of ENOSYS, + * this is treated as success and future calls to syncfs() will + * succeed automatically without being sent to the filesystem + * process. + * + * @param req request handle + */ + void (*syncfs)(fuse_req_t req); }; /** diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index dc940a1d048b..289900c6d274 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -3153,6 +3153,43 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, } } +static void lo_syncfs(fuse_req_t req) +{ + struct lo_data *lo = lo_data(req); + GHashTableIter iter; + gpointer key, value; + int err = 0; + + pthread_mutex_lock(&lo->mutex); + + g_hash_table_iter_init(&iter, lo->mnt_inodes); + while (g_hash_table_iter_next(&iter, &key, &value)) { + struct lo_inode *inode = value; + int fd; + + fuse_log(FUSE_LOG_DEBUG, "lo_syncfs(ino=%" PRIu64 ")\n", + inode->fuse_ino); + + fd = lo_inode_open(lo, inode, O_RDONLY); + if (fd < 0) { + err = -fd; + break; + } + + if (syncfs(fd) < 0) { + err = errno; + close(fd); + break; + } + + close(fd); + } + + pthread_mutex_unlock(&lo->mutex); + + fuse_reply_err(req, err); +} + static void lo_destroy(void *userdata) { struct lo_data *lo = (struct lo_data *)userdata; @@ -3214,6 +3251,7 @@ static struct fuse_lowlevel_ops lo_oper = { .copy_file_range = lo_copy_file_range, #endif .lseek = lo_lseek, + .syncfs = lo_syncfs, .destroy = lo_destroy, }; diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c index 62441cfcdb95..343188447901 100644 --- a/tools/virtiofsd/passthrough_seccomp.c +++ b/tools/virtiofsd/passthrough_seccomp.c @@ -107,6 +107,7 @@ static const int syscall_allowlist[] = { SCMP_SYS(set_robust_list), SCMP_SYS(setxattr), SCMP_SYS(symlinkat), + SCMP_SYS(syncfs), SCMP_SYS(time), /* Rarely needed, except on static builds */ SCMP_SYS(tgkill), SCMP_SYS(unlinkat),
Honor the expected behavior of syncfs() to synchronously flush all data and metadata on linux systems. Simply loop on all known submounts and call syncfs() on them. Note that syncfs() might suffer from a time penalty if the submounts are being hammered by some unrelated workload on the host. The only solution to avoid that is to avoid shared submounts. Signed-off-by: Greg Kurz <groug@kaod.org> --- tools/virtiofsd/fuse_lowlevel.c | 11 ++++++++ tools/virtiofsd/fuse_lowlevel.h | 12 +++++++++ tools/virtiofsd/passthrough_ll.c | 38 +++++++++++++++++++++++++++ tools/virtiofsd/passthrough_seccomp.c | 1 + 4 files changed, 62 insertions(+)