Message ID | 20191212163904.159893-51-dgilbert@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | virtiofs daemon [all] | expand |
On Thu, Dec 12, 2019 at 04:38:10PM +0000, Dr. David Alan Gilbert (git) wrote: > From: Stefan Hajnoczi <stefanha@redhat.com> > > Only allow system calls that are needed by virtiofsd. All other system > calls cause SIGSYS to be directed at the thread and the process will > coredump. > > Restricting system calls reduces the kernel attack surface and limits > what the process can do when compromised. > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > with additional entries by: > Signed-off-by: Ganesh Maharaj Mahalingam <ganesh.mahalingam@intel.com> > Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> > Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com> > Signed-off-by: piaojun <piaojun@huawei.com> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com> > Signed-off-by: Eric Ren <renzhen@linux.alibaba.com> > --- > Makefile | 2 + > tools/virtiofsd/Makefile.objs | 5 +- > tools/virtiofsd/passthrough_ll.c | 2 + > tools/virtiofsd/seccomp.c | 141 +++++++++++++++++++++++++++++++ > tools/virtiofsd/seccomp.h | 14 +++ > 5 files changed, 163 insertions(+), 1 deletion(-) > create mode 100644 tools/virtiofsd/seccomp.c > create mode 100644 tools/virtiofsd/seccomp.h > > diff --git a/Makefile b/Makefile > index 8a5746d8a0..3f5d04e1f7 100644 > --- a/Makefile > +++ b/Makefile > @@ -322,8 +322,10 @@ HELPERS-y = > HELPERS-$(call land,$(CONFIG_SOFTMMU),$(CONFIG_LINUX)) = qemu-bridge-helper$(EXESUF) > > ifdef CONFIG_LINUX > +ifdef CONFIG_SECCOMP > HELPERS-y += virtiofsd$(EXESUF) > vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json > +endif > > ifdef CONFIG_VIRGL > ifdef CONFIG_GBM > diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs > index 67be16332c..941b19f18e 100644 > --- a/tools/virtiofsd/Makefile.objs > +++ b/tools/virtiofsd/Makefile.objs > @@ -6,5 +6,8 @@ virtiofsd-obj-y = buffer.o \ > fuse_signals.o \ > fuse_virtio.o \ > helper.o \ > - passthrough_ll.o > + passthrough_ll.o \ > + seccomp.o > > +seccomp.o-cflags := $(SECCOMP_CFLAGS) > +seccomp.o-libs := $(SECCOMP_LIBS) > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c > index 754ef2618b..701608c6df 100644 > --- a/tools/virtiofsd/passthrough_ll.c > +++ b/tools/virtiofsd/passthrough_ll.c > @@ -58,6 +58,7 @@ > #include <unistd.h> > > #include "passthrough_helpers.h" > +#include "seccomp.h" > > #define HAVE_POSIX_FALLOCATE 1 > struct lo_map_elem { > @@ -2073,6 +2074,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) > { > setup_namespaces(lo, se); > setup_mounts(lo->source); > + setup_seccomp(); > } > > int main(int argc, char *argv[]) > diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c > new file mode 100644 > index 0000000000..6359bb55bb > --- /dev/null > +++ b/tools/virtiofsd/seccomp.c > @@ -0,0 +1,141 @@ > +/* > + * Seccomp sandboxing for virtiofsd > + * > + * Copyright (C) 2019 Red Hat, Inc. > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#include "seccomp.h" > +#include "fuse_i.h" > +#include "fuse_log.h" > +#include <errno.h> > +#include <glib.h> > +#include <seccomp.h> > +#include <stdlib.h> > + > +/* Bodge for libseccomp 2.4.2 which broke ppoll */ > +#if !defined(__SNR_ppoll) && defined(__SNR_brk) > +#ifdef __NR_ppoll > +#define __SNR_ppoll __NR_ppoll > +#else > +#define __SNR_ppoll __PNR_ppoll > +#endif > +#endif > + > +static const int syscall_whitelist[] = { > + /* TODO ireg sem*() syscalls */ > + SCMP_SYS(brk), > + SCMP_SYS(capget), /* For CAP_FSETID */ > + SCMP_SYS(capset), > + SCMP_SYS(clock_gettime), > + SCMP_SYS(clone), clone2 ? clone3 ? IIC some archs in Linux will require the newer variants. > + SCMP_SYS(close), > + SCMP_SYS(copy_file_range), > + SCMP_SYS(dup), > + SCMP_SYS(eventfd2), > + SCMP_SYS(exit), > + SCMP_SYS(exit_group), > + SCMP_SYS(fallocate), > + SCMP_SYS(fchmodat), > + SCMP_SYS(fchownat), > + SCMP_SYS(fcntl), > + SCMP_SYS(fdatasync), > + SCMP_SYS(fgetxattr), > + SCMP_SYS(flistxattr), > + SCMP_SYS(flock), > + SCMP_SYS(fremovexattr), > + SCMP_SYS(fsetxattr), > + SCMP_SYS(fstat), > + SCMP_SYS(fstatfs), > + SCMP_SYS(fsync), > + SCMP_SYS(ftruncate), > + SCMP_SYS(futex), > + SCMP_SYS(getdents), > + SCMP_SYS(getdents64), > + SCMP_SYS(getegid), > + SCMP_SYS(geteuid), > + SCMP_SYS(getpid), > + SCMP_SYS(gettid), > + SCMP_SYS(gettimeofday), > + SCMP_SYS(linkat), > + SCMP_SYS(lseek), > + SCMP_SYS(madvise), > + SCMP_SYS(mkdirat), > + SCMP_SYS(mknodat), > + SCMP_SYS(mmap), > + SCMP_SYS(mprotect), > + SCMP_SYS(mremap), > + SCMP_SYS(munmap), > + SCMP_SYS(newfstatat), > + SCMP_SYS(open), > + SCMP_SYS(openat), > + SCMP_SYS(ppoll), > + SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ > + SCMP_SYS(preadv), > + SCMP_SYS(pread64), > + SCMP_SYS(pwritev), > + SCMP_SYS(pwrite64), > + SCMP_SYS(read), > + SCMP_SYS(readlinkat), > + SCMP_SYS(recvmsg), > + SCMP_SYS(renameat), > + SCMP_SYS(renameat2), > + SCMP_SYS(rt_sigaction), > + SCMP_SYS(rt_sigprocmask), > + SCMP_SYS(rt_sigreturn), > + SCMP_SYS(sendmsg), > + SCMP_SYS(setresgid), Should be setresgid32 instead I think. We don't want the legacy syscall that's limted to 16-bit GIDs Needs the code fix I mention in an earlier patch too. > + SCMP_SYS(setresuid), Same as above > + SCMP_SYS(set_robust_list), > + SCMP_SYS(symlinkat), > + SCMP_SYS(time), /* Rarely needed, except on static builds */ > + SCMP_SYS(tgkill), > + SCMP_SYS(unlinkat), > + SCMP_SYS(utimensat), > + SCMP_SYS(write), > + SCMP_SYS(writev), > +}; Regards, Daniel
* Daniel P. Berrangé (berrange@redhat.com) wrote: > On Thu, Dec 12, 2019 at 04:38:10PM +0000, Dr. David Alan Gilbert (git) wrote: > > From: Stefan Hajnoczi <stefanha@redhat.com> > > > > Only allow system calls that are needed by virtiofsd. All other system > > calls cause SIGSYS to be directed at the thread and the process will > > coredump. > > > > Restricting system calls reduces the kernel attack surface and limits > > what the process can do when compromised. > > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > > with additional entries by: > > Signed-off-by: Ganesh Maharaj Mahalingam <ganesh.mahalingam@intel.com> > > Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> > > Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com> > > Signed-off-by: piaojun <piaojun@huawei.com> > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com> > > Signed-off-by: Eric Ren <renzhen@linux.alibaba.com> > > --- > > Makefile | 2 + > > tools/virtiofsd/Makefile.objs | 5 +- > > tools/virtiofsd/passthrough_ll.c | 2 + > > tools/virtiofsd/seccomp.c | 141 +++++++++++++++++++++++++++++++ > > tools/virtiofsd/seccomp.h | 14 +++ > > 5 files changed, 163 insertions(+), 1 deletion(-) > > create mode 100644 tools/virtiofsd/seccomp.c > > create mode 100644 tools/virtiofsd/seccomp.h > > > > diff --git a/Makefile b/Makefile > > index 8a5746d8a0..3f5d04e1f7 100644 > > --- a/Makefile > > +++ b/Makefile > > @@ -322,8 +322,10 @@ HELPERS-y = > > HELPERS-$(call land,$(CONFIG_SOFTMMU),$(CONFIG_LINUX)) = qemu-bridge-helper$(EXESUF) > > > > ifdef CONFIG_LINUX > > +ifdef CONFIG_SECCOMP > > HELPERS-y += virtiofsd$(EXESUF) > > vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json > > +endif > > > > ifdef CONFIG_VIRGL > > ifdef CONFIG_GBM > > diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs > > index 67be16332c..941b19f18e 100644 > > --- a/tools/virtiofsd/Makefile.objs > > +++ b/tools/virtiofsd/Makefile.objs > > @@ -6,5 +6,8 @@ virtiofsd-obj-y = buffer.o \ > > fuse_signals.o \ > > fuse_virtio.o \ > > helper.o \ > > - passthrough_ll.o > > + passthrough_ll.o \ > > + seccomp.o > > > > +seccomp.o-cflags := $(SECCOMP_CFLAGS) > > +seccomp.o-libs := $(SECCOMP_LIBS) > > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c > > index 754ef2618b..701608c6df 100644 > > --- a/tools/virtiofsd/passthrough_ll.c > > +++ b/tools/virtiofsd/passthrough_ll.c > > @@ -58,6 +58,7 @@ > > #include <unistd.h> > > > > #include "passthrough_helpers.h" > > +#include "seccomp.h" > > > > #define HAVE_POSIX_FALLOCATE 1 > > struct lo_map_elem { > > @@ -2073,6 +2074,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) > > { > > setup_namespaces(lo, se); > > setup_mounts(lo->source); > > + setup_seccomp(); > > } > > > > int main(int argc, char *argv[]) > > diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c > > new file mode 100644 > > index 0000000000..6359bb55bb > > --- /dev/null > > +++ b/tools/virtiofsd/seccomp.c > > @@ -0,0 +1,141 @@ > > +/* > > + * Seccomp sandboxing for virtiofsd > > + * > > + * Copyright (C) 2019 Red Hat, Inc. > > + * > > + * SPDX-License-Identifier: GPL-2.0-or-later > > + */ > > + > > +#include "seccomp.h" > > +#include "fuse_i.h" > > +#include "fuse_log.h" > > +#include <errno.h> > > +#include <glib.h> > > +#include <seccomp.h> > > +#include <stdlib.h> > > + > > +/* Bodge for libseccomp 2.4.2 which broke ppoll */ > > +#if !defined(__SNR_ppoll) && defined(__SNR_brk) > > +#ifdef __NR_ppoll > > +#define __SNR_ppoll __NR_ppoll > > +#else > > +#define __SNR_ppoll __PNR_ppoll > > +#endif > > +#endif > > + > > +static const int syscall_whitelist[] = { > > + /* TODO ireg sem*() syscalls */ > > + SCMP_SYS(brk), > > + SCMP_SYS(capget), /* For CAP_FSETID */ > > + SCMP_SYS(capset), > > + SCMP_SYS(clock_gettime), > > + SCMP_SYS(clone), > > clone2 ? clone3 ? IIC some archs in Linux > will require the newer variants. It looks like clone2 was an Itanium only thing; lets ignore that. Clone3 is very new; so we're going to have to do: #ifdef __NR_clone3 SCMP_SYS(clone3); #endif > > + SCMP_SYS(close), > > + SCMP_SYS(copy_file_range), > > + SCMP_SYS(dup), > > + SCMP_SYS(eventfd2), > > + SCMP_SYS(exit), > > + SCMP_SYS(exit_group), > > + SCMP_SYS(fallocate), > > + SCMP_SYS(fchmodat), > > + SCMP_SYS(fchownat), > > + SCMP_SYS(fcntl), > > + SCMP_SYS(fdatasync), > > + SCMP_SYS(fgetxattr), > > + SCMP_SYS(flistxattr), > > + SCMP_SYS(flock), > > + SCMP_SYS(fremovexattr), > > + SCMP_SYS(fsetxattr), > > + SCMP_SYS(fstat), > > + SCMP_SYS(fstatfs), > > + SCMP_SYS(fsync), > > + SCMP_SYS(ftruncate), > > + SCMP_SYS(futex), > > + SCMP_SYS(getdents), > > + SCMP_SYS(getdents64), > > + SCMP_SYS(getegid), > > + SCMP_SYS(geteuid), > > + SCMP_SYS(getpid), > > + SCMP_SYS(gettid), > > + SCMP_SYS(gettimeofday), > > + SCMP_SYS(linkat), > > + SCMP_SYS(lseek), > > + SCMP_SYS(madvise), > > + SCMP_SYS(mkdirat), > > + SCMP_SYS(mknodat), > > + SCMP_SYS(mmap), > > + SCMP_SYS(mprotect), > > + SCMP_SYS(mremap), > > + SCMP_SYS(munmap), > > + SCMP_SYS(newfstatat), > > + SCMP_SYS(open), > > + SCMP_SYS(openat), > > + SCMP_SYS(ppoll), > > + SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ > > + SCMP_SYS(preadv), > > + SCMP_SYS(pread64), > > + SCMP_SYS(pwritev), > > + SCMP_SYS(pwrite64), > > + SCMP_SYS(read), > > + SCMP_SYS(readlinkat), > > + SCMP_SYS(recvmsg), > > + SCMP_SYS(renameat), > > + SCMP_SYS(renameat2), > > + SCMP_SYS(rt_sigaction), > > + SCMP_SYS(rt_sigprocmask), > > + SCMP_SYS(rt_sigreturn), > > + SCMP_SYS(sendmsg), > > + SCMP_SYS(setresgid), > > Should be setresgid32 instead I think. We don't > want the legacy syscall that's limted to 16-bit GIDs > > Needs the code fix I mention in an earlier patch too. > > > + SCMP_SYS(setresuid), > > Same as above OK. Interestingly I see setresuid/setresgid blacklisted as SET_PRIVILEGED in qemu's qemu-secomp.c but not the 32 versions; perhaps those should be added - but then I don't understand why qemu would ever allow them to be enabled. Dave > > + SCMP_SYS(set_robust_list), > > + SCMP_SYS(symlinkat), > > + SCMP_SYS(time), /* Rarely needed, except on static builds */ > > + SCMP_SYS(tgkill), > > + SCMP_SYS(unlinkat), > > + SCMP_SYS(utimensat), > > + SCMP_SYS(write), > > + SCMP_SYS(writev), > > +}; > > > Regards, > Daniel > -- > |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| > |: https://libvirt.org -o- https://fstop138.berrange.com :| > |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :| -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/Makefile b/Makefile index 8a5746d8a0..3f5d04e1f7 100644 --- a/Makefile +++ b/Makefile @@ -322,8 +322,10 @@ HELPERS-y = HELPERS-$(call land,$(CONFIG_SOFTMMU),$(CONFIG_LINUX)) = qemu-bridge-helper$(EXESUF) ifdef CONFIG_LINUX +ifdef CONFIG_SECCOMP HELPERS-y += virtiofsd$(EXESUF) vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json +endif ifdef CONFIG_VIRGL ifdef CONFIG_GBM diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs index 67be16332c..941b19f18e 100644 --- a/tools/virtiofsd/Makefile.objs +++ b/tools/virtiofsd/Makefile.objs @@ -6,5 +6,8 @@ virtiofsd-obj-y = buffer.o \ fuse_signals.o \ fuse_virtio.o \ helper.o \ - passthrough_ll.o + passthrough_ll.o \ + seccomp.o +seccomp.o-cflags := $(SECCOMP_CFLAGS) +seccomp.o-libs := $(SECCOMP_LIBS) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 754ef2618b..701608c6df 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -58,6 +58,7 @@ #include <unistd.h> #include "passthrough_helpers.h" +#include "seccomp.h" #define HAVE_POSIX_FALLOCATE 1 struct lo_map_elem { @@ -2073,6 +2074,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) { setup_namespaces(lo, se); setup_mounts(lo->source); + setup_seccomp(); } int main(int argc, char *argv[]) diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c new file mode 100644 index 0000000000..6359bb55bb --- /dev/null +++ b/tools/virtiofsd/seccomp.c @@ -0,0 +1,141 @@ +/* + * Seccomp sandboxing for virtiofsd + * + * Copyright (C) 2019 Red Hat, Inc. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "seccomp.h" +#include "fuse_i.h" +#include "fuse_log.h" +#include <errno.h> +#include <glib.h> +#include <seccomp.h> +#include <stdlib.h> + +/* Bodge for libseccomp 2.4.2 which broke ppoll */ +#if !defined(__SNR_ppoll) && defined(__SNR_brk) +#ifdef __NR_ppoll +#define __SNR_ppoll __NR_ppoll +#else +#define __SNR_ppoll __PNR_ppoll +#endif +#endif + +static const int syscall_whitelist[] = { + /* TODO ireg sem*() syscalls */ + SCMP_SYS(brk), + SCMP_SYS(capget), /* For CAP_FSETID */ + SCMP_SYS(capset), + SCMP_SYS(clock_gettime), + SCMP_SYS(clone), + SCMP_SYS(close), + SCMP_SYS(copy_file_range), + SCMP_SYS(dup), + SCMP_SYS(eventfd2), + SCMP_SYS(exit), + SCMP_SYS(exit_group), + SCMP_SYS(fallocate), + SCMP_SYS(fchmodat), + SCMP_SYS(fchownat), + SCMP_SYS(fcntl), + SCMP_SYS(fdatasync), + SCMP_SYS(fgetxattr), + SCMP_SYS(flistxattr), + SCMP_SYS(flock), + SCMP_SYS(fremovexattr), + SCMP_SYS(fsetxattr), + SCMP_SYS(fstat), + SCMP_SYS(fstatfs), + SCMP_SYS(fsync), + SCMP_SYS(ftruncate), + SCMP_SYS(futex), + SCMP_SYS(getdents), + SCMP_SYS(getdents64), + SCMP_SYS(getegid), + SCMP_SYS(geteuid), + SCMP_SYS(getpid), + SCMP_SYS(gettid), + SCMP_SYS(gettimeofday), + SCMP_SYS(linkat), + SCMP_SYS(lseek), + SCMP_SYS(madvise), + SCMP_SYS(mkdirat), + SCMP_SYS(mknodat), + SCMP_SYS(mmap), + SCMP_SYS(mprotect), + SCMP_SYS(mremap), + SCMP_SYS(munmap), + SCMP_SYS(newfstatat), + SCMP_SYS(open), + SCMP_SYS(openat), + SCMP_SYS(ppoll), + SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ + SCMP_SYS(preadv), + SCMP_SYS(pread64), + SCMP_SYS(pwritev), + SCMP_SYS(pwrite64), + SCMP_SYS(read), + SCMP_SYS(readlinkat), + SCMP_SYS(recvmsg), + SCMP_SYS(renameat), + SCMP_SYS(renameat2), + SCMP_SYS(rt_sigaction), + SCMP_SYS(rt_sigprocmask), + SCMP_SYS(rt_sigreturn), + SCMP_SYS(sendmsg), + SCMP_SYS(setresgid), + SCMP_SYS(setresuid), + SCMP_SYS(set_robust_list), + SCMP_SYS(symlinkat), + SCMP_SYS(time), /* Rarely needed, except on static builds */ + SCMP_SYS(tgkill), + SCMP_SYS(unlinkat), + SCMP_SYS(utimensat), + SCMP_SYS(write), + SCMP_SYS(writev), +}; + +void setup_seccomp(void) +{ + scmp_filter_ctx ctx; + size_t i; + +#ifdef SCMP_ACT_KILL_PROCESS + ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); + /* Handle a newer libseccomp but an older kernel */ + if (!ctx && errno == EOPNOTSUPP) { + ctx = seccomp_init(SCMP_ACT_TRAP); + } +#else + ctx = seccomp_init(SCMP_ACT_TRAP); +#endif + if (!ctx) { + fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n"); + exit(1); + } + + for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { + if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, + syscall_whitelist[i], 0) != 0) { + fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", + syscall_whitelist[i]); + exit(1); + } + } + + /* libvhost-user calls this for post-copy migration, we don't need it */ + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), + SCMP_SYS(userfaultfd), 0) != 0) { + fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n"); + exit(1); + } + + if (seccomp_load(ctx) < 0) { + fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n"); + exit(1); + } + + seccomp_release(ctx); +} diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h new file mode 100644 index 0000000000..86bce72652 --- /dev/null +++ b/tools/virtiofsd/seccomp.h @@ -0,0 +1,14 @@ +/* + * Seccomp sandboxing for virtiofsd + * + * Copyright (C) 2019 Red Hat, Inc. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef VIRTIOFSD_SECCOMP_H +#define VIRTIOFSD_SECCOMP_H + +void setup_seccomp(void); + +#endif /* VIRTIOFSD_SECCOMP_H */