diff mbox series

[v2,051/109] virtiofsd: add seccomp whitelist

Message ID 20200121122433.50803-52-dgilbert@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtiofs daemon [all] | expand

Commit Message

Dr. David Alan Gilbert Jan. 21, 2020, 12:23 p.m. UTC
From: Stefan Hajnoczi <stefanha@redhat.com>

Only allow system calls that are needed by virtiofsd.  All other system
calls cause SIGSYS to be directed at the thread and the process will
coredump.

Restricting system calls reduces the kernel attack surface and limits
what the process can do when compromised.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
with additional entries by:
Signed-off-by: Ganesh Maharaj Mahalingam <ganesh.mahalingam@intel.com>
Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: piaojun <piaojun@huawei.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 Makefile                         |   2 +-
 tools/virtiofsd/Makefile.objs    |   5 +-
 tools/virtiofsd/passthrough_ll.c |   2 +
 tools/virtiofsd/seccomp.c        | 150 +++++++++++++++++++++++++++++++
 tools/virtiofsd/seccomp.h        |  14 +++
 5 files changed, 171 insertions(+), 2 deletions(-)
 create mode 100644 tools/virtiofsd/seccomp.c
 create mode 100644 tools/virtiofsd/seccomp.h

Comments

Philippe Mathieu-Daudé Jan. 21, 2020, 3:54 p.m. UTC | #1
On 1/21/20 1:23 PM, Dr. David Alan Gilbert (git) wrote:
> From: Stefan Hajnoczi <stefanha@redhat.com>
> 
> Only allow system calls that are needed by virtiofsd.  All other system
> calls cause SIGSYS to be directed at the thread and the process will
> coredump.
> 
> Restricting system calls reduces the kernel attack surface and limits
> what the process can do when compromised.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> with additional entries by:
> Signed-off-by: Ganesh Maharaj Mahalingam <ganesh.mahalingam@intel.com>
> Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
> Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
> Signed-off-by: piaojun <piaojun@huawei.com>
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>   Makefile                         |   2 +-
>   tools/virtiofsd/Makefile.objs    |   5 +-
>   tools/virtiofsd/passthrough_ll.c |   2 +
>   tools/virtiofsd/seccomp.c        | 150 +++++++++++++++++++++++++++++++
>   tools/virtiofsd/seccomp.h        |  14 +++
>   5 files changed, 171 insertions(+), 2 deletions(-)
>   create mode 100644 tools/virtiofsd/seccomp.c
>   create mode 100644 tools/virtiofsd/seccomp.h
> 
> diff --git a/Makefile b/Makefile
> index a87e06ad93..967d59c98a 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -327,7 +327,7 @@ HELPERS-y += vhost-user-gpu$(EXESUF)
>   vhost-user-json-y += contrib/vhost-user-gpu/50-qemu-gpu.json
>   endif
>   
> -ifdef CONFIG_LINUX
> +ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy)
>   HELPERS-y += virtiofsd$(EXESUF)

Something is weird here, because I got:

$ make virtiofsd
   ...
   CC      tools/virtiofsd/seccomp.o
tools/virtiofsd/seccomp.c:14:21: fatal error: seccomp.h: No such file or 
directory
  #include <seccomp.h>
                      ^

Indeed I don't have libseccomp installed, ./configure reported:

...
QGA MSI support   no
seccomp support   no
coroutine backend ucontext
coroutine pool    yes
debug stack usage no
...

Note also:

$ make print-CONFIG_LINUX
CONFIG_LINUX=y
$ make print-CONFIG_SECCOMP
CONFIG_SECCOMP=
$ make print-CONFIG_LIBCAP_NG
CONFIG_LIBCAP_NG=y
$ make print-HELPERS-y
HELPERS-y=qemu-bridge-helper

>   vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json
>   endif
> diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs
> index 45a807500d..076f667e46 100644
> --- a/tools/virtiofsd/Makefile.objs
> +++ b/tools/virtiofsd/Makefile.objs
> @@ -5,5 +5,8 @@ virtiofsd-obj-y = buffer.o \
>                     fuse_signals.o \
>                     fuse_virtio.o \
>                     helper.o \
> -                  passthrough_ll.o
> +                  passthrough_ll.o \
> +                  seccomp.o
>   
> +seccomp.o-cflags := $(SECCOMP_CFLAGS)
> +seccomp.o-libs := $(SECCOMP_LIBS)
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index 752beb459a..8748e64f33 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -58,6 +58,7 @@
>   #include <unistd.h>
>   
>   #include "passthrough_helpers.h"
> +#include "seccomp.h"
>   
>   struct lo_map_elem {
>       union {
> @@ -2090,6 +2091,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se)
>   {
>       setup_namespaces(lo, se);
>       setup_mounts(lo->source);
> +    setup_seccomp();
>   }
>   
>   int main(int argc, char *argv[])
> diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c
> new file mode 100644
> index 0000000000..2aa4d3cc66
> --- /dev/null
> +++ b/tools/virtiofsd/seccomp.c
> @@ -0,0 +1,150 @@
> +/*
> + * Seccomp sandboxing for virtiofsd
> + *
> + * Copyright (C) 2019 Red Hat, Inc.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "seccomp.h"
> +#include "fuse_i.h"
> +#include "fuse_log.h"
> +#include <errno.h>
> +#include <glib.h>
> +#include <seccomp.h>
> +#include <stdlib.h>
> +
> +/* Bodge for libseccomp 2.4.2 which broke ppoll */
> +#if !defined(__SNR_ppoll) && defined(__SNR_brk)
> +#ifdef __NR_ppoll
> +#define __SNR_ppoll __NR_ppoll
> +#else
> +#define __SNR_ppoll __PNR_ppoll
> +#endif
> +#endif
> +
> +static const int syscall_whitelist[] = {
> +    /* TODO ireg sem*() syscalls */
> +    SCMP_SYS(brk),
> +    SCMP_SYS(capget), /* For CAP_FSETID */
> +    SCMP_SYS(capset),
> +    SCMP_SYS(clock_gettime),
> +    SCMP_SYS(clone),
> +#ifdef __NR_clone3
> +    SCMP_SYS(clone3),
> +#endif
> +    SCMP_SYS(close),
> +    SCMP_SYS(copy_file_range),
> +    SCMP_SYS(dup),
> +    SCMP_SYS(eventfd2),
> +    SCMP_SYS(exit),
> +    SCMP_SYS(exit_group),
> +    SCMP_SYS(fallocate),
> +    SCMP_SYS(fchmodat),
> +    SCMP_SYS(fchownat),
> +    SCMP_SYS(fcntl),
> +    SCMP_SYS(fdatasync),
> +    SCMP_SYS(fgetxattr),
> +    SCMP_SYS(flistxattr),
> +    SCMP_SYS(flock),
> +    SCMP_SYS(fremovexattr),
> +    SCMP_SYS(fsetxattr),
> +    SCMP_SYS(fstat),
> +    SCMP_SYS(fstatfs),
> +    SCMP_SYS(fsync),
> +    SCMP_SYS(ftruncate),
> +    SCMP_SYS(futex),
> +    SCMP_SYS(getdents),
> +    SCMP_SYS(getdents64),
> +    SCMP_SYS(getegid),
> +    SCMP_SYS(geteuid),
> +    SCMP_SYS(getpid),
> +    SCMP_SYS(gettid),
> +    SCMP_SYS(gettimeofday),
> +    SCMP_SYS(linkat),
> +    SCMP_SYS(lseek),
> +    SCMP_SYS(madvise),
> +    SCMP_SYS(mkdirat),
> +    SCMP_SYS(mknodat),
> +    SCMP_SYS(mmap),
> +    SCMP_SYS(mprotect),
> +    SCMP_SYS(mremap),
> +    SCMP_SYS(munmap),
> +    SCMP_SYS(newfstatat),
> +    SCMP_SYS(open),
> +    SCMP_SYS(openat),
> +    SCMP_SYS(ppoll),
> +    SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
> +    SCMP_SYS(preadv),
> +    SCMP_SYS(pread64),
> +    SCMP_SYS(pwritev),
> +    SCMP_SYS(pwrite64),
> +    SCMP_SYS(read),
> +    SCMP_SYS(readlinkat),
> +    SCMP_SYS(recvmsg),
> +    SCMP_SYS(renameat),
> +    SCMP_SYS(renameat2),
> +    SCMP_SYS(rt_sigaction),
> +    SCMP_SYS(rt_sigprocmask),
> +    SCMP_SYS(rt_sigreturn),
> +    SCMP_SYS(sendmsg),
> +    SCMP_SYS(setresgid),
> +    SCMP_SYS(setresuid),
> +#ifdef __NR_setresgid32
> +    SCMP_SYS(setresgid32),
> +#endif
> +#ifdef __NR_setresuid32
> +    SCMP_SYS(setresuid32),
> +#endif
> +    SCMP_SYS(set_robust_list),
> +    SCMP_SYS(symlinkat),
> +    SCMP_SYS(time), /* Rarely needed, except on static builds */
> +    SCMP_SYS(tgkill),
> +    SCMP_SYS(unlinkat),
> +    SCMP_SYS(utimensat),
> +    SCMP_SYS(write),
> +    SCMP_SYS(writev),
> +};
> +
> +void setup_seccomp(void)
> +{
> +    scmp_filter_ctx ctx;
> +    size_t i;
> +
> +#ifdef SCMP_ACT_KILL_PROCESS
> +    ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
> +    /* Handle a newer libseccomp but an older kernel */
> +    if (!ctx && errno == EOPNOTSUPP) {
> +        ctx = seccomp_init(SCMP_ACT_TRAP);
> +    }
> +#else
> +    ctx = seccomp_init(SCMP_ACT_TRAP);
> +#endif
> +    if (!ctx) {
> +        fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
> +        exit(1);
> +    }
> +
> +    for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) {
> +        if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW,
> +                             syscall_whitelist[i], 0) != 0) {
> +            fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d",
> +                     syscall_whitelist[i]);
> +            exit(1);
> +        }
> +    }
> +
> +    /* libvhost-user calls this for post-copy migration, we don't need it */
> +    if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
> +                         SCMP_SYS(userfaultfd), 0) != 0) {
> +        fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
> +        exit(1);
> +    }
> +
> +    if (seccomp_load(ctx) < 0) {
> +        fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
> +        exit(1);
> +    }
> +
> +    seccomp_release(ctx);
> +}
> diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h
> new file mode 100644
> index 0000000000..86bce72652
> --- /dev/null
> +++ b/tools/virtiofsd/seccomp.h
> @@ -0,0 +1,14 @@
> +/*
> + * Seccomp sandboxing for virtiofsd
> + *
> + * Copyright (C) 2019 Red Hat, Inc.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef VIRTIOFSD_SECCOMP_H
> +#define VIRTIOFSD_SECCOMP_H
> +
> +void setup_seccomp(void);
> +
> +#endif /* VIRTIOFSD_SECCOMP_H */
>
Dr. David Alan Gilbert Jan. 21, 2020, 7:49 p.m. UTC | #2
* Philippe Mathieu-Daudé (philmd@redhat.com) wrote:
> On 1/21/20 1:23 PM, Dr. David Alan Gilbert (git) wrote:
> > From: Stefan Hajnoczi <stefanha@redhat.com>
> > 
> > Only allow system calls that are needed by virtiofsd.  All other system
> > calls cause SIGSYS to be directed at the thread and the process will
> > coredump.
> > 
> > Restricting system calls reduces the kernel attack surface and limits
> > what the process can do when compromised.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > with additional entries by:
> > Signed-off-by: Ganesh Maharaj Mahalingam <ganesh.mahalingam@intel.com>
> > Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
> > Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
> > Signed-off-by: piaojun <piaojun@huawei.com>
> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > ---
> >   Makefile                         |   2 +-
> >   tools/virtiofsd/Makefile.objs    |   5 +-
> >   tools/virtiofsd/passthrough_ll.c |   2 +
> >   tools/virtiofsd/seccomp.c        | 150 +++++++++++++++++++++++++++++++
> >   tools/virtiofsd/seccomp.h        |  14 +++
> >   5 files changed, 171 insertions(+), 2 deletions(-)
> >   create mode 100644 tools/virtiofsd/seccomp.c
> >   create mode 100644 tools/virtiofsd/seccomp.h
> > 
> > diff --git a/Makefile b/Makefile
> > index a87e06ad93..967d59c98a 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -327,7 +327,7 @@ HELPERS-y += vhost-user-gpu$(EXESUF)
> >   vhost-user-json-y += contrib/vhost-user-gpu/50-qemu-gpu.json
> >   endif
> > -ifdef CONFIG_LINUX
> > +ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy)
> >   HELPERS-y += virtiofsd$(EXESUF)
> 
> Something is weird here, because I got:
> 
> $ make virtiofsd
>   ...
>   CC      tools/virtiofsd/seccomp.o
> tools/virtiofsd/seccomp.c:14:21: fatal error: seccomp.h: No such file or
> directory
>  #include <seccomp.h>
>                      ^
> 
> Indeed I don't have libseccomp installed, ./configure reported:
> 
> ...
> QGA MSI support   no
> seccomp support   no
> coroutine backend ucontext
> coroutine pool    yes
> debug stack usage no
> ...
> 
> Note also:
> 
> $ make print-CONFIG_LINUX
> CONFIG_LINUX=y
> $ make print-CONFIG_SECCOMP
> CONFIG_SECCOMP=
> $ make print-CONFIG_LIBCAP_NG
> CONFIG_LIBCAP_NG=y
> $ make print-HELPERS-y
> HELPERS-y=qemu-bridge-helper

The same thing happens if you uninstall mesa-libgbm-devel and do a
'make vhost-user-gpu'

These ifeq's don't remove the definition of the target, they just remove
it from the HELPERS-y list, so stop it being built on an unqualified
'make' but don't change the behaviour when you explicitly ask for the
target.

Can you try:

diff --git a/Makefile b/Makefile
index ba7e2e5ebc..346a981f0e 100644
--- a/Makefile
+++ b/Makefile
@@ -676,8 +676,9 @@ vhost-user-blk$(EXESUF): $(vhost-user-blk-obj-y) libvhost-user.a
 rdmacm-mux$(EXESUF): LIBS += "-libumad"
 rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
        $(call LINK, $^)
-
-ifdef CONFIG_LINUX # relies on Linux-specific syscalls
+#
+# relies on Linux specific syscalls
+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy)
 virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS)
        $(call LINK, $^)
 endif


> >   vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json
> >   endif
> > diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs
> > index 45a807500d..076f667e46 100644
> > --- a/tools/virtiofsd/Makefile.objs
> > +++ b/tools/virtiofsd/Makefile.objs
> > @@ -5,5 +5,8 @@ virtiofsd-obj-y = buffer.o \
> >                     fuse_signals.o \
> >                     fuse_virtio.o \
> >                     helper.o \
> > -                  passthrough_ll.o
> > +                  passthrough_ll.o \
> > +                  seccomp.o
> > +seccomp.o-cflags := $(SECCOMP_CFLAGS)
> > +seccomp.o-libs := $(SECCOMP_LIBS)
> > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> > index 752beb459a..8748e64f33 100644
> > --- a/tools/virtiofsd/passthrough_ll.c
> > +++ b/tools/virtiofsd/passthrough_ll.c
> > @@ -58,6 +58,7 @@
> >   #include <unistd.h>
> >   #include "passthrough_helpers.h"
> > +#include "seccomp.h"
> >   struct lo_map_elem {
> >       union {
> > @@ -2090,6 +2091,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se)
> >   {
> >       setup_namespaces(lo, se);
> >       setup_mounts(lo->source);
> > +    setup_seccomp();
> >   }
> >   int main(int argc, char *argv[])
> > diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c
> > new file mode 100644
> > index 0000000000..2aa4d3cc66
> > --- /dev/null
> > +++ b/tools/virtiofsd/seccomp.c
> > @@ -0,0 +1,150 @@
> > +/*
> > + * Seccomp sandboxing for virtiofsd
> > + *
> > + * Copyright (C) 2019 Red Hat, Inc.
> > + *
> > + * SPDX-License-Identifier: GPL-2.0-or-later
> > + */
> > +
> > +#include "seccomp.h"
> > +#include "fuse_i.h"
> > +#include "fuse_log.h"
> > +#include <errno.h>
> > +#include <glib.h>
> > +#include <seccomp.h>
> > +#include <stdlib.h>
> > +
> > +/* Bodge for libseccomp 2.4.2 which broke ppoll */
> > +#if !defined(__SNR_ppoll) && defined(__SNR_brk)
> > +#ifdef __NR_ppoll
> > +#define __SNR_ppoll __NR_ppoll
> > +#else
> > +#define __SNR_ppoll __PNR_ppoll
> > +#endif
> > +#endif
> > +
> > +static const int syscall_whitelist[] = {
> > +    /* TODO ireg sem*() syscalls */
> > +    SCMP_SYS(brk),
> > +    SCMP_SYS(capget), /* For CAP_FSETID */
> > +    SCMP_SYS(capset),
> > +    SCMP_SYS(clock_gettime),
> > +    SCMP_SYS(clone),
> > +#ifdef __NR_clone3
> > +    SCMP_SYS(clone3),
> > +#endif
> > +    SCMP_SYS(close),
> > +    SCMP_SYS(copy_file_range),
> > +    SCMP_SYS(dup),
> > +    SCMP_SYS(eventfd2),
> > +    SCMP_SYS(exit),
> > +    SCMP_SYS(exit_group),
> > +    SCMP_SYS(fallocate),
> > +    SCMP_SYS(fchmodat),
> > +    SCMP_SYS(fchownat),
> > +    SCMP_SYS(fcntl),
> > +    SCMP_SYS(fdatasync),
> > +    SCMP_SYS(fgetxattr),
> > +    SCMP_SYS(flistxattr),
> > +    SCMP_SYS(flock),
> > +    SCMP_SYS(fremovexattr),
> > +    SCMP_SYS(fsetxattr),
> > +    SCMP_SYS(fstat),
> > +    SCMP_SYS(fstatfs),
> > +    SCMP_SYS(fsync),
> > +    SCMP_SYS(ftruncate),
> > +    SCMP_SYS(futex),
> > +    SCMP_SYS(getdents),
> > +    SCMP_SYS(getdents64),
> > +    SCMP_SYS(getegid),
> > +    SCMP_SYS(geteuid),
> > +    SCMP_SYS(getpid),
> > +    SCMP_SYS(gettid),
> > +    SCMP_SYS(gettimeofday),
> > +    SCMP_SYS(linkat),
> > +    SCMP_SYS(lseek),
> > +    SCMP_SYS(madvise),
> > +    SCMP_SYS(mkdirat),
> > +    SCMP_SYS(mknodat),
> > +    SCMP_SYS(mmap),
> > +    SCMP_SYS(mprotect),
> > +    SCMP_SYS(mremap),
> > +    SCMP_SYS(munmap),
> > +    SCMP_SYS(newfstatat),
> > +    SCMP_SYS(open),
> > +    SCMP_SYS(openat),
> > +    SCMP_SYS(ppoll),
> > +    SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
> > +    SCMP_SYS(preadv),
> > +    SCMP_SYS(pread64),
> > +    SCMP_SYS(pwritev),
> > +    SCMP_SYS(pwrite64),
> > +    SCMP_SYS(read),
> > +    SCMP_SYS(readlinkat),
> > +    SCMP_SYS(recvmsg),
> > +    SCMP_SYS(renameat),
> > +    SCMP_SYS(renameat2),
> > +    SCMP_SYS(rt_sigaction),
> > +    SCMP_SYS(rt_sigprocmask),
> > +    SCMP_SYS(rt_sigreturn),
> > +    SCMP_SYS(sendmsg),
> > +    SCMP_SYS(setresgid),
> > +    SCMP_SYS(setresuid),
> > +#ifdef __NR_setresgid32
> > +    SCMP_SYS(setresgid32),
> > +#endif
> > +#ifdef __NR_setresuid32
> > +    SCMP_SYS(setresuid32),
> > +#endif
> > +    SCMP_SYS(set_robust_list),
> > +    SCMP_SYS(symlinkat),
> > +    SCMP_SYS(time), /* Rarely needed, except on static builds */
> > +    SCMP_SYS(tgkill),
> > +    SCMP_SYS(unlinkat),
> > +    SCMP_SYS(utimensat),
> > +    SCMP_SYS(write),
> > +    SCMP_SYS(writev),
> > +};
> > +
> > +void setup_seccomp(void)
> > +{
> > +    scmp_filter_ctx ctx;
> > +    size_t i;
> > +
> > +#ifdef SCMP_ACT_KILL_PROCESS
> > +    ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
> > +    /* Handle a newer libseccomp but an older kernel */
> > +    if (!ctx && errno == EOPNOTSUPP) {
> > +        ctx = seccomp_init(SCMP_ACT_TRAP);
> > +    }
> > +#else
> > +    ctx = seccomp_init(SCMP_ACT_TRAP);
> > +#endif
> > +    if (!ctx) {
> > +        fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
> > +        exit(1);
> > +    }
> > +
> > +    for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) {
> > +        if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW,
> > +                             syscall_whitelist[i], 0) != 0) {
> > +            fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d",
> > +                     syscall_whitelist[i]);
> > +            exit(1);
> > +        }
> > +    }
> > +
> > +    /* libvhost-user calls this for post-copy migration, we don't need it */
> > +    if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
> > +                         SCMP_SYS(userfaultfd), 0) != 0) {
> > +        fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
> > +        exit(1);
> > +    }
> > +
> > +    if (seccomp_load(ctx) < 0) {
> > +        fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
> > +        exit(1);
> > +    }
> > +
> > +    seccomp_release(ctx);
> > +}
> > diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h
> > new file mode 100644
> > index 0000000000..86bce72652
> > --- /dev/null
> > +++ b/tools/virtiofsd/seccomp.h
> > @@ -0,0 +1,14 @@
> > +/*
> > + * Seccomp sandboxing for virtiofsd
> > + *
> > + * Copyright (C) 2019 Red Hat, Inc.
> > + *
> > + * SPDX-License-Identifier: GPL-2.0-or-later
> > + */
> > +
> > +#ifndef VIRTIOFSD_SECCOMP_H
> > +#define VIRTIOFSD_SECCOMP_H
> > +
> > +void setup_seccomp(void);
> > +
> > +#endif /* VIRTIOFSD_SECCOMP_H */
> > 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Philippe Mathieu-Daudé Jan. 21, 2020, 8:53 p.m. UTC | #3
On 1/21/20 8:49 PM, Dr. David Alan Gilbert wrote:
> * Philippe Mathieu-Daudé (philmd@redhat.com) wrote:
>> On 1/21/20 1:23 PM, Dr. David Alan Gilbert (git) wrote:
>>> From: Stefan Hajnoczi <stefanha@redhat.com>
>>>
>>> Only allow system calls that are needed by virtiofsd.  All other system
>>> calls cause SIGSYS to be directed at the thread and the process will
>>> coredump.
>>>
>>> Restricting system calls reduces the kernel attack surface and limits
>>> what the process can do when compromised.
>>>
>>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>>> with additional entries by:
>>> Signed-off-by: Ganesh Maharaj Mahalingam <ganesh.mahalingam@intel.com>
>>> Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
>>> Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
>>> Signed-off-by: piaojun <piaojun@huawei.com>
>>> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
>>> Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
>>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> ---
>>>    Makefile                         |   2 +-
>>>    tools/virtiofsd/Makefile.objs    |   5 +-
>>>    tools/virtiofsd/passthrough_ll.c |   2 +
>>>    tools/virtiofsd/seccomp.c        | 150 +++++++++++++++++++++++++++++++
>>>    tools/virtiofsd/seccomp.h        |  14 +++
>>>    5 files changed, 171 insertions(+), 2 deletions(-)
>>>    create mode 100644 tools/virtiofsd/seccomp.c
>>>    create mode 100644 tools/virtiofsd/seccomp.h
>>>
>>> diff --git a/Makefile b/Makefile
>>> index a87e06ad93..967d59c98a 100644
>>> --- a/Makefile
>>> +++ b/Makefile
>>> @@ -327,7 +327,7 @@ HELPERS-y += vhost-user-gpu$(EXESUF)
>>>    vhost-user-json-y += contrib/vhost-user-gpu/50-qemu-gpu.json
>>>    endif
>>> -ifdef CONFIG_LINUX
>>> +ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy)
>>>    HELPERS-y += virtiofsd$(EXESUF)
>>
>> Something is weird here, because I got:
>>
>> $ make virtiofsd
>>    ...
>>    CC      tools/virtiofsd/seccomp.o
>> tools/virtiofsd/seccomp.c:14:21: fatal error: seccomp.h: No such file or
>> directory
>>   #include <seccomp.h>
>>                       ^
>>
>> Indeed I don't have libseccomp installed, ./configure reported:
>>
>> ...
>> QGA MSI support   no
>> seccomp support   no
>> coroutine backend ucontext
>> coroutine pool    yes
>> debug stack usage no
>> ...
>>
>> Note also:
>>
>> $ make print-CONFIG_LINUX
>> CONFIG_LINUX=y
>> $ make print-CONFIG_SECCOMP
>> CONFIG_SECCOMP=
>> $ make print-CONFIG_LIBCAP_NG
>> CONFIG_LIBCAP_NG=y
>> $ make print-HELPERS-y
>> HELPERS-y=qemu-bridge-helper
> 
> The same thing happens if you uninstall mesa-libgbm-devel and do a
> 'make vhost-user-gpu'
> 
> These ifeq's don't remove the definition of the target, they just remove
> it from the HELPERS-y list, so stop it being built on an unqualified
> 'make' but don't change the behaviour when you explicitly ask for the
> target.
> 
> Can you try:
> 
> diff --git a/Makefile b/Makefile
> index ba7e2e5ebc..346a981f0e 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -676,8 +676,9 @@ vhost-user-blk$(EXESUF): $(vhost-user-blk-obj-y) libvhost-user.a
>   rdmacm-mux$(EXESUF): LIBS += "-libumad"
>   rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
>          $(call LINK, $^)
> -
> -ifdef CONFIG_LINUX # relies on Linux-specific syscalls
> +#
> +# relies on Linux specific syscalls
> +ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy)
>   virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS)
>          $(call LINK, $^)
>   endif

$ make print-HELPERS-y
HELPERS-y=qemu-bridge-helper

$ make virtiofsd
   GEN     config-host.h
make[1]: Entering directory 'slirp'
   CC      slirp/src/state.o
   CC      slirp/src/tcp_timer.o
   CC      slirp/src/dhcpv6.o
   CC      slirp/src/ip_input.o
   CC      slirp/src/ip6_icmp.o
   CC      slirp/src/bootp.o
   CC      slirp/src/ip6_input.o
   CC      slirp/src/slirp.o
   CC      slirp/src/vmstate.o
   CC      slirp/src/ip_output.o
   CC      slirp/src/ncsi.o
   CC      slirp/src/tcp_output.o
   CC      slirp/src/ndp_table.o
   CC      slirp/src/version.o
   CC      slirp/src/misc.o
   CC      slirp/src/ip6_output.o
   CC      slirp/src/mbuf.o
   CC      slirp/src/tftp.o
   CC      slirp/src/arp_table.o
   CC      slirp/src/util.o
   CC      slirp/src/socket.o
   CC      slirp/src/sbuf.o
   CC      slirp/src/stream.o
   CC      slirp/src/dnssearch.o
   CC      slirp/src/udp.o
   CC      slirp/src/tcp_input.o
   CC      slirp/src/if.o
   CC      slirp/src/cksum.o
   CC      slirp/src/tcp_subr.o
   CC      slirp/src/udp6.o
   CC      slirp/src/ip_icmp.o
   AR      slirp/libslirp.a
make[1]: Leaving directory 'slirp'
          DEP dtc/libfdt/fdt_overlay.c
          DEP dtc/libfdt/fdt_addresses.c
          DEP dtc/libfdt/fdt_empty_tree.c
          DEP dtc/libfdt/fdt_strerror.c
          DEP dtc/libfdt/fdt_rw.c
          DEP dtc/libfdt/fdt_sw.c
          DEP dtc/libfdt/fdt_wip.c
          DEP dtc/libfdt/fdt_ro.c
          DEP dtc/libfdt/fdt.c
          DEP dtc/util.c
          DEP dtc/fdtoverlay.c
          DEP dtc/fdtput.c
          DEP dtc/fdtget.c
          DEP dtc/fdtdump.c
          DEP convert-dtsv0-lexer.lex.c
          DEP dtc/srcpos.c
          DEP dtc-parser.tab.c
          DEP dtc-lexer.lex.c
          DEP dtc/treesource.c
          DEP dtc/livetree.c
          DEP dtc/fstree.c
          DEP dtc/flattree.c
          DEP dtc/dtc.c
          DEP dtc/data.c
          DEP dtc/checks.c
          CC libfdt/fdt.o
          CC libfdt/fdt_ro.o
          CC libfdt/fdt_wip.o
          CC libfdt/fdt_sw.o
          CC libfdt/fdt_rw.o
          CC libfdt/fdt_strerror.o
          CC libfdt/fdt_empty_tree.o
          CC libfdt/fdt_addresses.o
          CC libfdt/fdt_overlay.o
          AR libfdt/libfdt.a
r - libfdt/fdt.o
r - libfdt/fdt_ro.o
r - libfdt/fdt_wip.o
r - libfdt/fdt_sw.o
r - libfdt/fdt_rw.o
r - libfdt/fdt_strerror.o
r - libfdt/fdt_empty_tree.o
r - libfdt/fdt_addresses.o
r - libfdt/fdt_overlay.o
   CC      cs.o
   CC      utils.o
   CC      SStream.o
   CC      MCInstrDesc.o
   CC      MCRegisterInfo.o
   CC      arch/ARM/ARMDisassembler.o
   CC      arch/ARM/ARMInstPrinter.o
   CC      arch/ARM/ARMMapping.o
   CC      arch/ARM/ARMModule.o
   CC      arch/AArch64/AArch64BaseInfo.o
   CC      arch/AArch64/AArch64Disassembler.o
   CC      arch/AArch64/AArch64InstPrinter.o
   CC      arch/AArch64/AArch64Mapping.o
   CC      arch/AArch64/AArch64Module.o
   CC      arch/Mips/MipsDisassembler.o
   CC      arch/Mips/MipsInstPrinter.o
   CC      arch/Mips/MipsMapping.o
   CC      arch/Mips/MipsModule.o
   CC      arch/PowerPC/PPCDisassembler.o
   CC      arch/PowerPC/PPCInstPrinter.o
   CC      arch/PowerPC/PPCMapping.o
   CC      arch/PowerPC/PPCModule.o
   CC      arch/Sparc/SparcDisassembler.o
   CC      arch/Sparc/SparcInstPrinter.o
   CC      arch/Sparc/SparcMapping.o
   CC      arch/Sparc/SparcModule.o
   CC      arch/SystemZ/SystemZDisassembler.o
   CC      arch/SystemZ/SystemZInstPrinter.o
   CC      arch/SystemZ/SystemZMapping.o
   CC      arch/SystemZ/SystemZModule.o
   CC      arch/SystemZ/SystemZMCTargetDesc.o
   CC      arch/X86/X86DisassemblerDecoder.o
   CC      arch/X86/X86Disassembler.o
   CC      arch/X86/X86IntelInstPrinter.o
   CC      arch/X86/X86ATTInstPrinter.o
   CC      arch/X86/X86Mapping.o
   CC      arch/X86/X86Module.o
   CC      arch/XCore/XCoreDisassembler.o
   CC      arch/XCore/XCoreInstPrinter.o
   CC      arch/XCore/XCoreMapping.o
   CC      arch/XCore/XCoreModule.o
   CC      MCInst.o
   AR      libcapstone.a
   GEN     trace/generated-tcg-tracers.h
   GEN     trace/generated-helpers-wrappers.h
   GEN     trace/generated-helpers.h
   GEN     trace/generated-helpers.c
   GEN     module_block.h
   GEN     trace-root.h
   GEN     accel/kvm/trace.h
   GEN     accel/tcg/trace.h
   GEN     backends/trace.h
   GEN     crypto/trace.h
   GEN     monitor/trace.h
   GEN     linux-user/trace.h
   GEN     authz/trace.h
   GEN     block/trace.h
   GEN     io/trace.h
   GEN     nbd/trace.h
   GEN     scsi/trace.h
   GEN     chardev/trace.h
   GEN     audio/trace.h
   GEN     hw/9pfs/trace.h
   GEN     hw/acpi/trace.h
   GEN     hw/alpha/trace.h
   GEN     hw/arm/trace.h
   GEN     hw/audio/trace.h
   GEN     hw/block/trace.h
   GEN     hw/block/dataplane/trace.h
   GEN     hw/char/trace.h
   GEN     hw/dma/trace.h
   GEN     hw/hppa/trace.h
   GEN     hw/i2c/trace.h
   GEN     hw/i386/trace.h
   GEN     hw/i386/xen/trace.h
   GEN     hw/ide/trace.h
   GEN     hw/input/trace.h
   GEN     hw/intc/trace.h
   GEN     hw/isa/trace.h
   GEN     hw/mem/trace.h
   GEN     hw/mips/trace.h
   GEN     hw/misc/trace.h
   GEN     hw/misc/macio/trace.h
   GEN     hw/net/trace.h
   GEN     hw/nvram/trace.h
   GEN     hw/pci/trace.h
   GEN     hw/pci-host/trace.h
   GEN     hw/ppc/trace.h
   GEN     hw/rdma/trace.h
   GEN     hw/rdma/vmw/trace.h
   GEN     hw/rtc/trace.h
   GEN     hw/s390x/trace.h
   GEN     hw/scsi/trace.h
   GEN     hw/sd/trace.h
   GEN     hw/sparc/trace.h
   GEN     hw/sparc64/trace.h
   GEN     hw/timer/trace.h
   GEN     hw/tpm/trace.h
   GEN     hw/usb/trace.h
   GEN     hw/vfio/trace.h
   GEN     hw/virtio/trace.h
   GEN     hw/watchdog/trace.h
   GEN     hw/xen/trace.h
   GEN     hw/gpio/trace.h
   GEN     hw/riscv/trace.h
   GEN     migration/trace.h
   GEN     net/trace.h
   GEN     ui/trace.h
   GEN     hw/display/trace.h
   GEN     qapi/trace.h
   GEN     qom/trace.h
   GEN     target/arm/trace.h
   GEN     target/hppa/trace.h
   GEN     target/i386/trace.h
   GEN     target/mips/trace.h
   GEN     target/ppc/trace.h
   GEN     target/riscv/trace.h
   GEN     target/s390x/trace.h
   GEN     target/sparc/trace.h
   GEN     util/trace.h
   GEN     hw/core/trace.h
   GEN     trace-root.c
   GEN     accel/kvm/trace.c
   GEN     accel/tcg/trace.c
   GEN     backends/trace.c
   GEN     crypto/trace.c
   GEN     monitor/trace.c
   GEN     linux-user/trace.c
   GEN     authz/trace.c
   GEN     block/trace.c
   GEN     io/trace.c
   GEN     nbd/trace.c
   GEN     scsi/trace.c
   GEN     chardev/trace.c
   GEN     audio/trace.c
   GEN     hw/9pfs/trace.c
   GEN     hw/acpi/trace.c
   GEN     hw/alpha/trace.c
   GEN     hw/arm/trace.c
   GEN     hw/audio/trace.c
   GEN     hw/block/trace.c
   GEN     hw/block/dataplane/trace.c
   GEN     hw/char/trace.c
   GEN     hw/dma/trace.c
   GEN     hw/hppa/trace.c
   GEN     hw/i2c/trace.c
   GEN     hw/i386/trace.c
   GEN     hw/i386/xen/trace.c
   GEN     hw/ide/trace.c
   GEN     hw/input/trace.c
   GEN     hw/intc/trace.c
   GEN     hw/isa/trace.c
   GEN     hw/mem/trace.c
   GEN     hw/mips/trace.c
   GEN     hw/misc/trace.c
   GEN     hw/misc/macio/trace.c
   GEN     hw/net/trace.c
   GEN     hw/nvram/trace.c
   GEN     hw/pci/trace.c
   GEN     hw/pci-host/trace.c
   GEN     hw/ppc/trace.c
   GEN     hw/rdma/trace.c
   GEN     hw/rdma/vmw/trace.c
   GEN     hw/rtc/trace.c
   GEN     hw/s390x/trace.c
   GEN     hw/scsi/trace.c
   GEN     hw/sd/trace.c
   GEN     hw/sparc/trace.c
   GEN     hw/sparc64/trace.c
   GEN     hw/timer/trace.c
   GEN     hw/tpm/trace.c
   GEN     hw/usb/trace.c
   GEN     hw/vfio/trace.c
   GEN     hw/virtio/trace.c
   GEN     hw/watchdog/trace.c
   GEN     hw/xen/trace.c
   GEN     hw/gpio/trace.c
   GEN     hw/riscv/trace.c
   GEN     migration/trace.c
   GEN     net/trace.c
   GEN     ui/trace.c
   GEN     hw/display/trace.c
   GEN     qapi/trace.c
   GEN     qom/trace.c
   GEN     target/arm/trace.c
   GEN     target/hppa/trace.c
   GEN     target/i386/trace.c
   GEN     target/mips/trace.c
   GEN     target/ppc/trace.c
   GEN     target/riscv/trace.c
   GEN     target/s390x/trace.c
   GEN     target/sparc/trace.c
   GEN     util/trace.c
   GEN     hw/core/trace.c
make: *** No rule to make target 'virtiofsd'.  Stop.

This is better, thanks!

(We should also fix vhost-user-gpu, but this is out of the scope of this 
series).

>>>    vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json
>>>    endif
>>> diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs
>>> index 45a807500d..076f667e46 100644
>>> --- a/tools/virtiofsd/Makefile.objs
>>> +++ b/tools/virtiofsd/Makefile.objs
>>> @@ -5,5 +5,8 @@ virtiofsd-obj-y = buffer.o \
>>>                      fuse_signals.o \
>>>                      fuse_virtio.o \
>>>                      helper.o \
>>> -                  passthrough_ll.o
>>> +                  passthrough_ll.o \
>>> +                  seccomp.o
>>> +seccomp.o-cflags := $(SECCOMP_CFLAGS)
>>> +seccomp.o-libs := $(SECCOMP_LIBS)
>>> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
>>> index 752beb459a..8748e64f33 100644
>>> --- a/tools/virtiofsd/passthrough_ll.c
>>> +++ b/tools/virtiofsd/passthrough_ll.c
>>> @@ -58,6 +58,7 @@
>>>    #include <unistd.h>
>>>    #include "passthrough_helpers.h"
>>> +#include "seccomp.h"
>>>    struct lo_map_elem {
>>>        union {
>>> @@ -2090,6 +2091,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se)
>>>    {
>>>        setup_namespaces(lo, se);
>>>        setup_mounts(lo->source);
>>> +    setup_seccomp();
>>>    }
>>>    int main(int argc, char *argv[])
>>> diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c
>>> new file mode 100644
>>> index 0000000000..2aa4d3cc66
>>> --- /dev/null
>>> +++ b/tools/virtiofsd/seccomp.c
>>> @@ -0,0 +1,150 @@
>>> +/*
>>> + * Seccomp sandboxing for virtiofsd
>>> + *
>>> + * Copyright (C) 2019 Red Hat, Inc.
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#include "seccomp.h"
>>> +#include "fuse_i.h"
>>> +#include "fuse_log.h"
>>> +#include <errno.h>
>>> +#include <glib.h>
>>> +#include <seccomp.h>
>>> +#include <stdlib.h>
>>> +
>>> +/* Bodge for libseccomp 2.4.2 which broke ppoll */
>>> +#if !defined(__SNR_ppoll) && defined(__SNR_brk)
>>> +#ifdef __NR_ppoll
>>> +#define __SNR_ppoll __NR_ppoll
>>> +#else
>>> +#define __SNR_ppoll __PNR_ppoll
>>> +#endif
>>> +#endif
>>> +
>>> +static const int syscall_whitelist[] = {
>>> +    /* TODO ireg sem*() syscalls */
>>> +    SCMP_SYS(brk),
>>> +    SCMP_SYS(capget), /* For CAP_FSETID */
>>> +    SCMP_SYS(capset),
>>> +    SCMP_SYS(clock_gettime),
>>> +    SCMP_SYS(clone),
>>> +#ifdef __NR_clone3
>>> +    SCMP_SYS(clone3),
>>> +#endif
>>> +    SCMP_SYS(close),
>>> +    SCMP_SYS(copy_file_range),
>>> +    SCMP_SYS(dup),
>>> +    SCMP_SYS(eventfd2),
>>> +    SCMP_SYS(exit),
>>> +    SCMP_SYS(exit_group),
>>> +    SCMP_SYS(fallocate),
>>> +    SCMP_SYS(fchmodat),
>>> +    SCMP_SYS(fchownat),
>>> +    SCMP_SYS(fcntl),
>>> +    SCMP_SYS(fdatasync),
>>> +    SCMP_SYS(fgetxattr),
>>> +    SCMP_SYS(flistxattr),
>>> +    SCMP_SYS(flock),
>>> +    SCMP_SYS(fremovexattr),
>>> +    SCMP_SYS(fsetxattr),
>>> +    SCMP_SYS(fstat),
>>> +    SCMP_SYS(fstatfs),
>>> +    SCMP_SYS(fsync),
>>> +    SCMP_SYS(ftruncate),
>>> +    SCMP_SYS(futex),
>>> +    SCMP_SYS(getdents),
>>> +    SCMP_SYS(getdents64),
>>> +    SCMP_SYS(getegid),
>>> +    SCMP_SYS(geteuid),
>>> +    SCMP_SYS(getpid),
>>> +    SCMP_SYS(gettid),
>>> +    SCMP_SYS(gettimeofday),
>>> +    SCMP_SYS(linkat),
>>> +    SCMP_SYS(lseek),
>>> +    SCMP_SYS(madvise),
>>> +    SCMP_SYS(mkdirat),
>>> +    SCMP_SYS(mknodat),
>>> +    SCMP_SYS(mmap),
>>> +    SCMP_SYS(mprotect),
>>> +    SCMP_SYS(mremap),
>>> +    SCMP_SYS(munmap),
>>> +    SCMP_SYS(newfstatat),
>>> +    SCMP_SYS(open),
>>> +    SCMP_SYS(openat),
>>> +    SCMP_SYS(ppoll),
>>> +    SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
>>> +    SCMP_SYS(preadv),
>>> +    SCMP_SYS(pread64),
>>> +    SCMP_SYS(pwritev),
>>> +    SCMP_SYS(pwrite64),
>>> +    SCMP_SYS(read),
>>> +    SCMP_SYS(readlinkat),
>>> +    SCMP_SYS(recvmsg),
>>> +    SCMP_SYS(renameat),
>>> +    SCMP_SYS(renameat2),
>>> +    SCMP_SYS(rt_sigaction),
>>> +    SCMP_SYS(rt_sigprocmask),
>>> +    SCMP_SYS(rt_sigreturn),
>>> +    SCMP_SYS(sendmsg),
>>> +    SCMP_SYS(setresgid),
>>> +    SCMP_SYS(setresuid),
>>> +#ifdef __NR_setresgid32
>>> +    SCMP_SYS(setresgid32),
>>> +#endif
>>> +#ifdef __NR_setresuid32
>>> +    SCMP_SYS(setresuid32),
>>> +#endif
>>> +    SCMP_SYS(set_robust_list),
>>> +    SCMP_SYS(symlinkat),
>>> +    SCMP_SYS(time), /* Rarely needed, except on static builds */
>>> +    SCMP_SYS(tgkill),
>>> +    SCMP_SYS(unlinkat),
>>> +    SCMP_SYS(utimensat),
>>> +    SCMP_SYS(write),
>>> +    SCMP_SYS(writev),
>>> +};
>>> +
>>> +void setup_seccomp(void)
>>> +{
>>> +    scmp_filter_ctx ctx;
>>> +    size_t i;
>>> +
>>> +#ifdef SCMP_ACT_KILL_PROCESS
>>> +    ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
>>> +    /* Handle a newer libseccomp but an older kernel */
>>> +    if (!ctx && errno == EOPNOTSUPP) {
>>> +        ctx = seccomp_init(SCMP_ACT_TRAP);
>>> +    }
>>> +#else
>>> +    ctx = seccomp_init(SCMP_ACT_TRAP);
>>> +#endif
>>> +    if (!ctx) {
>>> +        fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
>>> +        exit(1);
>>> +    }
>>> +
>>> +    for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) {
>>> +        if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW,
>>> +                             syscall_whitelist[i], 0) != 0) {
>>> +            fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d",
>>> +                     syscall_whitelist[i]);
>>> +            exit(1);
>>> +        }
>>> +    }
>>> +
>>> +    /* libvhost-user calls this for post-copy migration, we don't need it */
>>> +    if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
>>> +                         SCMP_SYS(userfaultfd), 0) != 0) {
>>> +        fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
>>> +        exit(1);
>>> +    }
>>> +
>>> +    if (seccomp_load(ctx) < 0) {
>>> +        fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
>>> +        exit(1);
>>> +    }
>>> +
>>> +    seccomp_release(ctx);
>>> +}
>>> diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h
>>> new file mode 100644
>>> index 0000000000..86bce72652
>>> --- /dev/null
>>> +++ b/tools/virtiofsd/seccomp.h
>>> @@ -0,0 +1,14 @@
>>> +/*
>>> + * Seccomp sandboxing for virtiofsd
>>> + *
>>> + * Copyright (C) 2019 Red Hat, Inc.
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#ifndef VIRTIOFSD_SECCOMP_H
>>> +#define VIRTIOFSD_SECCOMP_H
>>> +
>>> +void setup_seccomp(void);
>>> +
>>> +#endif /* VIRTIOFSD_SECCOMP_H */
>>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
Florian Weimer Jan. 24, 2020, 9:46 a.m. UTC | #4
* David Alan Gilbert:

> +static const int syscall_whitelist[] = {
> +    /* TODO ireg sem*() syscalls */
> +    SCMP_SYS(brk),
> +    SCMP_SYS(capget), /* For CAP_FSETID */
> +    SCMP_SYS(capset),
> +    SCMP_SYS(clock_gettime),

> +    SCMP_SYS(gettimeofday),

Is this to suppose to work on 32-bit architectures?  Then you need to
add the time64 system call variants as well.

Thanks,
Florian
Dr. David Alan Gilbert Jan. 24, 2020, 9:51 a.m. UTC | #5
* Florian Weimer (fweimer@redhat.com) wrote:
> * David Alan Gilbert:
> 
> > +static const int syscall_whitelist[] = {
> > +    /* TODO ireg sem*() syscalls */
> > +    SCMP_SYS(brk),
> > +    SCMP_SYS(capget), /* For CAP_FSETID */
> > +    SCMP_SYS(capset),
> > +    SCMP_SYS(clock_gettime),
> 
> > +    SCMP_SYS(gettimeofday),
> 
> Is this to suppose to work on 32-bit architectures?  Then you need to
> add the time64 system call variants as well.

I've build tested on 32 but not tried running it; I'd added time(2) after
hitting it on a static build but didn't know of time64 (not that it has
a manpage!).

I'll do a follow up to fix it.

Dave

> Thanks,
> Florian
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert Jan. 24, 2020, 9:57 a.m. UTC | #6
* Florian Weimer (fweimer@redhat.com) wrote:
> * David Alan Gilbert:
> 
> > +static const int syscall_whitelist[] = {
> > +    /* TODO ireg sem*() syscalls */
> > +    SCMP_SYS(brk),
> > +    SCMP_SYS(capget), /* For CAP_FSETID */
> > +    SCMP_SYS(capset),
> > +    SCMP_SYS(clock_gettime),
> 
> > +    SCMP_SYS(gettimeofday),
> 
> Is this to suppose to work on 32-bit architectures?  Then you need to
> add the time64 system call variants as well.

Trying SCMP_SYS(time64) gives me an error for an undefined __NR_time64
on both 64 and 32 bit.

Dave

> Thanks,
> Florian
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Florian Weimer Jan. 24, 2020, 10:06 a.m. UTC | #7
* David Alan Gilbert:

> * Florian Weimer (fweimer@redhat.com) wrote:
>> * David Alan Gilbert:
>> 
>> > +static const int syscall_whitelist[] = {
>> > +    /* TODO ireg sem*() syscalls */
>> > +    SCMP_SYS(brk),
>> > +    SCMP_SYS(capget), /* For CAP_FSETID */
>> > +    SCMP_SYS(capset),
>> > +    SCMP_SYS(clock_gettime),
>> 
>> > +    SCMP_SYS(gettimeofday),
>> 
>> Is this to suppose to work on 32-bit architectures?  Then you need to
>> add the time64 system call variants as well.
>
> Trying SCMP_SYS(time64) gives me an error for an undefined __NR_time64
> on both 64 and 32 bit.

Sorry, time64 does not exist, Userspace is supposed to use
clock_gettime64 with CLOCK_REALTIME_COARSE.

I actually meant that you'll also need futex_time64, ppoll_time64,
recvmmsg_time64, utimensat_time64.  (Based on cursory checking against
the permit list you posted.)

And for a port to 32-bit RISC-V, I think the 32-bit syscalls need to be
protected by #ifdef because new 32-bit architectures do not have them
anymore.

Thanks,
Florian
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index a87e06ad93..967d59c98a 100644
--- a/Makefile
+++ b/Makefile
@@ -327,7 +327,7 @@  HELPERS-y += vhost-user-gpu$(EXESUF)
 vhost-user-json-y += contrib/vhost-user-gpu/50-qemu-gpu.json
 endif
 
-ifdef CONFIG_LINUX
+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy)
 HELPERS-y += virtiofsd$(EXESUF)
 vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json
 endif
diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs
index 45a807500d..076f667e46 100644
--- a/tools/virtiofsd/Makefile.objs
+++ b/tools/virtiofsd/Makefile.objs
@@ -5,5 +5,8 @@  virtiofsd-obj-y = buffer.o \
                   fuse_signals.o \
                   fuse_virtio.o \
                   helper.o \
-                  passthrough_ll.o
+                  passthrough_ll.o \
+                  seccomp.o
 
+seccomp.o-cflags := $(SECCOMP_CFLAGS)
+seccomp.o-libs := $(SECCOMP_LIBS)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 752beb459a..8748e64f33 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -58,6 +58,7 @@ 
 #include <unistd.h>
 
 #include "passthrough_helpers.h"
+#include "seccomp.h"
 
 struct lo_map_elem {
     union {
@@ -2090,6 +2091,7 @@  static void setup_sandbox(struct lo_data *lo, struct fuse_session *se)
 {
     setup_namespaces(lo, se);
     setup_mounts(lo->source);
+    setup_seccomp();
 }
 
 int main(int argc, char *argv[])
diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c
new file mode 100644
index 0000000000..2aa4d3cc66
--- /dev/null
+++ b/tools/virtiofsd/seccomp.c
@@ -0,0 +1,150 @@ 
+/*
+ * Seccomp sandboxing for virtiofsd
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "seccomp.h"
+#include "fuse_i.h"
+#include "fuse_log.h"
+#include <errno.h>
+#include <glib.h>
+#include <seccomp.h>
+#include <stdlib.h>
+
+/* Bodge for libseccomp 2.4.2 which broke ppoll */
+#if !defined(__SNR_ppoll) && defined(__SNR_brk)
+#ifdef __NR_ppoll
+#define __SNR_ppoll __NR_ppoll
+#else
+#define __SNR_ppoll __PNR_ppoll
+#endif
+#endif
+
+static const int syscall_whitelist[] = {
+    /* TODO ireg sem*() syscalls */
+    SCMP_SYS(brk),
+    SCMP_SYS(capget), /* For CAP_FSETID */
+    SCMP_SYS(capset),
+    SCMP_SYS(clock_gettime),
+    SCMP_SYS(clone),
+#ifdef __NR_clone3
+    SCMP_SYS(clone3),
+#endif
+    SCMP_SYS(close),
+    SCMP_SYS(copy_file_range),
+    SCMP_SYS(dup),
+    SCMP_SYS(eventfd2),
+    SCMP_SYS(exit),
+    SCMP_SYS(exit_group),
+    SCMP_SYS(fallocate),
+    SCMP_SYS(fchmodat),
+    SCMP_SYS(fchownat),
+    SCMP_SYS(fcntl),
+    SCMP_SYS(fdatasync),
+    SCMP_SYS(fgetxattr),
+    SCMP_SYS(flistxattr),
+    SCMP_SYS(flock),
+    SCMP_SYS(fremovexattr),
+    SCMP_SYS(fsetxattr),
+    SCMP_SYS(fstat),
+    SCMP_SYS(fstatfs),
+    SCMP_SYS(fsync),
+    SCMP_SYS(ftruncate),
+    SCMP_SYS(futex),
+    SCMP_SYS(getdents),
+    SCMP_SYS(getdents64),
+    SCMP_SYS(getegid),
+    SCMP_SYS(geteuid),
+    SCMP_SYS(getpid),
+    SCMP_SYS(gettid),
+    SCMP_SYS(gettimeofday),
+    SCMP_SYS(linkat),
+    SCMP_SYS(lseek),
+    SCMP_SYS(madvise),
+    SCMP_SYS(mkdirat),
+    SCMP_SYS(mknodat),
+    SCMP_SYS(mmap),
+    SCMP_SYS(mprotect),
+    SCMP_SYS(mremap),
+    SCMP_SYS(munmap),
+    SCMP_SYS(newfstatat),
+    SCMP_SYS(open),
+    SCMP_SYS(openat),
+    SCMP_SYS(ppoll),
+    SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
+    SCMP_SYS(preadv),
+    SCMP_SYS(pread64),
+    SCMP_SYS(pwritev),
+    SCMP_SYS(pwrite64),
+    SCMP_SYS(read),
+    SCMP_SYS(readlinkat),
+    SCMP_SYS(recvmsg),
+    SCMP_SYS(renameat),
+    SCMP_SYS(renameat2),
+    SCMP_SYS(rt_sigaction),
+    SCMP_SYS(rt_sigprocmask),
+    SCMP_SYS(rt_sigreturn),
+    SCMP_SYS(sendmsg),
+    SCMP_SYS(setresgid),
+    SCMP_SYS(setresuid),
+#ifdef __NR_setresgid32
+    SCMP_SYS(setresgid32),
+#endif
+#ifdef __NR_setresuid32
+    SCMP_SYS(setresuid32),
+#endif
+    SCMP_SYS(set_robust_list),
+    SCMP_SYS(symlinkat),
+    SCMP_SYS(time), /* Rarely needed, except on static builds */
+    SCMP_SYS(tgkill),
+    SCMP_SYS(unlinkat),
+    SCMP_SYS(utimensat),
+    SCMP_SYS(write),
+    SCMP_SYS(writev),
+};
+
+void setup_seccomp(void)
+{
+    scmp_filter_ctx ctx;
+    size_t i;
+
+#ifdef SCMP_ACT_KILL_PROCESS
+    ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
+    /* Handle a newer libseccomp but an older kernel */
+    if (!ctx && errno == EOPNOTSUPP) {
+        ctx = seccomp_init(SCMP_ACT_TRAP);
+    }
+#else
+    ctx = seccomp_init(SCMP_ACT_TRAP);
+#endif
+    if (!ctx) {
+        fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
+        exit(1);
+    }
+
+    for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) {
+        if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW,
+                             syscall_whitelist[i], 0) != 0) {
+            fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d",
+                     syscall_whitelist[i]);
+            exit(1);
+        }
+    }
+
+    /* libvhost-user calls this for post-copy migration, we don't need it */
+    if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
+                         SCMP_SYS(userfaultfd), 0) != 0) {
+        fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
+        exit(1);
+    }
+
+    if (seccomp_load(ctx) < 0) {
+        fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
+        exit(1);
+    }
+
+    seccomp_release(ctx);
+}
diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h
new file mode 100644
index 0000000000..86bce72652
--- /dev/null
+++ b/tools/virtiofsd/seccomp.h
@@ -0,0 +1,14 @@ 
+/*
+ * Seccomp sandboxing for virtiofsd
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef VIRTIOFSD_SECCOMP_H
+#define VIRTIOFSD_SECCOMP_H
+
+void setup_seccomp(void);
+
+#endif /* VIRTIOFSD_SECCOMP_H */