Message ID | 20170728121040.631-4-otubo@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Jul 28, 2017 at 02:10:37PM +0200, Eduardo Otubo wrote: > This patch introduces the new argument > [,elevateprivileges=allow|deny|children] to the `-sandbox on'. It allows > or denies Qemu process to elevate its privileges by blacklisting all > set*uid|gid system calls. The 'children' option will let forks and > execves run unprivileged. > > Signed-off-by: Eduardo Otubo <otubo@redhat.com> > --- > include/sysemu/seccomp.h | 1 + > qemu-options.hx | 9 ++++++--- > qemu-seccomp.c | 29 +++++++++++++++++++++++++++++ > vl.c | 22 ++++++++++++++++++++++ > 4 files changed, 58 insertions(+), 3 deletions(-) > > diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h > index 7a7bde246b..e6e78d85ce 100644 > --- a/include/sysemu/seccomp.h > +++ b/include/sysemu/seccomp.h > @@ -16,6 +16,7 @@ > #define QEMU_SECCOMP_H > > #define OBSOLETE 0x0001 > +#define PRIVILEGED 0x0010 Err, this is hex, but you seem to be treating it as a binary string. It would be better expressed as #define OBSOLETE (1 << 0) #define PRIVILEGED (1 << 1) #define .... (1 << 2) #define .... (1 << 3) #define .... (1 << 4) > > + value = qemu_opt_get(opts, "elevateprivileges"); > + if (value) { > + if (strcmp(value, "deny") == 0) { > + seccomp_opts |= PRIVILEGED; > + } > + if (strcmp(value, "children") == 0) { > + seccomp_opts |= PRIVILEGED; > + > + /* calling prctl directly because we're > + * not sure if host has CAP_SYS_ADMIN set*/ > + if (prctl(PR_SET_NO_NEW_PRIVS, 1)) { > + error_report("failed to set no_new_privs " > + "aborting"); > + } The prctl() really ought to be done in seccomp_start IMHO. > + } Also it should report an error for invalid 'value' strings. > + } > + > if (seccomp_start(seccomp_opts) < 0) { > error_report("failed to install seccomp syscall filter " > "in the kernel"); > -- > 2.13.3 > Regards, Daniel
On 28.07.2017 14:10, Eduardo Otubo wrote: > This patch introduces the new argument > [,elevateprivileges=allow|deny|children] to the `-sandbox on'. It allows > or denies Qemu process to elevate its privileges by blacklisting all > set*uid|gid system calls. The 'children' option will let forks and > execves run unprivileged. > > Signed-off-by: Eduardo Otubo <otubo@redhat.com> > --- > include/sysemu/seccomp.h | 1 + > qemu-options.hx | 9 ++++++--- > qemu-seccomp.c | 29 +++++++++++++++++++++++++++++ > vl.c | 22 ++++++++++++++++++++++ > 4 files changed, 58 insertions(+), 3 deletions(-) > > diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h > index 7a7bde246b..e6e78d85ce 100644 > --- a/include/sysemu/seccomp.h > +++ b/include/sysemu/seccomp.h > @@ -16,6 +16,7 @@ > #define QEMU_SECCOMP_H > > #define OBSOLETE 0x0001 > +#define PRIVILEGED 0x0010 > > #include <seccomp.h> > > diff --git a/qemu-options.hx b/qemu-options.hx > index 54e492f36a..34d33a812e 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -4004,17 +4004,20 @@ Old param mode (ARM only). > ETEXI > > DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ > - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ > - " obsolete: Allow obsolete system calls\n", > + "-sandbox on[,obsolete=allow][,elevateprivileges=allow|deny|children] Enable seccomp mode 2 system call filter (default 'off').\n" \ Most other boolean-like options use "on|off" as possible values ... maybe it would be nicer to use "on|off" instead of "allow|deny" here, too? > + " obsolete: Allow obsolete system calls\n" > + " elevateprivileges: allows or denies Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls. 'children' will deny set*uid|gid system calls for main Qemu process but will allow forks and execves to run unprivileged\n", Correct spelling is "QEMU" with all capital letters, not "Qemu" > QEMU_ARCH_ALL) > STEXI > -@item -sandbox @var{arg}[,obsolete=@var{string}] > +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}] > @findex -sandbox > Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will > disable it. The default is 'off'. > @table @option > @item obsolete=@var{string} > Enable Obsolete system calls > +@item elevateprivileges=@var{string} > +Disable set*uid|gid systema calls s/systema/system/ Thomas
diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h index 7a7bde246b..e6e78d85ce 100644 --- a/include/sysemu/seccomp.h +++ b/include/sysemu/seccomp.h @@ -16,6 +16,7 @@ #define QEMU_SECCOMP_H #define OBSOLETE 0x0001 +#define PRIVILEGED 0x0010 #include <seccomp.h> diff --git a/qemu-options.hx b/qemu-options.hx index 54e492f36a..34d33a812e 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4004,17 +4004,20 @@ Old param mode (ARM only). ETEXI DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ - " obsolete: Allow obsolete system calls\n", + "-sandbox on[,obsolete=allow][,elevateprivileges=allow|deny|children] Enable seccomp mode 2 system call filter (default 'off').\n" \ + " obsolete: Allow obsolete system calls\n" + " elevateprivileges: allows or denies Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls. 'children' will deny set*uid|gid system calls for main Qemu process but will allow forks and execves to run unprivileged\n", QEMU_ARCH_ALL) STEXI -@item -sandbox @var{arg}[,obsolete=@var{string}] +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}] @findex -sandbox Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will disable it. The default is 'off'. @table @option @item obsolete=@var{string} Enable Obsolete system calls +@item elevateprivileges=@var{string} +Disable set*uid|gid systema calls @end table ETEXI diff --git a/qemu-seccomp.c b/qemu-seccomp.c index c6a8b28260..6caa513edd 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -31,6 +31,19 @@ struct QemuSeccompSyscall { uint8_t priority; }; +static const struct QemuSeccompSyscall privileged_syscalls[] = { + { SCMP_SYS(setuid), 255 }, + { SCMP_SYS(setgid), 255 }, + { SCMP_SYS(setpgid), 255 }, + { SCMP_SYS(setsid), 255 }, + { SCMP_SYS(setreuid), 255 }, + { SCMP_SYS(setregid), 255 }, + { SCMP_SYS(setresuid), 255 }, + { SCMP_SYS(setresgid), 255 }, + { SCMP_SYS(setfsuid), 255 }, + { SCMP_SYS(setfsgid), 255 }, +}; + static const struct QemuSeccompSyscall obsolete[] = { { SCMP_SYS(readdir), 255 }, { SCMP_SYS(_sysctl), 255 }, @@ -110,6 +123,22 @@ int seccomp_start(uint8_t seccomp_opts) } } + if (seccomp_opts & PRIVILEGED) { + for (i = 0; i < ARRAY_SIZE(privileged_syscalls); i++) { + rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, + privileged_syscalls[i].num, 0); + if (rc < 0) { + goto seccomp_return; + } + rc = seccomp_syscall_priority(ctx, privileged_syscalls[i].num, + privileged_syscalls[i].priority); + if (rc < 0) { + goto seccomp_return; + } + } + } + + rc = seccomp_load(ctx); seccomp_return: diff --git a/vl.c b/vl.c index cbe09c94af..800e2b573d 100644 --- a/vl.c +++ b/vl.c @@ -29,6 +29,7 @@ #ifdef CONFIG_SECCOMP #include "sysemu/seccomp.h" +#include "sys/prctl.h" #endif #if defined(CONFIG_VDE) @@ -275,6 +276,10 @@ static QemuOptsList qemu_sandbox_opts = { .name = "obsolete", .type = QEMU_OPT_STRING, }, + { + .name = "elevateprivileges", + .type = QEMU_OPT_STRING, + }, { /* end of list */ } }, }; @@ -1046,6 +1051,23 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) } } + value = qemu_opt_get(opts, "elevateprivileges"); + if (value) { + if (strcmp(value, "deny") == 0) { + seccomp_opts |= PRIVILEGED; + } + if (strcmp(value, "children") == 0) { + seccomp_opts |= PRIVILEGED; + + /* calling prctl directly because we're + * not sure if host has CAP_SYS_ADMIN set*/ + if (prctl(PR_SET_NO_NEW_PRIVS, 1)) { + error_report("failed to set no_new_privs " + "aborting"); + } + } + } + if (seccomp_start(seccomp_opts) < 0) { error_report("failed to install seccomp syscall filter " "in the kernel");
This patch introduces the new argument [,elevateprivileges=allow|deny|children] to the `-sandbox on'. It allows or denies Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls. The 'children' option will let forks and execves run unprivileged. Signed-off-by: Eduardo Otubo <otubo@redhat.com> --- include/sysemu/seccomp.h | 1 + qemu-options.hx | 9 ++++++--- qemu-seccomp.c | 29 +++++++++++++++++++++++++++++ vl.c | 22 ++++++++++++++++++++++ 4 files changed, 58 insertions(+), 3 deletions(-)