@@ -17,6 +17,7 @@
#define QEMU_SECCOMP_SET_DEFAULT (1 << 0)
#define QEMU_SECCOMP_SET_OBSOLETE (1 << 1)
+#define QEMU_SECCOMP_SET_PRIVILEGED (1 << 2)
#include <seccomp.h>
@@ -4017,20 +4017,26 @@ Old param mode (ARM only).
ETEXI
DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
- "-sandbox on[,obsolete=allow|deny]\n" \
+ "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
" Enable seccomp mode 2 system call filter (default 'off').\n" \
" use 'obsolete' to allow obsolete system calls that are provided\n" \
" by the kernel, but typically no longer used by modern\n" \
- " C library implementations.\n",
+ " C library implementations.\n" \
+ " use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
+ " its privileges by blacklisting all set*uid|gid system calls.\n" \
+ " The value 'children' will deny set*uid|gid system calls for\n" \
+ " main QEMU process but will allow forks and execves to run unprivileged\n",
QEMU_ARCH_ALL)
STEXI
-@item -sandbox @var{arg}[,obsolete=@var{string}]
+@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
@findex -sandbox
Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
disable it. The default is 'off'.
@table @option
@item obsolete=@var{string}
Enable Obsolete system calls
+@item elevateprivileges=@var{string}
+Disable set*uid|gid system calls
@end table
ETEXI
@@ -57,17 +57,16 @@ static const struct QemuSeccompSyscall blacklist[] = {
{ SCMP_SYS(ulimit), 1, QEMU_SECCOMP_SET_DEFAULT },
{ SCMP_SYS(vserver), 1, QEMU_SECCOMP_SET_DEFAULT },
/* obsolete */
- { SCMP_SYS(readdir), 2, QEMU_SECCOMP_SET_OBSOLETE },
- { SCMP_SYS(_sysctl), 2, QEMU_SECCOMP_SET_OBSOLETE },
- { SCMP_SYS(bdflush), 2, QEMU_SECCOMP_SET_OBSOLETE },
- { SCMP_SYS(create_module), 2, QEMU_SECCOMP_SET_OBSOLETE },
- { SCMP_SYS(get_kernel_syms), 2, QEMU_SECCOMP_SET_OBSOLETE },
- { SCMP_SYS(query_module), 2, QEMU_SECCOMP_SET_OBSOLETE },
- { SCMP_SYS(sgetmask), 2, QEMU_SECCOMP_SET_OBSOLETE },
- { SCMP_SYS(ssetmask), 2, QEMU_SECCOMP_SET_OBSOLETE },
- { SCMP_SYS(sysfs), 2, QEMU_SECCOMP_SET_OBSOLETE },
- { SCMP_SYS(uselib), 2, QEMU_SECCOMP_SET_OBSOLETE },
- { SCMP_SYS(ustat), 2, QEMU_SECCOMP_SET_OBSOLETE },
+ { SCMP_SYS(setuid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
+ { SCMP_SYS(setgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
+ { SCMP_SYS(setpgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
+ { SCMP_SYS(setsid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
+ { SCMP_SYS(setreuid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
+ { SCMP_SYS(setregid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
+ { SCMP_SYS(setresuid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
+ { SCMP_SYS(setresgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
+ { SCMP_SYS(setfsuid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
+ { SCMP_SYS(setfsgid), 4, QEMU_SECCOMP_SET_PRIVILEGED },
};
@@ -93,6 +92,14 @@ int seccomp_start(uint32_t seccomp_opts)
}
break;
+ case QEMU_SECCOMP_SET_PRIVILEGED:
+ if (seccomp_opts & QEMU_SECCOMP_SET_PRIVILEGED) {
+ goto add_syscall;
+ } else {
+ continue;
+ }
+
+ break;
default:
goto add_syscall;
}
@@ -29,6 +29,7 @@
#ifdef CONFIG_SECCOMP
#include "sysemu/seccomp.h"
+#include "sys/prctl.h"
#endif
#if defined(CONFIG_VDE)
@@ -275,6 +276,10 @@ static QemuOptsList qemu_sandbox_opts = {
.name = "obsolete",
.type = QEMU_OPT_STRING,
},
+ {
+ .name = "elevateprivileges",
+ .type = QEMU_OPT_STRING,
+ },
{ /* end of list */ }
},
};
@@ -1052,6 +1057,28 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
}
}
+ value = qemu_opt_get(opts, "elevateprivileges");
+ if (value) {
+ if (strcmp(value, "deny") == 0) {
+ seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+ } else if (strcmp(value, "children") == 0) {
+ seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+
+ /* calling prctl directly because we're
+ * not sure if host has CAP_SYS_ADMIN set*/
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1)) {
+ error_report("failed to set no_new_privs "
+ "aborting");
+ return -1;
+ }
+ } else if (strcmp(value, "allow") == 0) {
+ /* default value */
+ } else {
+ error_report("invalid argument for elevateprivileges");
+ return -1;
+ }
+ }
+
if (seccomp_start(seccomp_opts) < 0) {
error_report("failed to install seccomp syscall filter "
"in the kernel");
This patch introduces the new argument [,elevateprivileges=allow|deny|children] to the `-sandbox on'. It allows or denies Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls. The 'children' option will let forks and execves run unprivileged. Signed-off-by: Eduardo Otubo <otubo@redhat.com> --- include/sysemu/seccomp.h | 1 + qemu-options.hx | 12 +++++++++--- qemu-seccomp.c | 29 ++++++++++++++++++----------- vl.c | 27 +++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 14 deletions(-)