Message ID | 20170314113209.12025-4-eduardo.otubo@profitbricks.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 14/03/2017 12:32, Eduardo Otubo wrote: > This patch introduces the new argument [,elevateprivileges=deny] to the > `-sandbox on'. It avoids Qemu process to elevate its privileges by > blacklisting all set*uid|gid system calls > > Signed-off-by: Eduardo Otubo <eduardo.otubo@profitbricks.com> > --- > include/sysemu/seccomp.h | 1 + > qemu-options.hx | 8 ++++++-- > qemu-seccomp.c | 28 ++++++++++++++++++++++++++++ > vl.c | 11 +++++++++++ > 4 files changed, 46 insertions(+), 2 deletions(-) > > diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h > index 7a7bde246b..e6e78d85ce 100644 > --- a/include/sysemu/seccomp.h > +++ b/include/sysemu/seccomp.h > @@ -16,6 +16,7 @@ > #define QEMU_SECCOMP_H > > #define OBSOLETE 0x0001 > +#define PRIVILEGED 0x0010 > > #include <seccomp.h> > > diff --git a/qemu-options.hx b/qemu-options.hx > index 1403d0c85f..47018db5aa 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -3732,8 +3732,10 @@ Old param mode (ARM only). > ETEXI > > DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ > - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ > - " obsolete: Allow obsolete system calls", > + "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \ > + " Enable seccomp mode 2 system call filter (default 'off').\n" \ > + " obsolete: Allow obsolete system calls\n" \ > + " elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls", Why allow these by default? Paolo > QEMU_ARCH_ALL) > STEXI > @item -sandbox @var{arg}[,obsolete=@var{string}] > @@ -3743,6 +3745,8 @@ disable it. The default is 'off'. > @table @option > @item obsolete=@var{string} > Enable Obsolete system calls > +@item elevateprivileges=@var{string} > +Disable set*uid|gid systema calls > @end table > ETEXI > > diff --git a/qemu-seccomp.c b/qemu-seccomp.c > index 5ef36890da..5aa6590386 100644 > --- a/qemu-seccomp.c > +++ b/qemu-seccomp.c > @@ -31,6 +31,19 @@ struct QemuSeccompSyscall { > uint8_t priority; > }; > > +static const struct QemuSeccompSyscall privileged_syscalls[] = { > + { SCMP_SYS(setuid), 255 }, > + { SCMP_SYS(setgid), 255 }, > + { SCMP_SYS(setpgid), 255 }, > + { SCMP_SYS(setsid), 255 }, > + { SCMP_SYS(setreuid), 255 }, > + { SCMP_SYS(setregid), 255 }, > + { SCMP_SYS(setresuid), 255 }, > + { SCMP_SYS(setresgid), 255 }, > + { SCMP_SYS(setfsuid), 255 }, > + { SCMP_SYS(setfsgid), 255 }, > +}; > + > static const struct QemuSeccompSyscall obsolete[] = { > { SCMP_SYS(readdir), 255 }, > { SCMP_SYS(_sysctl), 255 }, > @@ -125,6 +138,21 @@ int seccomp_start(uint8_t seccomp_opts) > } > } > > + if (seccomp_opts & PRIVILEGED) { > + for (i = 0; i < ARRAY_SIZE(privileged_syscalls); i++) { > + rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, privileged_syscalls[i].num, 0); > + if (rc < 0) { > + goto seccomp_return; > + } > + rc = seccomp_syscall_priority(ctx, privileged_syscalls[i].num, > + privileged_syscalls[i].priority); > + if (rc < 0) { > + goto seccomp_return; > + } > + } > + } > + > + > rc = seccomp_load(ctx); > > seccomp_return: > diff --git a/vl.c b/vl.c > index 7b08b3383b..d071e240b0 100644 > --- a/vl.c > +++ b/vl.c > @@ -273,6 +273,10 @@ static QemuOptsList qemu_sandbox_opts = { > .name = "obsolete", > .type = QEMU_OPT_STRING, > }, > + { > + .name = "elevateprivileges", > + .type = QEMU_OPT_STRING, > + }, > { /* end of list */ } > }, > }; > @@ -1045,6 +1049,13 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) > } > } > > + value = qemu_opt_get(opts,"elevateprivileges"); > + if (value) { > + if (strcmp(value, "deny") == 0) { > + seccomp_opts |= PRIVILEGED; > + } > + } > + > if (seccomp_start(seccomp_opts) < 0) { > error_report("failed to install seccomp syscall filter " > "in the kernel"); >
On Tue, Mar 14, 2017 at 12:47:10PM +0100, Paolo Bonzini wrote: > > > On 14/03/2017 12:32, Eduardo Otubo wrote: > > This patch introduces the new argument [,elevateprivileges=deny] to the > > `-sandbox on'. It avoids Qemu process to elevate its privileges by > > blacklisting all set*uid|gid system calls > > > > Signed-off-by: Eduardo Otubo <eduardo.otubo@profitbricks.com> > > --- > > include/sysemu/seccomp.h | 1 + > > qemu-options.hx | 8 ++++++-- > > qemu-seccomp.c | 28 ++++++++++++++++++++++++++++ > > vl.c | 11 +++++++++++ > > 4 files changed, 46 insertions(+), 2 deletions(-) > > > > diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h > > index 7a7bde246b..e6e78d85ce 100644 > > --- a/include/sysemu/seccomp.h > > +++ b/include/sysemu/seccomp.h > > @@ -16,6 +16,7 @@ > > #define QEMU_SECCOMP_H > > > > #define OBSOLETE 0x0001 > > +#define PRIVILEGED 0x0010 > > > > #include <seccomp.h> > > > > diff --git a/qemu-options.hx b/qemu-options.hx > > index 1403d0c85f..47018db5aa 100644 > > --- a/qemu-options.hx > > +++ b/qemu-options.hx > > @@ -3732,8 +3732,10 @@ Old param mode (ARM only). > > ETEXI > > > > DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ > > - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ > > - " obsolete: Allow obsolete system calls", > > + "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \ > > + " Enable seccomp mode 2 system call filter (default 'off').\n" \ > > + " obsolete: Allow obsolete system calls\n" \ > > + " elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls", > > Why allow these by default? The goal is that '-sandbox on' should not break *any* QEMU feature. It needs to be a safe thing that people can unconditionally turn on without thinking about it. The QEMU bridge helper requires setuid privs, hence elevated privileges needs to be permitted by default. Similarly I could see the qemu ifup scripts, or migrate 'exec' command wanting to elevate privs via setuid binaries if QEMU was running unprivileged. Regards, Daniel
On 14/03/2017 12:52, Daniel P. Berrange wrote: >>> DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ >>> - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ >>> - " obsolete: Allow obsolete system calls", >>> + "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \ >>> + " Enable seccomp mode 2 system call filter (default 'off').\n" \ >>> + " obsolete: Allow obsolete system calls\n" \ >>> + " elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls", >> Why allow these by default? > The goal is that '-sandbox on' should not break *any* QEMU feature. It > needs to be a safe thing that people can unconditionally turn on without > thinking about it. Sure, but what advantages would it provide if the default blacklist does not block anything meaningful? At the very least, spawn=deny should default elevateprivileges to deny too. I think there should be a list (as small as possible) of features that are sacrificed by "-sandbox on". > The QEMU bridge helper requires setuid privs, hence > elevated privileges needs to be permitted by default. QEMU itself should not be getting additional privileges, only the helper and in turn the helper or ifup scripts can be limited through MAC. The issue is that seccomp persists across execve. Currently, unprivileged users are only allowed to install seccomp filters if no_new_privs is set. Would it make sense if seccomp filters without no_new_privs succeeded, except that the filter would not persist across execve of binaries with setuid, setgid or file capabilities? Then the spawn option could be a tri-state with the choice of allow, deny and no_new_privs: - elevateprivileges=allow,spawn=allow: legacy for old kernels - elevateprivileges=deny,spawn=allow: can run privileged helpers - elevateprivileges=deny,spawn=deny: cannot run helpers at all - elevateprivileges=deny,spawn=no_new_privs: can run unprivileged helpers only Paolo > Similarly I could > see the qemu ifup scripts, or migrate 'exec' command wanting to elevate > privs via setuid binaries if QEMU was running unprivileged.
On Tue, Mar 14, 2017 at 01:13:15PM +0100, Paolo Bonzini wrote: > > > On 14/03/2017 12:52, Daniel P. Berrange wrote: > >>> DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ > >>> - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ > >>> - " obsolete: Allow obsolete system calls", > >>> + "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \ > >>> + " Enable seccomp mode 2 system call filter (default 'off').\n" \ > >>> + " obsolete: Allow obsolete system calls\n" \ > >>> + " elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls", > >> Why allow these by default? > > The goal is that '-sandbox on' should not break *any* QEMU feature. It > > needs to be a safe thing that people can unconditionally turn on without > > thinking about it. > > Sure, but what advantages would it provide if the default blacklist does > not block anything meaningful? At the very least, spawn=deny should > default elevateprivileges to deny too. Yep, having spawn=deny imply elevateprivileges=deny is reasonable IMHO. > I think there should be a list (as small as possible) of features that > are sacrificed by "-sandbox on". That breaks the key goal that '-sandbox on' should never break a valid QEMU configuration, no matter how obscure, and would thus continue to discourage people from turning it on by default. Yes, a bare '-sandbox on' is very loose, but think of it as just being a building block. 90% of the time the user or mgmt app would want to turn on extra flags to lock it down more meaningfully, by explicitly blocking ability to use feature they know won't be needed. > > The QEMU bridge helper requires setuid privs, hence > > elevated privileges needs to be permitted by default. > > QEMU itself should not be getting additional privileges, only the helper > and in turn the helper or ifup scripts can be limited through MAC. The > issue is that seccomp persists across execve. That's true. > Currently, unprivileged users are only allowed to install seccomp > filters if no_new_privs is set. Would it make sense if seccomp filters > without no_new_privs succeeded, except that the filter would not persist > across execve of binaries with setuid, setgid or file capabilities? > > Then the spawn option could be a tri-state with the choice of allow, > deny and no_new_privs: > > - elevateprivileges=allow,spawn=allow: legacy for old kernels > - elevateprivileges=deny,spawn=allow: can run privileged helpers > - elevateprivileges=deny,spawn=deny: cannot run helpers at all > - elevateprivileges=deny,spawn=no_new_privs: can run unprivileged > helpers only That could work, but I think that syntax is making it uneccessarily complex to understand. I don't like how it introduces a semantic dependancy between the elevateprivileges & spawn flags i.e. the interpretation of elevateprivileges=deny, varies according to what you set for spawn= option. I'd be more inclined to make elevateprivileges be a tri-state instead e.g. elevateprivileges=allow|deny|children Regards, Daniel
On Tue, Mar 14, 2017 at 01=13=15PM +0100, Paolo Bonzini wrote: > > > On 14/03/2017 12:52, Daniel P. Berrange wrote: > >>> DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ > >>> - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ > >>> - " obsolete: Allow obsolete system calls", > >>> + "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \ > >>> + " Enable seccomp mode 2 system call filter (default 'off').\n" \ > >>> + " obsolete: Allow obsolete system calls\n" \ > >>> + " elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls", > >> Why allow these by default? > > The goal is that '-sandbox on' should not break *any* QEMU feature. It > > needs to be a safe thing that people can unconditionally turn on without > > thinking about it. > > Sure, but what advantages would it provide if the default blacklist does > not block anything meaningful? At the very least, spawn=deny should > default elevateprivileges to deny too. > > I think there should be a list (as small as possible) of features that > are sacrificed by "-sandbox on". Can you give an example of such a list? > > > The QEMU bridge helper requires setuid privs, hence > > elevated privileges needs to be permitted by default. > > QEMU itself should not be getting additional privileges, only the helper > and in turn the helper or ifup scripts can be limited through MAC. The > issue is that seccomp persists across execve. > > Currently, unprivileged users are only allowed to install seccomp > filters if no_new_privs is set. Would it make sense if seccomp filters > without no_new_privs succeeded, except that the filter would not persist > across execve of binaries with setuid, setgid or file capabilities? Yes, it does make sense. Using seccomp_attr_set() and the flag SCMP_FLTATR_CTL_NNP we can disable NO_NEW_PRIVS. > > Then the spawn option could be a tri-state with the choice of allow, > deny and no_new_privs: > > - elevateprivileges=allow,spawn=allow: legacy for old kernels > - elevateprivileges=deny,spawn=allow: can run privileged helpers > - elevateprivileges=deny,spawn=deny: cannot run helpers at all > - elevateprivileges=deny,spawn=no_new_privs: can run unprivileged > helpers only I think it does look interesting to me this model.
On Tue, Mar 14, 2017 at 12=24=55PM +0000, Daniel P. Berrange wrote: > On Tue, Mar 14, 2017 at 01:13:15PM +0100, Paolo Bonzini wrote: > > > > > > On 14/03/2017 12:52, Daniel P. Berrange wrote: > > >>> DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ > > >>> - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ > > >>> - " obsolete: Allow obsolete system calls", > > >>> + "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \ > > >>> + " Enable seccomp mode 2 system call filter (default 'off').\n" \ > > >>> + " obsolete: Allow obsolete system calls\n" \ > > >>> + " elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls", > > >> Why allow these by default? > > > The goal is that '-sandbox on' should not break *any* QEMU feature. It > > > needs to be a safe thing that people can unconditionally turn on without > > > thinking about it. > > > > Sure, but what advantages would it provide if the default blacklist does > > not block anything meaningful? At the very least, spawn=deny should > > default elevateprivileges to deny too. > > Yep, having spawn=deny imply elevateprivileges=deny is reasonable IMHO. > > > I think there should be a list (as small as possible) of features that > > are sacrificed by "-sandbox on". > > That breaks the key goal that '-sandbox on' should never break a valid > QEMU configuration, no matter how obscure, and would thus continue to > discourage people from turning it on by default. > > Yes, a bare '-sandbox on' is very loose, but think of it as just being > a building block. 90% of the time the user or mgmt app would want to > turn on extra flags to lock it down more meaningfully, by explicitly > blocking ability to use feature they know won't be needed. > > > > The QEMU bridge helper requires setuid privs, hence > > > elevated privileges needs to be permitted by default. > > > > QEMU itself should not be getting additional privileges, only the helper > > and in turn the helper or ifup scripts can be limited through MAC. The > > issue is that seccomp persists across execve. > > That's true. > > > Currently, unprivileged users are only allowed to install seccomp > > filters if no_new_privs is set. Would it make sense if seccomp filters > > without no_new_privs succeeded, except that the filter would not persist > > across execve of binaries with setuid, setgid or file capabilities? > > > > Then the spawn option could be a tri-state with the choice of allow, > > deny and no_new_privs: > > > > - elevateprivileges=allow,spawn=allow: legacy for old kernels > > - elevateprivileges=deny,spawn=allow: can run privileged helpers > > - elevateprivileges=deny,spawn=deny: cannot run helpers at all > > - elevateprivileges=deny,spawn=no_new_privs: can run unprivileged > > helpers only > > That could work, but I think that syntax is making it uneccessarily > complex to understand. I don't like how it introduces a semantic > dependancy between the elevateprivileges & spawn flags i.e. the > interpretation of elevateprivileges=deny, varies according to what > you set for spawn= option. > > I'd be more inclined to make elevateprivileges be a tri-state instead > e.g. > > elevateprivileges=allow|deny|children > Still weird but better than the combination of elevateprivileges and spawn. Perhaps this has a better semantics? elevateprivileges=allow|deny|no_new_privs
On 14/03/2017 13:32, Eduardo Otubo wrote: > On Tue, Mar 14, 2017 at 01=13=15PM +0100, Paolo Bonzini wrote: >> >> >> On 14/03/2017 12:52, Daniel P. Berrange wrote: >>>>> DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ >>>>> - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ >>>>> - " obsolete: Allow obsolete system calls", >>>>> + "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \ >>>>> + " Enable seccomp mode 2 system call filter (default 'off').\n" \ >>>>> + " obsolete: Allow obsolete system calls\n" \ >>>>> + " elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls", >>>> Why allow these by default? >>> The goal is that '-sandbox on' should not break *any* QEMU feature. It >>> needs to be a safe thing that people can unconditionally turn on without >>> thinking about it. >> >> Sure, but what advantages would it provide if the default blacklist does >> not block anything meaningful? At the very least, spawn=deny should >> default elevateprivileges to deny too. >> >> I think there should be a list (as small as possible) of features that >> are sacrificed by "-sandbox on". > > Can you give an example of such a list? Well, anything that makes "-sandbox on" more than security theater... I would consider it a necessary evil, not a good thing to have such a list. Due to NO_NEW_PRIVS, I think non-migration fork/exec should be on the list, but hopefully nothing else. >>> The QEMU bridge helper requires setuid privs, hence >>> elevated privileges needs to be permitted by default. >> >> QEMU itself should not be getting additional privileges, only the helper >> and in turn the helper or ifup scripts can be limited through MAC. The >> issue is that seccomp persists across execve. >> >> Currently, unprivileged users are only allowed to install seccomp >> filters if no_new_privs is set. Would it make sense if seccomp filters >> without no_new_privs succeeded, except that the filter would not persist >> across execve of binaries with setuid, setgid or file capabilities? > > Yes, it does make sense. Using seccomp_attr_set() and the flag > SCMP_FLTATR_CTL_NNP we can disable NO_NEW_PRIVS. That however in turn requires CAP_SYS_ADMIN. Without kernel changes, it's not possible. Paolo >> >> Then the spawn option could be a tri-state with the choice of allow, >> deny and no_new_privs: >> >> - elevateprivileges=allow,spawn=allow: legacy for old kernels >> - elevateprivileges=deny,spawn=allow: can run privileged helpers >> - elevateprivileges=deny,spawn=deny: cannot run helpers at all >> - elevateprivileges=deny,spawn=no_new_privs: can run unprivileged >> helpers only > > I think it does look interesting to me this model. >
On Tue, Mar 14, 2017 at 01:48:59PM +0100, Paolo Bonzini wrote: > > > On 14/03/2017 13:32, Eduardo Otubo wrote: > > On Tue, Mar 14, 2017 at 01=13=15PM +0100, Paolo Bonzini wrote: > >> > >> > >> On 14/03/2017 12:52, Daniel P. Berrange wrote: > >>>>> DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ > >>>>> - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ > >>>>> - " obsolete: Allow obsolete system calls", > >>>>> + "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \ > >>>>> + " Enable seccomp mode 2 system call filter (default 'off').\n" \ > >>>>> + " obsolete: Allow obsolete system calls\n" \ > >>>>> + " elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls", > >>>> Why allow these by default? > >>> The goal is that '-sandbox on' should not break *any* QEMU feature. It > >>> needs to be a safe thing that people can unconditionally turn on without > >>> thinking about it. > >> > >> Sure, but what advantages would it provide if the default blacklist does > >> not block anything meaningful? At the very least, spawn=deny should > >> default elevateprivileges to deny too. > >> > >> I think there should be a list (as small as possible) of features that > >> are sacrificed by "-sandbox on". > > > > Can you give an example of such a list? > > Well, anything that makes "-sandbox on" more than security theater... I > would consider it a necessary evil, not a good thing to have such a list. > Due to NO_NEW_PRIVS, I think non-migration fork/exec should be on the > list, but hopefully nothing else. The current semantics of '-sandbox on' are that it is documented to not break any valid QEMU features. By blocking fork/exec, we make a semantic change to the existing behaviour of '-sandbox on'. So any application which has been using '-sandbox on' historically, is at risk of being broken when upgrading to QEMU 2.10. It would force the application to generate a different CLI for new QEMU - ie to avoid being broken they would have to now add elevateprivs=allow, spawn=allow. I think we have to maintain semantic compat with existing usage, which means that '-sandbox on' has to remain security theatre. So if we want a default config to be more restrictive, then I think you realistically have to turn the default parameter into a tri-state instead of bool, ie allow '-sandbox default' as an alternative to '-sandbox on' that was slightly stronger by blocking fork/exec. Regards, Daniel
On 14/03/2017 14:02, Daniel P. Berrange wrote: >>> Can you give an example of such a list? >> >> Well, anything that makes "-sandbox on" more than security theater... I >> would consider it a necessary evil, not a good thing to have such a list. >> >> Due to NO_NEW_PRIVS, I think non-migration fork/exec should be on the >> list, but hopefully nothing else. > > The current semantics of '-sandbox on' are that it is documented to not > break any valid QEMU features. By blocking fork/exec, we make a semantic > change to the existing behaviour of '-sandbox on'. I don't propose to block fork/exec. I propose not to whitelist setuid, as is the case now too. It wouldn't be backwards-incompatible. > So any application > which has been using '-sandbox on' historically, is at risk of being > broken when upgrading to QEMU 2.10. It would force the application to > generate a different CLI for new QEMU - ie to avoid being broken they > would have to now add elevateprivs=allow, spawn=allow. I think we have > to maintain semantic compat with existing usage, which means that > '-sandbox on' has to remain security theatre. > > So if we want a default config to be more restrictive, then I think you > realistically have to turn the default parameter into a tri-state > instead of bool, ie allow '-sandbox default' as an alternative to > '-sandbox on' that was slightly stronger by blocking fork/exec. Or just print a warning that "-sandbox on" is useless. I guess that would be okay too if we want to be "stronger" than backwards-compatible. Paolo
On Tue, Mar 14, 2017 at 02:05:32PM +0100, Paolo Bonzini wrote: > > > On 14/03/2017 14:02, Daniel P. Berrange wrote: > >>> Can you give an example of such a list? > >> > >> Well, anything that makes "-sandbox on" more than security theater... I > >> would consider it a necessary evil, not a good thing to have such a list. > >> > >> Due to NO_NEW_PRIVS, I think non-migration fork/exec should be on the > >> list, but hopefully nothing else. > > > > The current semantics of '-sandbox on' are that it is documented to not > > break any valid QEMU features. By blocking fork/exec, we make a semantic > > change to the existing behaviour of '-sandbox on'. > > I don't propose to block fork/exec. I propose not to whitelist setuid, > as is the case now too. It wouldn't be backwards-incompatible. That we don't whitelist setuid is a bug in the current impl vs the intended semantics of '-sandbox on', where we are denying valid features. > > So any application > > which has been using '-sandbox on' historically, is at risk of being > > broken when upgrading to QEMU 2.10. It would force the application to > > generate a different CLI for new QEMU - ie to avoid being broken they > > would have to now add elevateprivs=allow, spawn=allow. I think we have > > to maintain semantic compat with existing usage, which means that > > '-sandbox on' has to remain security theatre. > > > > So if we want a default config to be more restrictive, then I think you > > realistically have to turn the default parameter into a tri-state > > instead of bool, ie allow '-sandbox default' as an alternative to > > '-sandbox on' that was slightly stronger by blocking fork/exec. > > Or just print a warning that "-sandbox on" is useless. I guess that > would be okay too if we want to be "stronger" than backwards-compatible. It isn't totally useless - it is blocking access to a number of system calls that QEMU should never use. eg things like reboot, load_module, etc. In the absence of other security protections it is useless, but if you combine with other mechanisms like DAC or LSM, then the default seccomp rules offer some extra protection. Admittedly not alot, but it is non-zero. The extra opt-in protecton flags then increase the value at cost of killing some features. Regards, Daniel
diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h index 7a7bde246b..e6e78d85ce 100644 --- a/include/sysemu/seccomp.h +++ b/include/sysemu/seccomp.h @@ -16,6 +16,7 @@ #define QEMU_SECCOMP_H #define OBSOLETE 0x0001 +#define PRIVILEGED 0x0010 #include <seccomp.h> diff --git a/qemu-options.hx b/qemu-options.hx index 1403d0c85f..47018db5aa 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3732,8 +3732,10 @@ Old param mode (ARM only). ETEXI DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \ - "-sandbox on[,obsolete=allow] Enable seccomp mode 2 system call filter (default 'off').\n" \ - " obsolete: Allow obsolete system calls", + "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \ + " Enable seccomp mode 2 system call filter (default 'off').\n" \ + " obsolete: Allow obsolete system calls\n" \ + " elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls", QEMU_ARCH_ALL) STEXI @item -sandbox @var{arg}[,obsolete=@var{string}] @@ -3743,6 +3745,8 @@ disable it. The default is 'off'. @table @option @item obsolete=@var{string} Enable Obsolete system calls +@item elevateprivileges=@var{string} +Disable set*uid|gid systema calls @end table ETEXI diff --git a/qemu-seccomp.c b/qemu-seccomp.c index 5ef36890da..5aa6590386 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -31,6 +31,19 @@ struct QemuSeccompSyscall { uint8_t priority; }; +static const struct QemuSeccompSyscall privileged_syscalls[] = { + { SCMP_SYS(setuid), 255 }, + { SCMP_SYS(setgid), 255 }, + { SCMP_SYS(setpgid), 255 }, + { SCMP_SYS(setsid), 255 }, + { SCMP_SYS(setreuid), 255 }, + { SCMP_SYS(setregid), 255 }, + { SCMP_SYS(setresuid), 255 }, + { SCMP_SYS(setresgid), 255 }, + { SCMP_SYS(setfsuid), 255 }, + { SCMP_SYS(setfsgid), 255 }, +}; + static const struct QemuSeccompSyscall obsolete[] = { { SCMP_SYS(readdir), 255 }, { SCMP_SYS(_sysctl), 255 }, @@ -125,6 +138,21 @@ int seccomp_start(uint8_t seccomp_opts) } } + if (seccomp_opts & PRIVILEGED) { + for (i = 0; i < ARRAY_SIZE(privileged_syscalls); i++) { + rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, privileged_syscalls[i].num, 0); + if (rc < 0) { + goto seccomp_return; + } + rc = seccomp_syscall_priority(ctx, privileged_syscalls[i].num, + privileged_syscalls[i].priority); + if (rc < 0) { + goto seccomp_return; + } + } + } + + rc = seccomp_load(ctx); seccomp_return: diff --git a/vl.c b/vl.c index 7b08b3383b..d071e240b0 100644 --- a/vl.c +++ b/vl.c @@ -273,6 +273,10 @@ static QemuOptsList qemu_sandbox_opts = { .name = "obsolete", .type = QEMU_OPT_STRING, }, + { + .name = "elevateprivileges", + .type = QEMU_OPT_STRING, + }, { /* end of list */ } }, }; @@ -1045,6 +1049,13 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) } } + value = qemu_opt_get(opts,"elevateprivileges"); + if (value) { + if (strcmp(value, "deny") == 0) { + seccomp_opts |= PRIVILEGED; + } + } + if (seccomp_start(seccomp_opts) < 0) { error_report("failed to install seccomp syscall filter " "in the kernel");
This patch introduces the new argument [,elevateprivileges=deny] to the `-sandbox on'. It avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls Signed-off-by: Eduardo Otubo <eduardo.otubo@profitbricks.com> --- include/sysemu/seccomp.h | 1 + qemu-options.hx | 8 ++++++-- qemu-seccomp.c | 28 ++++++++++++++++++++++++++++ vl.c | 11 +++++++++++ 4 files changed, 46 insertions(+), 2 deletions(-)