diff mbox

[3/5] seccomp: add elevateprivileges argument to command line

Message ID 20170314113209.12025-4-eduardo.otubo@profitbricks.com (mailing list archive)
State New, archived
Headers show

Commit Message

Eduardo Otubo March 14, 2017, 11:32 a.m. UTC
This patch introduces the new argument [,elevateprivileges=deny] to the
`-sandbox on'. It avoids Qemu process to elevate its privileges by
blacklisting all set*uid|gid system calls

Signed-off-by: Eduardo Otubo <eduardo.otubo@profitbricks.com>
---
 include/sysemu/seccomp.h |  1 +
 qemu-options.hx          |  8 ++++++--
 qemu-seccomp.c           | 28 ++++++++++++++++++++++++++++
 vl.c                     | 11 +++++++++++
 4 files changed, 46 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini March 14, 2017, 11:47 a.m. UTC | #1
On 14/03/2017 12:32, Eduardo Otubo wrote:
> This patch introduces the new argument [,elevateprivileges=deny] to the
> `-sandbox on'. It avoids Qemu process to elevate its privileges by
> blacklisting all set*uid|gid system calls
> 
> Signed-off-by: Eduardo Otubo <eduardo.otubo@profitbricks.com>
> ---
>  include/sysemu/seccomp.h |  1 +
>  qemu-options.hx          |  8 ++++++--
>  qemu-seccomp.c           | 28 ++++++++++++++++++++++++++++
>  vl.c                     | 11 +++++++++++
>  4 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> index 7a7bde246b..e6e78d85ce 100644
> --- a/include/sysemu/seccomp.h
> +++ b/include/sysemu/seccomp.h
> @@ -16,6 +16,7 @@
>  #define QEMU_SECCOMP_H
>  
>  #define OBSOLETE    0x0001
> +#define PRIVILEGED  0x0010
>  
>  #include <seccomp.h>
>  
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 1403d0c85f..47018db5aa 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -3732,8 +3732,10 @@ Old param mode (ARM only).
>  ETEXI
>  
>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> -    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
> -    "                              obsolete: Allow obsolete system calls",
> +    "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \
> +    "                               Enable seccomp mode 2 system call filter (default 'off').\n" \
> +    "                               obsolete: Allow obsolete system calls\n" \
> +    "                               elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls",

Why allow these by default?

Paolo

>      QEMU_ARCH_ALL)
>  STEXI
>  @item -sandbox @var{arg}[,obsolete=@var{string}]
> @@ -3743,6 +3745,8 @@ disable it.  The default is 'off'.
>  @table @option
>  @item obsolete=@var{string}
>  Enable Obsolete system calls
> +@item elevateprivileges=@var{string}
> +Disable set*uid|gid systema calls
>  @end table
>  ETEXI
>  
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index 5ef36890da..5aa6590386 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -31,6 +31,19 @@ struct QemuSeccompSyscall {
>      uint8_t priority;
>  };
>  
> +static const struct QemuSeccompSyscall privileged_syscalls[] = {
> +    { SCMP_SYS(setuid), 255 },
> +    { SCMP_SYS(setgid), 255 },
> +    { SCMP_SYS(setpgid), 255 },
> +    { SCMP_SYS(setsid), 255 },
> +    { SCMP_SYS(setreuid), 255 },
> +    { SCMP_SYS(setregid), 255 },
> +    { SCMP_SYS(setresuid), 255 },
> +    { SCMP_SYS(setresgid), 255 },
> +    { SCMP_SYS(setfsuid), 255 },
> +    { SCMP_SYS(setfsgid), 255 },
> +};
> +
>  static const struct QemuSeccompSyscall obsolete[] = {
>      { SCMP_SYS(readdir), 255 },
>      { SCMP_SYS(_sysctl), 255 },
> @@ -125,6 +138,21 @@ int seccomp_start(uint8_t seccomp_opts)
>          }
>      }
>  
> +    if (seccomp_opts & PRIVILEGED) {
> +        for (i = 0; i < ARRAY_SIZE(privileged_syscalls); i++) {
> +            rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, privileged_syscalls[i].num, 0);
> +            if (rc < 0) {
> +                goto seccomp_return;
> +            }
> +            rc = seccomp_syscall_priority(ctx, privileged_syscalls[i].num,
> +                    privileged_syscalls[i].priority);
> +            if (rc < 0) {
> +                goto seccomp_return;
> +            }
> +        }
> +    }
> +
> +
>      rc = seccomp_load(ctx);
>  
>    seccomp_return:
> diff --git a/vl.c b/vl.c
> index 7b08b3383b..d071e240b0 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -273,6 +273,10 @@ static QemuOptsList qemu_sandbox_opts = {
>              .name = "obsolete",
>              .type = QEMU_OPT_STRING,
>          },
> +        {
> +            .name = "elevateprivileges",
> +            .type = QEMU_OPT_STRING,
> +        },
>          { /* end of list */ }
>      },
>  };
> @@ -1045,6 +1049,13 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
>              }
>          }
>  
> +        value = qemu_opt_get(opts,"elevateprivileges");
> +        if (value) {
> +            if (strcmp(value, "deny") == 0) {
> +                seccomp_opts |= PRIVILEGED;
> +            }
> +        }
> +
>          if (seccomp_start(seccomp_opts) < 0) {
>              error_report("failed to install seccomp syscall filter "
>                           "in the kernel");
>
Daniel P. Berrangé March 14, 2017, 11:52 a.m. UTC | #2
On Tue, Mar 14, 2017 at 12:47:10PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/03/2017 12:32, Eduardo Otubo wrote:
> > This patch introduces the new argument [,elevateprivileges=deny] to the
> > `-sandbox on'. It avoids Qemu process to elevate its privileges by
> > blacklisting all set*uid|gid system calls
> > 
> > Signed-off-by: Eduardo Otubo <eduardo.otubo@profitbricks.com>
> > ---
> >  include/sysemu/seccomp.h |  1 +
> >  qemu-options.hx          |  8 ++++++--
> >  qemu-seccomp.c           | 28 ++++++++++++++++++++++++++++
> >  vl.c                     | 11 +++++++++++
> >  4 files changed, 46 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> > index 7a7bde246b..e6e78d85ce 100644
> > --- a/include/sysemu/seccomp.h
> > +++ b/include/sysemu/seccomp.h
> > @@ -16,6 +16,7 @@
> >  #define QEMU_SECCOMP_H
> >  
> >  #define OBSOLETE    0x0001
> > +#define PRIVILEGED  0x0010
> >  
> >  #include <seccomp.h>
> >  
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 1403d0c85f..47018db5aa 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -3732,8 +3732,10 @@ Old param mode (ARM only).
> >  ETEXI
> >  
> >  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> > -    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
> > -    "                              obsolete: Allow obsolete system calls",
> > +    "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \
> > +    "                               Enable seccomp mode 2 system call filter (default 'off').\n" \
> > +    "                               obsolete: Allow obsolete system calls\n" \
> > +    "                               elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls",
> 
> Why allow these by default?

The goal is that '-sandbox on' should not break *any* QEMU feature. It
needs to be a safe thing that people can unconditionally turn on without
thinking about it.  The QEMU bridge helper  requires setuid privs, hence
elevated privileges needs to be permitted by default. Similarly I could
see the qemu ifup  scripts, or migrate 'exec' command wanting to elevate
privs via setuid binaries if QEMU was running unprivileged.

Regards,
Daniel
Paolo Bonzini March 14, 2017, 12:13 p.m. UTC | #3
On 14/03/2017 12:52, Daniel P. Berrange wrote:
>>>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
>>> -    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
>>> -    "                              obsolete: Allow obsolete system calls",
>>> +    "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \
>>> +    "                               Enable seccomp mode 2 system call filter (default 'off').\n" \
>>> +    "                               obsolete: Allow obsolete system calls\n" \
>>> +    "                               elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls",
>> Why allow these by default?
> The goal is that '-sandbox on' should not break *any* QEMU feature. It
> needs to be a safe thing that people can unconditionally turn on without
> thinking about it.

Sure, but what advantages would it provide if the default blacklist does
not block anything meaningful?  At the very least, spawn=deny should
default elevateprivileges to deny too.

I think there should be a list (as small as possible) of features that
are sacrificed by "-sandbox on".

> The QEMU bridge helper  requires setuid privs, hence
> elevated privileges needs to be permitted by default.

QEMU itself should not be getting additional privileges, only the helper
and in turn the helper or ifup scripts can be limited through MAC.  The
issue is that seccomp persists across execve.

Currently, unprivileged users are only allowed to install seccomp
filters if no_new_privs is set.  Would it make sense if seccomp filters
without no_new_privs succeeded, except that the filter would not persist
across execve of binaries with setuid, setgid or file capabilities?

Then the spawn option could be a tri-state with the choice of allow,
deny and no_new_privs:

- elevateprivileges=allow,spawn=allow: legacy for old kernels
- elevateprivileges=deny,spawn=allow: can run privileged helpers
- elevateprivileges=deny,spawn=deny: cannot run helpers at all
- elevateprivileges=deny,spawn=no_new_privs: can run unprivileged
helpers only

Paolo

> Similarly I could
> see the qemu ifup  scripts, or migrate 'exec' command wanting to elevate
> privs via setuid binaries if QEMU was running unprivileged.
Daniel P. Berrangé March 14, 2017, 12:24 p.m. UTC | #4
On Tue, Mar 14, 2017 at 01:13:15PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/03/2017 12:52, Daniel P. Berrange wrote:
> >>>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> >>> -    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
> >>> -    "                              obsolete: Allow obsolete system calls",
> >>> +    "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \
> >>> +    "                               Enable seccomp mode 2 system call filter (default 'off').\n" \
> >>> +    "                               obsolete: Allow obsolete system calls\n" \
> >>> +    "                               elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls",
> >> Why allow these by default?
> > The goal is that '-sandbox on' should not break *any* QEMU feature. It
> > needs to be a safe thing that people can unconditionally turn on without
> > thinking about it.
> 
> Sure, but what advantages would it provide if the default blacklist does
> not block anything meaningful?  At the very least, spawn=deny should
> default elevateprivileges to deny too.

Yep, having spawn=deny imply elevateprivileges=deny is reasonable IMHO.

> I think there should be a list (as small as possible) of features that
> are sacrificed by "-sandbox on".

That breaks the key goal that '-sandbox on' should never break a valid
QEMU configuration, no matter how obscure, and would thus continue to
discourage people from turning it on by default.

Yes, a bare '-sandbox on' is very loose, but think of it as just being
a building block. 90% of the time the user or mgmt app would want to
turn on extra flags to lock it down more meaningfully, by explicitly
blocking ability to use feature they know won't be needed. 

> > The QEMU bridge helper  requires setuid privs, hence
> > elevated privileges needs to be permitted by default.
> 
> QEMU itself should not be getting additional privileges, only the helper
> and in turn the helper or ifup scripts can be limited through MAC.  The
> issue is that seccomp persists across execve.

That's true.

> Currently, unprivileged users are only allowed to install seccomp
> filters if no_new_privs is set.  Would it make sense if seccomp filters
> without no_new_privs succeeded, except that the filter would not persist
> across execve of binaries with setuid, setgid or file capabilities?
> 
> Then the spawn option could be a tri-state with the choice of allow,
> deny and no_new_privs:
> 
> - elevateprivileges=allow,spawn=allow: legacy for old kernels
> - elevateprivileges=deny,spawn=allow: can run privileged helpers
> - elevateprivileges=deny,spawn=deny: cannot run helpers at all
> - elevateprivileges=deny,spawn=no_new_privs: can run unprivileged
> helpers only

That could work, but I think that syntax is making it uneccessarily
complex to understand. I don't like how it introduces a semantic
dependancy between the elevateprivileges & spawn flags i.e. the
interpretation of elevateprivileges=deny, varies according to what
you set for spawn= option.

I'd be more inclined to make elevateprivileges be a tri-state instead
e.g.

  elevateprivileges=allow|deny|children

Regards,
Daniel
Eduardo Otubo March 14, 2017, 12:32 p.m. UTC | #5
On Tue, Mar 14, 2017 at 01=13=15PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/03/2017 12:52, Daniel P. Berrange wrote:
> >>>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> >>> -    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
> >>> -    "                              obsolete: Allow obsolete system calls",
> >>> +    "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \
> >>> +    "                               Enable seccomp mode 2 system call filter (default 'off').\n" \
> >>> +    "                               obsolete: Allow obsolete system calls\n" \
> >>> +    "                               elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls",
> >> Why allow these by default?
> > The goal is that '-sandbox on' should not break *any* QEMU feature. It
> > needs to be a safe thing that people can unconditionally turn on without
> > thinking about it.
> 
> Sure, but what advantages would it provide if the default blacklist does
> not block anything meaningful?  At the very least, spawn=deny should
> default elevateprivileges to deny too.
> 
> I think there should be a list (as small as possible) of features that
> are sacrificed by "-sandbox on".

Can you give an example of such a list?

> 
> > The QEMU bridge helper  requires setuid privs, hence
> > elevated privileges needs to be permitted by default.
> 
> QEMU itself should not be getting additional privileges, only the helper
> and in turn the helper or ifup scripts can be limited through MAC.  The
> issue is that seccomp persists across execve.
> 
> Currently, unprivileged users are only allowed to install seccomp
> filters if no_new_privs is set.  Would it make sense if seccomp filters
> without no_new_privs succeeded, except that the filter would not persist
> across execve of binaries with setuid, setgid or file capabilities?

Yes, it does make sense. Using seccomp_attr_set() and the flag
SCMP_FLTATR_CTL_NNP we can disable NO_NEW_PRIVS. 

> 
> Then the spawn option could be a tri-state with the choice of allow,
> deny and no_new_privs:
> 
> - elevateprivileges=allow,spawn=allow: legacy for old kernels
> - elevateprivileges=deny,spawn=allow: can run privileged helpers
> - elevateprivileges=deny,spawn=deny: cannot run helpers at all
> - elevateprivileges=deny,spawn=no_new_privs: can run unprivileged
> helpers only

I think it does look interesting to me this model.
Eduardo Otubo March 14, 2017, 12:42 p.m. UTC | #6
On Tue, Mar 14, 2017 at 12=24=55PM +0000, Daniel P. Berrange wrote:
> On Tue, Mar 14, 2017 at 01:13:15PM +0100, Paolo Bonzini wrote:
> > 
> > 
> > On 14/03/2017 12:52, Daniel P. Berrange wrote:
> > >>>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> > >>> -    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
> > >>> -    "                              obsolete: Allow obsolete system calls",
> > >>> +    "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \
> > >>> +    "                               Enable seccomp mode 2 system call filter (default 'off').\n" \
> > >>> +    "                               obsolete: Allow obsolete system calls\n" \
> > >>> +    "                               elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls",
> > >> Why allow these by default?
> > > The goal is that '-sandbox on' should not break *any* QEMU feature. It
> > > needs to be a safe thing that people can unconditionally turn on without
> > > thinking about it.
> > 
> > Sure, but what advantages would it provide if the default blacklist does
> > not block anything meaningful?  At the very least, spawn=deny should
> > default elevateprivileges to deny too.
> 
> Yep, having spawn=deny imply elevateprivileges=deny is reasonable IMHO.
> 
> > I think there should be a list (as small as possible) of features that
> > are sacrificed by "-sandbox on".
> 
> That breaks the key goal that '-sandbox on' should never break a valid
> QEMU configuration, no matter how obscure, and would thus continue to
> discourage people from turning it on by default.
> 
> Yes, a bare '-sandbox on' is very loose, but think of it as just being
> a building block. 90% of the time the user or mgmt app would want to
> turn on extra flags to lock it down more meaningfully, by explicitly
> blocking ability to use feature they know won't be needed. 
> 
> > > The QEMU bridge helper  requires setuid privs, hence
> > > elevated privileges needs to be permitted by default.
> > 
> > QEMU itself should not be getting additional privileges, only the helper
> > and in turn the helper or ifup scripts can be limited through MAC.  The
> > issue is that seccomp persists across execve.
> 
> That's true.
> 
> > Currently, unprivileged users are only allowed to install seccomp
> > filters if no_new_privs is set.  Would it make sense if seccomp filters
> > without no_new_privs succeeded, except that the filter would not persist
> > across execve of binaries with setuid, setgid or file capabilities?
> > 
> > Then the spawn option could be a tri-state with the choice of allow,
> > deny and no_new_privs:
> > 
> > - elevateprivileges=allow,spawn=allow: legacy for old kernels
> > - elevateprivileges=deny,spawn=allow: can run privileged helpers
> > - elevateprivileges=deny,spawn=deny: cannot run helpers at all
> > - elevateprivileges=deny,spawn=no_new_privs: can run unprivileged
> > helpers only
> 
> That could work, but I think that syntax is making it uneccessarily
> complex to understand. I don't like how it introduces a semantic
> dependancy between the elevateprivileges & spawn flags i.e. the
> interpretation of elevateprivileges=deny, varies according to what
> you set for spawn= option.
> 
> I'd be more inclined to make elevateprivileges be a tri-state instead
> e.g.
> 
>   elevateprivileges=allow|deny|children
> 

Still weird but better than the combination of elevateprivileges and
spawn. Perhaps this has a better semantics?

    elevateprivileges=allow|deny|no_new_privs
Paolo Bonzini March 14, 2017, 12:48 p.m. UTC | #7
On 14/03/2017 13:32, Eduardo Otubo wrote:
> On Tue, Mar 14, 2017 at 01=13=15PM +0100, Paolo Bonzini wrote:
>>
>>
>> On 14/03/2017 12:52, Daniel P. Berrange wrote:
>>>>>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
>>>>> -    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
>>>>> -    "                              obsolete: Allow obsolete system calls",
>>>>> +    "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \
>>>>> +    "                               Enable seccomp mode 2 system call filter (default 'off').\n" \
>>>>> +    "                               obsolete: Allow obsolete system calls\n" \
>>>>> +    "                               elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls",
>>>> Why allow these by default?
>>> The goal is that '-sandbox on' should not break *any* QEMU feature. It
>>> needs to be a safe thing that people can unconditionally turn on without
>>> thinking about it.
>>
>> Sure, but what advantages would it provide if the default blacklist does
>> not block anything meaningful?  At the very least, spawn=deny should
>> default elevateprivileges to deny too.
>>
>> I think there should be a list (as small as possible) of features that
>> are sacrificed by "-sandbox on".
> 
> Can you give an example of such a list?

Well, anything that makes "-sandbox on" more than security theater...  I
would consider it a necessary evil, not a good thing to have such a list.

Due to NO_NEW_PRIVS, I think non-migration fork/exec should be on the
list, but hopefully nothing else.

>>> The QEMU bridge helper  requires setuid privs, hence
>>> elevated privileges needs to be permitted by default.
>>
>> QEMU itself should not be getting additional privileges, only the helper
>> and in turn the helper or ifup scripts can be limited through MAC.  The
>> issue is that seccomp persists across execve.
>>
>> Currently, unprivileged users are only allowed to install seccomp
>> filters if no_new_privs is set.  Would it make sense if seccomp filters
>> without no_new_privs succeeded, except that the filter would not persist
>> across execve of binaries with setuid, setgid or file capabilities?
> 
> Yes, it does make sense. Using seccomp_attr_set() and the flag
> SCMP_FLTATR_CTL_NNP we can disable NO_NEW_PRIVS. 

That however in turn requires CAP_SYS_ADMIN.  Without kernel changes,
it's not possible.

Paolo

>>
>> Then the spawn option could be a tri-state with the choice of allow,
>> deny and no_new_privs:
>>
>> - elevateprivileges=allow,spawn=allow: legacy for old kernels
>> - elevateprivileges=deny,spawn=allow: can run privileged helpers
>> - elevateprivileges=deny,spawn=deny: cannot run helpers at all
>> - elevateprivileges=deny,spawn=no_new_privs: can run unprivileged
>> helpers only
> 
> I think it does look interesting to me this model.
>
Daniel P. Berrangé March 14, 2017, 1:02 p.m. UTC | #8
On Tue, Mar 14, 2017 at 01:48:59PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/03/2017 13:32, Eduardo Otubo wrote:
> > On Tue, Mar 14, 2017 at 01=13=15PM +0100, Paolo Bonzini wrote:
> >>
> >>
> >> On 14/03/2017 12:52, Daniel P. Berrange wrote:
> >>>>>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> >>>>> -    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
> >>>>> -    "                              obsolete: Allow obsolete system calls",
> >>>>> +    "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \
> >>>>> +    "                               Enable seccomp mode 2 system call filter (default 'off').\n" \
> >>>>> +    "                               obsolete: Allow obsolete system calls\n" \
> >>>>> +    "                               elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls",
> >>>> Why allow these by default?
> >>> The goal is that '-sandbox on' should not break *any* QEMU feature. It
> >>> needs to be a safe thing that people can unconditionally turn on without
> >>> thinking about it.
> >>
> >> Sure, but what advantages would it provide if the default blacklist does
> >> not block anything meaningful?  At the very least, spawn=deny should
> >> default elevateprivileges to deny too.
> >>
> >> I think there should be a list (as small as possible) of features that
> >> are sacrificed by "-sandbox on".
> > 
> > Can you give an example of such a list?
> 
> Well, anything that makes "-sandbox on" more than security theater...  I
> would consider it a necessary evil, not a good thing to have such a list.

> Due to NO_NEW_PRIVS, I think non-migration fork/exec should be on the
> list, but hopefully nothing else.

The current semantics of '-sandbox on' are that it is documented to not
break any valid QEMU features. By blocking fork/exec, we make a semantic
change to the existing behaviour of '-sandbox on'. So any application
which has been using '-sandbox on' historically, is at risk of being
broken when upgrading to QEMU 2.10. It would force the application to
generate a different CLI for new QEMU - ie to avoid being broken they
would have to now  add elevateprivs=allow, spawn=allow. I think we have
to maintain semantic compat with existing usage, which means that
'-sandbox on' has to remain security theatre.

So if we want a default config to be more restrictive, then I think you
realistically have to turn the default parameter into a tri-state
instead of bool, ie allow '-sandbox default' as an alternative to
'-sandbox on' that was slightly stronger by blocking fork/exec.


Regards,
Daniel
Paolo Bonzini March 14, 2017, 1:05 p.m. UTC | #9
On 14/03/2017 14:02, Daniel P. Berrange wrote:
>>> Can you give an example of such a list?
>> 
>> Well, anything that makes "-sandbox on" more than security theater...  I
>> would consider it a necessary evil, not a good thing to have such a list.
>> 
>> Due to NO_NEW_PRIVS, I think non-migration fork/exec should be on the
>> list, but hopefully nothing else.
>
> The current semantics of '-sandbox on' are that it is documented to not
> break any valid QEMU features. By blocking fork/exec, we make a semantic
> change to the existing behaviour of '-sandbox on'.

I don't propose to block fork/exec.  I propose not to whitelist setuid,
as is the case now too.  It wouldn't be backwards-incompatible.

> So any application
> which has been using '-sandbox on' historically, is at risk of being
> broken when upgrading to QEMU 2.10. It would force the application to
> generate a different CLI for new QEMU - ie to avoid being broken they
> would have to now  add elevateprivs=allow, spawn=allow. I think we have
> to maintain semantic compat with existing usage, which means that
> '-sandbox on' has to remain security theatre.
> 
> So if we want a default config to be more restrictive, then I think you
> realistically have to turn the default parameter into a tri-state
> instead of bool, ie allow '-sandbox default' as an alternative to
> '-sandbox on' that was slightly stronger by blocking fork/exec.

Or just print a warning that "-sandbox on" is useless.  I guess that
would be okay too if we want to be "stronger" than backwards-compatible.

Paolo
Daniel P. Berrangé March 14, 2017, 1:19 p.m. UTC | #10
On Tue, Mar 14, 2017 at 02:05:32PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/03/2017 14:02, Daniel P. Berrange wrote:
> >>> Can you give an example of such a list?
> >> 
> >> Well, anything that makes "-sandbox on" more than security theater...  I
> >> would consider it a necessary evil, not a good thing to have such a list.
> >> 
> >> Due to NO_NEW_PRIVS, I think non-migration fork/exec should be on the
> >> list, but hopefully nothing else.
> >
> > The current semantics of '-sandbox on' are that it is documented to not
> > break any valid QEMU features. By blocking fork/exec, we make a semantic
> > change to the existing behaviour of '-sandbox on'.
> 
> I don't propose to block fork/exec.  I propose not to whitelist setuid,
> as is the case now too.  It wouldn't be backwards-incompatible.

That we don't whitelist setuid is a bug in the current impl vs the intended
semantics of '-sandbox on', where we are denying valid features. 

> > So any application
> > which has been using '-sandbox on' historically, is at risk of being
> > broken when upgrading to QEMU 2.10. It would force the application to
> > generate a different CLI for new QEMU - ie to avoid being broken they
> > would have to now  add elevateprivs=allow, spawn=allow. I think we have
> > to maintain semantic compat with existing usage, which means that
> > '-sandbox on' has to remain security theatre.
> > 
> > So if we want a default config to be more restrictive, then I think you
> > realistically have to turn the default parameter into a tri-state
> > instead of bool, ie allow '-sandbox default' as an alternative to
> > '-sandbox on' that was slightly stronger by blocking fork/exec.
> 
> Or just print a warning that "-sandbox on" is useless.  I guess that
> would be okay too if we want to be "stronger" than backwards-compatible.

It isn't totally useless - it is blocking access to a number of system
calls that QEMU should never use. eg things like reboot, load_module,
etc. In the absence of other security protections it is useless, but
if you combine with other mechanisms like DAC or LSM, then the default
seccomp rules offer some extra protection. Admittedly not alot, but
it is non-zero. The extra opt-in protecton flags then increase the
value at cost of killing some features.

Regards,
Daniel
diff mbox

Patch

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 7a7bde246b..e6e78d85ce 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -16,6 +16,7 @@ 
 #define QEMU_SECCOMP_H
 
 #define OBSOLETE    0x0001
+#define PRIVILEGED  0x0010
 
 #include <seccomp.h>
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 1403d0c85f..47018db5aa 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3732,8 +3732,10 @@  Old param mode (ARM only).
 ETEXI
 
 DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
-    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
-    "                              obsolete: Allow obsolete system calls",
+    "-sandbox on[,obsolete=allow][,elevateprivileges=deny]\n" \
+    "                               Enable seccomp mode 2 system call filter (default 'off').\n" \
+    "                               obsolete: Allow obsolete system calls\n" \
+    "                               elevateprivileges: avoids Qemu process to elevate its privileges by blacklisting all set*uid|gid system calls",
     QEMU_ARCH_ALL)
 STEXI
 @item -sandbox @var{arg}[,obsolete=@var{string}]
@@ -3743,6 +3745,8 @@  disable it.  The default is 'off'.
 @table @option
 @item obsolete=@var{string}
 Enable Obsolete system calls
+@item elevateprivileges=@var{string}
+Disable set*uid|gid systema calls
 @end table
 ETEXI
 
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index 5ef36890da..5aa6590386 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -31,6 +31,19 @@  struct QemuSeccompSyscall {
     uint8_t priority;
 };
 
+static const struct QemuSeccompSyscall privileged_syscalls[] = {
+    { SCMP_SYS(setuid), 255 },
+    { SCMP_SYS(setgid), 255 },
+    { SCMP_SYS(setpgid), 255 },
+    { SCMP_SYS(setsid), 255 },
+    { SCMP_SYS(setreuid), 255 },
+    { SCMP_SYS(setregid), 255 },
+    { SCMP_SYS(setresuid), 255 },
+    { SCMP_SYS(setresgid), 255 },
+    { SCMP_SYS(setfsuid), 255 },
+    { SCMP_SYS(setfsgid), 255 },
+};
+
 static const struct QemuSeccompSyscall obsolete[] = {
     { SCMP_SYS(readdir), 255 },
     { SCMP_SYS(_sysctl), 255 },
@@ -125,6 +138,21 @@  int seccomp_start(uint8_t seccomp_opts)
         }
     }
 
+    if (seccomp_opts & PRIVILEGED) {
+        for (i = 0; i < ARRAY_SIZE(privileged_syscalls); i++) {
+            rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, privileged_syscalls[i].num, 0);
+            if (rc < 0) {
+                goto seccomp_return;
+            }
+            rc = seccomp_syscall_priority(ctx, privileged_syscalls[i].num,
+                    privileged_syscalls[i].priority);
+            if (rc < 0) {
+                goto seccomp_return;
+            }
+        }
+    }
+
+
     rc = seccomp_load(ctx);
 
   seccomp_return:
diff --git a/vl.c b/vl.c
index 7b08b3383b..d071e240b0 100644
--- a/vl.c
+++ b/vl.c
@@ -273,6 +273,10 @@  static QemuOptsList qemu_sandbox_opts = {
             .name = "obsolete",
             .type = QEMU_OPT_STRING,
         },
+        {
+            .name = "elevateprivileges",
+            .type = QEMU_OPT_STRING,
+        },
         { /* end of list */ }
     },
 };
@@ -1045,6 +1049,13 @@  static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
             }
         }
 
+        value = qemu_opt_get(opts,"elevateprivileges");
+        if (value) {
+            if (strcmp(value, "deny") == 0) {
+                seccomp_opts |= PRIVILEGED;
+            }
+        }
+
         if (seccomp_start(seccomp_opts) < 0) {
             error_report("failed to install seccomp syscall filter "
                          "in the kernel");