diff mbox

[v3,2/6] seccomp: add obsolete argument to command line

Message ID 20170728121040.631-3-otubo@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Eduardo Otubo July 28, 2017, 12:10 p.m. UTC
This patch introduces the argument [,obsolete=allow] to the `-sandbox on'
option. It allows Qemu to run safely on old system that still relies on
old system calls.

Signed-off-by: Eduardo Otubo <otubo@redhat.com>
---
 include/sysemu/seccomp.h |  4 +++-
 qemu-options.hx          |  9 +++++++--
 qemu-seccomp.c           | 32 +++++++++++++++++++++++++++++++-
 vl.c                     | 16 +++++++++++++++-
 4 files changed, 56 insertions(+), 5 deletions(-)

Comments

Daniel P. Berrangé Aug. 2, 2017, 12:33 p.m. UTC | #1
On Fri, Jul 28, 2017 at 02:10:36PM +0200, Eduardo Otubo wrote:
> This patch introduces the argument [,obsolete=allow] to the `-sandbox on'
> option. It allows Qemu to run safely on old system that still relies on
> old system calls.
> 
> Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> ---
>  include/sysemu/seccomp.h |  4 +++-
>  qemu-options.hx          |  9 +++++++--
>  qemu-seccomp.c           | 32 +++++++++++++++++++++++++++++++-
>  vl.c                     | 16 +++++++++++++++-
>  4 files changed, 56 insertions(+), 5 deletions(-)
> 
> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> index cfc06008cb..7a7bde246b 100644
> --- a/include/sysemu/seccomp.h
> +++ b/include/sysemu/seccomp.h
> @@ -15,7 +15,9 @@
>  #ifndef QEMU_SECCOMP_H
>  #define QEMU_SECCOMP_H
>  
> +#define OBSOLETE    0x0001

Please namespace this - its far too generic a term to expose to other
source files. I'd suggest 

  QEMU_SECCOMP_SET_OBSOLETE

> -int seccomp_start(void);
> +int seccomp_start(uint8_t seccomp_opts);

This only allows for 8 sets. Perhaps its enough, but I'd suggest
just using a uint32_t straight away.

> diff --git a/qemu-options.hx b/qemu-options.hx
> index 746b5fa75d..54e492f36a 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -4004,13 +4004,18 @@ Old param mode (ARM only).
>  ETEXI
>  
>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> -    "-sandbox <arg>  Enable seccomp mode 2 system call filter (default 'off').\n",
> +    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
> +    "                obsolete: Allow obsolete system calls\n",
>      QEMU_ARCH_ALL)
>  STEXI
> -@item -sandbox @var{arg}
> +@item -sandbox @var{arg}[,obsolete=@var{string}]
>  @findex -sandbox
>  Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
>  disable it.  The default is 'off'.
> +@table @option
> +@item obsolete=@var{string}
> +Enable Obsolete system calls

Lets explain this a bit more.

E obsolete system calls that are provided by the kernel, but typically no
longer used by modern C library implementations. 

> +@end table
>  ETEXI
>  
>  DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index f8877b07b5..c6a8b28260 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -31,6 +31,20 @@ struct QemuSeccompSyscall {
>      uint8_t priority;
>  };
>  
> +static const struct QemuSeccompSyscall obsolete[] = {
> +    { SCMP_SYS(readdir), 255 },
> +    { SCMP_SYS(_sysctl), 255 },
> +    { SCMP_SYS(bdflush), 255 },
> +    { SCMP_SYS(create_module), 255 },
> +    { SCMP_SYS(get_kernel_syms), 255 },
> +    { SCMP_SYS(query_module), 255 },
> +    { SCMP_SYS(sgetmask), 255 },
> +    { SCMP_SYS(ssetmask), 255 },
> +    { SCMP_SYS(sysfs), 255 },
> +    { SCMP_SYS(uselib), 255 },
> +    { SCMP_SYS(ustat), 255 },
> +};
> +
>  static const struct QemuSeccompSyscall blacklist[] = {
>      { SCMP_SYS(reboot), 255 },
>      { SCMP_SYS(swapon), 255 },
> @@ -56,7 +70,20 @@ static const struct QemuSeccompSyscall blacklist[] = {
>      { SCMP_SYS(vserver), 255 },
>  };
>  
> -int seccomp_start(void)
> +static int is_obsolete(int syscall)
> +{
> +    unsigned int i = 0;
> +
> +    for (i = 0; i < ARRAY_SIZE(obsolete); i++) {
> +        if (syscall == obsolete[i].num) {
> +            return 1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +int seccomp_start(uint8_t seccomp_opts)
>  {
>      int rc = 0;
>      unsigned int i = 0;
> @@ -69,6 +96,9 @@ int seccomp_start(void)
>      }
>  
>      for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
> +        if ((seccomp_opts & OBSOLETE) && is_obsolete(blacklist[i].num)) {
> +            continue;
> +        }

IMHO this is leading to a rather inefficient approach. Why not extend
QemuSeccompSyscall struct so that it has another field to list which
set it belongs to. Then you can do


  static const struct QemuSeccompSyscall blacklist[] = {
    { SCMP_SYS(reboot), 255, QEMU_SECCOMP_SET_DEFAULT },
    { SCMP_SYS(swapon), 255, QEMU_SECCOMP_SET_DEFAULT },
     ....
    { SCMP_SYS(readdir), 255, QEMU_SECCOMP_SET_OBSOLETE },
    { SCMP_SYS(_sysctl), 255, QEMU_SECCOMP_SET_OBSOLETE },
    ...

And then to process this you can do

      for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
          if (blacklist[i].set != QEMU_SECCOMP_SET_OBSOLETE &&
              blacklist[i].set & seccomp_opts) {
	      continue;
	  }


>          rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, blacklist[i].num, 0);
>          if (rc < 0) {
>              goto seccomp_return;
> diff --git a/vl.c b/vl.c
> index 15b98800e9..cbe09c94af 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -271,6 +271,10 @@ static QemuOptsList qemu_sandbox_opts = {
>              .name = "enable",
>              .type = QEMU_OPT_BOOL,
>          },
> +        {
> +            .name = "obsolete",
> +            .type = QEMU_OPT_STRING,
> +        },
>          { /* end of list */ }
>      },
>  };
> @@ -1032,7 +1036,17 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
>  {
>      if (qemu_opt_get_bool(opts, "enable", false)) {
>  #ifdef CONFIG_SECCOMP
> -        if (seccomp_start() < 0) {
> +        uint8_t seccomp_opts = 0x0000;
> +        const char *value = NULL;
> +
> +        value = qemu_opt_get(opts, "obsolete");
> +        if (value) {
> +            if (strcmp(value, "allow") == 0) {
> +                seccomp_opts |= OBSOLETE;
> +            }
> +        }

IIUC, the values will all be booleans, so we should just use

   if (qemu_opt_get_bool(opts, "obsolete", false))
       seccomp_opts |= OBSOLETE;

> +
> +        if (seccomp_start(seccomp_opts) < 0) {
>              error_report("failed to install seccomp syscall filter "
>                           "in the kernel");
>              return -1;

Regards,
Daniel
Daniel P. Berrangé Aug. 2, 2017, 12:38 p.m. UTC | #2
On Wed, Aug 02, 2017 at 01:33:56PM +0100, Daniel P. Berrange wrote:
> On Fri, Jul 28, 2017 at 02:10:36PM +0200, Eduardo Otubo wrote:
> > This patch introduces the argument [,obsolete=allow] to the `-sandbox on'
> > option. It allows Qemu to run safely on old system that still relies on
> > old system calls.
> > 
> > Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> > ---
> >  include/sysemu/seccomp.h |  4 +++-
> >  qemu-options.hx          |  9 +++++++--
> >  qemu-seccomp.c           | 32 +++++++++++++++++++++++++++++++-
> >  vl.c                     | 16 +++++++++++++++-
> >  4 files changed, 56 insertions(+), 5 deletions(-)


> > @@ -1032,7 +1036,17 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
> >  {
> >      if (qemu_opt_get_bool(opts, "enable", false)) {
> >  #ifdef CONFIG_SECCOMP
> > -        if (seccomp_start() < 0) {
> > +        uint8_t seccomp_opts = 0x0000;
> > +        const char *value = NULL;
> > +
> > +        value = qemu_opt_get(opts, "obsolete");
> > +        if (value) {
> > +            if (strcmp(value, "allow") == 0) {
> > +                seccomp_opts |= OBSOLETE;
> > +            }
> > +        }
> 
> IIUC, the values will all be booleans, so we should just use
> 
>    if (qemu_opt_get_bool(opts, "obsolete", false))
>        seccomp_opts |= OBSOLETE;

Oh ignore this. I see from the next patch, we can't treat it as a boolean.

We should however explicitly look for 'value == deny', and then reject
all other values with an error message

> 
> > +
> > +        if (seccomp_start(seccomp_opts) < 0) {
> >              error_report("failed to install seccomp syscall filter "
> >                           "in the kernel");
> >              return -1;

Regards,
Daniel
Eduardo Otubo Aug. 11, 2017, 9:12 a.m. UTC | #3
On Wed, Aug 02, 2017 at 01:33:56PM +0100, Daniel P. Berrange wrote:
> On Fri, Jul 28, 2017 at 02:10:36PM +0200, Eduardo Otubo wrote:
> > This patch introduces the argument [,obsolete=allow] to the `-sandbox on'
> > option. It allows Qemu to run safely on old system that still relies on
> > old system calls.
> > 
> > Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> > ---
> >  include/sysemu/seccomp.h |  4 +++-
> >  qemu-options.hx          |  9 +++++++--
> >  qemu-seccomp.c           | 32 +++++++++++++++++++++++++++++++-
> >  vl.c                     | 16 +++++++++++++++-
> >  4 files changed, 56 insertions(+), 5 deletions(-)
> > 
> > diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> > index cfc06008cb..7a7bde246b 100644
> > --- a/include/sysemu/seccomp.h
> > +++ b/include/sysemu/seccomp.h
> > @@ -15,7 +15,9 @@
> >  #ifndef QEMU_SECCOMP_H
> >  #define QEMU_SECCOMP_H
> >  
> > +#define OBSOLETE    0x0001
> 
> Please namespace this - its far too generic a term to expose to other
> source files. I'd suggest 
> 
>   QEMU_SECCOMP_SET_OBSOLETE
> 
> > -int seccomp_start(void);
> > +int seccomp_start(uint8_t seccomp_opts);
> 
> This only allows for 8 sets. Perhaps its enough, but I'd suggest
> just using a uint32_t straight away.
> 
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 746b5fa75d..54e492f36a 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -4004,13 +4004,18 @@ Old param mode (ARM only).
> >  ETEXI
> >  
> >  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> > -    "-sandbox <arg>  Enable seccomp mode 2 system call filter (default 'off').\n",
> > +    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
> > +    "                obsolete: Allow obsolete system calls\n",
> >      QEMU_ARCH_ALL)
> >  STEXI
> > -@item -sandbox @var{arg}
> > +@item -sandbox @var{arg}[,obsolete=@var{string}]
> >  @findex -sandbox
> >  Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
> >  disable it.  The default is 'off'.
> > +@table @option
> > +@item obsolete=@var{string}
> > +Enable Obsolete system calls
> 
> Lets explain this a bit more.
> 
> E obsolete system calls that are provided by the kernel, but typically no
> longer used by modern C library implementations. 
> 
> > +@end table
> >  ETEXI
> >  
> >  DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
> > diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> > index f8877b07b5..c6a8b28260 100644
> > --- a/qemu-seccomp.c
> > +++ b/qemu-seccomp.c
> > @@ -31,6 +31,20 @@ struct QemuSeccompSyscall {
> >      uint8_t priority;
> >  };
> >  
> > +static const struct QemuSeccompSyscall obsolete[] = {
> > +    { SCMP_SYS(readdir), 255 },
> > +    { SCMP_SYS(_sysctl), 255 },
> > +    { SCMP_SYS(bdflush), 255 },
> > +    { SCMP_SYS(create_module), 255 },
> > +    { SCMP_SYS(get_kernel_syms), 255 },
> > +    { SCMP_SYS(query_module), 255 },
> > +    { SCMP_SYS(sgetmask), 255 },
> > +    { SCMP_SYS(ssetmask), 255 },
> > +    { SCMP_SYS(sysfs), 255 },
> > +    { SCMP_SYS(uselib), 255 },
> > +    { SCMP_SYS(ustat), 255 },
> > +};
> > +
> >  static const struct QemuSeccompSyscall blacklist[] = {
> >      { SCMP_SYS(reboot), 255 },
> >      { SCMP_SYS(swapon), 255 },
> > @@ -56,7 +70,20 @@ static const struct QemuSeccompSyscall blacklist[] = {
> >      { SCMP_SYS(vserver), 255 },
> >  };
> >  
> > -int seccomp_start(void)
> > +static int is_obsolete(int syscall)
> > +{
> > +    unsigned int i = 0;
> > +
> > +    for (i = 0; i < ARRAY_SIZE(obsolete); i++) {
> > +        if (syscall == obsolete[i].num) {
> > +            return 1;
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +int seccomp_start(uint8_t seccomp_opts)
> >  {
> >      int rc = 0;
> >      unsigned int i = 0;
> > @@ -69,6 +96,9 @@ int seccomp_start(void)
> >      }
> >  
> >      for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
> > +        if ((seccomp_opts & OBSOLETE) && is_obsolete(blacklist[i].num)) {
> > +            continue;
> > +        }
> 
> IMHO this is leading to a rather inefficient approach. Why not extend
> QemuSeccompSyscall struct so that it has another field to list which
> set it belongs to. Then you can do
> 
> 
>   static const struct QemuSeccompSyscall blacklist[] = {
>     { SCMP_SYS(reboot), 255, QEMU_SECCOMP_SET_DEFAULT },
>     { SCMP_SYS(swapon), 255, QEMU_SECCOMP_SET_DEFAULT },
>      ....
>     { SCMP_SYS(readdir), 255, QEMU_SECCOMP_SET_OBSOLETE },
>     { SCMP_SYS(_sysctl), 255, QEMU_SECCOMP_SET_OBSOLETE },
>     ...
> 
> And then to process this you can do
> 
>       for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
>           if (blacklist[i].set != QEMU_SECCOMP_SET_OBSOLETE &&
>               blacklist[i].set & seccomp_opts) {
> 	      continue;

I agree with all the rest except with this one. This would require a
change on libseccomp itself. Not sure a change on the library would be
suited for now.

I'm working and reviewing all the comments today, so I'll try to post
a new version later today.

> 	  }
> 
> 
> >          rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, blacklist[i].num, 0);
> >          if (rc < 0) {
> >              goto seccomp_return;
> > diff --git a/vl.c b/vl.c
> > index 15b98800e9..cbe09c94af 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -271,6 +271,10 @@ static QemuOptsList qemu_sandbox_opts = {
> >              .name = "enable",
> >              .type = QEMU_OPT_BOOL,
> >          },
> > +        {
> > +            .name = "obsolete",
> > +            .type = QEMU_OPT_STRING,
> > +        },
> >          { /* end of list */ }
> >      },
> >  };
> > @@ -1032,7 +1036,17 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
> >  {
> >      if (qemu_opt_get_bool(opts, "enable", false)) {
> >  #ifdef CONFIG_SECCOMP
> > -        if (seccomp_start() < 0) {
> > +        uint8_t seccomp_opts = 0x0000;
> > +        const char *value = NULL;
> > +
> > +        value = qemu_opt_get(opts, "obsolete");
> > +        if (value) {
> > +            if (strcmp(value, "allow") == 0) {
> > +                seccomp_opts |= OBSOLETE;
> > +            }
> > +        }
> 
> IIUC, the values will all be booleans, so we should just use
> 
>    if (qemu_opt_get_bool(opts, "obsolete", false))
>        seccomp_opts |= OBSOLETE;
> 
> > +
> > +        if (seccomp_start(seccomp_opts) < 0) {
> >              error_report("failed to install seccomp syscall filter "
> >                           "in the kernel");
> >              return -1;
> 
> Regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Daniel P. Berrangé Aug. 11, 2017, 9:25 a.m. UTC | #4
On Fri, Aug 11, 2017 at 11:12:48AM +0200, Eduardo Otubo wrote:
> On Wed, Aug 02, 2017 at 01:33:56PM +0100, Daniel P. Berrange wrote:
> > On Fri, Jul 28, 2017 at 02:10:36PM +0200, Eduardo Otubo wrote:
> > > This patch introduces the argument [,obsolete=allow] to the `-sandbox on'
> > > option. It allows Qemu to run safely on old system that still relies on
> > > old system calls.
> > > 
> > > Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> > > ---
> > >  include/sysemu/seccomp.h |  4 +++-
> > >  qemu-options.hx          |  9 +++++++--
> > >  qemu-seccomp.c           | 32 +++++++++++++++++++++++++++++++-
> > >  vl.c                     | 16 +++++++++++++++-
> > >  4 files changed, 56 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> > > index cfc06008cb..7a7bde246b 100644
> > > --- a/include/sysemu/seccomp.h
> > > +++ b/include/sysemu/seccomp.h
> > > @@ -15,7 +15,9 @@
> > >  #ifndef QEMU_SECCOMP_H
> > >  #define QEMU_SECCOMP_H
> > >  
> > > +#define OBSOLETE    0x0001
> > 
> > Please namespace this - its far too generic a term to expose to other
> > source files. I'd suggest 
> > 
> >   QEMU_SECCOMP_SET_OBSOLETE
> > 
> > > -int seccomp_start(void);
> > > +int seccomp_start(uint8_t seccomp_opts);
> > 
> > This only allows for 8 sets. Perhaps its enough, but I'd suggest
> > just using a uint32_t straight away.
> > 
> > > diff --git a/qemu-options.hx b/qemu-options.hx
> > > index 746b5fa75d..54e492f36a 100644
> > > --- a/qemu-options.hx
> > > +++ b/qemu-options.hx
> > > @@ -4004,13 +4004,18 @@ Old param mode (ARM only).
> > >  ETEXI
> > >  
> > >  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> > > -    "-sandbox <arg>  Enable seccomp mode 2 system call filter (default 'off').\n",
> > > +    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
> > > +    "                obsolete: Allow obsolete system calls\n",
> > >      QEMU_ARCH_ALL)
> > >  STEXI
> > > -@item -sandbox @var{arg}
> > > +@item -sandbox @var{arg}[,obsolete=@var{string}]
> > >  @findex -sandbox
> > >  Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
> > >  disable it.  The default is 'off'.
> > > +@table @option
> > > +@item obsolete=@var{string}
> > > +Enable Obsolete system calls
> > 
> > Lets explain this a bit more.
> > 
> > E obsolete system calls that are provided by the kernel, but typically no
> > longer used by modern C library implementations. 
> > 
> > > +@end table
> > >  ETEXI
> > >  
> > >  DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
> > > diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> > > index f8877b07b5..c6a8b28260 100644
> > > --- a/qemu-seccomp.c
> > > +++ b/qemu-seccomp.c
> > > @@ -31,6 +31,20 @@ struct QemuSeccompSyscall {
> > >      uint8_t priority;
> > >  };
> > >  
> > > +static const struct QemuSeccompSyscall obsolete[] = {
> > > +    { SCMP_SYS(readdir), 255 },
> > > +    { SCMP_SYS(_sysctl), 255 },
> > > +    { SCMP_SYS(bdflush), 255 },
> > > +    { SCMP_SYS(create_module), 255 },
> > > +    { SCMP_SYS(get_kernel_syms), 255 },
> > > +    { SCMP_SYS(query_module), 255 },
> > > +    { SCMP_SYS(sgetmask), 255 },
> > > +    { SCMP_SYS(ssetmask), 255 },
> > > +    { SCMP_SYS(sysfs), 255 },
> > > +    { SCMP_SYS(uselib), 255 },
> > > +    { SCMP_SYS(ustat), 255 },
> > > +};
> > > +
> > >  static const struct QemuSeccompSyscall blacklist[] = {
> > >      { SCMP_SYS(reboot), 255 },
> > >      { SCMP_SYS(swapon), 255 },
> > > @@ -56,7 +70,20 @@ static const struct QemuSeccompSyscall blacklist[] = {
> > >      { SCMP_SYS(vserver), 255 },
> > >  };
> > >  
> > > -int seccomp_start(void)
> > > +static int is_obsolete(int syscall)
> > > +{
> > > +    unsigned int i = 0;
> > > +
> > > +    for (i = 0; i < ARRAY_SIZE(obsolete); i++) {
> > > +        if (syscall == obsolete[i].num) {
> > > +            return 1;
> > > +        }
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +int seccomp_start(uint8_t seccomp_opts)
> > >  {
> > >      int rc = 0;
> > >      unsigned int i = 0;
> > > @@ -69,6 +96,9 @@ int seccomp_start(void)
> > >      }
> > >  
> > >      for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
> > > +        if ((seccomp_opts & OBSOLETE) && is_obsolete(blacklist[i].num)) {
> > > +            continue;
> > > +        }
> > 
> > IMHO this is leading to a rather inefficient approach. Why not extend
> > QemuSeccompSyscall struct so that it has another field to list which
> > set it belongs to. Then you can do
> > 
> > 
> >   static const struct QemuSeccompSyscall blacklist[] = {
> >     { SCMP_SYS(reboot), 255, QEMU_SECCOMP_SET_DEFAULT },
> >     { SCMP_SYS(swapon), 255, QEMU_SECCOMP_SET_DEFAULT },
> >      ....
> >     { SCMP_SYS(readdir), 255, QEMU_SECCOMP_SET_OBSOLETE },
> >     { SCMP_SYS(_sysctl), 255, QEMU_SECCOMP_SET_OBSOLETE },
> >     ...
> > 
> > And then to process this you can do
> > 
> >       for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
> >           if (blacklist[i].set != QEMU_SECCOMP_SET_OBSOLETE &&
> >               blacklist[i].set & seccomp_opts) {
> > 	      continue;
> 
> I agree with all the rest except with this one. This would require a
> change on libseccomp itself. Not sure a change on the library would be
> suited for now.

Huh ?  QemuSeccompSyscall is a QEMU defined struct, and this is
QEMU code. The change I describe here takes place before we even
call libseccomp and doesn't affect any APIs we call in the future


Regards,
Daniel
Eduardo Otubo Aug. 11, 2017, 9:49 a.m. UTC | #5
On Fri, Aug 11, 2017 at 11:12:48AM +0200, Eduardo Otubo wrote:
> On Wed, Aug 02, 2017 at 01:33:56PM +0100, Daniel P. Berrange wrote:
> > On Fri, Jul 28, 2017 at 02:10:36PM +0200, Eduardo Otubo wrote:
> > > This patch introduces the argument [,obsolete=allow] to the `-sandbox on'
> > > option. It allows Qemu to run safely on old system that still relies on
> > > old system calls.
> > > 
> > > Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> > > ---
> > >  include/sysemu/seccomp.h |  4 +++-
> > >  qemu-options.hx          |  9 +++++++--
> > >  qemu-seccomp.c           | 32 +++++++++++++++++++++++++++++++-
> > >  vl.c                     | 16 +++++++++++++++-
> > >  4 files changed, 56 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> > > index cfc06008cb..7a7bde246b 100644
> > > --- a/include/sysemu/seccomp.h
> > > +++ b/include/sysemu/seccomp.h
> > > @@ -15,7 +15,9 @@
> > >  #ifndef QEMU_SECCOMP_H
> > >  #define QEMU_SECCOMP_H
> > >  
> > > +#define OBSOLETE    0x0001
> > 
> > Please namespace this - its far too generic a term to expose to other
> > source files. I'd suggest 
> > 
> >   QEMU_SECCOMP_SET_OBSOLETE
> > 
> > > -int seccomp_start(void);
> > > +int seccomp_start(uint8_t seccomp_opts);
> > 
> > This only allows for 8 sets. Perhaps its enough, but I'd suggest
> > just using a uint32_t straight away.
> > 
> > > diff --git a/qemu-options.hx b/qemu-options.hx
> > > index 746b5fa75d..54e492f36a 100644
> > > --- a/qemu-options.hx
> > > +++ b/qemu-options.hx
> > > @@ -4004,13 +4004,18 @@ Old param mode (ARM only).
> > >  ETEXI
> > >  
> > >  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> > > -    "-sandbox <arg>  Enable seccomp mode 2 system call filter (default 'off').\n",
> > > +    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
> > > +    "                obsolete: Allow obsolete system calls\n",
> > >      QEMU_ARCH_ALL)
> > >  STEXI
> > > -@item -sandbox @var{arg}
> > > +@item -sandbox @var{arg}[,obsolete=@var{string}]
> > >  @findex -sandbox
> > >  Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
> > >  disable it.  The default is 'off'.
> > > +@table @option
> > > +@item obsolete=@var{string}
> > > +Enable Obsolete system calls
> > 
> > Lets explain this a bit more.
> > 
> > E obsolete system calls that are provided by the kernel, but typically no
> > longer used by modern C library implementations. 
> > 
> > > +@end table
> > >  ETEXI
> > >  
> > >  DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
> > > diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> > > index f8877b07b5..c6a8b28260 100644
> > > --- a/qemu-seccomp.c
> > > +++ b/qemu-seccomp.c
> > > @@ -31,6 +31,20 @@ struct QemuSeccompSyscall {
> > >      uint8_t priority;
> > >  };
> > >  
> > > +static const struct QemuSeccompSyscall obsolete[] = {
> > > +    { SCMP_SYS(readdir), 255 },
> > > +    { SCMP_SYS(_sysctl), 255 },
> > > +    { SCMP_SYS(bdflush), 255 },
> > > +    { SCMP_SYS(create_module), 255 },
> > > +    { SCMP_SYS(get_kernel_syms), 255 },
> > > +    { SCMP_SYS(query_module), 255 },
> > > +    { SCMP_SYS(sgetmask), 255 },
> > > +    { SCMP_SYS(ssetmask), 255 },
> > > +    { SCMP_SYS(sysfs), 255 },
> > > +    { SCMP_SYS(uselib), 255 },
> > > +    { SCMP_SYS(ustat), 255 },
> > > +};
> > > +
> > >  static const struct QemuSeccompSyscall blacklist[] = {
> > >      { SCMP_SYS(reboot), 255 },
> > >      { SCMP_SYS(swapon), 255 },
> > > @@ -56,7 +70,20 @@ static const struct QemuSeccompSyscall blacklist[] = {
> > >      { SCMP_SYS(vserver), 255 },
> > >  };
> > >  
> > > -int seccomp_start(void)
> > > +static int is_obsolete(int syscall)
> > > +{
> > > +    unsigned int i = 0;
> > > +
> > > +    for (i = 0; i < ARRAY_SIZE(obsolete); i++) {
> > > +        if (syscall == obsolete[i].num) {
> > > +            return 1;
> > > +        }
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +int seccomp_start(uint8_t seccomp_opts)
> > >  {
> > >      int rc = 0;
> > >      unsigned int i = 0;
> > > @@ -69,6 +96,9 @@ int seccomp_start(void)
> > >      }
> > >  
> > >      for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
> > > +        if ((seccomp_opts & OBSOLETE) && is_obsolete(blacklist[i].num)) {
> > > +            continue;
> > > +        }
> > 
> > IMHO this is leading to a rather inefficient approach. Why not extend
> > QemuSeccompSyscall struct so that it has another field to list which
> > set it belongs to. Then you can do
> > 
> > 
> >   static const struct QemuSeccompSyscall blacklist[] = {
> >     { SCMP_SYS(reboot), 255, QEMU_SECCOMP_SET_DEFAULT },
> >     { SCMP_SYS(swapon), 255, QEMU_SECCOMP_SET_DEFAULT },
> >      ....
> >     { SCMP_SYS(readdir), 255, QEMU_SECCOMP_SET_OBSOLETE },
> >     { SCMP_SYS(_sysctl), 255, QEMU_SECCOMP_SET_OBSOLETE },
> >     ...
> > 
> > And then to process this you can do
> > 
> >       for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
> >           if (blacklist[i].set != QEMU_SECCOMP_SET_OBSOLETE &&
> >               blacklist[i].set & seccomp_opts) {
> > 	      continue;
> 
> I agree with all the rest except with this one. This would require a
> change on libseccomp itself. Not sure a change on the library would be
> suited for now.
> 
> I'm working and reviewing all the comments today, so I'll try to post
> a new version later today.

Let me reply this before you do: My mistake, this structure model does
not belong to libseccomp. My bad. So yeah, I can change it to optimize
things here. Thanks for the idea :)

> 
> > 	  }
> > 
> > 
> > >          rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, blacklist[i].num, 0);
> > >          if (rc < 0) {
> > >              goto seccomp_return;
> > > diff --git a/vl.c b/vl.c
> > > index 15b98800e9..cbe09c94af 100644
> > > --- a/vl.c
> > > +++ b/vl.c
> > > @@ -271,6 +271,10 @@ static QemuOptsList qemu_sandbox_opts = {
> > >              .name = "enable",
> > >              .type = QEMU_OPT_BOOL,
> > >          },
> > > +        {
> > > +            .name = "obsolete",
> > > +            .type = QEMU_OPT_STRING,
> > > +        },
> > >          { /* end of list */ }
> > >      },
> > >  };
> > > @@ -1032,7 +1036,17 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
> > >  {
> > >      if (qemu_opt_get_bool(opts, "enable", false)) {
> > >  #ifdef CONFIG_SECCOMP
> > > -        if (seccomp_start() < 0) {
> > > +        uint8_t seccomp_opts = 0x0000;
> > > +        const char *value = NULL;
> > > +
> > > +        value = qemu_opt_get(opts, "obsolete");
> > > +        if (value) {
> > > +            if (strcmp(value, "allow") == 0) {
> > > +                seccomp_opts |= OBSOLETE;
> > > +            }
> > > +        }
> > 
> > IIUC, the values will all be booleans, so we should just use
> > 
> >    if (qemu_opt_get_bool(opts, "obsolete", false))
> >        seccomp_opts |= OBSOLETE;
> > 
> > > +
> > > +        if (seccomp_start(seccomp_opts) < 0) {
> > >              error_report("failed to install seccomp syscall filter "
> > >                           "in the kernel");
> > >              return -1;
> > 
> > Regards,
> > Daniel
> > -- 
> > |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> > |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> > |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
> 
> -- 
> Eduardo Otubo
> Senior Software Engineer @ RedHat
diff mbox

Patch

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index cfc06008cb..7a7bde246b 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -15,7 +15,9 @@ 
 #ifndef QEMU_SECCOMP_H
 #define QEMU_SECCOMP_H
 
+#define OBSOLETE    0x0001
+
 #include <seccomp.h>
 
-int seccomp_start(void);
+int seccomp_start(uint8_t seccomp_opts);
 #endif
diff --git a/qemu-options.hx b/qemu-options.hx
index 746b5fa75d..54e492f36a 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4004,13 +4004,18 @@  Old param mode (ARM only).
 ETEXI
 
 DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
-    "-sandbox <arg>  Enable seccomp mode 2 system call filter (default 'off').\n",
+    "-sandbox on[,obsolete=allow]  Enable seccomp mode 2 system call filter (default 'off').\n" \
+    "                obsolete: Allow obsolete system calls\n",
     QEMU_ARCH_ALL)
 STEXI
-@item -sandbox @var{arg}
+@item -sandbox @var{arg}[,obsolete=@var{string}]
 @findex -sandbox
 Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
 disable it.  The default is 'off'.
+@table @option
+@item obsolete=@var{string}
+Enable Obsolete system calls
+@end table
 ETEXI
 
 DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index f8877b07b5..c6a8b28260 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -31,6 +31,20 @@  struct QemuSeccompSyscall {
     uint8_t priority;
 };
 
+static const struct QemuSeccompSyscall obsolete[] = {
+    { SCMP_SYS(readdir), 255 },
+    { SCMP_SYS(_sysctl), 255 },
+    { SCMP_SYS(bdflush), 255 },
+    { SCMP_SYS(create_module), 255 },
+    { SCMP_SYS(get_kernel_syms), 255 },
+    { SCMP_SYS(query_module), 255 },
+    { SCMP_SYS(sgetmask), 255 },
+    { SCMP_SYS(ssetmask), 255 },
+    { SCMP_SYS(sysfs), 255 },
+    { SCMP_SYS(uselib), 255 },
+    { SCMP_SYS(ustat), 255 },
+};
+
 static const struct QemuSeccompSyscall blacklist[] = {
     { SCMP_SYS(reboot), 255 },
     { SCMP_SYS(swapon), 255 },
@@ -56,7 +70,20 @@  static const struct QemuSeccompSyscall blacklist[] = {
     { SCMP_SYS(vserver), 255 },
 };
 
-int seccomp_start(void)
+static int is_obsolete(int syscall)
+{
+    unsigned int i = 0;
+
+    for (i = 0; i < ARRAY_SIZE(obsolete); i++) {
+        if (syscall == obsolete[i].num) {
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+int seccomp_start(uint8_t seccomp_opts)
 {
     int rc = 0;
     unsigned int i = 0;
@@ -69,6 +96,9 @@  int seccomp_start(void)
     }
 
     for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
+        if ((seccomp_opts & OBSOLETE) && is_obsolete(blacklist[i].num)) {
+            continue;
+        }
         rc = seccomp_rule_add(ctx, SCMP_ACT_KILL, blacklist[i].num, 0);
         if (rc < 0) {
             goto seccomp_return;
diff --git a/vl.c b/vl.c
index 15b98800e9..cbe09c94af 100644
--- a/vl.c
+++ b/vl.c
@@ -271,6 +271,10 @@  static QemuOptsList qemu_sandbox_opts = {
             .name = "enable",
             .type = QEMU_OPT_BOOL,
         },
+        {
+            .name = "obsolete",
+            .type = QEMU_OPT_STRING,
+        },
         { /* end of list */ }
     },
 };
@@ -1032,7 +1036,17 @@  static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
 {
     if (qemu_opt_get_bool(opts, "enable", false)) {
 #ifdef CONFIG_SECCOMP
-        if (seccomp_start() < 0) {
+        uint8_t seccomp_opts = 0x0000;
+        const char *value = NULL;
+
+        value = qemu_opt_get(opts, "obsolete");
+        if (value) {
+            if (strcmp(value, "allow") == 0) {
+                seccomp_opts |= OBSOLETE;
+            }
+        }
+
+        if (seccomp_start(seccomp_opts) < 0) {
             error_report("failed to install seccomp syscall filter "
                          "in the kernel");
             return -1;