[PATCHv5,4/5] seccomp: add spawn argument to command line
diff mbox

Message ID 20170908091027.9104-5-otubo@redhat.com
State New
Headers show

Commit Message

Eduardo Otubo Sept. 8, 2017, 9:10 a.m. UTC
This patch adds [,spawn=deny] argument to `-sandbox on' option. It
blacklists fork and execve system calls, avoiding Qemu to spawn new
threads or processes.

Signed-off-by: Eduardo Otubo <otubo@redhat.com>
---
 include/sysemu/seccomp.h |  1 +
 qemu-options.hx          |  9 +++++++--
 qemu-seccomp.c           | 12 ++++++++++++
 vl.c                     | 16 ++++++++++++++++
 4 files changed, 36 insertions(+), 2 deletions(-)

Comments

Daniel P. Berrangé Sept. 8, 2017, 9:33 a.m. UTC | #1
On Fri, Sep 08, 2017 at 11:10:26AM +0200, Eduardo Otubo wrote:
> This patch adds [,spawn=deny] argument to `-sandbox on' option. It
> blacklists fork and execve system calls, avoiding Qemu to spawn new
> threads or processes.
> 
> Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> ---
>  include/sysemu/seccomp.h |  1 +
>  qemu-options.hx          |  9 +++++++--
>  qemu-seccomp.c           | 12 ++++++++++++
>  vl.c                     | 16 ++++++++++++++++
>  4 files changed, 36 insertions(+), 2 deletions(-)

Reviewed-by: Daniel P. Berrange <berrange@redhat.com>

Regards,
Daniel
Thomas Huth Sept. 8, 2017, 9:50 a.m. UTC | #2
On 08.09.2017 11:10, Eduardo Otubo wrote:
> This patch adds [,spawn=deny] argument to `-sandbox on' option. It
> blacklists fork and execve system calls, avoiding Qemu to spawn new
> threads or processes.
> 
> Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> ---
>  include/sysemu/seccomp.h |  1 +
>  qemu-options.hx          |  9 +++++++--
>  qemu-seccomp.c           | 12 ++++++++++++
>  vl.c                     | 16 ++++++++++++++++
>  4 files changed, 36 insertions(+), 2 deletions(-)
> 
> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> index 4a9e63c7cd..3ab5fc4f61 100644
> --- a/include/sysemu/seccomp.h
> +++ b/include/sysemu/seccomp.h
> @@ -18,6 +18,7 @@
>  #define QEMU_SECCOMP_SET_DEFAULT     (1 << 0)
>  #define QEMU_SECCOMP_SET_OBSOLETE    (1 << 1)
>  #define QEMU_SECCOMP_SET_PRIVILEGED  (1 << 2)
> +#define QEMU_SECCOMP_SET_SPAWN       (1 << 3)
>  
>  #include <seccomp.h>
>  
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 5c1b163fb5..2b04b9f170 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -4018,6 +4018,7 @@ ETEXI
>  
>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
>      "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
> +    "          [,spawn=allow|deny]\n" \
>      "                Enable seccomp mode 2 system call filter (default 'off').\n" \
>      "                use 'obsolete' to allow obsolete system calls that are provided\n" \
>      "                    by the kernel, but typically no longer used by modern\n" \
> @@ -4025,10 +4026,12 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
>      "                use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
>      "                    its privileges by blacklisting all set*uid|gid system calls.\n" \
>      "                    The value 'children' will deny set*uid|gid system calls for\n" \
> -    "                    main QEMU process but will allow forks and execves to run unprivileged\n",
> +    "                    main QEMU process but will allow forks and execves to run unprivileged\n" \
> +    "                use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \
> +    "                     blacklisting *fork and execve\n",
>      QEMU_ARCH_ALL)
>  STEXI
> -@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
> +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}]
>  @findex -sandbox
>  Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
>  disable it.  The default is 'off'.
> @@ -4037,6 +4040,8 @@ disable it.  The default is 'off'.
>  Enable Obsolete system calls
>  @item elevateprivileges=@var{string}
>  Disable set*uid|gid system calls
> +@item spawn=@var{string}
> +Disable *fork and execve
>  @end table
>  ETEXI
>  
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index 2bad16cafb..4c169febf8 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -79,6 +79,10 @@ static const struct QemuSeccompSyscall blacklist[] = {
>      { SCMP_SYS(setresgid),              4, QEMU_SECCOMP_SET_PRIVILEGED },
>      { SCMP_SYS(setfsuid),               4, QEMU_SECCOMP_SET_PRIVILEGED },
>      { SCMP_SYS(setfsgid),               4, QEMU_SECCOMP_SET_PRIVILEGED },
> +    /* spawn */
> +    { SCMP_SYS(fork),                   8, QEMU_SECCOMP_SET_SPAWN },
> +    { SCMP_SYS(vfork),                  8, QEMU_SECCOMP_SET_SPAWN },
> +    { SCMP_SYS(execve),                 8, QEMU_SECCOMP_SET_SPAWN },
>  };
>  
>  
> @@ -109,6 +113,14 @@ int seccomp_start(uint32_t seccomp_opts)
>              }
>  
>              break;
> +        case QEMU_SECCOMP_SET_SPAWN:
> +            if (seccomp_opts & QEMU_SECCOMP_SET_SPAWN) {
> +                break;
> +            } else {
> +                continue;
> +            }
> +

Remove the above empty line?

Anyway, it's somewhat ugly that you need a switch-case statement here at
all. Couldn't you simply check it like this:

	if (!(seccomp_opts & blacklist[i].set)) {
		continue;
	}
?

You then just have to invert the meaning of the
QEMU_SECCOMP_SET_OBSOLETE bit in the second patch, so that this bit is
treated in the same way as the others (i.e. use
 uint32_t seccomp_opts = QEMU_SECCOMP_SET_OBSOLETE;
instead of
 uint32_t seccomp_opts = 0x00000;
in vl.c in the second patch).

 Thomas
Eduardo Otubo Sept. 8, 2017, 11:15 a.m. UTC | #3
On Fri, Sep 08, 2017 at 11:50:12AM +0200, Thomas Huth wrote:
> On 08.09.2017 11:10, Eduardo Otubo wrote:
> > This patch adds [,spawn=deny] argument to `-sandbox on' option. It
> > blacklists fork and execve system calls, avoiding Qemu to spawn new
> > threads or processes.
> > 
> > Signed-off-by: Eduardo Otubo <otubo@redhat.com>
> > ---
> >  include/sysemu/seccomp.h |  1 +
> >  qemu-options.hx          |  9 +++++++--
> >  qemu-seccomp.c           | 12 ++++++++++++
> >  vl.c                     | 16 ++++++++++++++++
> >  4 files changed, 36 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> > index 4a9e63c7cd..3ab5fc4f61 100644
> > --- a/include/sysemu/seccomp.h
> > +++ b/include/sysemu/seccomp.h
> > @@ -18,6 +18,7 @@
> >  #define QEMU_SECCOMP_SET_DEFAULT     (1 << 0)
> >  #define QEMU_SECCOMP_SET_OBSOLETE    (1 << 1)
> >  #define QEMU_SECCOMP_SET_PRIVILEGED  (1 << 2)
> > +#define QEMU_SECCOMP_SET_SPAWN       (1 << 3)
> >  
> >  #include <seccomp.h>
> >  
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 5c1b163fb5..2b04b9f170 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -4018,6 +4018,7 @@ ETEXI
> >  
> >  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> >      "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
> > +    "          [,spawn=allow|deny]\n" \
> >      "                Enable seccomp mode 2 system call filter (default 'off').\n" \
> >      "                use 'obsolete' to allow obsolete system calls that are provided\n" \
> >      "                    by the kernel, but typically no longer used by modern\n" \
> > @@ -4025,10 +4026,12 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> >      "                use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
> >      "                    its privileges by blacklisting all set*uid|gid system calls.\n" \
> >      "                    The value 'children' will deny set*uid|gid system calls for\n" \
> > -    "                    main QEMU process but will allow forks and execves to run unprivileged\n",
> > +    "                    main QEMU process but will allow forks and execves to run unprivileged\n" \
> > +    "                use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \
> > +    "                     blacklisting *fork and execve\n",
> >      QEMU_ARCH_ALL)
> >  STEXI
> > -@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
> > +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}]
> >  @findex -sandbox
> >  Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
> >  disable it.  The default is 'off'.
> > @@ -4037,6 +4040,8 @@ disable it.  The default is 'off'.
> >  Enable Obsolete system calls
> >  @item elevateprivileges=@var{string}
> >  Disable set*uid|gid system calls
> > +@item spawn=@var{string}
> > +Disable *fork and execve
> >  @end table
> >  ETEXI
> >  
> > diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> > index 2bad16cafb..4c169febf8 100644
> > --- a/qemu-seccomp.c
> > +++ b/qemu-seccomp.c
> > @@ -79,6 +79,10 @@ static const struct QemuSeccompSyscall blacklist[] = {
> >      { SCMP_SYS(setresgid),              4, QEMU_SECCOMP_SET_PRIVILEGED },
> >      { SCMP_SYS(setfsuid),               4, QEMU_SECCOMP_SET_PRIVILEGED },
> >      { SCMP_SYS(setfsgid),               4, QEMU_SECCOMP_SET_PRIVILEGED },
> > +    /* spawn */
> > +    { SCMP_SYS(fork),                   8, QEMU_SECCOMP_SET_SPAWN },
> > +    { SCMP_SYS(vfork),                  8, QEMU_SECCOMP_SET_SPAWN },
> > +    { SCMP_SYS(execve),                 8, QEMU_SECCOMP_SET_SPAWN },
> >  };
> >  
> >  
> > @@ -109,6 +113,14 @@ int seccomp_start(uint32_t seccomp_opts)
> >              }
> >  
> >              break;
> > +        case QEMU_SECCOMP_SET_SPAWN:
> > +            if (seccomp_opts & QEMU_SECCOMP_SET_SPAWN) {
> > +                break;
> > +            } else {
> > +                continue;
> > +            }
> > +
> 
> Remove the above empty line?
> 
> Anyway, it's somewhat ugly that you need a switch-case statement here at
> all. Couldn't you simply check it like this:
> 
> 	if (!(seccomp_opts & blacklist[i].set)) {
> 		continue;
> 	}
> ?
> 
> You then just have to invert the meaning of the
> QEMU_SECCOMP_SET_OBSOLETE bit in the second patch, so that this bit is
> treated in the same way as the others (i.e. use
>  uint32_t seccomp_opts = QEMU_SECCOMP_SET_OBSOLETE;
> instead of
>  uint32_t seccomp_opts = 0x00000;
> in vl.c in the second patch).

That's indeed much better, but perhaps:
 uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT | QEMU_SECCOMP_SET_OBSOLETE;
?
Thomas Huth Sept. 8, 2017, 11:31 a.m. UTC | #4
On 08.09.2017 13:15, Eduardo Otubo wrote:
> On Fri, Sep 08, 2017 at 11:50:12AM +0200, Thomas Huth wrote:
>> On 08.09.2017 11:10, Eduardo Otubo wrote:
>>> This patch adds [,spawn=deny] argument to `-sandbox on' option. It
>>> blacklists fork and execve system calls, avoiding Qemu to spawn new
>>> threads or processes.
>>>
>>> Signed-off-by: Eduardo Otubo <otubo@redhat.com>
>>> ---
>>>  include/sysemu/seccomp.h |  1 +
>>>  qemu-options.hx          |  9 +++++++--
>>>  qemu-seccomp.c           | 12 ++++++++++++
>>>  vl.c                     | 16 ++++++++++++++++
>>>  4 files changed, 36 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
>>> index 4a9e63c7cd..3ab5fc4f61 100644
>>> --- a/include/sysemu/seccomp.h
>>> +++ b/include/sysemu/seccomp.h
>>> @@ -18,6 +18,7 @@
>>>  #define QEMU_SECCOMP_SET_DEFAULT     (1 << 0)
>>>  #define QEMU_SECCOMP_SET_OBSOLETE    (1 << 1)
>>>  #define QEMU_SECCOMP_SET_PRIVILEGED  (1 << 2)
>>> +#define QEMU_SECCOMP_SET_SPAWN       (1 << 3)
>>>  
>>>  #include <seccomp.h>
>>>  
>>> diff --git a/qemu-options.hx b/qemu-options.hx
>>> index 5c1b163fb5..2b04b9f170 100644
>>> --- a/qemu-options.hx
>>> +++ b/qemu-options.hx
>>> @@ -4018,6 +4018,7 @@ ETEXI
>>>  
>>>  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
>>>      "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
>>> +    "          [,spawn=allow|deny]\n" \
>>>      "                Enable seccomp mode 2 system call filter (default 'off').\n" \
>>>      "                use 'obsolete' to allow obsolete system calls that are provided\n" \
>>>      "                    by the kernel, but typically no longer used by modern\n" \
>>> @@ -4025,10 +4026,12 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
>>>      "                use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
>>>      "                    its privileges by blacklisting all set*uid|gid system calls.\n" \
>>>      "                    The value 'children' will deny set*uid|gid system calls for\n" \
>>> -    "                    main QEMU process but will allow forks and execves to run unprivileged\n",
>>> +    "                    main QEMU process but will allow forks and execves to run unprivileged\n" \
>>> +    "                use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \
>>> +    "                     blacklisting *fork and execve\n",
>>>      QEMU_ARCH_ALL)
>>>  STEXI
>>> -@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
>>> +@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}]
>>>  @findex -sandbox
>>>  Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
>>>  disable it.  The default is 'off'.
>>> @@ -4037,6 +4040,8 @@ disable it.  The default is 'off'.
>>>  Enable Obsolete system calls
>>>  @item elevateprivileges=@var{string}
>>>  Disable set*uid|gid system calls
>>> +@item spawn=@var{string}
>>> +Disable *fork and execve
>>>  @end table
>>>  ETEXI
>>>  
>>> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
>>> index 2bad16cafb..4c169febf8 100644
>>> --- a/qemu-seccomp.c
>>> +++ b/qemu-seccomp.c
>>> @@ -79,6 +79,10 @@ static const struct QemuSeccompSyscall blacklist[] = {
>>>      { SCMP_SYS(setresgid),              4, QEMU_SECCOMP_SET_PRIVILEGED },
>>>      { SCMP_SYS(setfsuid),               4, QEMU_SECCOMP_SET_PRIVILEGED },
>>>      { SCMP_SYS(setfsgid),               4, QEMU_SECCOMP_SET_PRIVILEGED },
>>> +    /* spawn */
>>> +    { SCMP_SYS(fork),                   8, QEMU_SECCOMP_SET_SPAWN },
>>> +    { SCMP_SYS(vfork),                  8, QEMU_SECCOMP_SET_SPAWN },
>>> +    { SCMP_SYS(execve),                 8, QEMU_SECCOMP_SET_SPAWN },
>>>  };
>>>  
>>>  
>>> @@ -109,6 +113,14 @@ int seccomp_start(uint32_t seccomp_opts)
>>>              }
>>>  
>>>              break;
>>> +        case QEMU_SECCOMP_SET_SPAWN:
>>> +            if (seccomp_opts & QEMU_SECCOMP_SET_SPAWN) {
>>> +                break;
>>> +            } else {
>>> +                continue;
>>> +            }
>>> +
>>
>> Remove the above empty line?
>>
>> Anyway, it's somewhat ugly that you need a switch-case statement here at
>> all. Couldn't you simply check it like this:
>>
>> 	if (!(seccomp_opts & blacklist[i].set)) {
>> 		continue;
>> 	}
>> ?
>>
>> You then just have to invert the meaning of the
>> QEMU_SECCOMP_SET_OBSOLETE bit in the second patch, so that this bit is
>> treated in the same way as the others (i.e. use
>>  uint32_t seccomp_opts = QEMU_SECCOMP_SET_OBSOLETE;
>> instead of
>>  uint32_t seccomp_opts = 0x00000;
>> in vl.c in the second patch).
> 
> That's indeed much better, but perhaps:
>  uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT | QEMU_SECCOMP_SET_OBSOLETE;

Right, the default set should be excluded by default of course, too! :-)

 Thomas

Patch
diff mbox

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 4a9e63c7cd..3ab5fc4f61 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -18,6 +18,7 @@ 
 #define QEMU_SECCOMP_SET_DEFAULT     (1 << 0)
 #define QEMU_SECCOMP_SET_OBSOLETE    (1 << 1)
 #define QEMU_SECCOMP_SET_PRIVILEGED  (1 << 2)
+#define QEMU_SECCOMP_SET_SPAWN       (1 << 3)
 
 #include <seccomp.h>
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 5c1b163fb5..2b04b9f170 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4018,6 +4018,7 @@  ETEXI
 
 DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
     "-sandbox on[,obsolete=allow|deny][,elevateprivileges=allow|deny|children]\n" \
+    "          [,spawn=allow|deny]\n" \
     "                Enable seccomp mode 2 system call filter (default 'off').\n" \
     "                use 'obsolete' to allow obsolete system calls that are provided\n" \
     "                    by the kernel, but typically no longer used by modern\n" \
@@ -4025,10 +4026,12 @@  DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
     "                use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
     "                    its privileges by blacklisting all set*uid|gid system calls.\n" \
     "                    The value 'children' will deny set*uid|gid system calls for\n" \
-    "                    main QEMU process but will allow forks and execves to run unprivileged\n",
+    "                    main QEMU process but will allow forks and execves to run unprivileged\n" \
+    "                use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \
+    "                     blacklisting *fork and execve\n",
     QEMU_ARCH_ALL)
 STEXI
-@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}]
+@item -sandbox @var{arg}[,obsolete=@var{string}][,elevateprivileges=@var{string}][,spawn=@var{string}]
 @findex -sandbox
 Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
 disable it.  The default is 'off'.
@@ -4037,6 +4040,8 @@  disable it.  The default is 'off'.
 Enable Obsolete system calls
 @item elevateprivileges=@var{string}
 Disable set*uid|gid system calls
+@item spawn=@var{string}
+Disable *fork and execve
 @end table
 ETEXI
 
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index 2bad16cafb..4c169febf8 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -79,6 +79,10 @@  static const struct QemuSeccompSyscall blacklist[] = {
     { SCMP_SYS(setresgid),              4, QEMU_SECCOMP_SET_PRIVILEGED },
     { SCMP_SYS(setfsuid),               4, QEMU_SECCOMP_SET_PRIVILEGED },
     { SCMP_SYS(setfsgid),               4, QEMU_SECCOMP_SET_PRIVILEGED },
+    /* spawn */
+    { SCMP_SYS(fork),                   8, QEMU_SECCOMP_SET_SPAWN },
+    { SCMP_SYS(vfork),                  8, QEMU_SECCOMP_SET_SPAWN },
+    { SCMP_SYS(execve),                 8, QEMU_SECCOMP_SET_SPAWN },
 };
 
 
@@ -109,6 +113,14 @@  int seccomp_start(uint32_t seccomp_opts)
             }
 
             break;
+        case QEMU_SECCOMP_SET_SPAWN:
+            if (seccomp_opts & QEMU_SECCOMP_SET_SPAWN) {
+                break;
+            } else {
+                continue;
+            }
+
+            break;
         default:
             break;
         }
diff --git a/vl.c b/vl.c
index 413cfe8504..0af137da17 100644
--- a/vl.c
+++ b/vl.c
@@ -280,6 +280,10 @@  static QemuOptsList qemu_sandbox_opts = {
             .name = "elevateprivileges",
             .type = QEMU_OPT_STRING,
         },
+        {
+            .name = "spawn",
+            .type = QEMU_OPT_STRING,
+        },
         { /* end of list */ }
     },
 };
@@ -1081,6 +1085,18 @@  static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
             }
         }
 
+        value = qemu_opt_get(opts, "spawn");
+        if (value) {
+            if (g_str_equal(value, "deny")) {
+                seccomp_opts |= QEMU_SECCOMP_SET_SPAWN;
+            } else if (g_str_equal(value, "allow")) {
+                /* default value */
+            } else {
+                error_report("invalid argument for spawn");
+                return -1;
+            }
+        }
+
         if (seccomp_start(seccomp_opts) < 0) {
             error_report("failed to install seccomp syscall filter "
                          "in the kernel");