diff mbox series

[PULL,3/3] seccomp: set the seccomp filter to all threads

Message ID 20180822154030.14911-4-otubo@redhat.com (mailing list archive)
State New, archived
Headers show
Series [PULL,1/3] seccomp: use SIGSYS signal instead of killing the thread | expand

Commit Message

Eduardo Otubo Aug. 22, 2018, 3:40 p.m. UTC
From: Marc-André Lureau <marcandre.lureau@redhat.com>

When using "-seccomp on", the seccomp policy is only applied to the
main thread, the vcpu worker thread and other worker threads created
after seccomp policy is applied; the seccomp policy is not applied to
e.g. the RCU thread because it is created before the seccomp policy is
applied and SECCOMP_FILTER_FLAG_TSYNC isn't used.

This can be verified with
for task in /proc/`pidof qemu`/task/*; do cat $task/status | grep Secc ; done
Seccomp:	2
Seccomp:	0
Seccomp:	0
Seccomp:	2
Seccomp:	2
Seccomp:	2

Starting with libseccomp 2.2.0 and kernel >= 3.17, we can use
seccomp_attr_set(ctx, > SCMP_FLTATR_CTL_TSYNC, 1) to update the policy
on all threads.

Do it by default if possible, warn if not possible. Add an option to
set the tsync behaviour explicitly.

Note: we can't bump libseccomp to 2.2.0 since it's not available in
Debian oldstable (2.1.0).

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: Eduardo Otubo <otubo@redhat.com>
---
 qemu-options.hx |  2 ++
 qemu-seccomp.c  | 65 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 65 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/qemu-options.hx b/qemu-options.hx
index 5515dfaba5..dafacb60c6 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3864,6 +3864,8 @@  Disable set*uid|gid system calls
 Disable *fork and execve
 @item resourcecontrol=@var{string}
 Disable process affinity and schedular priority
+@item tsync=@var{bool}
+Apply seccomp filter to all threads (default is auto, and will warn if fail)
 @end table
 ETEXI
 
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index f0c833f3ca..aa23eae970 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -119,6 +119,45 @@  qemu_seccomp(unsigned int operation, unsigned int flags, void *args)
 #endif
 }
 
+static bool qemu_seccomp_syscall_check(void)
+{
+    int rc;
+
+    /*
+     * this is an invalid call because the second argument is non-zero, but
+     * depending on the errno value of ENOSYS or EINVAL we can guess if the
+     * seccomp() syscal is supported or not
+     */
+    rc = qemu_seccomp(SECCOMP_SET_MODE_STRICT, 1, NULL);
+    if (rc < 0 && errno == EINVAL) {
+        return true;
+    }
+
+    return false;
+}
+
+static bool qemu_seccomp_get_default_tsync(void)
+{
+    bool tsync = true;
+
+    /* TSYNC support was added with the syscall */
+    if (!qemu_seccomp_syscall_check()) {
+        error_report("The host kernel doesn't support seccomp TSYNC!");
+        tsync = false;
+    }
+
+#if !(SCMP_VER_MAJOR >= 2 && SCMP_VER_MINOR >= 2)
+    error_report("libseccomp is too old to support TSYNC!");
+    tsync = false;
+#endif
+
+    if (!tsync) {
+        error_report("Only the main thread will be filtered by seccomp!");
+    }
+
+    return tsync;
+}
+
 static uint32_t qemu_seccomp_get_kill_action(void)
 {
 #if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \
@@ -136,7 +175,7 @@  static uint32_t qemu_seccomp_get_kill_action(void)
 }
 
 
-static int seccomp_start(uint32_t seccomp_opts)
+static int seccomp_start(uint32_t seccomp_opts, bool tsync)
 {
     int rc = 0;
     unsigned int i = 0;
@@ -149,6 +188,17 @@  static int seccomp_start(uint32_t seccomp_opts)
         goto seccomp_return;
     }
 
+    if (tsync) {
+#if SCMP_VER_MAJOR >= 2 && SCMP_VER_MINOR >= 2
+        rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1);
+#else
+        rc = -1;
+#endif
+        if (rc != 0) {
+            goto seccomp_return;
+        }
+    }
+
     for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
         if (!(seccomp_opts & blacklist[i].set)) {
             continue;
@@ -175,6 +225,13 @@  int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
         uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
                 | QEMU_SECCOMP_SET_OBSOLETE;
         const char *value = NULL;
+        bool tsync;
+
+        if (qemu_opt_get(opts, "tsync")) {
+            tsync = qemu_opt_get_bool(opts, "tsync", true);
+        } else {
+            tsync = qemu_seccomp_get_default_tsync();
+        }
 
         value = qemu_opt_get(opts, "obsolete");
         if (value) {
@@ -236,7 +293,7 @@  int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
             }
         }
 
-        if (seccomp_start(seccomp_opts) < 0) {
+        if (seccomp_start(seccomp_opts, tsync) < 0) {
             error_report("failed to install seccomp syscall filter "
                          "in the kernel");
             return -1;
@@ -271,6 +328,10 @@  static QemuOptsList qemu_sandbox_opts = {
             .name = "resourcecontrol",
             .type = QEMU_OPT_STRING,
         },
+        {
+            .name = "tsync",
+            .type = QEMU_OPT_BOOL,
+        },
         { /* end of list */ }
     },
 };