diff mbox

[3/6] hypertrace: [*-user] Add QEMU-side proxy to "guest_hypertrace" event

Message ID 147041637969.2523.4570342042982870131.stgit@fimbulvetr.bsc.es (mailing list archive)
State New, archived
Headers show

Commit Message

Lluís Vilanova Aug. 5, 2016, 4:59 p.m. UTC
QEMU detects when the guest uses 'mmap' on hypertrace's control channel
file, and then uses 'mprotect' to detect accesses to it, which are used
to trigger traceing event "guest_hypertrace".

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
---
 Makefile.objs            |    4 +
 bsd-user/main.c          |   16 +++
 bsd-user/mmap.c          |    2 
 bsd-user/syscall.c       |    4 +
 hypertrace/Makefile.objs |   17 +++
 hypertrace/user.c        |  292 ++++++++++++++++++++++++++++++++++++++++++++++
 hypertrace/user.h        |   52 ++++++++
 linux-user/main.c        |   19 +++
 linux-user/mmap.c        |    2 
 linux-user/syscall.c     |    3 
 10 files changed, 411 insertions(+)
 create mode 100644 hypertrace/Makefile.objs
 create mode 100644 hypertrace/user.c
 create mode 100644 hypertrace/user.h

Comments

Eric Blake Aug. 5, 2016, 5:23 p.m. UTC | #1
On 08/05/2016 10:59 AM, Lluís Vilanova wrote:
> QEMU detects when the guest uses 'mmap' on hypertrace's control channel
> file, and then uses 'mprotect' to detect accesses to it, which are used
> to trigger traceing event "guest_hypertrace".

s/traceing/tracing/

I'll probably leave the technical review to others, though



> +++ b/bsd-user/mmap.c
> @@ -21,6 +21,7 @@
>  #include "qemu.h"
>  #include "qemu-common.h"
>  #include "bsd-mman.h"
> +#include "hypertrace/user.h"
>  
>  //#define DEBUG_MMAP
>  
> @@ -407,6 +408,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
>          }
>      }
>   the_end1:
> +    hypertrace_guest_mmap(fd, (void *)g2h(start));

Why is the cast to void* needed?
Lluís Vilanova Aug. 8, 2016, 1:08 p.m. UTC | #2
Eric Blake writes:

> On 08/05/2016 10:59 AM, Lluís Vilanova wrote:
>> QEMU detects when the guest uses 'mmap' on hypertrace's control channel
>> file, and then uses 'mprotect' to detect accesses to it, which are used
>> to trigger traceing event "guest_hypertrace".

> s/traceing/tracing/

> I'll probably leave the technical review to others, though

Thanks.


>> +++ b/bsd-user/mmap.c
>> @@ -21,6 +21,7 @@
>> #include "qemu.h"
>> #include "qemu-common.h"
>> #include "bsd-mman.h"
>> +#include "hypertrace/user.h"
>> 
>> //#define DEBUG_MMAP
>> 
>> @@ -407,6 +408,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
>> }
>> }
>> the_end1:
>> +    hypertrace_guest_mmap(fd, (void *)g2h(start));

> Why is the cast to void* needed?

That's unnecessary, my bad.


Thanks,
  Lluis
Stefan Hajnoczi Aug. 18, 2016, 10:17 a.m. UTC | #3
On Fri, Aug 05, 2016 at 06:59:39PM +0200, Lluís Vilanova wrote:
> +static void init_channel(const char *base, const char *suffix, size_t size,
> +                         char ** path, int *fd, uint64_t **addr)
> +{
> +    *path = g_malloc(strlen(base) + strlen(suffix) + 1);
> +    sprintf(*path, "%s%s", base, suffix);

Use g_strdup_printf() instead.

> +static void swap_control(void *from, void *to)
> +{
> +    if (mprotect(from, getpagesize(), PROT_READ | PROT_WRITE) == -1) {
> +        error_report("error: mprotect(from): %s", strerror(errno));
> +        abort();
> +    }
> +    if (mprotect(to, getpagesize(), PROT_READ) == -1) {
> +        error_report("error: mprotect(to): %s", strerror(errno));
> +        abort();
> +    }
> +}
> +
> +#include "hypertrace/emit.c"
> +
> +static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt)
> +{
> +    if (qemu_control_0 <= siginfo->si_addr &&
> +        siginfo->si_addr < qemu_control_1) {
> +
> +        /* 1st fault (guest will write cmd) */
> +        assert(((unsigned long)siginfo->si_addr % getpagesize()) == sizeof(uint64_t));
> +        swap_control(qemu_control_0, qemu_control_1);
> +
> +    } else if (qemu_control_1 <= siginfo->si_addr &&
> +               siginfo->si_addr < qemu_control_1 + getpagesize()) {
> +        uint64_t vcontrol = ((uint64_t*)qemu_control_0)[2];
> +        uint64_t *data_ptr = &qemu_data[vcontrol * CONFIG_HYPERTRACE_ARGS * sizeof(uint64_t)];
> +
> +        /* 2nd fault (invoke) */
> +        assert(((unsigned long)siginfo->si_addr % getpagesize()) == sizeof(uint64_t));
> +        hypertrace_emit(current_cpu, data_ptr);
> +        swap_control(qemu_control_1, qemu_control_0);
> +
> +    } else {
> +        /* proxy to next handler */
> +        if (segv_next.sa_sigaction != NULL) {
> +            segv_next.sa_sigaction(signum, siginfo, sigctxt);
> +        } else if (segv_next.sa_handler != NULL) {
> +            segv_next.sa_handler(signum);
> +        }
> +    }
> +}

Can this approach be made thread-safe?

If not then it would be good to consider the problem right away and
switch to something that is thread-safe, even if it depends on the
target architecture.
Lluís Vilanova Aug. 21, 2016, 12:15 p.m. UTC | #4
Stefan Hajnoczi writes:

> On Fri, Aug 05, 2016 at 06:59:39PM +0200, Lluís Vilanova wrote:
>> +static void init_channel(const char *base, const char *suffix, size_t size,
>> +                         char ** path, int *fd, uint64_t **addr)
>> +{
>> +    *path = g_malloc(strlen(base) + strlen(suffix) + 1);
>> +    sprintf(*path, "%s%s", base, suffix);

> Use g_strdup_printf() instead.

>> +static void swap_control(void *from, void *to)
>> +{
>> +    if (mprotect(from, getpagesize(), PROT_READ | PROT_WRITE) == -1) {
>> +        error_report("error: mprotect(from): %s", strerror(errno));
>> +        abort();
>> +    }
>> +    if (mprotect(to, getpagesize(), PROT_READ) == -1) {
>> +        error_report("error: mprotect(to): %s", strerror(errno));
>> +        abort();
>> +    }
>> +}
>> +
>> +#include "hypertrace/emit.c"
>> +
>> +static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt)
>> +{
>> +    if (qemu_control_0 <= siginfo->si_addr &&
>> +        siginfo->si_addr < qemu_control_1) {
>> +
>> +        /* 1st fault (guest will write cmd) */
>> +        assert(((unsigned long)siginfo->si_addr % getpagesize()) == sizeof(uint64_t));
>> +        swap_control(qemu_control_0, qemu_control_1);
>> +
>> +    } else if (qemu_control_1 <= siginfo->si_addr &&
>> +               siginfo->si_addr < qemu_control_1 + getpagesize()) {
>> +        uint64_t vcontrol = ((uint64_t*)qemu_control_0)[2];
>> +        uint64_t *data_ptr = &qemu_data[vcontrol * CONFIG_HYPERTRACE_ARGS * sizeof(uint64_t)];
>> +
>> +        /* 2nd fault (invoke) */
>> +        assert(((unsigned long)siginfo->si_addr % getpagesize()) == sizeof(uint64_t));
>> +        hypertrace_emit(current_cpu, data_ptr);
>> +        swap_control(qemu_control_1, qemu_control_0);
>> +
>> +    } else {
>> +        /* proxy to next handler */
>> +        if (segv_next.sa_sigaction != NULL) {
>> +            segv_next.sa_sigaction(signum, siginfo, sigctxt);
>> +        } else if (segv_next.sa_handler != NULL) {
>> +            segv_next.sa_handler(signum);
>> +        }
>> +    }
>> +}

> Can this approach be made thread-safe?

> If not then it would be good to consider the problem right away and
> switch to something that is thread-safe, even if it depends on the
> target architecture.

Kind of. The easiest solution is to have each thread have an mmap of its own of
the device (and moving qemu_control_{0,1} into CPUState). This would be
completely explicit and easy to understand.

Cheers,
  Lluis
Stefan Hajnoczi Aug. 23, 2016, 3:52 p.m. UTC | #5
On Sun, Aug 21, 2016 at 02:15:31PM +0200, Lluís Vilanova wrote:
> > Can this approach be made thread-safe?
> 
> > If not then it would be good to consider the problem right away and
> > switch to something that is thread-safe, even if it depends on the
> > target architecture.
> 
> Kind of. The easiest solution is to have each thread have an mmap of its own of
> the device (and moving qemu_control_{0,1} into CPUState). This would be
> completely explicit and easy to understand.

Yes.  I don't know if the TCG and -user folks will accept the SIGSEGV
approach, but I'd be okay with it if the multi-threading issue is solved
by 1 device per thread.

Stefan
diff mbox

Patch

diff --git a/Makefile.objs b/Makefile.objs
index 1c1b03c..f598e0e 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -104,6 +104,10 @@  util-obj-y +=  trace/
 target-obj-y += trace/
 
 ######################################################################
+# hypertrace
+target-obj-y += hypertrace/
+
+######################################################################
 # guest agent
 
 # FIXME: a few definitions from qapi-types.o/qapi-visit.o are needed
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 315ba1d..9721240 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -30,9 +30,12 @@ 
 #include "tcg.h"
 #include "qemu/timer.h"
 #include "qemu/envlist.h"
+#include "qemu/error-report.h"
 #include "exec/log.h"
 #include "trace/control.h"
 #include "glib-compat.h"
+#include "hypertrace/user.h"
+
 
 int singlestep;
 unsigned long mmap_min_addr;
@@ -692,6 +695,8 @@  static void usage(void)
            "-strace           log system calls\n"
            "-trace            [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
            "                  specify tracing options\n"
+           "-hypertrace       [[base=]<path>][,pages=<int>]\n"
+           "                  specify hypertrace options\n"
            "\n"
            "Environment variables:\n"
            "QEMU_STRACE       Print system calls and arguments similar to the\n"
@@ -742,6 +747,8 @@  int main(int argc, char **argv)
     envlist_t *envlist = NULL;
     char *trace_file = NULL;
     bsd_type = target_openbsd;
+    char *hypertrace_base = NULL;
+    uint64_t hypertrace_size = 0;
 
     if (argc <= 1)
         usage();
@@ -761,6 +768,7 @@  int main(int argc, char **argv)
     cpu_model = NULL;
 
     qemu_add_opts(&qemu_trace_opts);
+    qemu_add_opts(&qemu_hypertrace_opts);
 
     optind = 1;
     for (;;) {
@@ -851,6 +859,9 @@  int main(int argc, char **argv)
         } else if (!strcmp(r, "trace")) {
             g_free(trace_file);
             trace_file = trace_opt_parse(optarg);
+        } else if (!strcmp(r, "hypertrace")) {
+            g_free(hypertrace_file);
+            hypertrace_opt_parse(optarg, &hypertrace_base, &hypertrace_size);
         } else {
             usage();
         }
@@ -985,6 +996,11 @@  int main(int argc, char **argv)
     target_set_brk(info->brk);
     syscall_init();
     signal_init();
+    if (atexit(hypertrace_fini) != 0) {
+        fprintf(stderr, "error: atexit: %s\n", strerror(errno));
+        abort();
+    }
+    hypertrace_init(hypertrace_base, hypertrace_size);
 
     /* Now that we've loaded the binary, GUEST_BASE is fixed.  Delay
        generating the prologue until now so that the prologue can take
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 610f91b..faf255f 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -21,6 +21,7 @@ 
 #include "qemu.h"
 #include "qemu-common.h"
 #include "bsd-mman.h"
+#include "hypertrace/user.h"
 
 //#define DEBUG_MMAP
 
@@ -407,6 +408,7 @@  abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
         }
     }
  the_end1:
+    hypertrace_guest_mmap(fd, (void *)g2h(start));
     page_set_flags(start, start + len, prot | PAGE_VALID);
  the_end:
 #ifdef DEBUG_MMAP
diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c
index 66492aa..275ef61 100644
--- a/bsd-user/syscall.c
+++ b/bsd-user/syscall.c
@@ -26,6 +26,7 @@ 
 
 #include "qemu.h"
 #include "qemu-common.h"
+#include "hypertrace/user.h"
 
 //#define DEBUG
 
@@ -332,6 +333,7 @@  abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        hypertrace_fini();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
@@ -430,6 +432,7 @@  abi_long do_netbsd_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        hypertrace_fini();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
@@ -505,6 +508,7 @@  abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        hypertrace_fini();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
diff --git a/hypertrace/Makefile.objs b/hypertrace/Makefile.objs
new file mode 100644
index 0000000..6eb5acf
--- /dev/null
+++ b/hypertrace/Makefile.objs
@@ -0,0 +1,17 @@ 
+# -*- mode: makefile -*-
+
+target-obj-$(CONFIG_USER_ONLY) += user.o
+
+$(obj)/user.o: $(obj)/emit.c
+
+$(obj)/emit.c: $(obj)/emit.c-timestamp $(BUILD_DIR)/config-host.mak
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+$(obj)/emit.c-timestamp: $(BUILD_DIR)/config-host.mak
+	@echo "static void hypertrace_emit(CPUState *cpu, uint64_t *data)" >$@
+	@echo "{" >>$@
+	@echo -n "    trace_guest_hypertrace(cpu" >>$@
+	@for i in `seq $(CONFIG_HYPERTRACE_ARGS)`; do \
+	    echo -n ", data[$$i-1]" >>$@; \
+	done
+	@echo ");" >>$@
+	@echo "}" >>$@
diff --git a/hypertrace/user.c b/hypertrace/user.c
new file mode 100644
index 0000000..55df79d
--- /dev/null
+++ b/hypertrace/user.c
@@ -0,0 +1,292 @@ 
+/*
+ * QEMU-side management of hypertrace in user-level emulation.
+ *
+ * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * Implementation details
+ * ======================
+ *
+ * Both channels are provided as regular files in the host system, which must be
+ * mmap'ed by the guest application.
+ *
+ * Data channel
+ * ------------
+ *
+ * The guest must mmap a file named <base>-data, where base is the argument
+ * given to hypertrace_init.
+ *
+ * Regular memory accesses are used on the data channel.
+ *
+ * Control channel
+ * ---------------
+ *
+ * The guest must mmap a file named <base>-control, where base is the argument
+ * given to hypertrace_init.
+ *
+ * The first 64 bits of that memory contain the size of the data channel.
+ *
+ * The control channel is mprotect'ed by QEMU so that guest writes can be
+ * intercepted by QEMU in order to raise the "guest_hypertrace" tracing
+ * event. The guest must perform writes twice, one on each of two consecutive
+ * pages, so that the written data can be both read by QEMU and the access
+ * intercepted.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+
+#include "hypertrace/user.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+
+static char *data_path = NULL;
+static char *control_path = NULL;
+static int data_fd = -1;
+static int control_fd = -1;
+
+static uint64_t *qemu_data = NULL;
+static void *qemu_control_0 = NULL;
+static void *qemu_control_1 = NULL;
+
+static struct stat control_fd_stat;
+
+struct sigaction segv_next;
+static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt);
+
+
+QemuOptsList qemu_hypertrace_opts = {
+    .name = "hypertrace",
+    .implied_opt_name = "path",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_hypertrace_opts.head),
+    .desc = {
+        {
+            .name = "path",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "pages",
+            .type = QEMU_OPT_NUMBER,
+            .def_value_str = "1",
+        },
+        { /* end of list */ }
+    },
+};
+
+void hypertrace_opt_parse(const char *optarg, char **base, size_t *size)
+{
+    int pages;
+    QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("hypertrace"),
+                                             optarg, true);
+    if (!opts) {
+        exit(1);
+    }
+    if (qemu_opt_get(opts, "path")) {
+        *base = g_strdup(qemu_opt_get(opts, "path"));
+    } else {
+        *base = NULL;
+    }
+    pages = qemu_opt_get_number(opts, "pages", 1);
+    if (pages <= 0) {
+        error_report("Parameter 'pages' expects a positive number");
+        exit(EXIT_FAILURE);
+    }
+    *size = pages * TARGET_PAGE_SIZE;
+}
+
+static void init_channel(const char *base, const char *suffix, size_t size,
+                         char ** path, int *fd, uint64_t **addr)
+{
+    *path = g_malloc(strlen(base) + strlen(suffix) + 1);
+    sprintf(*path, "%s%s", base, suffix);
+
+    *fd = open(*path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
+    if (*fd == -1) {
+        error_report("error: open(%s): %s", *path, strerror(errno));
+        abort();
+    }
+
+    off_t lres = lseek(*fd, size - 1, SEEK_SET);
+    if (lres == (off_t)-1) {
+        error_report("error: lseek(%s): %s", *path, strerror(errno));
+        abort();
+    }
+
+    char tmp;
+    ssize_t wres = write(*fd, &tmp, 1);
+    if (wres == -1) {
+        error_report("error: write(%s): %s", *path, strerror(errno));
+        abort();
+    }
+
+    if (addr) {
+        *addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
+        if (*addr == MAP_FAILED) {
+            error_report("error: mmap(%s): %s", *path, strerror(errno));
+            abort();
+        }
+    }
+}
+
+static void fini_handler(int signum, siginfo_t *siginfo, void *sigctxt)
+{
+    hypertrace_fini();
+}
+
+void hypertrace_init(const char *base, uint64_t data_size)
+{
+    if (base == NULL) {
+        return;
+    }
+
+    struct sigaction sigint;
+    memset(&sigint, 0, sizeof(sigint));
+    sigint.sa_sigaction = fini_handler;
+    sigint.sa_flags = SA_SIGINFO | SA_RESTART;
+    if (sigaction(SIGINT, &sigint, NULL) != 0) {
+        error_report("error: sigaction(SIGINT): %s", strerror(errno));
+        abort();
+    }
+    if (sigaction(SIGABRT, &sigint, NULL) != 0) {
+        error_report("error: sigaction(SIGABRT): %s", strerror(errno));
+        abort();
+    }
+
+    init_channel(base, "-data", data_size, &data_path, &data_fd, &qemu_data);
+    uint64_t *control;
+    init_channel(base, "-control", getpagesize() * 2, &control_path, &control_fd, &control);
+
+    control[0] = tswap64(data_size / (CONFIG_HYPERTRACE_ARGS * sizeof(uint64_t)));
+
+    if (fstat(control_fd, &control_fd_stat) == -1) {
+        error_report("error: fstat(hypertrace_control): %s", strerror(errno));
+        abort();
+    }
+
+    struct sigaction segv;
+    memset(&segv, 0, sizeof(segv));
+    segv.sa_sigaction = segv_handler;
+    segv.sa_flags = SA_SIGINFO | SA_RESTART;
+    sigemptyset(&segv.sa_mask);
+
+    if (sigaction(SIGSEGV, &segv, &segv_next) != 0) {
+        error_report("error: sigaction(SIGSEGV): %s", strerror(errno));
+        abort();
+    }
+}
+
+
+static void fini_channel(int *fd, char **path)
+{
+    if (*fd != -1) {
+        if (close(*fd) == -1) {
+            error_report("error: close: %s", strerror(errno));
+            abort();
+        }
+        if (unlink(*path) == -1) {
+            error_report("error: unlink(%s): %s", *path, strerror(errno));
+            abort();
+        }
+        *fd = -1;
+    }
+    if (*path != NULL) {
+        g_free(*path);
+        *path =  NULL;
+    }
+}
+
+void hypertrace_fini(void)
+{
+    static bool atexit_in = false;
+    if (atexit_in) {
+        return;
+    }
+    atexit_in = true;
+
+    if (sigaction(SIGSEGV, &segv_next, NULL) != 0) {
+        error_report("error: sigaction(SIGSEGV): %s", strerror(errno));
+        abort();
+    }
+    fini_channel(&data_fd, &data_path);
+    fini_channel(&control_fd, &control_path);
+}
+
+
+void hypertrace_guest_mmap(int fd, void *qemu_addr)
+{
+    struct stat s;
+    if (fstat(fd, &s) != 0) {
+        return;
+    }
+
+    if (s.st_dev != control_fd_stat.st_dev ||
+        s.st_ino != control_fd_stat.st_ino) {
+        return;
+    }
+
+    /* it's an mmap of the control channel; split it in two and mprotect it to
+     * detect writes (cmd is written once on each part)
+     */
+    qemu_control_0 = qemu_addr;
+    qemu_control_1 = qemu_control_0 + getpagesize();
+    if (mprotect(qemu_control_0, getpagesize(), PROT_READ) == -1) {
+        error_report("error: mprotect(hypertrace_control): %s", strerror(errno));
+        abort();
+    }
+}
+
+static void swap_control(void *from, void *to)
+{
+    if (mprotect(from, getpagesize(), PROT_READ | PROT_WRITE) == -1) {
+        error_report("error: mprotect(from): %s", strerror(errno));
+        abort();
+    }
+    if (mprotect(to, getpagesize(), PROT_READ) == -1) {
+        error_report("error: mprotect(to): %s", strerror(errno));
+        abort();
+    }
+}
+
+#include "hypertrace/emit.c"
+
+static void segv_handler(int signum, siginfo_t *siginfo, void *sigctxt)
+{
+    if (qemu_control_0 <= siginfo->si_addr &&
+        siginfo->si_addr < qemu_control_1) {
+
+        /* 1st fault (guest will write cmd) */
+        assert(((unsigned long)siginfo->si_addr % getpagesize()) == sizeof(uint64_t));
+        swap_control(qemu_control_0, qemu_control_1);
+
+    } else if (qemu_control_1 <= siginfo->si_addr &&
+               siginfo->si_addr < qemu_control_1 + getpagesize()) {
+        uint64_t vcontrol = ((uint64_t*)qemu_control_0)[2];
+        uint64_t *data_ptr = &qemu_data[vcontrol * CONFIG_HYPERTRACE_ARGS * sizeof(uint64_t)];
+
+        /* 2nd fault (invoke) */
+        assert(((unsigned long)siginfo->si_addr % getpagesize()) == sizeof(uint64_t));
+        hypertrace_emit(current_cpu, data_ptr);
+        swap_control(qemu_control_1, qemu_control_0);
+
+    } else {
+        /* proxy to next handler */
+        if (segv_next.sa_sigaction != NULL) {
+            segv_next.sa_sigaction(signum, siginfo, sigctxt);
+        } else if (segv_next.sa_handler != NULL) {
+            segv_next.sa_handler(signum);
+        }
+    }
+}
diff --git a/hypertrace/user.h b/hypertrace/user.h
new file mode 100644
index 0000000..a13bae4
--- /dev/null
+++ b/hypertrace/user.h
@@ -0,0 +1,52 @@ 
+/*
+ * QEMU-side management of hypertrace in user-level emulation.
+ *
+ * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdint.h>
+#include <sys/types.h>
+
+
+/**
+ * Definition of QEMU options describing hypertrace subsystem configuration
+ */
+extern QemuOptsList qemu_hypertrace_opts;
+
+/**
+ * hypertrace_opt_parse:
+ * @optarg: Input arguments.
+ * @base: Output base path for the hypertrace channel files.
+ * @data_size: Output length in bytes for the data channel.
+ *
+ * Parse the commandline arguments for hypertrace.
+ */
+void hypertrace_opt_parse(const char *optarg, char **base, size_t *size);
+
+/**
+ * hypertrace_init:
+ * @base: Base path for the hypertrace channel files.
+ * @data_size: Length in bytes for the data channel.
+ *
+ * Initialize the backing files for the hypertrace channel.
+ */
+void hypertrace_init(const char *base, uint64_t data_size);
+
+/**
+ * hypertrace_guest_mmap:
+ *
+ * Check if this mmap is for the control channel and act accordingly.
+ *
+ * Precondition: defined(CONFIG_USER_ONLY)
+ */
+void hypertrace_guest_mmap(int fd, void *qemu_addr);
+
+/**
+ * hypertrace_fini:
+ *
+ * Remove the backing files for the hypertrace channel.
+ */
+void hypertrace_fini(void);
diff --git a/linux-user/main.c b/linux-user/main.c
index 462e820..8f3d9cf 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -32,10 +32,12 @@ 
 #include "tcg.h"
 #include "qemu/timer.h"
 #include "qemu/envlist.h"
+#include "qemu/error-report.h"
 #include "elf.h"
 #include "exec/log.h"
 #include "trace/control.h"
 #include "glib-compat.h"
+#include "hypertrace/user.h"
 
 char *exec_path;
 
@@ -4011,6 +4013,14 @@  static void handle_arg_trace(const char *arg)
     trace_file = trace_opt_parse(arg);
 }
 
+static char *hypertrace_base;
+static size_t hypertrace_size;
+static void handle_arg_hypertrace(const char *arg)
+{
+    g_free(hypertrace_base);
+    hypertrace_opt_parse(arg, &hypertrace_base, &hypertrace_size);
+}
+
 struct qemu_argument {
     const char *argv;
     const char *env;
@@ -4060,6 +4070,8 @@  static const struct qemu_argument arg_table[] = {
      "",           "Seed for pseudo-random number generator"},
     {"trace",      "QEMU_TRACE",       true,  handle_arg_trace,
      "",           "[[enable=]<pattern>][,events=<file>][,file=<file>]"},
+    {"hypertrace", "QEMU_HYPERTRACE",  true,  handle_arg_hypertrace,
+     "",           "[[base=]<path>][,pages=<int>]"},
     {"version",    "QEMU_VERSION",     false, handle_arg_version,
      "",           "display version information and exit"},
     {NULL, NULL, false, NULL, NULL, NULL}
@@ -4250,6 +4262,7 @@  int main(int argc, char **argv, char **envp)
     srand(time(NULL));
 
     qemu_add_opts(&qemu_trace_opts);
+    qemu_add_opts(&qemu_hypertrace_opts);
 
     optind = parse_args(argc, argv);
 
@@ -4448,6 +4461,12 @@  int main(int argc, char **argv, char **envp)
     syscall_init();
     signal_init();
 
+    if (atexit(hypertrace_fini)) {
+        fprintf(stderr, "error: atexit: %s\n", strerror(errno));
+        abort();
+    }
+    hypertrace_init(hypertrace_base, hypertrace_size);
+
     /* Now that we've loaded the binary, GUEST_BASE is fixed.  Delay
        generating the prologue until now so that the prologue can take
        the real value of GUEST_BASE into account.  */
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index c4371d9..3207d98 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -23,6 +23,7 @@ 
 #include "qemu.h"
 #include "qemu-common.h"
 #include "translate-all.h"
+#include "hypertrace/user.h"
 
 //#define DEBUG_MMAP
 
@@ -553,6 +554,7 @@  abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
         }
     }
  the_end1:
+    hypertrace_guest_mmap(fd, (void *)g2h(start));
     page_set_flags(start, start + len, prot | PAGE_VALID);
  the_end:
 #ifdef DEBUG_MMAP
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ca6a2b4..e73ec5d 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -111,6 +111,7 @@  int __clone2(int (*fn)(void *), void *child_stack_base,
 #include "uname.h"
 
 #include "qemu.h"
+#include "hypertrace/user.h"
 
 #define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
     CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
@@ -7214,6 +7215,7 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        hypertrace_fini();
         _exit(arg1);
         ret = 0; /* avoid warning */
         break;
@@ -9219,6 +9221,7 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
+        hypertrace_fini();
         ret = get_errno(exit_group(arg1));
         break;
 #endif