diff mbox

[15/16] x86/monitor: fix: don't compromise a monitor_write_data with pending CR writes

Message ID 1468038185-7126-1-git-send-email-czuzu@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Corneliu ZUZU July 9, 2016, 4:23 a.m. UTC
The arch_vm_event.monitor structure is dynamically allocated and freed e.g. when
the toolstack user calls xc_monitor_disable(), which in turn effectively
discards any information that was in arch_vm_event.monitor->write_data.

But this can yield unexpected behavior since if a CR-write was awaiting to be
committed (non-zero write_data.writes_pending) on the scheduling tail
(hvm_do_resume()->monitor_ctrlreg_write_data()) before xc_monitor_disable() is
called, then the domain CR-write is wrongfully ignored, which of course, in
these cases, can easily render a domain crash.

To fix the issue, this patch:

* makes arch_vm_event.monitor->emul_read_data dynamically allocated and only
  frees the entire arch_vm_event.monitor in monitor_cleanup_domain() if there
  are no pending CR-writes, otherwise it only frees emul_read_data

* if there were pending CR-writes when monitor_cleanup_domain() was called,
  arch_vm_event.monitor will eventually be freed either after the CR-writes are
  committed (monitor_ctrlreg_write_data()) or when the domain/vcpu is destroyed

* calls monitor_ctrlreg_write_data() even if the monitor subsystem is not
  initialized

Signed-off-by: Corneliu ZUZU <czuzu@bitdefender.com>
---
 xen/arch/x86/hvm/emulate.c   |  4 ++--
 xen/arch/x86/hvm/hvm.c       |  7 ++++++-
 xen/arch/x86/mm/p2m.c        |  2 +-
 xen/arch/x86/monitor.c       | 49 +++++++++++++++++++++++++++++++++++++++-----
 xen/include/asm-x86/domain.h |  3 +--
 5 files changed, 54 insertions(+), 11 deletions(-)
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 7a9c6af..e08d9c6 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -77,9 +77,9 @@  static int set_context_data(void *buffer, unsigned int size)
     if ( unlikely(monitor_domain_initialised(curr->domain)) )
     {
         struct arch_vm_event_monitor *vem = curr->arch.vm_event->monitor;
-        unsigned int safe_size = min(size, vem->emul_read_data.size);
+        unsigned int safe_size = min(size, vem->emul_read_data->size);
 
-        memcpy(buffer, vem->emul_read_data.data, safe_size);
+        memcpy(buffer, vem->emul_read_data->data, safe_size);
         memset(buffer + safe_size, 0, size - safe_size);
         return X86EMUL_OKAY;
     }
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 1bfd9d4..a2568fd 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -488,9 +488,14 @@  void hvm_do_resume(struct vcpu *v)
 
             vem->emulate_flags = 0;
         }
+    }
 
+    /* 
+     * Handle pending monitor CR-writes. Note that the monitor subsystem
+     * might be uninitialized.
+     */
+    if ( unlikely(v->arch.vm_event && v->arch.vm_event->monitor) )
         monitor_ctrlreg_write_data(v);
-    }
 
     /* Inject pending hw/sw trap */
     if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 92c0576..b280dc0 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1645,7 +1645,7 @@  void p2m_mem_access_emulate_check(struct vcpu *v,
         vem->emulate_flags = violation ? rsp->flags : 0;
 
         if ( (rsp->flags & VM_EVENT_FLAG_SET_EMUL_READ_DATA) )
-            vem->emul_read_data = rsp->data.emul_read_data;
+            *vem->emul_read_data = rsp->data.emul_read_data;
     }
 }
 
diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
index c3123bc..05a2f0d 100644
--- a/xen/arch/x86/monitor.c
+++ b/xen/arch/x86/monitor.c
@@ -56,6 +56,7 @@  int monitor_init_domain(struct domain *d)
 {
     int rc = 0;
     struct vcpu *v;
+    struct arch_vm_event_monitor *vem;
 
     /* Already initialised? */
     if ( unlikely(monitor_domain_initialised(d)) )
@@ -65,10 +66,22 @@  int monitor_init_domain(struct domain *d)
     for_each_vcpu(d, v)
     {
         ASSERT(v->arch.vm_event);
-        ASSERT(!v->arch.vm_event->monitor);
 
-        v->arch.vm_event->monitor = xzalloc(struct arch_vm_event_monitor);
-        if ( unlikely(!v->arch.vm_event->monitor) )
+        if ( likely(!v->arch.vm_event->monitor) )
+        {
+            v->arch.vm_event->monitor = xzalloc(struct arch_vm_event_monitor);
+            if ( unlikely(!v->arch.vm_event->monitor) )
+            {
+                rc = -ENOMEM;
+                goto err;
+            }
+        }
+
+        vem = v->arch.vm_event->monitor;
+
+        ASSERT(!vem->emul_read_data);
+        vem->emul_read_data = xzalloc(struct vm_event_emul_read_data);
+        if ( unlikely(!vem->emul_read_data) )
         {
             rc = -ENOMEM;
             goto err;
@@ -100,6 +113,7 @@  err:
 void monitor_cleanup_domain(struct domain *d)
 {
     struct vcpu *v;
+    struct arch_vm_event_monitor *vem;
 
     xfree(d->arch.monitor.msr_bitmap);
     d->arch.monitor.msr_bitmap = NULL;
@@ -109,8 +123,22 @@  void monitor_cleanup_domain(struct domain *d)
         if ( unlikely(!v->arch.vm_event || !v->arch.vm_event->monitor) )
             continue;
 
-        xfree(v->arch.vm_event->monitor);
-        v->arch.vm_event->monitor = NULL;
+        vem = v->arch.vm_event->monitor;
+        xfree(vem->emul_read_data);
+        vem->emul_read_data = NULL;
+
+        /*
+         * Only invalidate write_data if no CR-writes are pending or domain is
+         * dead (domain_kill(d) code-path). If CR-writes are pending, write_data
+         * will eventually be freed when those writes are committed (see
+         * monitor_ctrlreg_write_data() function).
+         */
+        if ( likely(!vem->write_data.writes_pending) ||
+             unlikely(DOMDYING_dead == d->is_dying) )
+        {
+           xfree(vem);
+           v->arch.vm_event->monitor = NULL;
+        }
     }
 
     monitor_ctrlreg_disable_traps(d);
@@ -192,6 +220,17 @@  void monitor_ctrlreg_write_data(struct vcpu *v)
         hvm_set_cr3(w->cr3, 0);
         w->do_write.cr3 = 0;
     }
+
+    /*
+     * Was the monitor subsystem actually uninitialized
+     * and only waiting for pending CR-writes?
+     */
+    if ( unlikely(!monitor_domain_initialised(v->domain)) )
+    {
+        xfree(v->arch.vm_event->monitor);
+        xfree(v->arch.vm_event);
+        v->arch.vm_event = NULL;
+    }
 }
 
 static unsigned long *monitor_bitmap_for_msr(const struct domain *d, u32 *msr)
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 7663da2..e337cb3 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -9,7 +9,6 @@ 
 #include <asm/e820.h>
 #include <asm/mce.h>
 #include <public/vcpu.h>
-#include <public/vm_event.h>
 #include <public/hvm/hvm_info_table.h>
 
 #define has_32bit_shinfo(d)    ((d)->arch.has_32bit_shinfo)
@@ -510,7 +509,7 @@  typedef enum __packed {
  */
 struct arch_vm_event_monitor {
     uint32_t emulate_flags;
-    struct vm_event_emul_read_data emul_read_data;
+    struct vm_event_emul_read_data *emul_read_data;
     struct monitor_write_data write_data;
 };