diff mbox

[v3,3/8] x86/vm-event/monitor: don't compromise monitor_write_data on domain cleanup

Message ID 1467820299-13403-1-git-send-email-czuzu@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Corneliu ZUZU July 6, 2016, 3:51 p.m. UTC
The arch_vm_event structure is dynamically allocated and freed @
vm_event_cleanup_domain. This cleanup is triggered e.g. when the toolstack user
disables domain monitoring (xc_monitor_disable), which in turn effectively
discards any information that was in arch_vm_event.write_data.

But this can yield unexpected behavior since if a CR-write was awaiting to be
committed on the scheduling tail (hvm_do_resume->arch_monitor_write_data)
before xc_monitor_disable is called, then the domain CR write is wrongfully
ignored, which of course, in these cases, can easily render a domain crash.

To fix the issue, this patch:
    - makes arch_vm_event.emul_read_data dynamically allocated
    - in vm_event_cleanup_domain, if there are still uncommitted writes in
      arch_vm_event.write_data:
        - only frees emul_read_data
        - defers xfree of the entire arch_vm_event until vcpu/domain destroyal
    - otherwise arch_vm_event is freed in vm_event_cleanup_domain, as before

For clarity, also introduce inline functions that check initialisation of the
vm_event subsystem for a vcpu/domain (vm_event_{vcpu,domain}_initialised), since
that is now true only when both arch_vm_event and arch_vm_event.emul_read_data
are non-NULL.

Signed-off-by: Corneliu ZUZU <czuzu@bitdefender.com>
---
Changed since v2:
  * introduce vm_event_{vcpu,domain}_initialised inline functions for clarity
  * xfree arch_vm_event in vm_event_cleanup_domain as before if there are no
    uncommitted writes in arch_vm_event.write_data
---
 xen/arch/x86/domain.c          |  9 +++++++--
 xen/arch/x86/hvm/emulate.c     |  6 +++---
 xen/arch/x86/hvm/hvm.c         |  2 ++
 xen/arch/x86/mm/p2m.c          |  2 +-
 xen/arch/x86/vm_event.c        | 35 +++++++++++++++++++++++++++++------
 xen/common/vm_event.c          | 12 ++++++++++++
 xen/include/asm-x86/domain.h   | 17 +++++++++++------
 xen/include/asm-x86/monitor.h  |  3 ++-
 xen/include/asm-x86/vm_event.h | 13 ++++++++++++-
 9 files changed, 79 insertions(+), 20 deletions(-)

Comments

Jan Beulich July 8, 2016, 7:35 a.m. UTC | #1
>>> On 06.07.16 at 17:51, <czuzu@bitdefender.com> wrote:
> @@ -492,8 +493,12 @@ int vcpu_initialise(struct vcpu *v)
>  
>  void vcpu_destroy(struct vcpu *v)
>  {
> -    xfree(v->arch.vm_event);
> -    v->arch.vm_event = NULL;
> +    if ( unlikely(v->arch.vm_event) )
> +    {
> +        xfree(v->arch.vm_event->emul_read_data);
> +        xfree(v->arch.vm_event);
> +        v->arch.vm_event = NULL;
> +    }

Considering the repeat of this pattern ...

> @@ -52,8 +58,25 @@ void vm_event_cleanup_domain(struct domain *d)
>  
>      for_each_vcpu ( d, v )
>      {
> -        xfree(v->arch.vm_event);
> -        v->arch.vm_event = NULL;
> +        if ( likely(!v->arch.vm_event) )
> +            continue;
> +
> +        /*
> +         * Only xfree the entire arch_vm_event if write_data was fully handled.
> +         * Otherwise defer entire xfree until domain/vcpu destroyal.
> +         */
> +        if ( likely(!v->arch.vm_event->write_data.uncommitted_writes) )
> +        {
> +            xfree(v->arch.vm_event->emul_read_data);
> +            xfree(v->arch.vm_event);
> +            v->arch.vm_event = NULL;
> +            continue;
> +        }

... here, please consider making this another helper (inline?) function.

> +        /* write_data not fully handled, only xfree emul_read_data */

Comment style again (and more below).

> --- a/xen/common/vm_event.c
> +++ b/xen/common/vm_event.c
> @@ -534,6 +534,8 @@ static void mem_sharing_notification(struct vcpu *v, unsigned int port)
>  /* Clean up on domain destruction */
>  void vm_event_cleanup(struct domain *d)
>  {
> +    struct vcpu *v;
> +
>  #ifdef CONFIG_HAS_MEM_PAGING
>      if ( d->vm_event->paging.ring_page )
>      {
> @@ -560,6 +562,16 @@ void vm_event_cleanup(struct domain *d)
>          (void)vm_event_disable(d, &d->vm_event->share);
>      }
>  #endif
> +
> +    for_each_vcpu ( d, v )
> +    {
> +        if ( unlikely(v->arch.vm_event) )
> +        {
> +            /* vm_event->emul_read_data freed in vm_event_cleanup_domain */

Perhaps worthwhile adding a respective ASSERT()?

> +static inline bool_t vm_event_vcpu_initialised(struct vcpu *v)
> +{
> +    return (v->arch.vm_event && v->arch.vm_event->emul_read_data);
> +}
> +
> +static inline bool_t vm_event_domain_initialised(struct domain *d)
> +{
> +    return (d->max_vcpus && d->vcpu[0] &&
> +            vm_event_vcpu_initialised(d->vcpu[0]));
> +}

Both functions' parameters should be const. Pointless parentheses
in both return statements.

Jan
Corneliu ZUZU July 8, 2016, 10:28 a.m. UTC | #2
On 7/8/2016 10:35 AM, Jan Beulich wrote:
>>>> On 06.07.16 at 17:51, <czuzu@bitdefender.com> wrote:
>> @@ -492,8 +493,12 @@ int vcpu_initialise(struct vcpu *v)
>>   
>>   void vcpu_destroy(struct vcpu *v)
>>   {
>> -    xfree(v->arch.vm_event);
>> -    v->arch.vm_event = NULL;
>> +    if ( unlikely(v->arch.vm_event) )
>> +    {
>> +        xfree(v->arch.vm_event->emul_read_data);
>> +        xfree(v->arch.vm_event);
>> +        v->arch.vm_event = NULL;
>> +    }
> Considering the repeat of this pattern ...
>
>> @@ -52,8 +58,25 @@ void vm_event_cleanup_domain(struct domain *d)
>>   
>>       for_each_vcpu ( d, v )
>>       {
>> -        xfree(v->arch.vm_event);
>> -        v->arch.vm_event = NULL;
>> +        if ( likely(!v->arch.vm_event) )
>> +            continue;
>> +
>> +        /*
>> +         * Only xfree the entire arch_vm_event if write_data was fully handled.
>> +         * Otherwise defer entire xfree until domain/vcpu destroyal.
>> +         */
>> +        if ( likely(!v->arch.vm_event->write_data.uncommitted_writes) )
>> +        {
>> +            xfree(v->arch.vm_event->emul_read_data);
>> +            xfree(v->arch.vm_event);
>> +            v->arch.vm_event = NULL;
>> +            continue;
>> +        }
> ... here, please consider making this another helper (inline?) function.

Yeah, I'm sending a separate patch today which will invalidate some of 
these changes (then a v4 above that one).

>
>> +        /* write_data not fully handled, only xfree emul_read_data */
> Comment style again (and more below).

Ack, assuming you mean 'capital letter, end with dot'.

>
>> --- a/xen/common/vm_event.c
>> +++ b/xen/common/vm_event.c
>> @@ -534,6 +534,8 @@ static void mem_sharing_notification(struct vcpu *v, unsigned int port)
>>   /* Clean up on domain destruction */
>>   void vm_event_cleanup(struct domain *d)
>>   {
>> +    struct vcpu *v;
>> +
>>   #ifdef CONFIG_HAS_MEM_PAGING
>>       if ( d->vm_event->paging.ring_page )
>>       {
>> @@ -560,6 +562,16 @@ void vm_event_cleanup(struct domain *d)
>>           (void)vm_event_disable(d, &d->vm_event->share);
>>       }
>>   #endif
>> +
>> +    for_each_vcpu ( d, v )
>> +    {
>> +        if ( unlikely(v->arch.vm_event) )
>> +        {
>> +            /* vm_event->emul_read_data freed in vm_event_cleanup_domain */
> Perhaps worthwhile adding a respective ASSERT()?

Good point, ack.

>
>> +static inline bool_t vm_event_vcpu_initialised(struct vcpu *v)
>> +{
>> +    return (v->arch.vm_event && v->arch.vm_event->emul_read_data);
>> +}
>> +
>> +static inline bool_t vm_event_domain_initialised(struct domain *d)
>> +{
>> +    return (d->max_vcpus && d->vcpu[0] &&
>> +            vm_event_vcpu_initialised(d->vcpu[0]));
>> +}
> Both functions' parameters should be const. Pointless parentheses
> in both return statements.
>
> Jan

Ack (although the parenthesis were there strictly for aesthetics, but 
that's subjective).

Thanks,
Corneliu.
Jan Beulich July 8, 2016, 10:38 a.m. UTC | #3
>>> On 08.07.16 at 12:28, <czuzu@bitdefender.com> wrote:
> On 7/8/2016 10:35 AM, Jan Beulich wrote:
>>>>> On 06.07.16 at 17:51, <czuzu@bitdefender.com> wrote:
>>> +        /* write_data not fully handled, only xfree emul_read_data */
>> Comment style again (and more below).
> 
> Ack, assuming you mean 'capital letter, end with dot'.

Actually in this particular case I meant only the missing full stop,
as the first word - afaict - refers to a structure field name (and
hence should remain lower case so e.g. a grep would catch it).

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index bb59247..0313208 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -56,6 +56,7 @@ 
 #include <asm/debugreg.h>
 #include <asm/msr.h>
 #include <asm/traps.h>
+#include <asm/vm_event.h>
 #include <asm/nmi.h>
 #include <asm/mce.h>
 #include <asm/amd.h>
@@ -492,8 +493,12 @@  int vcpu_initialise(struct vcpu *v)
 
 void vcpu_destroy(struct vcpu *v)
 {
-    xfree(v->arch.vm_event);
-    v->arch.vm_event = NULL;
+    if ( unlikely(v->arch.vm_event) )
+    {
+        xfree(v->arch.vm_event->emul_read_data);
+        xfree(v->arch.vm_event);
+        v->arch.vm_event = NULL;
+    }
 
     if ( is_pv_32bit_vcpu(v) )
     {
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 855af4d..59e2344 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -73,12 +73,12 @@  static int set_context_data(void *buffer, unsigned int size)
 {
     struct vcpu *curr = current;
 
-    if ( curr->arch.vm_event )
+    if ( vm_event_vcpu_initialised(curr) )
     {
         unsigned int safe_size =
-            min(size, curr->arch.vm_event->emul_read_data.size);
+            min(size, curr->arch.vm_event->emul_read_data->size);
 
-        memcpy(buffer, curr->arch.vm_event->emul_read_data.data, safe_size);
+        memcpy(buffer, curr->arch.vm_event->emul_read_data->data, safe_size);
         memset(buffer + safe_size, 0, size - safe_size);
         return X86EMUL_OKAY;
     }
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index e3829d2..ac6d9eb 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -479,6 +479,8 @@  void hvm_do_resume(struct vcpu *v)
         {
             enum emul_kind kind = EMUL_KIND_NORMAL;
 
+            ASSERT(v->arch.vm_event->emul_read_data);
+
             if ( v->arch.vm_event->emulate_flags &
                  VM_EVENT_FLAG_SET_EMUL_READ_DATA )
                 kind = EMUL_KIND_SET_CONTEXT;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 16733a4..6616626 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1642,7 +1642,7 @@  void p2m_mem_access_emulate_check(struct vcpu *v,
         v->arch.vm_event->emulate_flags = violation ? rsp->flags : 0;
 
         if ( (rsp->flags & VM_EVENT_FLAG_SET_EMUL_READ_DATA) )
-            v->arch.vm_event->emul_read_data = rsp->data.emul_read_data;
+            *v->arch.vm_event->emul_read_data = rsp->data.emul_read_data;
     }
 }
 
diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c
index 80f84d6..ff2ba92 100644
--- a/xen/arch/x86/vm_event.c
+++ b/xen/arch/x86/vm_event.c
@@ -30,12 +30,18 @@  int vm_event_init_domain(struct domain *d)
 
     for_each_vcpu ( d, v )
     {
-        if ( v->arch.vm_event )
+        if ( likely(!v->arch.vm_event) )
+        {
+            v->arch.vm_event = xzalloc(struct arch_vm_event);
+            if ( !v->arch.vm_event )
+                return -ENOMEM;
+        }
+        else if ( unlikely(v->arch.vm_event->emul_read_data) )
             continue;
 
-        v->arch.vm_event = xzalloc(struct arch_vm_event);
-
-        if ( !v->arch.vm_event )
+        v->arch.vm_event->emul_read_data =
+                xzalloc(struct vm_event_emul_read_data);
+        if ( !v->arch.vm_event->emul_read_data )
             return -ENOMEM;
     }
 
@@ -52,8 +58,25 @@  void vm_event_cleanup_domain(struct domain *d)
 
     for_each_vcpu ( d, v )
     {
-        xfree(v->arch.vm_event);
-        v->arch.vm_event = NULL;
+        if ( likely(!v->arch.vm_event) )
+            continue;
+
+        /*
+         * Only xfree the entire arch_vm_event if write_data was fully handled.
+         * Otherwise defer entire xfree until domain/vcpu destroyal.
+         */
+        if ( likely(!v->arch.vm_event->write_data.uncommitted_writes) )
+        {
+            xfree(v->arch.vm_event->emul_read_data);
+            xfree(v->arch.vm_event);
+            v->arch.vm_event = NULL;
+            continue;
+        }
+
+        /* write_data not fully handled, only xfree emul_read_data */
+        v->arch.vm_event->emulate_flags = 0;
+        xfree(v->arch.vm_event->emul_read_data);
+        v->arch.vm_event->emul_read_data = NULL;
     }
 
     d->arch.mem_access_emulate_each_rep = 0;
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 17d2716..47ae96c 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -534,6 +534,8 @@  static void mem_sharing_notification(struct vcpu *v, unsigned int port)
 /* Clean up on domain destruction */
 void vm_event_cleanup(struct domain *d)
 {
+    struct vcpu *v;
+
 #ifdef CONFIG_HAS_MEM_PAGING
     if ( d->vm_event->paging.ring_page )
     {
@@ -560,6 +562,16 @@  void vm_event_cleanup(struct domain *d)
         (void)vm_event_disable(d, &d->vm_event->share);
     }
 #endif
+
+    for_each_vcpu ( d, v )
+    {
+        if ( unlikely(v->arch.vm_event) )
+        {
+            /* vm_event->emul_read_data freed in vm_event_cleanup_domain */
+            xfree(v->arch.vm_event);
+            v->arch.vm_event = NULL;
+        }
+    }
 }
 
 int vm_event_domctl(struct domain *d, xen_domctl_vm_event_op_t *vec,
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 8f64ae9..0e3e139 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -260,12 +260,17 @@  struct pv_domain
 };
 
 struct monitor_write_data {
-    struct {
-        unsigned int msr : 1;
-        unsigned int cr0 : 1;
-        unsigned int cr3 : 1;
-        unsigned int cr4 : 1;
-    } do_write;
+    union {
+        struct {
+            unsigned int msr : 1;
+            unsigned int cr0 : 1;
+            unsigned int cr3 : 1;
+            unsigned int cr4 : 1;
+        } do_write;
+
+        /* non-zero when at least one of do_write fields is non-zero */
+        unsigned int uncommitted_writes;
+    };
 
     uint32_t msr;
     uint64_t value;
diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
index 0611681..9238ec8 100644
--- a/xen/include/asm-x86/monitor.h
+++ b/xen/include/asm-x86/monitor.h
@@ -26,6 +26,7 @@ 
 #include <public/domctl.h>
 #include <asm/cpufeature.h>
 #include <asm/hvm/hvm.h>
+#include <asm/vm_event.h>
 
 #define monitor_ctrlreg_bitmask(ctrlreg_index) (1U << (ctrlreg_index))
 
@@ -48,7 +49,7 @@  int arch_monitor_domctl_op(struct domain *d, struct xen_domctl_monitor_op *mop)
          * Enabling mem_access_emulate_each_rep without a vm_event subscriber
          * is meaningless.
          */
-        if ( d->max_vcpus && d->vcpu[0] && d->vcpu[0]->arch.vm_event )
+        if ( vm_event_domain_initialised(d) )
             d->arch.mem_access_emulate_each_rep = !!mop->event;
         else
             rc = -EINVAL;
diff --git a/xen/include/asm-x86/vm_event.h b/xen/include/asm-x86/vm_event.h
index 026f42e..9bdeccc 100644
--- a/xen/include/asm-x86/vm_event.h
+++ b/xen/include/asm-x86/vm_event.h
@@ -28,12 +28,23 @@ 
  */
 struct arch_vm_event {
     uint32_t emulate_flags;
-    struct vm_event_emul_read_data emul_read_data;
+    struct vm_event_emul_read_data *emul_read_data;
     struct monitor_write_data write_data;
 };
 
 int vm_event_init_domain(struct domain *d);
 
+static inline bool_t vm_event_vcpu_initialised(struct vcpu *v)
+{
+    return (v->arch.vm_event && v->arch.vm_event->emul_read_data);
+}
+
+static inline bool_t vm_event_domain_initialised(struct domain *d)
+{
+    return (d->max_vcpus && d->vcpu[0] &&
+            vm_event_vcpu_initialised(d->vcpu[0]));
+}
+
 void vm_event_cleanup_domain(struct domain *d);
 
 void vm_event_toggle_singlestep(struct domain *d, struct vcpu *v);