diff mbox

[05/16] x86/monitor: relocate code more appropriately

Message ID 1468037707-6663-1-git-send-email-czuzu@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Corneliu ZUZU July 9, 2016, 4:15 a.m. UTC
For readability:

* Add function monitor_ctrlreg_write_data() (in x86/monitor.c) and separate
handling of monitor_write_data there (previously done directly in
hvm_do_resume()).

* Separate enabling/disabling of CPU_BASED_CR3_LOAD_EXITING for CR3 monitor
vm-events from CR0 node @ vmx_update_guest_cr(v, 0) into newly added function
vmx_vm_event_update_cr3_traps(d); also, for a better interface, create generic
functions monitor_ctrlreg_adjust_traps() and monitor_ctrlreg_disable_traps() (in
x86/monitor.c) which deal with setting/unsetting any traps for cr-write monitor
vm-events.

Signed-off-by: Corneliu ZUZU <czuzu@bitdefender.com>
---
 xen/arch/x86/hvm/hvm.c            | 46 ++++++++++----------------
 xen/arch/x86/hvm/vmx/vmx.c        | 59 ++++++++++++++++++++++++++++++---
 xen/arch/x86/monitor.c            | 68 +++++++++++++++++++++++++++++++++++----
 xen/include/asm-x86/hvm/vmx/vmx.h |  1 +
 xen/include/asm-x86/monitor.h     |  2 ++
 5 files changed, 135 insertions(+), 41 deletions(-)

Comments

Corneliu ZUZU July 11, 2016, 6:19 a.m. UTC | #1
This was 2/8 in the old v3-series "x86/vm-event: Adjustments & fixes".
Transferring code-review from "Tian, Kevin <kevin.tian@intel.com>" (July 
11, 2016) and responding...


On 7/9/2016 7:15 AM, Corneliu ZUZU wrote:
> For readability:
>
> * Add function monitor_ctrlreg_write_data() (in x86/monitor.c) and separate
> handling of monitor_write_data there (previously done directly in
> hvm_do_resume()).
>
> * Separate enabling/disabling of CPU_BASED_CR3_LOAD_EXITING for CR3 monitor
> vm-events from CR0 node @ vmx_update_guest_cr(v, 0) into newly added function
> vmx_vm_event_update_cr3_traps(d); also, for a better interface, create generic
> functions monitor_ctrlreg_adjust_traps() and monitor_ctrlreg_disable_traps() (in
> x86/monitor.c) which deal with setting/unsetting any traps for cr-write monitor
> vm-events.
>
> Signed-off-by: Corneliu ZUZU <czuzu@bitdefender.com>
> ---
>   xen/arch/x86/hvm/hvm.c            | 46 ++++++++++----------------
>   xen/arch/x86/hvm/vmx/vmx.c        | 59 ++++++++++++++++++++++++++++++---
>   xen/arch/x86/monitor.c            | 68 +++++++++++++++++++++++++++++++++++----
>   xen/include/asm-x86/hvm/vmx/vmx.h |  1 +
>   xen/include/asm-x86/monitor.h     |  2 ++
>   5 files changed, 135 insertions(+), 41 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 7f99087..79ba185 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -473,8 +473,6 @@ void hvm_do_resume(struct vcpu *v)
>   
>       if ( unlikely(v->arch.vm_event) )
>       {
> -        struct monitor_write_data *w = &v->arch.vm_event->write_data;
> -
>           if ( unlikely(v->arch.vm_event->emulate_flags) )
>           {
>               enum emul_kind kind = EMUL_KIND_NORMAL;
> @@ -492,29 +490,7 @@ void hvm_do_resume(struct vcpu *v)
>               v->arch.vm_event->emulate_flags = 0;
>           }
>   
> -        if ( w->do_write.msr )
> -        {
> -            hvm_msr_write_intercept(w->msr, w->value, 0);
> -            w->do_write.msr = 0;
> -        }
> -
> -        if ( w->do_write.cr0 )
> -        {
> -            hvm_set_cr0(w->cr0, 0);
> -            w->do_write.cr0 = 0;
> -        }
> -
> -        if ( w->do_write.cr4 )
> -        {
> -            hvm_set_cr4(w->cr4, 0);
> -            w->do_write.cr4 = 0;
> -        }
> -
> -        if ( w->do_write.cr3 )
> -        {
> -            hvm_set_cr3(w->cr3, 0);
> -            w->do_write.cr3 = 0;
> -        }
> +        monitor_ctrlreg_write_data(v);
>       }
>   
>       /* Inject pending hw/sw trap */
> @@ -2204,7 +2180,10 @@ int hvm_set_cr0(unsigned long value, bool_t may_defer)
>   
>           if ( hvm_monitor_crX(CR0, value, old_value) )
>           {
> -            /* The actual write will occur in hvm_do_resume(), if permitted. */
> +            /*
> +             * The actual write will occur in monitor_ctrlreg_write_data(), if
> +             * permitted.
> +             */
>               v->arch.vm_event->write_data.do_write.cr0 = 1;
>               v->arch.vm_event->write_data.cr0 = value;
>   
> @@ -2306,7 +2285,10 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
>   
>           if ( hvm_monitor_crX(CR3, value, old) )
>           {
> -            /* The actual write will occur in hvm_do_resume(), if permitted. */
> +            /*
> +             * The actual write will occur in monitor_ctrlreg_write_data(), if
> +             * permitted.
> +             */
>               v->arch.vm_event->write_data.do_write.cr3 = 1;
>               v->arch.vm_event->write_data.cr3 = value;
>   
> @@ -2386,7 +2368,10 @@ int hvm_set_cr4(unsigned long value, bool_t may_defer)
>   
>           if ( hvm_monitor_crX(CR4, value, old_cr) )
>           {
> -            /* The actual write will occur in hvm_do_resume(), if permitted. */
> +            /*
> +             * The actual write will occur in monitor_ctrlreg_write_data(), if
> +             * permitted.
> +             */
>               v->arch.vm_event->write_data.do_write.cr4 = 1;
>               v->arch.vm_event->write_data.cr4 = value;
>   
> @@ -3765,7 +3750,10 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
>       {
>           ASSERT(v->arch.vm_event);
>   
> -        /* The actual write will occur in hvm_do_resume() (if permitted). */
> +        /*
> +         * The actual write will occur in monitor_ctrlreg_write_data(), if
> +         * permitted.
> +         */
>           v->arch.vm_event->write_data.do_write.msr = 1;
>           v->arch.vm_event->write_data.msr = msr;
>           v->arch.vm_event->write_data.value = msr_content;
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 0798245..7a31582 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1441,11 +1441,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
>               if ( !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
>                   v->arch.hvm_vmx.exec_control |= cr3_ctls;
>   
> -            /* Trap CR3 updates if CR3 memory events are enabled. */
> -            if ( v->domain->arch.monitor.write_ctrlreg_enabled &
> -                 monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
> -                v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING;
> -
>               if ( old_ctls != v->arch.hvm_vmx.exec_control )
>                   vmx_update_cpu_exec_control(v);
>           }
> @@ -3898,6 +3893,60 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>   }
>   
>   /*
> + * Adjusts domain CR3 load-exiting execution control for CR3-write monitor
> + * vm-event.
> + */
> +void vmx_vm_event_update_cr3_traps(const struct domain *d)
> +{
> +    struct vcpu *v;
> +    struct arch_vmx_struct *avmx;
     [Kevin wrote]:

	better move to inner later loop.

---


Ack.


> +    unsigned int cr3_bitmask;
> +    bool_t cr3_vmevent, cr3_ldexit;
> +
> +    /* Domain must be paused. */
> +    ASSERT(atomic_read(&d->pause_count));

     [Kevin wrote]:

	The comment doesn't add more info than the code itself, unless you want to
	explain the reason why domain must be paused here.
---


Ack, will remove the comment (or explain why the domain is paused: probably because we're writing exec_control of all its vcpus).


> +
> +    /* Non-hap domains trap CR3 writes unconditionally. */
> +    if ( !paging_mode_hap(d) )
> +    {
> +#ifndef NDEBUG
> +        for_each_vcpu ( d, v )
> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
> +#endif
> +        return;
> +    }
> +
> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
> +
> +    for_each_vcpu ( d, v )
> +    {
> +        avmx = &v->arch.hvm_vmx;
> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
> +
> +        if ( cr3_vmevent == cr3_ldexit )
> +            continue;
> +
> +        /*
> +         * If domain paging is disabled (CR0.PG=0) and
> +         * the domain is not in real-mode, then CR3 load-exiting
> +         * must remain enabled (see vmx_update_guest_cr(v, cr) when cr = 0).
> +         */

     [Kevin wrote]:

	Please give the real informative message here instead of asking people to
	look at other place
---


I only now realized I might have mistakenly assumed something when writing up this function.
I'll amend that in a v2 and consider making this comment clearer (will ask for feedback from you again here before sending v2).


> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
> +            continue;
     [Kevin wrote]:

	if !cr3_ldexit is not expected to occur in such scenario, is it more strict as below?

	if ( !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
	{
		ASSERT(cr3_ldexit);
		continue;
	}
---

That actually seems to be correct, I thought it was possible to have CR3 
load-exiting disabled with the above condition true (by looking @ old 
the code). I must attentively re-review this entire function so that it 
preserves the old behavior.

> +
> +        if ( cr3_vmevent )
> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
> +        else
> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
> +
> +        vmx_vmcs_enter(v);
> +        vmx_update_cpu_exec_control(v);
> +        vmx_vmcs_exit(v);
> +    }
> +}
> +
> +/*
>    * Local variables:
>    * mode: C
>    * c-file-style: "BSD"
> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> index 06d21b1..29188e5 100644
> --- a/xen/arch/x86/monitor.c
> +++ b/xen/arch/x86/monitor.c
> @@ -19,10 +19,39 @@
>    * License along with this program; If not, see <http://www.gnu.org/licenses/>.
>    */
>   
> +#include <asm/hvm/vmx/vmx.h>
>   #include <asm/monitor.h>
>   #include <asm/vm_event.h>
>   #include <public/vm_event.h>
>   
> +static inline
> +void monitor_ctrlreg_adjust_traps(struct domain *d, unsigned int index)
> +{
> +    /* For now, VMX only. */
> +    ASSERT(cpu_has_vmx);
> +
> +    /* Other CRs than CR3 are always trapped. */
> +    if ( VM_EVENT_X86_CR3 == index )
> +        vmx_vm_event_update_cr3_traps(d);
     [Kevin wrote]:

	Please add above into a hvm function instead of directly calling
	vmx in common file. (other arch can have it unimplemented).
	Then you don't need change this common code again later when
	other archs are added

---


This is not common code, it's in arch/x86/monitor.c (?) and currently, 
as the above ASSERT indicates, only VMX is supported. If in the future 
support for SVM for example will be added, then the hvm move you suggest 
must be done (Jan also suggested this).
Or, I only now realize, if you guys prefer doing this now I could add a 
vm_event_update_cr3_traps field in hvm_function_table, but BUG() in the 
SVM one..

> +}
> +
> +static inline void monitor_ctrlreg_disable_traps(struct domain *d)
> +{
> +    unsigned int old = d->arch.monitor.write_ctrlreg_enabled;
> +
> +    d->arch.monitor.write_ctrlreg_enabled = 0;
> +
> +    if ( old )
> +    {
> +        /* For now, VMX only. */
> +        ASSERT(cpu_has_vmx);
> +
> +        /* Was CR3 load-exiting enabled due to monitoring? */
> +        if ( old & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
> +            vmx_vm_event_update_cr3_traps(d);
> +    }
> +}
> +
>   int monitor_init_domain(struct domain *d)
>   {
>       if ( !d->arch.monitor.msr_bitmap )
> @@ -38,6 +67,8 @@ void monitor_cleanup_domain(struct domain *d)
>   {
>       xfree(d->arch.monitor.msr_bitmap);
>   
> +    monitor_ctrlreg_disable_traps(d);
> +
>       memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
>       memset(&d->monitor, 0, sizeof(d->monitor));
>   }
> @@ -79,6 +110,35 @@ void monitor_ctrlreg_write_resume(struct vcpu *v, vm_event_response_t *rsp)
>       }
>   }
>   
> +void monitor_ctrlreg_write_data(struct vcpu *v)
> +{
> +    struct monitor_write_data *w = &v->arch.vm_event->write_data;
> +
> +    if ( w->do_write.msr )
> +    {
> +        hvm_msr_write_intercept(w->msr, w->value, 0);
> +        w->do_write.msr = 0;
> +    }
> +
> +    if ( w->do_write.cr0 )
> +    {
> +        hvm_set_cr0(w->cr0, 0);
> +        w->do_write.cr0 = 0;
> +    }
> +
> +    if ( w->do_write.cr4 )
> +    {
> +        hvm_set_cr4(w->cr4, 0);
> +        w->do_write.cr4 = 0;
> +    }
> +
> +    if ( w->do_write.cr3 )
> +    {
> +        hvm_set_cr3(w->cr3, 0);
> +        w->do_write.cr3 = 0;
> +    }
> +}
> +
>   static unsigned long *monitor_bitmap_for_msr(const struct domain *d, u32 *msr)
>   {
>       ASSERT(d->arch.monitor.msr_bitmap && msr);
> @@ -197,13 +257,7 @@ int arch_monitor_domctl_event(struct domain *d,
>           else
>               ad->monitor.write_ctrlreg_enabled &= ~ctrlreg_bitmask;
>   
> -        if ( VM_EVENT_X86_CR3 == mop->u.mov_to_cr.index )
> -        {
> -            struct vcpu *v;
> -            /* Latches new CR3 mask through CR0 code. */
> -            for_each_vcpu ( d, v )
> -                hvm_update_guest_cr(v, 0);
> -        }
> +        monitor_ctrlreg_adjust_traps(d, mop->u.mov_to_cr.index);
>   
>           domain_unpause(d);
>   
> diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
> index 359b2a9..301df56 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -101,6 +101,7 @@ void vmx_update_debug_state(struct vcpu *v);
>   void vmx_update_exception_bitmap(struct vcpu *v);
>   void vmx_update_cpu_exec_control(struct vcpu *v);
>   void vmx_update_secondary_exec_control(struct vcpu *v);
> +void vmx_vm_event_update_cr3_traps(const struct domain *d);
>   
>   #define POSTED_INTR_ON  0
>   #define POSTED_INTR_SN  1
> diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
> index adca378..3ae24dd 100644
> --- a/xen/include/asm-x86/monitor.h
> +++ b/xen/include/asm-x86/monitor.h
> @@ -93,6 +93,8 @@ void monitor_cleanup_domain(struct domain *d);
>   
>   void monitor_ctrlreg_write_resume(struct vcpu *v, vm_event_response_t *rsp);
>   
> +void monitor_ctrlreg_write_data(struct vcpu *v);
> +
>   bool_t monitored_msr(const struct domain *d, u32 msr);
>   
>   #endif /* __ASM_X86_MONITOR_H__ */

Thanks,
Zuzu C.
Tian, Kevin July 12, 2016, 7:45 a.m. UTC | #2
> From: Corneliu ZUZU [mailto:czuzu@bitdefender.com]

> Sent: Monday, July 11, 2016 2:19 PM

> >

> > +static inline

> > +void monitor_ctrlreg_adjust_traps(struct domain *d, unsigned int index)

> > +{

> > +    /* For now, VMX only. */

> > +    ASSERT(cpu_has_vmx);

> > +

> > +    /* Other CRs than CR3 are always trapped. */

> > +    if ( VM_EVENT_X86_CR3 == index )

> > +        vmx_vm_event_update_cr3_traps(d);

>      [Kevin wrote]:

> 

> 	Please add above into a hvm function instead of directly calling

> 	vmx in common file. (other arch can have it unimplemented).

> 	Then you don't need change this common code again later when

> 	other archs are added

> 

> ---

> 

> 

> This is not common code, it's in arch/x86/monitor.c (?) and currently,

> as the above ASSERT indicates, only VMX is supported. If in the future

> support for SVM for example will be added, then the hvm move you suggest

> must be done (Jan also suggested this).

> Or, I only now realize, if you guys prefer doing this now I could add a

> vm_event_update_cr3_traps field in hvm_function_table, but BUG() in the

> SVM one..

> 


The latter is desired. Instead of BUG, it makes more sense to return
error on an arch which doesn't register the callback.

Thanks
Kevin
Corneliu ZUZU July 12, 2016, 8:07 a.m. UTC | #3
On 7/12/2016 10:45 AM, Tian, Kevin wrote:
>> From: Corneliu ZUZU [mailto:czuzu@bitdefender.com]
>> Sent: Monday, July 11, 2016 2:19 PM
>>> +static inline
>>> +void monitor_ctrlreg_adjust_traps(struct domain *d, unsigned int index)
>>> +{
>>> +    /* For now, VMX only. */
>>> +    ASSERT(cpu_has_vmx);
>>> +
>>> +    /* Other CRs than CR3 are always trapped. */
>>> +    if ( VM_EVENT_X86_CR3 == index )
>>> +        vmx_vm_event_update_cr3_traps(d);
>>       [Kevin wrote]:
>>
>> 	Please add above into a hvm function instead of directly calling
>> 	vmx in common file. (other arch can have it unimplemented).
>> 	Then you don't need change this common code again later when
>> 	other archs are added
>>
>> ---
>>
>>
>> This is not common code, it's in arch/x86/monitor.c (?) and currently,
>> as the above ASSERT indicates, only VMX is supported. If in the future
>> support for SVM for example will be added, then the hvm move you suggest
>> must be done (Jan also suggested this).
>> Or, I only now realize, if you guys prefer doing this now I could add a
>> vm_event_update_cr3_traps field in hvm_function_table, but BUG() in the
>> SVM one..
>>
> The latter is desired. Instead of BUG, it makes more sense to return
> error on an arch which doesn't register the callback.
>
> Thanks
> Kevin

Will do.

Thanks,
Zuzu C.
Corneliu ZUZU July 15, 2016, 11:41 a.m. UTC | #4
On 7/12/2016 11:07 AM, Corneliu ZUZU wrote:
> On 7/12/2016 10:45 AM, Tian, Kevin wrote:
>>> From: Corneliu ZUZU [mailto:czuzu@bitdefender.com]
>>> Sent: Monday, July 11, 2016 2:19 PM
>>>> +static inline
>>>> +void monitor_ctrlreg_adjust_traps(struct domain *d, unsigned int 
>>>> index)
>>>> +{
>>>> +    /* For now, VMX only. */
>>>> +    ASSERT(cpu_has_vmx);
>>>> +
>>>> +    /* Other CRs than CR3 are always trapped. */
>>>> +    if ( VM_EVENT_X86_CR3 == index )
>>>> +        vmx_vm_event_update_cr3_traps(d);
>>>       [Kevin wrote]:
>>>
>>>     Please add above into a hvm function instead of directly calling
>>>     vmx in common file. (other arch can have it unimplemented).
>>>     Then you don't need change this common code again later when
>>>     other archs are added
>>>
>>> ---
>>>
>>>
>>> This is not common code, it's in arch/x86/monitor.c (?) and currently,
>>> as the above ASSERT indicates, only VMX is supported. If in the future
>>> support for SVM for example will be added, then the hvm move you 
>>> suggest
>>> must be done (Jan also suggested this).
>>> Or, I only now realize, if you guys prefer doing this now I could add a
>>> vm_event_update_cr3_traps field in hvm_function_table, but BUG() in the
>>> SVM one..
>>>
>> The latter is desired. Instead of BUG, it makes more sense to return
>> error on an arch which doesn't register the callback.
>>
>> Thanks
>> Kevin
>
> Will do.
>
> Thanks,
> Zuzu C.

I've decided to discard separating CR3 load-exiting handling (i.e. 
discard vmx_vm_event_update_cr3_traps) entirely since I find that it's 
complicated to have to handle the bit from 2 different places 
(vmx_update_guest_cr and arch_monitor_domctl_event).

Normally such a situation is resolved by counting the number of 
subscribers to the resource (in this case counting the number of 
'entities' that want to CR3 load-exiting enabled - i.e. just as we have 
a vCPU pause_count to count entities that want the vCPU to be paused), 
but it's just a single bit of a lot more and I don't think the overhead 
is worth.

Let me know if you disagree and I'm open to suggestions, if you guys 
have any.

Thanks,
Corneliu.
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 7f99087..79ba185 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -473,8 +473,6 @@  void hvm_do_resume(struct vcpu *v)
 
     if ( unlikely(v->arch.vm_event) )
     {
-        struct monitor_write_data *w = &v->arch.vm_event->write_data;
-
         if ( unlikely(v->arch.vm_event->emulate_flags) )
         {
             enum emul_kind kind = EMUL_KIND_NORMAL;
@@ -492,29 +490,7 @@  void hvm_do_resume(struct vcpu *v)
             v->arch.vm_event->emulate_flags = 0;
         }
 
-        if ( w->do_write.msr )
-        {
-            hvm_msr_write_intercept(w->msr, w->value, 0);
-            w->do_write.msr = 0;
-        }
-
-        if ( w->do_write.cr0 )
-        {
-            hvm_set_cr0(w->cr0, 0);
-            w->do_write.cr0 = 0;
-        }
-
-        if ( w->do_write.cr4 )
-        {
-            hvm_set_cr4(w->cr4, 0);
-            w->do_write.cr4 = 0;
-        }
-
-        if ( w->do_write.cr3 )
-        {
-            hvm_set_cr3(w->cr3, 0);
-            w->do_write.cr3 = 0;
-        }
+        monitor_ctrlreg_write_data(v);
     }
 
     /* Inject pending hw/sw trap */
@@ -2204,7 +2180,10 @@  int hvm_set_cr0(unsigned long value, bool_t may_defer)
 
         if ( hvm_monitor_crX(CR0, value, old_value) )
         {
-            /* The actual write will occur in hvm_do_resume(), if permitted. */
+            /*
+             * The actual write will occur in monitor_ctrlreg_write_data(), if
+             * permitted.
+             */
             v->arch.vm_event->write_data.do_write.cr0 = 1;
             v->arch.vm_event->write_data.cr0 = value;
 
@@ -2306,7 +2285,10 @@  int hvm_set_cr3(unsigned long value, bool_t may_defer)
 
         if ( hvm_monitor_crX(CR3, value, old) )
         {
-            /* The actual write will occur in hvm_do_resume(), if permitted. */
+            /*
+             * The actual write will occur in monitor_ctrlreg_write_data(), if
+             * permitted.
+             */
             v->arch.vm_event->write_data.do_write.cr3 = 1;
             v->arch.vm_event->write_data.cr3 = value;
 
@@ -2386,7 +2368,10 @@  int hvm_set_cr4(unsigned long value, bool_t may_defer)
 
         if ( hvm_monitor_crX(CR4, value, old_cr) )
         {
-            /* The actual write will occur in hvm_do_resume(), if permitted. */
+            /*
+             * The actual write will occur in monitor_ctrlreg_write_data(), if
+             * permitted.
+             */
             v->arch.vm_event->write_data.do_write.cr4 = 1;
             v->arch.vm_event->write_data.cr4 = value;
 
@@ -3765,7 +3750,10 @@  int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
     {
         ASSERT(v->arch.vm_event);
 
-        /* The actual write will occur in hvm_do_resume() (if permitted). */
+        /*
+         * The actual write will occur in monitor_ctrlreg_write_data(), if
+         * permitted.
+         */
         v->arch.vm_event->write_data.do_write.msr = 1;
         v->arch.vm_event->write_data.msr = msr;
         v->arch.vm_event->write_data.value = msr_content;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 0798245..7a31582 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1441,11 +1441,6 @@  static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
             if ( !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
                 v->arch.hvm_vmx.exec_control |= cr3_ctls;
 
-            /* Trap CR3 updates if CR3 memory events are enabled. */
-            if ( v->domain->arch.monitor.write_ctrlreg_enabled &
-                 monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
-                v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING;
-
             if ( old_ctls != v->arch.hvm_vmx.exec_control )
                 vmx_update_cpu_exec_control(v);
         }
@@ -3898,6 +3893,60 @@  void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 }
 
 /*
+ * Adjusts domain CR3 load-exiting execution control for CR3-write monitor
+ * vm-event.
+ */
+void vmx_vm_event_update_cr3_traps(const struct domain *d)
+{
+    struct vcpu *v;
+    struct arch_vmx_struct *avmx;
+    unsigned int cr3_bitmask;
+    bool_t cr3_vmevent, cr3_ldexit;
+
+    /* Domain must be paused. */
+    ASSERT(atomic_read(&d->pause_count));
+
+    /* Non-hap domains trap CR3 writes unconditionally. */
+    if ( !paging_mode_hap(d) )
+    {
+#ifndef NDEBUG
+        for_each_vcpu ( d, v )
+            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
+#endif
+        return;
+    }
+
+    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
+    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
+
+    for_each_vcpu ( d, v )
+    {
+        avmx = &v->arch.hvm_vmx;
+        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
+
+        if ( cr3_vmevent == cr3_ldexit )
+            continue;
+
+        /*
+         * If domain paging is disabled (CR0.PG=0) and
+         * the domain is not in real-mode, then CR3 load-exiting
+         * must remain enabled (see vmx_update_guest_cr(v, cr) when cr = 0).
+         */
+        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
+            continue;
+
+        if ( cr3_vmevent )
+            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
+        else
+            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
+
+        vmx_vmcs_enter(v);
+        vmx_update_cpu_exec_control(v);
+        vmx_vmcs_exit(v);
+    }
+}
+
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
index 06d21b1..29188e5 100644
--- a/xen/arch/x86/monitor.c
+++ b/xen/arch/x86/monitor.c
@@ -19,10 +19,39 @@ 
  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <asm/hvm/vmx/vmx.h>
 #include <asm/monitor.h>
 #include <asm/vm_event.h>
 #include <public/vm_event.h>
 
+static inline
+void monitor_ctrlreg_adjust_traps(struct domain *d, unsigned int index)
+{
+    /* For now, VMX only. */
+    ASSERT(cpu_has_vmx);
+
+    /* Other CRs than CR3 are always trapped. */
+    if ( VM_EVENT_X86_CR3 == index )
+        vmx_vm_event_update_cr3_traps(d);
+}
+
+static inline void monitor_ctrlreg_disable_traps(struct domain *d)
+{
+    unsigned int old = d->arch.monitor.write_ctrlreg_enabled;
+
+    d->arch.monitor.write_ctrlreg_enabled = 0;
+
+    if ( old )
+    {
+        /* For now, VMX only. */
+        ASSERT(cpu_has_vmx);
+
+        /* Was CR3 load-exiting enabled due to monitoring? */
+        if ( old & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
+            vmx_vm_event_update_cr3_traps(d);
+    }
+}
+
 int monitor_init_domain(struct domain *d)
 {
     if ( !d->arch.monitor.msr_bitmap )
@@ -38,6 +67,8 @@  void monitor_cleanup_domain(struct domain *d)
 {
     xfree(d->arch.monitor.msr_bitmap);
 
+    monitor_ctrlreg_disable_traps(d);
+
     memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
     memset(&d->monitor, 0, sizeof(d->monitor));
 }
@@ -79,6 +110,35 @@  void monitor_ctrlreg_write_resume(struct vcpu *v, vm_event_response_t *rsp)
     }
 }
 
+void monitor_ctrlreg_write_data(struct vcpu *v)
+{
+    struct monitor_write_data *w = &v->arch.vm_event->write_data;
+
+    if ( w->do_write.msr )
+    {
+        hvm_msr_write_intercept(w->msr, w->value, 0);
+        w->do_write.msr = 0;
+    }
+
+    if ( w->do_write.cr0 )
+    {
+        hvm_set_cr0(w->cr0, 0);
+        w->do_write.cr0 = 0;
+    }
+
+    if ( w->do_write.cr4 )
+    {
+        hvm_set_cr4(w->cr4, 0);
+        w->do_write.cr4 = 0;
+    }
+
+    if ( w->do_write.cr3 )
+    {
+        hvm_set_cr3(w->cr3, 0);
+        w->do_write.cr3 = 0;
+    }
+}
+
 static unsigned long *monitor_bitmap_for_msr(const struct domain *d, u32 *msr)
 {
     ASSERT(d->arch.monitor.msr_bitmap && msr);
@@ -197,13 +257,7 @@  int arch_monitor_domctl_event(struct domain *d,
         else
             ad->monitor.write_ctrlreg_enabled &= ~ctrlreg_bitmask;
 
-        if ( VM_EVENT_X86_CR3 == mop->u.mov_to_cr.index )
-        {
-            struct vcpu *v;
-            /* Latches new CR3 mask through CR0 code. */
-            for_each_vcpu ( d, v )
-                hvm_update_guest_cr(v, 0);
-        }
+        monitor_ctrlreg_adjust_traps(d, mop->u.mov_to_cr.index);
 
         domain_unpause(d);
 
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index 359b2a9..301df56 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -101,6 +101,7 @@  void vmx_update_debug_state(struct vcpu *v);
 void vmx_update_exception_bitmap(struct vcpu *v);
 void vmx_update_cpu_exec_control(struct vcpu *v);
 void vmx_update_secondary_exec_control(struct vcpu *v);
+void vmx_vm_event_update_cr3_traps(const struct domain *d);
 
 #define POSTED_INTR_ON  0
 #define POSTED_INTR_SN  1
diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
index adca378..3ae24dd 100644
--- a/xen/include/asm-x86/monitor.h
+++ b/xen/include/asm-x86/monitor.h
@@ -93,6 +93,8 @@  void monitor_cleanup_domain(struct domain *d);
 
 void monitor_ctrlreg_write_resume(struct vcpu *v, vm_event_response_t *rsp);
 
+void monitor_ctrlreg_write_data(struct vcpu *v);
+
 bool_t monitored_msr(const struct domain *d, u32 msr);
 
 #endif /* __ASM_X86_MONITOR_H__ */