diff mbox

[v3,2/8] x86/vm-event/monitor: relocate code-motion more appropriately

Message ID 1467820242-13358-1-git-send-email-czuzu@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Corneliu ZUZU July 6, 2016, 3:50 p.m. UTC
For readability:

* Add function arch_monitor_write_data (in x86/monitor.c) and separate handling
of monitor_write_data there (previously done directly in hvm_do_resume).

* Separate enabling/disabling of CPU_BASED_CR3_LOAD_EXITING for CR3 monitor
vm-events from CR0 node @ vmx_update_guest_cr(v, 0) into newly added function
vmx_vm_event_update_cr3_traps(d); also, for a better interface, create generic
functions write_ctrlreg_adjust_traps and write_ctrlreg_disable_traps (in
x86/monitor.c) which deal with setting/unsetting any traps for cr-write monitor
vm-events.

Signed-off-by: Corneliu ZUZU <czuzu@bitdefender.com>
---
Changed since v2:
  * add write_ctrlreg_disable_traps, call from arch_monitor_cleanup_domain
---
 xen/arch/x86/hvm/hvm.c            | 46 ++++++++++-----------------
 xen/arch/x86/hvm/vmx/vmx.c        | 58 +++++++++++++++++++++++++++++++---
 xen/arch/x86/monitor.c            | 66 ++++++++++++++++++++++++++++++++++-----
 xen/include/asm-x86/hvm/vmx/vmx.h |  1 +
 xen/include/asm-x86/monitor.h     |  2 ++
 5 files changed, 131 insertions(+), 42 deletions(-)

Comments

Jan Beulich July 8, 2016, 7:21 a.m. UTC | #1
>>> On 06.07.16 at 17:50, <czuzu@bitdefender.com> wrote:

The title of this patch keeps confusing me - which code motion is
being relocated here?

> +void vmx_vm_event_update_cr3_traps(struct domain *d)

Is there anything in this function preventing the parameter to be
const?

> +{
> +    struct vcpu *v;
> +    struct arch_vmx_struct *avmx;
> +    unsigned int cr3_bitmask;
> +    bool_t cr3_vmevent, cr3_ldexit;
> +
> +    /* domain must be paused */
> +    ASSERT(atomic_read(&d->pause_count));

Comment style.

> +    /* non-hap domains trap CR3 writes unconditionally */
> +    if ( !paging_mode_hap(d) )
> +    {
> +#ifndef NDEBUG
> +        for_each_vcpu ( d, v )
> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
> +#endif
> +        return;
> +    }
> +
> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
> +
> +    for_each_vcpu ( d, v )
> +    {
> +        avmx = &v->arch.hvm_vmx;
> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
> +
> +        if ( cr3_vmevent == cr3_ldexit )
> +            continue;
> +
> +        /*
> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
> +         * See vmx_update_guest_cr code motion for cr = 0.
> +         */

Same as for the title - what code motion is this referring to? In a
code comment you clearly shouldn't be referring to anything the
patch effects, only to its result.

> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
> +            continue;

The first sentence of the comment should be brought in line with
this condition.

> +static inline void write_ctrlreg_adjust_traps(struct domain *d, uint8_t index)

Unless there is a particular reason for this uint8_t, please convert to
unsigned int.

> +{
> +    /* vmx only */
> +    ASSERT(cpu_has_vmx);

Comment style (more below). Should perhaps also get "for now" or
some such added.

> +static inline void write_ctrlreg_disable_traps(struct domain *d)
> +{
> +    unsigned int old = d->arch.monitor.write_ctrlreg_enabled;
> +    d->arch.monitor.write_ctrlreg_enabled = 0;
> +
> +    if ( old )
> +    {
> +        /* vmx only */
> +        ASSERT(cpu_has_vmx);

Wouldn't this better move ahead of the if()?

> +        /* was CR3 load-exiting enabled due to monitoring? */
> +        if ( old & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )

And then this if() alone would suffice.

> @@ -36,11 +63,40 @@ int arch_monitor_init_domain(struct domain *d)
>  void arch_monitor_cleanup_domain(struct domain *d)
>  {
>      xfree(d->arch.monitor.msr_bitmap);
> -
> +    write_ctrlreg_disable_traps(d);
>      memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
>      memset(&d->monitor, 0, sizeof(d->monitor));
>  }

Rather than deleting the blank line, perhaps better to insert another
one after your addition?

Jan
Corneliu ZUZU July 8, 2016, 10:22 a.m. UTC | #2
On 7/8/2016 10:21 AM, Jan Beulich wrote:
>>>> On 06.07.16 at 17:50, <czuzu@bitdefender.com> wrote:
> The title of this patch keeps confusing me - which code motion is
> being relocated here?

As the commit message clearly states, the code motions that are being 
relocated are:
1) handling of monitor_write_data @ hvm_do_resume
2) the code in vmx_update_guest_cr (when cr = 0) that deals with setting 
CR3 load-exiting for cr-write monitor vm-events, i.e. the comment:
     /* Trap CR3 updates if CR3 memory events are enabled. */
and what's removed from under it.

By 'relocation' I meant making that code vm-event specific (moving it to 
vm-event specific files).

>
>> +void vmx_vm_event_update_cr3_traps(struct domain *d)
> Is there anything in this function preventing the parameter to be
> const?

Nope, will do.

>> +{
>> +    struct vcpu *v;
>> +    struct arch_vmx_struct *avmx;
>> +    unsigned int cr3_bitmask;
>> +    bool_t cr3_vmevent, cr3_ldexit;
>> +
>> +    /* domain must be paused */
>> +    ASSERT(atomic_read(&d->pause_count));
> Comment style.

As in change to "/* Domain must be paused. */"?

>
>> +    /* non-hap domains trap CR3 writes unconditionally */
>> +    if ( !paging_mode_hap(d) )
>> +    {
>> +#ifndef NDEBUG
>> +        for_each_vcpu ( d, v )
>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>> +#endif
>> +        return;
>> +    }
>> +
>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>> +
>> +    for_each_vcpu ( d, v )
>> +    {
>> +        avmx = &v->arch.hvm_vmx;
>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>> +
>> +        if ( cr3_vmevent == cr3_ldexit )
>> +            continue;
>> +
>> +        /*
>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>> +         * See vmx_update_guest_cr code motion for cr = 0.
>> +         */
> Same as for the title - what code motion is this referring to? In a
> code comment you clearly shouldn't be referring to anything the
> patch effects, only to its result.

The "vmx_update_guest_cr code motion for cr = 0", that's what's 
referring to.
'vmx_update_guest_cr()' is a function, 'cr' is one of its parameters.
In other words, see what's happening in the function 
'vmx_update_guest_cr() when you pass it cr = 0' and you'll understand 
why CR3 load-exiting must remain enabled when CR0.PE=0.

>
>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
>> +            continue;
> The first sentence of the comment should be brought in line with
> this condition.

Would this do (aligned with the above observation):

"

         /*
          * If CR3 load-exiting was enabled and CR0.PE=0, then it must remain
          * enabled (see vmx_update_guest_cr(v, cr) function when cr = 0).
          */

"
?

>> +static inline void write_ctrlreg_adjust_traps(struct domain *d, uint8_t index)
> Unless there is a particular reason for this uint8_t, please convert to
> unsigned int.

The particular reason is cr-indexes being uint8_t typed (see 
typeof(xen_domctl_monitor_op.mov_to_cr.index)).
But I will change it to unsigned int if you prefer (maybe you could 
explain the preference though).

>> +{
>> +    /* vmx only */
>> +    ASSERT(cpu_has_vmx);
> Comment style (more below). Should perhaps also get "for now" or
> some such added.

As in "/* For now, VMX only. */"?

>
>> +static inline void write_ctrlreg_disable_traps(struct domain *d)
>> +{
>> +    unsigned int old = d->arch.monitor.write_ctrlreg_enabled;
>> +    d->arch.monitor.write_ctrlreg_enabled = 0;
>> +
>> +    if ( old )
>> +    {
>> +        /* vmx only */
>> +        ASSERT(cpu_has_vmx);
> Wouldn't this better move ahead of the if()?
>
>> +        /* was CR3 load-exiting enabled due to monitoring? */
>> +        if ( old & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
> And then this if() alone would suffice.

No, it would be wrong because that ASSERT may not hold if "old == 0", 
i.e. we only ASSERT the implication "CR-write vm-events can be enabled 
-> vmx domain", but since the function is called by 
arch_monitor_cleanup_domain, putting the ASSERT before the if() would 
change that implication to "(any) monitor vm-events available -> vmx 
domain", assertion which wouldn't be proper TBD here.

>
>> @@ -36,11 +63,40 @@ int arch_monitor_init_domain(struct domain *d)
>>   void arch_monitor_cleanup_domain(struct domain *d)
>>   {
>>       xfree(d->arch.monitor.msr_bitmap);
>> -
>> +    write_ctrlreg_disable_traps(d);
>>       memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
>>       memset(&d->monitor, 0, sizeof(d->monitor));
>>   }
> Rather than deleting the blank line, perhaps better to insert another
> one after your addition?
>
> Jan

Ack.

Thanks,
Corneliu.
Jan Beulich July 8, 2016, 10:37 a.m. UTC | #3
>>> On 08.07.16 at 12:22, <czuzu@bitdefender.com> wrote:
> On 7/8/2016 10:21 AM, Jan Beulich wrote:
>>>>> On 06.07.16 at 17:50, <czuzu@bitdefender.com> wrote:
>> The title of this patch keeps confusing me - which code motion is
>> being relocated here?
> 
> As the commit message clearly states, the code motions that are being 
> relocated are:

Again this sentence makes no sense to me: I can't see how
"code motions" can be "relocated", just like I don't see how you
could move a move. But maybe it's just me...

> 1) handling of monitor_write_data @ hvm_do_resume
> 2) the code in vmx_update_guest_cr (when cr = 0) that deals with setting 
> CR3 load-exiting for cr-write monitor vm-events, i.e. the comment:
>      /* Trap CR3 updates if CR3 memory events are enabled. */
> and what's removed from under it.
> 
> By 'relocation' I meant making that code vm-event specific (moving it to 
> vm-event specific files).

Yes, that what I've guessed.

>>> +{
>>> +    struct vcpu *v;
>>> +    struct arch_vmx_struct *avmx;
>>> +    unsigned int cr3_bitmask;
>>> +    bool_t cr3_vmevent, cr3_ldexit;
>>> +
>>> +    /* domain must be paused */
>>> +    ASSERT(atomic_read(&d->pause_count));
>> Comment style.
> 
> As in change to "/* Domain must be paused. */"?

Yes, as mandated by ./CODING_STYLE.

>>> +    /* non-hap domains trap CR3 writes unconditionally */
>>> +    if ( !paging_mode_hap(d) )
>>> +    {
>>> +#ifndef NDEBUG
>>> +        for_each_vcpu ( d, v )
>>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>> +#endif
>>> +        return;
>>> +    }
>>> +
>>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>>> +
>>> +    for_each_vcpu ( d, v )
>>> +    {
>>> +        avmx = &v->arch.hvm_vmx;
>>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>> +
>>> +        if ( cr3_vmevent == cr3_ldexit )
>>> +            continue;
>>> +
>>> +        /*
>>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>>> +         * See vmx_update_guest_cr code motion for cr = 0.
>>> +         */
>> Same as for the title - what code motion is this referring to? In a
>> code comment you clearly shouldn't be referring to anything the
>> patch effects, only to its result.
> 
> The "vmx_update_guest_cr code motion for cr = 0", that's what's 
> referring to.

So I guess my problem really is that I don't understand what a
"code motion" is (other than the act of moving code from one
place to another).

> 'vmx_update_guest_cr()' is a function, 'cr' is one of its parameters.
> In other words, see what's happening in the function 
> 'vmx_update_guest_cr() when you pass it cr = 0' and you'll understand 
> why CR3 load-exiting must remain enabled when CR0.PE=0.
> 
>>
>>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
>>> +            continue;
>> The first sentence of the comment should be brought in line with
>> this condition.
> 
> Would this do (aligned with the above observation):
> 
> "
> 
>          /*
>           * If CR3 load-exiting was enabled and CR0.PE=0, then it must remain
>           * enabled (see vmx_update_guest_cr(v, cr) function when cr = 0).
>           */
> 
> "
> ?

Not really: The condition checks whether paging is enabled and
whether it's an unrestricted guest. The comment talks about
protected mode being enabled.

>>> +static inline void write_ctrlreg_adjust_traps(struct domain *d, uint8_t 
> index)
>> Unless there is a particular reason for this uint8_t, please convert to
>> unsigned int.
> 
> The particular reason is cr-indexes being uint8_t typed (see 
> typeof(xen_domctl_monitor_op.mov_to_cr.index)).
> But I will change it to unsigned int if you prefer (maybe you could 
> explain the preference though).

No use of fixed width types when fixed width types aren't really
required. Generally generated code is less efficient when having
to deal with fixed width types.

>>> +{
>>> +    /* vmx only */
>>> +    ASSERT(cpu_has_vmx);
>> Comment style (more below). Should perhaps also get "for now" or
>> some such added.
> 
> As in "/* For now, VMX only. */"?

For example, yes.

>>> +static inline void write_ctrlreg_disable_traps(struct domain *d)
>>> +{
>>> +    unsigned int old = d->arch.monitor.write_ctrlreg_enabled;
>>> +    d->arch.monitor.write_ctrlreg_enabled = 0;
>>> +
>>> +    if ( old )
>>> +    {
>>> +        /* vmx only */
>>> +        ASSERT(cpu_has_vmx);
>> Wouldn't this better move ahead of the if()?
>>
>>> +        /* was CR3 load-exiting enabled due to monitoring? */
>>> +        if ( old & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
>> And then this if() alone would suffice.
> 
> No, it would be wrong because that ASSERT may not hold if "old == 0", 
> i.e. we only ASSERT the implication "CR-write vm-events can be enabled 
> -> vmx domain", but since the function is called by 
> arch_monitor_cleanup_domain, putting the ASSERT before the if() would 
> change that implication to "(any) monitor vm-events available -> vmx 
> domain", assertion which wouldn't be proper TBD here.

Ah, okay - I was under the impression that no VM events were
allowed under SVM.

Jan
Corneliu ZUZU July 8, 2016, 11:33 a.m. UTC | #4
On 7/8/2016 1:37 PM, Jan Beulich wrote:
>>>> On 08.07.16 at 12:22, <czuzu@bitdefender.com> wrote:
>> On 7/8/2016 10:21 AM, Jan Beulich wrote:
>>>>>> On 06.07.16 at 17:50, <czuzu@bitdefender.com> wrote:
>>> The title of this patch keeps confusing me - which code motion is
>>> being relocated here?
>> As the commit message clearly states, the code motions that are being
>> relocated are:
> Again this sentence makes no sense to me: I can't see how
> "code motions" can be "relocated", just like I don't see how you
> could move a move. But maybe it's just me...

Hah, sorry, I'm not very good expressivity-wise, a weakness I'm aware of 
and which makes me pick up expressions I notice other people use (in 
this case those of maintainers).
I think you were the one I noticed using the expression back in an older 
patch-series I've sent and I thought by "code-motion" you meant simply 
"that which some code does, tries to accomplish and the code itself".

>> 1) handling of monitor_write_data @ hvm_do_resume
>> 2) the code in vmx_update_guest_cr (when cr = 0) that deals with setting
>> CR3 load-exiting for cr-write monitor vm-events, i.e. the comment:
>>       /* Trap CR3 updates if CR3 memory events are enabled. */
>> and what's removed from under it.
>>
>> By 'relocation' I meant making that code vm-event specific (moving it to
>> vm-event specific files).
> Yes, that what I've guessed.
>
>>>> +{
>>>> +    struct vcpu *v;
>>>> +    struct arch_vmx_struct *avmx;
>>>> +    unsigned int cr3_bitmask;
>>>> +    bool_t cr3_vmevent, cr3_ldexit;
>>>> +
>>>> +    /* domain must be paused */
>>>> +    ASSERT(atomic_read(&d->pause_count));
>>> Comment style.
>> As in change to "/* Domain must be paused. */"?
> Yes, as mandated by ./CODING_STYLE.
>
>>>> +    /* non-hap domains trap CR3 writes unconditionally */
>>>> +    if ( !paging_mode_hap(d) )
>>>> +    {
>>>> +#ifndef NDEBUG
>>>> +        for_each_vcpu ( d, v )
>>>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>>> +#endif
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>>>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>>>> +
>>>> +    for_each_vcpu ( d, v )
>>>> +    {
>>>> +        avmx = &v->arch.hvm_vmx;
>>>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>>> +
>>>> +        if ( cr3_vmevent == cr3_ldexit )
>>>> +            continue;
>>>> +
>>>> +        /*
>>>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>>>> +         * See vmx_update_guest_cr code motion for cr = 0.
>>>> +         */
>>> Same as for the title - what code motion is this referring to? In a
>>> code comment you clearly shouldn't be referring to anything the
>>> patch effects, only to its result.
>> The "vmx_update_guest_cr code motion for cr = 0", that's what's
>> referring to.
> So I guess my problem really is that I don't understand what a
> "code motion" is (other than the act of moving code from one
> place to another).

Again, sorry, will try to rephrase all of this properly :-).

>
>> 'vmx_update_guest_cr()' is a function, 'cr' is one of its parameters.
>> In other words, see what's happening in the function
>> 'vmx_update_guest_cr() when you pass it cr = 0' and you'll understand
>> why CR3 load-exiting must remain enabled when CR0.PE=0.
>>
>>>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
>>>> +            continue;
>>> The first sentence of the comment should be brought in line with
>>> this condition.
>> Would this do (aligned with the above observation):
>>
>> "
>>
>>           /*
>>            * If CR3 load-exiting was enabled and CR0.PE=0, then it must remain
>>            * enabled (see vmx_update_guest_cr(v, cr) function when cr = 0).
>>            */
>>
>> "
>> ?
> Not really: The condition checks whether paging is enabled and
> whether it's an unrestricted guest. The comment talks about
> protected mode being enabled.

Hah you're right, I only now notice, that comment has actually been 
adopted (although I don't remember from where, I wonder if it was 
meantime removed and I only now see), I always thought it said "CR0.PG = 
0"...
So...
"

         /*
          * If domain paging is disabled (CR0.PG=0) and
          * the domain is not in real-mode, then CR3 load-exiting
          * must remain enabled (see vmx_update_guest_cr(v, cr) when cr = 0).
          */
"
?

>>>> +static inline void write_ctrlreg_adjust_traps(struct domain *d, uint8_t
>> index)
>>> Unless there is a particular reason for this uint8_t, please convert to
>>> unsigned int.
>> The particular reason is cr-indexes being uint8_t typed (see
>> typeof(xen_domctl_monitor_op.mov_to_cr.index)).
>> But I will change it to unsigned int if you prefer (maybe you could
>> explain the preference though).
> No use of fixed width types when fixed width types aren't really
> required. Generally generated code is less efficient when having
> to deal with fixed width types.

Strange, I would have thought the compiler would properly (and easily) 
deal with such efficiency issues.

>>>> +{
>>>> +    /* vmx only */
>>>> +    ASSERT(cpu_has_vmx);
>>> Comment style (more below). Should perhaps also get "for now" or
>>> some such added.
>> As in "/* For now, VMX only. */"?
> For example, yes.
>
>>>> +static inline void write_ctrlreg_disable_traps(struct domain *d)
>>>> +{
>>>> +    unsigned int old = d->arch.monitor.write_ctrlreg_enabled;
>>>> +    d->arch.monitor.write_ctrlreg_enabled = 0;
>>>> +
>>>> +    if ( old )
>>>> +    {
>>>> +        /* vmx only */
>>>> +        ASSERT(cpu_has_vmx);
>>> Wouldn't this better move ahead of the if()?
>>>
>>>> +        /* was CR3 load-exiting enabled due to monitoring? */
>>>> +        if ( old & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
>>> And then this if() alone would suffice.
>> No, it would be wrong because that ASSERT may not hold if "old == 0",
>> i.e. we only ASSERT the implication "CR-write vm-events can be enabled
>> -> vmx domain", but since the function is called by
>> arch_monitor_cleanup_domain, putting the ASSERT before the if() would
>> change that implication to "(any) monitor vm-events available -> vmx
>> domain", assertion which wouldn't be proper TBD here.
> Ah, okay - I was under the impression that no VM events were
> allowed under SVM.
>
> Jan

Thanks,
Corneliu.
Jan Beulich July 8, 2016, 11:53 a.m. UTC | #5
>>> On 08.07.16 at 13:33, <czuzu@bitdefender.com> wrote:
> On 7/8/2016 1:37 PM, Jan Beulich wrote:
>>>>> On 08.07.16 at 12:22, <czuzu@bitdefender.com> wrote:
>>> On 7/8/2016 10:21 AM, Jan Beulich wrote:
>>>>>>> On 06.07.16 at 17:50, <czuzu@bitdefender.com> wrote:
>>>>> +        /*
>>>>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>>>>> +         * See vmx_update_guest_cr code motion for cr = 0.
>>>>> +         */
>>>>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
>>>>> +            continue;
>>>> The first sentence of the comment should be brought in line with
>>>> this condition.
>>> Would this do (aligned with the above observation):
>>>
>>> "
>>>
>>>           /*
>>>            * If CR3 load-exiting was enabled and CR0.PE=0, then it must remain
>>>            * enabled (see vmx_update_guest_cr(v, cr) function when cr = 0).
>>>            */
>>>
>>> "
>>> ?
>> Not really: The condition checks whether paging is enabled and
>> whether it's an unrestricted guest. The comment talks about
>> protected mode being enabled.
> 
> Hah you're right, I only now notice, that comment has actually been 
> adopted (although I don't remember from where, I wonder if it was 
> meantime removed and I only now see), I always thought it said "CR0.PG = 
> 0"...
> So...
> "
> 
>          /*
>           * If domain paging is disabled (CR0.PG=0) and
>           * the domain is not in real-mode, then CR3 load-exiting
>           * must remain enabled (see vmx_update_guest_cr(v, cr) when cr = 0).
>           */
> "
> ?

Looks reasonable.

>>>>> +static inline void write_ctrlreg_adjust_traps(struct domain *d, uint8_t
>>> index)
>>>> Unless there is a particular reason for this uint8_t, please convert to
>>>> unsigned int.
>>> The particular reason is cr-indexes being uint8_t typed (see
>>> typeof(xen_domctl_monitor_op.mov_to_cr.index)).
>>> But I will change it to unsigned int if you prefer (maybe you could
>>> explain the preference though).
>> No use of fixed width types when fixed width types aren't really
>> required. Generally generated code is less efficient when having
>> to deal with fixed width types.
> 
> Strange, I would have thought the compiler would properly (and easily) 
> deal with such efficiency issues.

In this case the compiler may well do (as the function is static
inline), but in other cases it's simply not allowed to. In order to
not misguide people cloning existing code we should thus try to
limit the number of bad examples (which in particular mean not
introducing any new ones).

Jan
Corneliu ZUZU July 8, 2016, 11:57 a.m. UTC | #6
On 7/8/2016 2:53 PM, Jan Beulich wrote:
>>>> On 08.07.16 at 13:33, <czuzu@bitdefender.com> wrote:
>> On 7/8/2016 1:37 PM, Jan Beulich wrote:
>>>>>> On 08.07.16 at 12:22, <czuzu@bitdefender.com> wrote:
>>>> On 7/8/2016 10:21 AM, Jan Beulich wrote:
>>>>>>>> On 06.07.16 at 17:50, <czuzu@bitdefender.com> wrote:
>>>>>> +        /*
>>>>>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>>>>>> +         * See vmx_update_guest_cr code motion for cr = 0.
>>>>>> +         */
>>>>>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
>>>>>> +            continue;
>>>>> The first sentence of the comment should be brought in line with
>>>>> this condition.
>>>> Would this do (aligned with the above observation):
>>>>
>>>> "
>>>>
>>>>            /*
>>>>             * If CR3 load-exiting was enabled and CR0.PE=0, then it must remain
>>>>             * enabled (see vmx_update_guest_cr(v, cr) function when cr = 0).
>>>>             */
>>>>
>>>> "
>>>> ?
>>> Not really: The condition checks whether paging is enabled and
>>> whether it's an unrestricted guest. The comment talks about
>>> protected mode being enabled.
>> Hah you're right, I only now notice, that comment has actually been
>> adopted (although I don't remember from where, I wonder if it was
>> meantime removed and I only now see), I always thought it said "CR0.PG =
>> 0"...
>> So...
>> "
>>
>>           /*
>>            * If domain paging is disabled (CR0.PG=0) and
>>            * the domain is not in real-mode, then CR3 load-exiting
>>            * must remain enabled (see vmx_update_guest_cr(v, cr) when cr = 0).
>>            */
>> "
>> ?
> Looks reasonable.
>
>>>>>> +static inline void write_ctrlreg_adjust_traps(struct domain *d, uint8_t
>>>> index)
>>>>> Unless there is a particular reason for this uint8_t, please convert to
>>>>> unsigned int.
>>>> The particular reason is cr-indexes being uint8_t typed (see
>>>> typeof(xen_domctl_monitor_op.mov_to_cr.index)).
>>>> But I will change it to unsigned int if you prefer (maybe you could
>>>> explain the preference though).
>>> No use of fixed width types when fixed width types aren't really
>>> required. Generally generated code is less efficient when having
>>> to deal with fixed width types.
>> Strange, I would have thought the compiler would properly (and easily)
>> deal with such efficiency issues.
> In this case the compiler may well do (as the function is static
> inline), but in other cases it's simply not allowed to. In order to
> not misguide people cloning existing code we should thus try to
> limit the number of bad examples (which in particular mean not
> introducing any new ones).
>
> Jan

Ack.

Corneliu.
Tamas K Lengyel July 8, 2016, 3:50 p.m. UTC | #7
> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> index 205df41..42f31bf 100644
> --- a/xen/arch/x86/monitor.c
> +++ b/xen/arch/x86/monitor.c

[snip]

So with this code-portion appropriately being relocated here I think
we should also rename..

> +void arch_monitor_write_data(struct vcpu *v)
> +{
> +    struct monitor_write_data *w = &v->arch.vm_event->write_data;

.. arch.vm_event to arch.monitor to match the subsystem it is used by.

Thanks,
Tamas
Corneliu ZUZU July 8, 2016, 5:58 p.m. UTC | #8
On 7/8/2016 6:50 PM, Tamas K Lengyel wrote:
>> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
>> index 205df41..42f31bf 100644
>> --- a/xen/arch/x86/monitor.c
>> +++ b/xen/arch/x86/monitor.c
> [snip]
>
> So with this code-portion appropriately being relocated here I think
> we should also rename..
>
>> +void arch_monitor_write_data(struct vcpu *v)
>> +{
>> +    struct monitor_write_data *w = &v->arch.vm_event->write_data;
> .. arch.vm_event to arch.monitor to match the subsystem it is used by.
>
> Thanks,
> Tamas

Which is (almost) exactly what I've done in (another..) patch-series 
I'll be sending today I hope (and a few more things). ;-)

Thanks,
Corneliu.
Tian, Kevin July 11, 2016, 2:52 a.m. UTC | #9
> From: Corneliu ZUZU [mailto:czuzu@bitdefender.com]
> Sent: Wednesday, July 06, 2016 11:51 PM
> 
> For readability:
> 
> * Add function arch_monitor_write_data (in x86/monitor.c) and separate handling
> of monitor_write_data there (previously done directly in hvm_do_resume).
> 
> * Separate enabling/disabling of CPU_BASED_CR3_LOAD_EXITING for CR3 monitor
> vm-events from CR0 node @ vmx_update_guest_cr(v, 0) into newly added function
> vmx_vm_event_update_cr3_traps(d); also, for a better interface, create generic
> functions write_ctrlreg_adjust_traps and write_ctrlreg_disable_traps (in
> x86/monitor.c) which deal with setting/unsetting any traps for cr-write monitor
> vm-events.
> 
> Signed-off-by: Corneliu ZUZU <czuzu@bitdefender.com>
> ---
> Changed since v2:
>   * add write_ctrlreg_disable_traps, call from arch_monitor_cleanup_domain
> ---
>  xen/arch/x86/hvm/hvm.c            | 46 ++++++++++-----------------
>  xen/arch/x86/hvm/vmx/vmx.c        | 58
> +++++++++++++++++++++++++++++++---
>  xen/arch/x86/monitor.c            | 66
> ++++++++++++++++++++++++++++++++++-----
>  xen/include/asm-x86/hvm/vmx/vmx.h |  1 +
>  xen/include/asm-x86/monitor.h     |  2 ++
>  5 files changed, 131 insertions(+), 42 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index c89ab6e..e3829d2 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -475,8 +475,6 @@ void hvm_do_resume(struct vcpu *v)
> 
>      if ( unlikely(v->arch.vm_event) )
>      {
> -        struct monitor_write_data *w = &v->arch.vm_event->write_data;
> -
>          if ( v->arch.vm_event->emulate_flags )
>          {
>              enum emul_kind kind = EMUL_KIND_NORMAL;
> @@ -494,29 +492,7 @@ void hvm_do_resume(struct vcpu *v)
>              v->arch.vm_event->emulate_flags = 0;
>          }
> 
> -        if ( w->do_write.msr )
> -        {
> -            hvm_msr_write_intercept(w->msr, w->value, 0);
> -            w->do_write.msr = 0;
> -        }
> -
> -        if ( w->do_write.cr0 )
> -        {
> -            hvm_set_cr0(w->cr0, 0);
> -            w->do_write.cr0 = 0;
> -        }
> -
> -        if ( w->do_write.cr4 )
> -        {
> -            hvm_set_cr4(w->cr4, 0);
> -            w->do_write.cr4 = 0;
> -        }
> -
> -        if ( w->do_write.cr3 )
> -        {
> -            hvm_set_cr3(w->cr3, 0);
> -            w->do_write.cr3 = 0;
> -        }
> +        arch_monitor_write_data(v);
>      }
> 
>      /* Inject pending hw/sw trap */
> @@ -2206,7 +2182,10 @@ int hvm_set_cr0(unsigned long value, bool_t may_defer)
> 
>          if ( hvm_monitor_crX(CR0, value, old_value) )
>          {
> -            /* The actual write will occur in hvm_do_resume(), if permitted. */
> +            /*
> +             * The actual write will occur in arch_monitor_write_data(), if
> +             * permitted.
> +             */
>              v->arch.vm_event->write_data.do_write.cr0 = 1;
>              v->arch.vm_event->write_data.cr0 = value;
> 
> @@ -2308,7 +2287,10 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
> 
>          if ( hvm_monitor_crX(CR3, value, old) )
>          {
> -            /* The actual write will occur in hvm_do_resume(), if permitted. */
> +            /*
> +             * The actual write will occur in arch_monitor_write_data(), if
> +             * permitted.
> +             */
>              v->arch.vm_event->write_data.do_write.cr3 = 1;
>              v->arch.vm_event->write_data.cr3 = value;
> 
> @@ -2388,7 +2370,10 @@ int hvm_set_cr4(unsigned long value, bool_t may_defer)
> 
>          if ( hvm_monitor_crX(CR4, value, old_cr) )
>          {
> -            /* The actual write will occur in hvm_do_resume(), if permitted. */
> +            /*
> +             * The actual write will occur in arch_monitor_write_data(), if
> +             * permitted.
> +             */
>              v->arch.vm_event->write_data.do_write.cr4 = 1;
>              v->arch.vm_event->write_data.cr4 = value;
> 
> @@ -3767,7 +3752,10 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t
> msr_content,
>      {
>          ASSERT(v->arch.vm_event);
> 
> -        /* The actual write will occur in hvm_do_resume() (if permitted). */
> +        /*
> +         * The actual write will occur in arch_monitor_write_data(), if
> +         * permitted.
> +         */
>          v->arch.vm_event->write_data.do_write.msr = 1;
>          v->arch.vm_event->write_data.msr = msr;
>          v->arch.vm_event->write_data.value = msr_content;
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 8ab074f..0fa3fea 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1442,11 +1442,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int
> cr)
>              if ( !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
>                  v->arch.hvm_vmx.exec_control |= cr3_ctls;
> 
> -            /* Trap CR3 updates if CR3 memory events are enabled. */
> -            if ( v->domain->arch.monitor.write_ctrlreg_enabled &
> -                 monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
> -                v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING;
> -
>              if ( old_ctls != v->arch.hvm_vmx.exec_control )
>                  vmx_update_cpu_exec_control(v);
>          }
> @@ -3899,6 +3894,59 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>  }
> 
>  /*
> + * Adjusts domain CR3 load-exiting execution control for CR3-write monitor
> + * vm-event.
> + */
> +void vmx_vm_event_update_cr3_traps(struct domain *d)
> +{
> +    struct vcpu *v;
> +    struct arch_vmx_struct *avmx;

better move to inner later loop.

> +    unsigned int cr3_bitmask;
> +    bool_t cr3_vmevent, cr3_ldexit;
> +
> +    /* domain must be paused */
> +    ASSERT(atomic_read(&d->pause_count));

The comment doesn't add more info than the code itself, unless you want to
explain the reason why domain must be paused here.

> +
> +    /* non-hap domains trap CR3 writes unconditionally */
> +    if ( !paging_mode_hap(d) )
> +    {
> +#ifndef NDEBUG
> +        for_each_vcpu ( d, v )
> +            ASSERT(v->arch.hvm_vmx.exec_control &
> CPU_BASED_CR3_LOAD_EXITING);
> +#endif
> +        return;
> +    }
> +
> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
> +
> +    for_each_vcpu ( d, v )
> +    {
> +        avmx = &v->arch.hvm_vmx;
> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
> +
> +        if ( cr3_vmevent == cr3_ldexit )
> +            continue;
> +
> +        /*
> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
> +         * See vmx_update_guest_cr code motion for cr = 0.
> +         */

Please give the real informative message here instead of asking people to 
look at other place

> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
> +            continue;

if !cr3_ldexit is not expected to occur in such scenario, is it more strict as below?

if ( !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
{
	ASSERT(cr3_ldexit);
	continue;
}

> +
> +        if ( cr3_vmevent )
> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
> +        else
> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
> +
> +        vmx_vmcs_enter(v);
> +        vmx_update_cpu_exec_control(v);
> +        vmx_vmcs_exit(v);
> +    }
> +}
> +
> +/*
>   * Local variables:
>   * mode: C
>   * c-file-style: "BSD"
> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> index 205df41..42f31bf 100644
> --- a/xen/arch/x86/monitor.c
> +++ b/xen/arch/x86/monitor.c
> @@ -19,9 +19,36 @@
>   * License along with this program; If not, see <http://www.gnu.org/licenses/>.
>   */
> 
> +#include <asm/hvm/vmx/vmx.h>
>  #include <asm/monitor.h>
> +#include <asm/vm_event.h>
>  #include <public/vm_event.h>
> 
> +static inline void write_ctrlreg_adjust_traps(struct domain *d, uint8_t index)
> +{
> +    /* vmx only */
> +    ASSERT(cpu_has_vmx);
> +
> +    if ( VM_EVENT_X86_CR3 == index ) /* other CRs are always trapped */
> +        vmx_vm_event_update_cr3_traps(d);

Please add above into a hvm function instead of directly calling
vmx in common file. (other arch can have it unimplemented).
Then you don't need change this common code again later when
other archs are added

> +}
> +
> +static inline void write_ctrlreg_disable_traps(struct domain *d)
> +{
> +    unsigned int old = d->arch.monitor.write_ctrlreg_enabled;
> +    d->arch.monitor.write_ctrlreg_enabled = 0;
> +
> +    if ( old )
> +    {
> +        /* vmx only */
> +        ASSERT(cpu_has_vmx);
> +
> +        /* was CR3 load-exiting enabled due to monitoring? */
> +        if ( old & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
> +            vmx_vm_event_update_cr3_traps(d);
> +    }
> +}
> +
>  int arch_monitor_init_domain(struct domain *d)
>  {
>      if ( !d->arch.monitor.msr_bitmap )
> @@ -36,11 +63,40 @@ int arch_monitor_init_domain(struct domain *d)
>  void arch_monitor_cleanup_domain(struct domain *d)
>  {
>      xfree(d->arch.monitor.msr_bitmap);
> -
> +    write_ctrlreg_disable_traps(d);
>      memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
>      memset(&d->monitor, 0, sizeof(d->monitor));
>  }
> 
> +void arch_monitor_write_data(struct vcpu *v)
> +{
> +    struct monitor_write_data *w = &v->arch.vm_event->write_data;
> +
> +    if ( w->do_write.msr )
> +    {
> +        hvm_msr_write_intercept(w->msr, w->value, 0);
> +        w->do_write.msr = 0;
> +    }
> +
> +    if ( w->do_write.cr0 )
> +    {
> +        hvm_set_cr0(w->cr0, 0);
> +        w->do_write.cr0 = 0;
> +    }
> +
> +    if ( w->do_write.cr4 )
> +    {
> +        hvm_set_cr4(w->cr4, 0);
> +        w->do_write.cr4 = 0;
> +    }
> +
> +    if ( w->do_write.cr3 )
> +    {
> +        hvm_set_cr3(w->cr3, 0);
> +        w->do_write.cr3 = 0;
> +    }
> +}
> +
>  static unsigned long *monitor_bitmap_for_msr(const struct domain *d, u32 *msr)
>  {
>      ASSERT(d->arch.monitor.msr_bitmap && msr);
> @@ -159,13 +215,7 @@ int arch_monitor_domctl_event(struct domain *d,
>          else
>              ad->monitor.write_ctrlreg_enabled &= ~ctrlreg_bitmask;
> 
> -        if ( VM_EVENT_X86_CR3 == mop->u.mov_to_cr.index )
> -        {
> -            struct vcpu *v;
> -            /* Latches new CR3 mask through CR0 code. */
> -            for_each_vcpu ( d, v )
> -                hvm_update_guest_cr(v, 0);
> -        }
> +        write_ctrlreg_adjust_traps(d, mop->u.mov_to_cr.index);
> 
>          domain_unpause(d);
> 
> diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h
> b/xen/include/asm-x86/hvm/vmx/vmx.h
> index 359b2a9..15bb592 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -101,6 +101,7 @@ void vmx_update_debug_state(struct vcpu *v);
>  void vmx_update_exception_bitmap(struct vcpu *v);
>  void vmx_update_cpu_exec_control(struct vcpu *v);
>  void vmx_update_secondary_exec_control(struct vcpu *v);
> +void vmx_vm_event_update_cr3_traps(struct domain *d);
> 
>  #define POSTED_INTR_ON  0
>  #define POSTED_INTR_SN  1
> diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
> index a9db3c0..0611681 100644
> --- a/xen/include/asm-x86/monitor.h
> +++ b/xen/include/asm-x86/monitor.h
> @@ -94,6 +94,8 @@ int arch_monitor_init_domain(struct domain *d);
> 
>  void arch_monitor_cleanup_domain(struct domain *d);
> 
> +void arch_monitor_write_data(struct vcpu *v);
> +
>  bool_t monitored_msr(const struct domain *d, u32 msr);
> 
>  #endif /* __ASM_X86_MONITOR_H__ */
> --
> 2.5.0
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index c89ab6e..e3829d2 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -475,8 +475,6 @@  void hvm_do_resume(struct vcpu *v)
 
     if ( unlikely(v->arch.vm_event) )
     {
-        struct monitor_write_data *w = &v->arch.vm_event->write_data;
-
         if ( v->arch.vm_event->emulate_flags )
         {
             enum emul_kind kind = EMUL_KIND_NORMAL;
@@ -494,29 +492,7 @@  void hvm_do_resume(struct vcpu *v)
             v->arch.vm_event->emulate_flags = 0;
         }
 
-        if ( w->do_write.msr )
-        {
-            hvm_msr_write_intercept(w->msr, w->value, 0);
-            w->do_write.msr = 0;
-        }
-
-        if ( w->do_write.cr0 )
-        {
-            hvm_set_cr0(w->cr0, 0);
-            w->do_write.cr0 = 0;
-        }
-
-        if ( w->do_write.cr4 )
-        {
-            hvm_set_cr4(w->cr4, 0);
-            w->do_write.cr4 = 0;
-        }
-
-        if ( w->do_write.cr3 )
-        {
-            hvm_set_cr3(w->cr3, 0);
-            w->do_write.cr3 = 0;
-        }
+        arch_monitor_write_data(v);
     }
 
     /* Inject pending hw/sw trap */
@@ -2206,7 +2182,10 @@  int hvm_set_cr0(unsigned long value, bool_t may_defer)
 
         if ( hvm_monitor_crX(CR0, value, old_value) )
         {
-            /* The actual write will occur in hvm_do_resume(), if permitted. */
+            /*
+             * The actual write will occur in arch_monitor_write_data(), if
+             * permitted.
+             */
             v->arch.vm_event->write_data.do_write.cr0 = 1;
             v->arch.vm_event->write_data.cr0 = value;
 
@@ -2308,7 +2287,10 @@  int hvm_set_cr3(unsigned long value, bool_t may_defer)
 
         if ( hvm_monitor_crX(CR3, value, old) )
         {
-            /* The actual write will occur in hvm_do_resume(), if permitted. */
+            /*
+             * The actual write will occur in arch_monitor_write_data(), if
+             * permitted.
+             */
             v->arch.vm_event->write_data.do_write.cr3 = 1;
             v->arch.vm_event->write_data.cr3 = value;
 
@@ -2388,7 +2370,10 @@  int hvm_set_cr4(unsigned long value, bool_t may_defer)
 
         if ( hvm_monitor_crX(CR4, value, old_cr) )
         {
-            /* The actual write will occur in hvm_do_resume(), if permitted. */
+            /*
+             * The actual write will occur in arch_monitor_write_data(), if
+             * permitted.
+             */
             v->arch.vm_event->write_data.do_write.cr4 = 1;
             v->arch.vm_event->write_data.cr4 = value;
 
@@ -3767,7 +3752,10 @@  int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
     {
         ASSERT(v->arch.vm_event);
 
-        /* The actual write will occur in hvm_do_resume() (if permitted). */
+        /*
+         * The actual write will occur in arch_monitor_write_data(), if
+         * permitted.
+         */
         v->arch.vm_event->write_data.do_write.msr = 1;
         v->arch.vm_event->write_data.msr = msr;
         v->arch.vm_event->write_data.value = msr_content;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 8ab074f..0fa3fea 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1442,11 +1442,6 @@  static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
             if ( !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
                 v->arch.hvm_vmx.exec_control |= cr3_ctls;
 
-            /* Trap CR3 updates if CR3 memory events are enabled. */
-            if ( v->domain->arch.monitor.write_ctrlreg_enabled &
-                 monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
-                v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING;
-
             if ( old_ctls != v->arch.hvm_vmx.exec_control )
                 vmx_update_cpu_exec_control(v);
         }
@@ -3899,6 +3894,59 @@  void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 }
 
 /*
+ * Adjusts domain CR3 load-exiting execution control for CR3-write monitor
+ * vm-event.
+ */
+void vmx_vm_event_update_cr3_traps(struct domain *d)
+{
+    struct vcpu *v;
+    struct arch_vmx_struct *avmx;
+    unsigned int cr3_bitmask;
+    bool_t cr3_vmevent, cr3_ldexit;
+
+    /* domain must be paused */
+    ASSERT(atomic_read(&d->pause_count));
+
+    /* non-hap domains trap CR3 writes unconditionally */
+    if ( !paging_mode_hap(d) )
+    {
+#ifndef NDEBUG
+        for_each_vcpu ( d, v )
+            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
+#endif
+        return;
+    }
+
+    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
+    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
+
+    for_each_vcpu ( d, v )
+    {
+        avmx = &v->arch.hvm_vmx;
+        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
+
+        if ( cr3_vmevent == cr3_ldexit )
+            continue;
+
+        /*
+         * If CR0.PE=0, CR3 load exiting must remain enabled.
+         * See vmx_update_guest_cr code motion for cr = 0.
+         */
+        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
+            continue;
+
+        if ( cr3_vmevent )
+            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
+        else
+            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
+
+        vmx_vmcs_enter(v);
+        vmx_update_cpu_exec_control(v);
+        vmx_vmcs_exit(v);
+    }
+}
+
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
index 205df41..42f31bf 100644
--- a/xen/arch/x86/monitor.c
+++ b/xen/arch/x86/monitor.c
@@ -19,9 +19,36 @@ 
  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <asm/hvm/vmx/vmx.h>
 #include <asm/monitor.h>
+#include <asm/vm_event.h>
 #include <public/vm_event.h>
 
+static inline void write_ctrlreg_adjust_traps(struct domain *d, uint8_t index)
+{
+    /* vmx only */
+    ASSERT(cpu_has_vmx);
+
+    if ( VM_EVENT_X86_CR3 == index ) /* other CRs are always trapped */
+        vmx_vm_event_update_cr3_traps(d);
+}
+
+static inline void write_ctrlreg_disable_traps(struct domain *d)
+{
+    unsigned int old = d->arch.monitor.write_ctrlreg_enabled;
+    d->arch.monitor.write_ctrlreg_enabled = 0;
+
+    if ( old )
+    {
+        /* vmx only */
+        ASSERT(cpu_has_vmx);
+
+        /* was CR3 load-exiting enabled due to monitoring? */
+        if ( old & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
+            vmx_vm_event_update_cr3_traps(d);
+    }
+}
+
 int arch_monitor_init_domain(struct domain *d)
 {
     if ( !d->arch.monitor.msr_bitmap )
@@ -36,11 +63,40 @@  int arch_monitor_init_domain(struct domain *d)
 void arch_monitor_cleanup_domain(struct domain *d)
 {
     xfree(d->arch.monitor.msr_bitmap);
-
+    write_ctrlreg_disable_traps(d);
     memset(&d->arch.monitor, 0, sizeof(d->arch.monitor));
     memset(&d->monitor, 0, sizeof(d->monitor));
 }
 
+void arch_monitor_write_data(struct vcpu *v)
+{
+    struct monitor_write_data *w = &v->arch.vm_event->write_data;
+
+    if ( w->do_write.msr )
+    {
+        hvm_msr_write_intercept(w->msr, w->value, 0);
+        w->do_write.msr = 0;
+    }
+
+    if ( w->do_write.cr0 )
+    {
+        hvm_set_cr0(w->cr0, 0);
+        w->do_write.cr0 = 0;
+    }
+
+    if ( w->do_write.cr4 )
+    {
+        hvm_set_cr4(w->cr4, 0);
+        w->do_write.cr4 = 0;
+    }
+
+    if ( w->do_write.cr3 )
+    {
+        hvm_set_cr3(w->cr3, 0);
+        w->do_write.cr3 = 0;
+    }
+}
+
 static unsigned long *monitor_bitmap_for_msr(const struct domain *d, u32 *msr)
 {
     ASSERT(d->arch.monitor.msr_bitmap && msr);
@@ -159,13 +215,7 @@  int arch_monitor_domctl_event(struct domain *d,
         else
             ad->monitor.write_ctrlreg_enabled &= ~ctrlreg_bitmask;
 
-        if ( VM_EVENT_X86_CR3 == mop->u.mov_to_cr.index )
-        {
-            struct vcpu *v;
-            /* Latches new CR3 mask through CR0 code. */
-            for_each_vcpu ( d, v )
-                hvm_update_guest_cr(v, 0);
-        }
+        write_ctrlreg_adjust_traps(d, mop->u.mov_to_cr.index);
 
         domain_unpause(d);
 
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index 359b2a9..15bb592 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -101,6 +101,7 @@  void vmx_update_debug_state(struct vcpu *v);
 void vmx_update_exception_bitmap(struct vcpu *v);
 void vmx_update_cpu_exec_control(struct vcpu *v);
 void vmx_update_secondary_exec_control(struct vcpu *v);
+void vmx_vm_event_update_cr3_traps(struct domain *d);
 
 #define POSTED_INTR_ON  0
 #define POSTED_INTR_SN  1
diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
index a9db3c0..0611681 100644
--- a/xen/include/asm-x86/monitor.h
+++ b/xen/include/asm-x86/monitor.h
@@ -94,6 +94,8 @@  int arch_monitor_init_domain(struct domain *d);
 
 void arch_monitor_cleanup_domain(struct domain *d);
 
+void arch_monitor_write_data(struct vcpu *v);
+
 bool_t monitored_msr(const struct domain *d, u32 msr);
 
 #endif /* __ASM_X86_MONITOR_H__ */