diff mbox

[3/8] x86/vm-event/monitor: relocate code-motion more appropriately

Message ID 1467312212-3010-1-git-send-email-czuzu@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Corneliu ZUZU June 30, 2016, 6:43 p.m. UTC
For readability:

* Add function arch_monitor_write_data (in x86/monitor.c) and separate handling
of monitor_write_data there (previously done directly in hvm_do_resume).

* Add function write_ctrlreg_adjust_traps (in x86/monitor.c) and relocate
enabling/disabling of CPU_BASED_CR3_LOAD_EXITING for CR3 monitor vm-events there
(previously done through CR0 node @ vmx_update_guest_cr(v, 0)).

Signed-off-by: Corneliu ZUZU <czuzu@bitdefender.com>
---
 xen/arch/x86/hvm/hvm.c        | 48 +++++++++-------------
 xen/arch/x86/hvm/vmx/vmx.c    |  5 ---
 xen/arch/x86/monitor.c        | 94 +++++++++++++++++++++++++++++++++++++++----
 xen/include/asm-x86/monitor.h |  2 +
 4 files changed, 107 insertions(+), 42 deletions(-)

Comments

Razvan Cojocaru July 1, 2016, 7:54 a.m. UTC | #1
On 06/30/16 21:43, Corneliu ZUZU wrote:
> For readability:
> 
> * Add function arch_monitor_write_data (in x86/monitor.c) and separate handling
> of monitor_write_data there (previously done directly in hvm_do_resume).
> 
> * Add function write_ctrlreg_adjust_traps (in x86/monitor.c) and relocate
> enabling/disabling of CPU_BASED_CR3_LOAD_EXITING for CR3 monitor vm-events there
> (previously done through CR0 node @ vmx_update_guest_cr(v, 0)).
> 
> Signed-off-by: Corneliu ZUZU <czuzu@bitdefender.com>
> ---
>  xen/arch/x86/hvm/hvm.c        | 48 +++++++++-------------
>  xen/arch/x86/hvm/vmx/vmx.c    |  5 ---
>  xen/arch/x86/monitor.c        | 94 +++++++++++++++++++++++++++++++++++++++----
>  xen/include/asm-x86/monitor.h |  2 +
>  4 files changed, 107 insertions(+), 42 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index c89ab6e..5481a6e 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -475,8 +475,6 @@ void hvm_do_resume(struct vcpu *v)
>  
>      if ( unlikely(v->arch.vm_event) )
>      {
> -        struct monitor_write_data *w = &v->arch.vm_event->write_data;
> -
>          if ( v->arch.vm_event->emulate_flags )
>          {
>              enum emul_kind kind = EMUL_KIND_NORMAL;
> @@ -493,32 +491,10 @@ void hvm_do_resume(struct vcpu *v)
>  
>              v->arch.vm_event->emulate_flags = 0;
>          }
> -
> -        if ( w->do_write.msr )
> -        {
> -            hvm_msr_write_intercept(w->msr, w->value, 0);
> -            w->do_write.msr = 0;
> -        }
> -
> -        if ( w->do_write.cr0 )
> -        {
> -            hvm_set_cr0(w->cr0, 0);
> -            w->do_write.cr0 = 0;
> -        }
> -
> -        if ( w->do_write.cr4 )
> -        {
> -            hvm_set_cr4(w->cr4, 0);
> -            w->do_write.cr4 = 0;
> -        }
> -
> -        if ( w->do_write.cr3 )
> -        {
> -            hvm_set_cr3(w->cr3, 0);
> -            w->do_write.cr3 = 0;
> -        }
>      }
>  
> +    arch_monitor_write_data(v);
> +
>      /* Inject pending hw/sw trap */
>      if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
>      {
> @@ -2206,7 +2182,10 @@ int hvm_set_cr0(unsigned long value, bool_t may_defer)
>  
>          if ( hvm_monitor_crX(CR0, value, old_value) )
>          {
> -            /* The actual write will occur in hvm_do_resume(), if permitted. */
> +            /*
> +             * The actual write will occur in arch_monitor_write_data(), if
> +             * permitted.
> +             */
>              v->arch.vm_event->write_data.do_write.cr0 = 1;
>              v->arch.vm_event->write_data.cr0 = value;
>  
> @@ -2308,7 +2287,10 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
>  
>          if ( hvm_monitor_crX(CR3, value, old) )
>          {
> -            /* The actual write will occur in hvm_do_resume(), if permitted. */
> +            /*
> +             * The actual write will occur in arch_monitor_write_data(), if
> +             * permitted.
> +             */
>              v->arch.vm_event->write_data.do_write.cr3 = 1;
>              v->arch.vm_event->write_data.cr3 = value;
>  
> @@ -2388,7 +2370,10 @@ int hvm_set_cr4(unsigned long value, bool_t may_defer)
>  
>          if ( hvm_monitor_crX(CR4, value, old_cr) )
>          {
> -            /* The actual write will occur in hvm_do_resume(), if permitted. */
> +            /*
> +             * The actual write will occur in arch_monitor_write_data(), if
> +             * permitted.
> +             */
>              v->arch.vm_event->write_data.do_write.cr4 = 1;
>              v->arch.vm_event->write_data.cr4 = value;
>  
> @@ -3767,7 +3752,10 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
>      {
>          ASSERT(v->arch.vm_event);
>  
> -        /* The actual write will occur in hvm_do_resume() (if permitted). */
> +        /*
> +         * The actual write will occur in arch_monitor_write_data(), if
> +         * permitted.
> +         */
>          v->arch.vm_event->write_data.do_write.msr = 1;
>          v->arch.vm_event->write_data.msr = msr;
>          v->arch.vm_event->write_data.value = msr_content;
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 15c84c2..de04e6c 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1442,11 +1442,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
>              if ( !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
>                  v->arch.hvm_vmx.exec_control |= cr3_ctls;
>  
> -            /* Trap CR3 updates if CR3 memory events are enabled. */
> -            if ( v->domain->arch.monitor.write_ctrlreg_enabled &
> -                 monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
> -                v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING;
> -
>              if ( old_ctls != v->arch.hvm_vmx.exec_control )
>                  vmx_update_cpu_exec_control(v);
>          }
> diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> index a271161..90e4856 100644
> --- a/xen/arch/x86/monitor.c
> +++ b/xen/arch/x86/monitor.c
> @@ -20,6 +20,9 @@
>   */
>  
>  #include <asm/monitor.h>
> +#include <asm/paging.h>
> +#include <asm/hvm/vmx/vmx.h>
> +#include <asm/vm_event.h>
>  #include <public/vm_event.h>

As previously stated, unless there's a compelling reason to do
otherwise, AFAIK asm/hvm/... goes above asm/monitor.h.

Otherwise, for the monitor parts:

Acked-by: Razvan Cojocaru <rcojocaru@bitdefender.com>


Thanks,
Razvan
Jan Beulich July 4, 2016, 10:22 a.m. UTC | #2
>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote:
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -475,8 +475,6 @@ void hvm_do_resume(struct vcpu *v)
>  
>      if ( unlikely(v->arch.vm_event) )
>      {
> -        struct monitor_write_data *w = &v->arch.vm_event->write_data;
> -
>          if ( v->arch.vm_event->emulate_flags )
>          {
>              enum emul_kind kind = EMUL_KIND_NORMAL;
> @@ -493,32 +491,10 @@ void hvm_do_resume(struct vcpu *v)
>  
>              v->arch.vm_event->emulate_flags = 0;
>          }
> -
> -        if ( w->do_write.msr )
> -        {
> -            hvm_msr_write_intercept(w->msr, w->value, 0);
> -            w->do_write.msr = 0;
> -        }
> -
> -        if ( w->do_write.cr0 )
> -        {
> -            hvm_set_cr0(w->cr0, 0);
> -            w->do_write.cr0 = 0;
> -        }
> -
> -        if ( w->do_write.cr4 )
> -        {
> -            hvm_set_cr4(w->cr4, 0);
> -            w->do_write.cr4 = 0;
> -        }
> -
> -        if ( w->do_write.cr3 )
> -        {
> -            hvm_set_cr3(w->cr3, 0);
> -            w->do_write.cr3 = 0;
> -        }
>      }
>  
> +    arch_monitor_write_data(v);

Why does this get moved outside the if(), with the same condition
getting added inside the function (inverted for bailing early)?

> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr)
>      return test_bit(msr, bitmap);
>  }
>  
> +static void write_ctrlreg_adjust_traps(struct domain *d)
> +{
> +    struct vcpu *v;
> +    struct arch_vmx_struct *avmx;
> +    unsigned int cr3_bitmask;
> +    bool_t cr3_vmevent, cr3_ldexit;
> +
> +    /* Adjust CR3 load-exiting. */
> +
> +    /* vmx only */
> +    ASSERT(cpu_has_vmx);
> +
> +    /* non-hap domains trap CR3 writes unconditionally */
> +    if ( !paging_mode_hap(d) )
> +    {
> +        for_each_vcpu ( d, v )
> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
> +        return;
> +    }
> +
> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
> +
> +    for_each_vcpu ( d, v )
> +    {
> +        avmx = &v->arch.hvm_vmx;
> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
> +
> +        if ( cr3_vmevent == cr3_ldexit )
> +            continue;
> +
> +        /*
> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
> +         * See vmx_update_guest_cr code motion for cr = 0.
> +         */
> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) 
> )
> +            continue;
> +
> +        if ( cr3_vmevent )
> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
> +        else
> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
> +
> +        vmx_vmcs_enter(v);
> +        vmx_update_cpu_exec_control(v);
> +        vmx_vmcs_exit(v);
> +    }
> +}

While Razvan gave his ack already, I wonder whether it's really a
good idea to put deeply VMX-specific code outside of a VMX-specific
file.

Jan
Corneliu ZUZU July 4, 2016, 11:02 a.m. UTC | #3
Hi Jan,

On 7/4/2016 1:22 PM, Jan Beulich wrote:
>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote:
>> --- a/xen/arch/x86/hvm/hvm.c
>> +++ b/xen/arch/x86/hvm/hvm.c
>> @@ -475,8 +475,6 @@ void hvm_do_resume(struct vcpu *v)
>>   
>>       if ( unlikely(v->arch.vm_event) )
>>       {
>> -        struct monitor_write_data *w = &v->arch.vm_event->write_data;
>> -
>>           if ( v->arch.vm_event->emulate_flags )
>>           {
>>               enum emul_kind kind = EMUL_KIND_NORMAL;
>> @@ -493,32 +491,10 @@ void hvm_do_resume(struct vcpu *v)
>>   
>>               v->arch.vm_event->emulate_flags = 0;
>>           }
>> -
>> -        if ( w->do_write.msr )
>> -        {
>> -            hvm_msr_write_intercept(w->msr, w->value, 0);
>> -            w->do_write.msr = 0;
>> -        }
>> -
>> -        if ( w->do_write.cr0 )
>> -        {
>> -            hvm_set_cr0(w->cr0, 0);
>> -            w->do_write.cr0 = 0;
>> -        }
>> -
>> -        if ( w->do_write.cr4 )
>> -        {
>> -            hvm_set_cr4(w->cr4, 0);
>> -            w->do_write.cr4 = 0;
>> -        }
>> -
>> -        if ( w->do_write.cr3 )
>> -        {
>> -            hvm_set_cr3(w->cr3, 0);
>> -            w->do_write.cr3 = 0;
>> -        }
>>       }
>>   
>> +    arch_monitor_write_data(v);
> Why does this get moved outside the if(), with the same condition
> getting added inside the function (inverted for bailing early)?

I left that so because of patch 5/8 - specifically, monitor_write_data 
handling shouldn't depend on the vm_event subsystem being initialized.
But you're right, it still does depend on that initialization in this 
patch, so I should leave the call inside the if (and remove the check 
inside the function) as you suggest and only get it out in 5/8.
Will do that in v2.

>
>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr)
>>       return test_bit(msr, bitmap);
>>   }
>>   
>> +static void write_ctrlreg_adjust_traps(struct domain *d)
>> +{
>> +    struct vcpu *v;
>> +    struct arch_vmx_struct *avmx;
>> +    unsigned int cr3_bitmask;
>> +    bool_t cr3_vmevent, cr3_ldexit;
>> +
>> +    /* Adjust CR3 load-exiting. */
>> +
>> +    /* vmx only */
>> +    ASSERT(cpu_has_vmx);
>> +
>> +    /* non-hap domains trap CR3 writes unconditionally */
>> +    if ( !paging_mode_hap(d) )
>> +    {
>> +        for_each_vcpu ( d, v )
>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>> +        return;
>> +    }
>> +
>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>> +
>> +    for_each_vcpu ( d, v )
>> +    {
>> +        avmx = &v->arch.hvm_vmx;
>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>> +
>> +        if ( cr3_vmevent == cr3_ldexit )
>> +            continue;
>> +
>> +        /*
>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>> +         * See vmx_update_guest_cr code motion for cr = 0.
>> +         */
>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v)
>> )
>> +            continue;
>> +
>> +        if ( cr3_vmevent )
>> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
>> +        else
>> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
>> +
>> +        vmx_vmcs_enter(v);
>> +        vmx_update_cpu_exec_control(v);
>> +        vmx_vmcs_exit(v);
>> +    }
>> +}
> While Razvan gave his ack already, I wonder whether it's really a
> good idea to put deeply VMX-specific code outside of a VMX-specific
> file.
>
> Jan

Well, a summary of what this function does would sound like: "adjusts 
CR3 load-exiting for cr-write monitor vm-events". IMHO that's (monitor) 
vm-event specific enough to be placed within the vm-event subsystem.
Could you suggest concretely how this separation would look like? (where 
to put this function/parts of it (and what parts), what name should it 
have once moved). Another reason this was done (besides avoiding 
hackishly doing a CR0 update when we actually need a CR3 update 
specifically for a vm-event to happen) is keeping symmetry between 
ARM<->X86 in a future patch that would implement monitor CR vm-events 
for ARM. In that patch write_ctrlreg_adjust_traps is renamed and 
implemented per-architecture, on ARM it would have the same job, i.e. 
updating some hypervisor traps (~ vmx execution controls) for CR 
vm-events to happen.

On a different note, one thing I forgot to do though is to also move the 
following check (instead of completely removing it from 
arch_monitor_domctl_event):

	if ( VM_EVENT_X86_CR3 == mop->u.mov_to_cr.index )

inside write_ctrlreg_adjust_traps. Will remedy that in v2.

Thanks,
Corneliu.
Razvan Cojocaru July 4, 2016, 1:22 p.m. UTC | #4
On 07/04/16 13:22, Jan Beulich wrote:
>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote:
>> --- a/xen/arch/x86/hvm/hvm.c
>> +++ b/xen/arch/x86/hvm/hvm.c
>> @@ -475,8 +475,6 @@ void hvm_do_resume(struct vcpu *v)
>>  
>>      if ( unlikely(v->arch.vm_event) )
>>      {
>> -        struct monitor_write_data *w = &v->arch.vm_event->write_data;
>> -
>>          if ( v->arch.vm_event->emulate_flags )
>>          {
>>              enum emul_kind kind = EMUL_KIND_NORMAL;
>> @@ -493,32 +491,10 @@ void hvm_do_resume(struct vcpu *v)
>>  
>>              v->arch.vm_event->emulate_flags = 0;
>>          }
>> -
>> -        if ( w->do_write.msr )
>> -        {
>> -            hvm_msr_write_intercept(w->msr, w->value, 0);
>> -            w->do_write.msr = 0;
>> -        }
>> -
>> -        if ( w->do_write.cr0 )
>> -        {
>> -            hvm_set_cr0(w->cr0, 0);
>> -            w->do_write.cr0 = 0;
>> -        }
>> -
>> -        if ( w->do_write.cr4 )
>> -        {
>> -            hvm_set_cr4(w->cr4, 0);
>> -            w->do_write.cr4 = 0;
>> -        }
>> -
>> -        if ( w->do_write.cr3 )
>> -        {
>> -            hvm_set_cr3(w->cr3, 0);
>> -            w->do_write.cr3 = 0;
>> -        }
>>      }
>>  
>> +    arch_monitor_write_data(v);
> 
> Why does this get moved outside the if(), with the same condition
> getting added inside the function (inverted for bailing early)?
> 
>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr)
>>      return test_bit(msr, bitmap);
>>  }
>>  
>> +static void write_ctrlreg_adjust_traps(struct domain *d)
>> +{
>> +    struct vcpu *v;
>> +    struct arch_vmx_struct *avmx;
>> +    unsigned int cr3_bitmask;
>> +    bool_t cr3_vmevent, cr3_ldexit;
>> +
>> +    /* Adjust CR3 load-exiting. */
>> +
>> +    /* vmx only */
>> +    ASSERT(cpu_has_vmx);
>> +
>> +    /* non-hap domains trap CR3 writes unconditionally */
>> +    if ( !paging_mode_hap(d) )
>> +    {
>> +        for_each_vcpu ( d, v )
>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>> +        return;
>> +    }
>> +
>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>> +
>> +    for_each_vcpu ( d, v )
>> +    {
>> +        avmx = &v->arch.hvm_vmx;
>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>> +
>> +        if ( cr3_vmevent == cr3_ldexit )
>> +            continue;
>> +
>> +        /*
>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>> +         * See vmx_update_guest_cr code motion for cr = 0.
>> +         */
>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) 
>> )
>> +            continue;
>> +
>> +        if ( cr3_vmevent )
>> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
>> +        else
>> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
>> +
>> +        vmx_vmcs_enter(v);
>> +        vmx_update_cpu_exec_control(v);
>> +        vmx_vmcs_exit(v);
>> +    }
>> +}
> 
> While Razvan gave his ack already, I wonder whether it's really a
> good idea to put deeply VMX-specific code outside of a VMX-specific
> file.

Didn't I add "for monitor / vm_event parts Acked-by: ..."? If I didn't,
I meant to. Obviously VMX code maintainers outrank me on these issues.


Thanks,
Razvan
Jan Beulich July 4, 2016, 2:05 p.m. UTC | #5
>>> On 04.07.16 at 15:22, <rcojocaru@bitdefender.com> wrote:
> On 07/04/16 13:22, Jan Beulich wrote:
>>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote:
>>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr)
>>>      return test_bit(msr, bitmap);
>>>  }
>>>  
>>> +static void write_ctrlreg_adjust_traps(struct domain *d)
>>> +{
>>> +    struct vcpu *v;
>>> +    struct arch_vmx_struct *avmx;
>>> +    unsigned int cr3_bitmask;
>>> +    bool_t cr3_vmevent, cr3_ldexit;
>>> +
>>> +    /* Adjust CR3 load-exiting. */
>>> +
>>> +    /* vmx only */
>>> +    ASSERT(cpu_has_vmx);
>>> +
>>> +    /* non-hap domains trap CR3 writes unconditionally */
>>> +    if ( !paging_mode_hap(d) )
>>> +    {
>>> +        for_each_vcpu ( d, v )
>>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>> +        return;
>>> +    }
>>> +
>>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>>> +
>>> +    for_each_vcpu ( d, v )
>>> +    {
>>> +        avmx = &v->arch.hvm_vmx;
>>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>> +
>>> +        if ( cr3_vmevent == cr3_ldexit )
>>> +            continue;
>>> +
>>> +        /*
>>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>>> +         * See vmx_update_guest_cr code motion for cr = 0.
>>> +         */
>>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) 
>>> )
>>> +            continue;
>>> +
>>> +        if ( cr3_vmevent )
>>> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
>>> +        else
>>> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
>>> +
>>> +        vmx_vmcs_enter(v);
>>> +        vmx_update_cpu_exec_control(v);
>>> +        vmx_vmcs_exit(v);
>>> +    }
>>> +}
>> 
>> While Razvan gave his ack already, I wonder whether it's really a
>> good idea to put deeply VMX-specific code outside of a VMX-specific
>> file.
> 
> Didn't I add "for monitor / vm_event parts Acked-by: ..."? If I didn't,
> I meant to.

Well - this is a monitor file (monitor.c).

Jan
Razvan Cojocaru July 4, 2016, 2:16 p.m. UTC | #6
On 07/04/16 17:05, Jan Beulich wrote:
>>>> On 04.07.16 at 15:22, <rcojocaru@bitdefender.com> wrote:
>> On 07/04/16 13:22, Jan Beulich wrote:
>>>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote:
>>>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr)
>>>>      return test_bit(msr, bitmap);
>>>>  }
>>>>  
>>>> +static void write_ctrlreg_adjust_traps(struct domain *d)
>>>> +{
>>>> +    struct vcpu *v;
>>>> +    struct arch_vmx_struct *avmx;
>>>> +    unsigned int cr3_bitmask;
>>>> +    bool_t cr3_vmevent, cr3_ldexit;
>>>> +
>>>> +    /* Adjust CR3 load-exiting. */
>>>> +
>>>> +    /* vmx only */
>>>> +    ASSERT(cpu_has_vmx);
>>>> +
>>>> +    /* non-hap domains trap CR3 writes unconditionally */
>>>> +    if ( !paging_mode_hap(d) )
>>>> +    {
>>>> +        for_each_vcpu ( d, v )
>>>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>>>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>>>> +
>>>> +    for_each_vcpu ( d, v )
>>>> +    {
>>>> +        avmx = &v->arch.hvm_vmx;
>>>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>>> +
>>>> +        if ( cr3_vmevent == cr3_ldexit )
>>>> +            continue;
>>>> +
>>>> +        /*
>>>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>>>> +         * See vmx_update_guest_cr code motion for cr = 0.
>>>> +         */
>>>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) 
>>>> )
>>>> +            continue;
>>>> +
>>>> +        if ( cr3_vmevent )
>>>> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
>>>> +        else
>>>> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
>>>> +
>>>> +        vmx_vmcs_enter(v);
>>>> +        vmx_update_cpu_exec_control(v);
>>>> +        vmx_vmcs_exit(v);
>>>> +    }
>>>> +}
>>>
>>> While Razvan gave his ack already, I wonder whether it's really a
>>> good idea to put deeply VMX-specific code outside of a VMX-specific
>>> file.
>>
>> Didn't I add "for monitor / vm_event parts Acked-by: ..."? If I didn't,
>> I meant to.
> 
> Well - this is a monitor file (monitor.c).

Fair enough, I should have been more detailed here. I do see the merit
of your suggestion, and so FWIW I second your suggestion to move the
code to some VMX-specific part of the tree if possible.


Thanks,
Razvan
Corneliu ZUZU July 4, 2016, 2:35 p.m. UTC | #7
On 7/4/2016 5:16 PM, Razvan Cojocaru wrote:
> On 07/04/16 17:05, Jan Beulich wrote:
>>>>> On 04.07.16 at 15:22, <rcojocaru@bitdefender.com> wrote:
>>> On 07/04/16 13:22, Jan Beulich wrote:
>>>>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote:
>>>>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr)
>>>>>       return test_bit(msr, bitmap);
>>>>>   }
>>>>>   
>>>>> +static void write_ctrlreg_adjust_traps(struct domain *d)
>>>>> +{
>>>>> +    struct vcpu *v;
>>>>> +    struct arch_vmx_struct *avmx;
>>>>> +    unsigned int cr3_bitmask;
>>>>> +    bool_t cr3_vmevent, cr3_ldexit;
>>>>> +
>>>>> +    /* Adjust CR3 load-exiting. */
>>>>> +
>>>>> +    /* vmx only */
>>>>> +    ASSERT(cpu_has_vmx);
>>>>> +
>>>>> +    /* non-hap domains trap CR3 writes unconditionally */
>>>>> +    if ( !paging_mode_hap(d) )
>>>>> +    {
>>>>> +        for_each_vcpu ( d, v )
>>>>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>>>>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>>>>> +
>>>>> +    for_each_vcpu ( d, v )
>>>>> +    {
>>>>> +        avmx = &v->arch.hvm_vmx;
>>>>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>>>> +
>>>>> +        if ( cr3_vmevent == cr3_ldexit )
>>>>> +            continue;
>>>>> +
>>>>> +        /*
>>>>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>>>>> +         * See vmx_update_guest_cr code motion for cr = 0.
>>>>> +         */
>>>>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v)
>>>>> )
>>>>> +            continue;
>>>>> +
>>>>> +        if ( cr3_vmevent )
>>>>> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
>>>>> +        else
>>>>> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
>>>>> +
>>>>> +        vmx_vmcs_enter(v);
>>>>> +        vmx_update_cpu_exec_control(v);
>>>>> +        vmx_vmcs_exit(v);
>>>>> +    }
>>>>> +}
>>>> While Razvan gave his ack already, I wonder whether it's really a
>>>> good idea to put deeply VMX-specific code outside of a VMX-specific
>>>> file.
>>> Didn't I add "for monitor / vm_event parts Acked-by: ..."? If I didn't,
>>> I meant to.
>> Well - this is a monitor file (monitor.c).
> Fair enough, I should have been more detailed here. I do see the merit
> of your suggestion, and so FWIW I second your suggestion to move the
> code to some VMX-specific part of the tree if possible.

But did you also see my reply concerning this suggestion? :-)

>
> Thanks,
> Razvan
>

Thanks,
Corneliu.
Jan Beulich July 4, 2016, 2:58 p.m. UTC | #8
>>> On 04.07.16 at 13:02, <czuzu@bitdefender.com> wrote:
> On 7/4/2016 1:22 PM, Jan Beulich wrote:
>>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote:
>>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr)
>>>       return test_bit(msr, bitmap);
>>>   }
>>>   
>>> +static void write_ctrlreg_adjust_traps(struct domain *d)
>>> +{
>>> +    struct vcpu *v;
>>> +    struct arch_vmx_struct *avmx;
>>> +    unsigned int cr3_bitmask;
>>> +    bool_t cr3_vmevent, cr3_ldexit;
>>> +
>>> +    /* Adjust CR3 load-exiting. */
>>> +
>>> +    /* vmx only */
>>> +    ASSERT(cpu_has_vmx);
>>> +
>>> +    /* non-hap domains trap CR3 writes unconditionally */
>>> +    if ( !paging_mode_hap(d) )
>>> +    {
>>> +        for_each_vcpu ( d, v )
>>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>> +        return;
>>> +    }
>>> +
>>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>>> +
>>> +    for_each_vcpu ( d, v )
>>> +    {
>>> +        avmx = &v->arch.hvm_vmx;
>>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>> +
>>> +        if ( cr3_vmevent == cr3_ldexit )
>>> +            continue;
>>> +
>>> +        /*
>>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>>> +         * See vmx_update_guest_cr code motion for cr = 0.
>>> +         */
>>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v)
>>> )
>>> +            continue;
>>> +
>>> +        if ( cr3_vmevent )
>>> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
>>> +        else
>>> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
>>> +
>>> +        vmx_vmcs_enter(v);
>>> +        vmx_update_cpu_exec_control(v);
>>> +        vmx_vmcs_exit(v);
>>> +    }
>>> +}
>> While Razvan gave his ack already, I wonder whether it's really a
>> good idea to put deeply VMX-specific code outside of a VMX-specific
>> file.
> 
> Well, a summary of what this function does would sound like: "adjusts 
> CR3 load-exiting for cr-write monitor vm-events". IMHO that's (monitor) 
> vm-event specific enough to be placed within the vm-event subsystem.
> Could you suggest concretely how this separation would look like? (where 
> to put this function/parts of it (and what parts), what name should it 
> have once moved).

I won't go into that level of detail. Fact is that VMX-specific code
should be kept out of here. Whether you move the entire function
behind a hvm_funcs hook or just part of it is of no interest to me.
In no case should, if and when SVM eventually gets supported for
vm-event/monitor too, this function end up doing both VMX and SVM
specific things.

Jan
Corneliu ZUZU July 4, 2016, 4 p.m. UTC | #9
On 7/4/2016 5:58 PM, Jan Beulich wrote:
>>>> On 04.07.16 at 13:02, <czuzu@bitdefender.com> wrote:
>> On 7/4/2016 1:22 PM, Jan Beulich wrote:
>>>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote:
>>>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr)
>>>>        return test_bit(msr, bitmap);
>>>>    }
>>>>    
>>>> +static void write_ctrlreg_adjust_traps(struct domain *d)
>>>> +{
>>>> +    struct vcpu *v;
>>>> +    struct arch_vmx_struct *avmx;
>>>> +    unsigned int cr3_bitmask;
>>>> +    bool_t cr3_vmevent, cr3_ldexit;
>>>> +
>>>> +    /* Adjust CR3 load-exiting. */
>>>> +
>>>> +    /* vmx only */
>>>> +    ASSERT(cpu_has_vmx);
>>>> +
>>>> +    /* non-hap domains trap CR3 writes unconditionally */
>>>> +    if ( !paging_mode_hap(d) )
>>>> +    {
>>>> +        for_each_vcpu ( d, v )
>>>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>>>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>>>> +
>>>> +    for_each_vcpu ( d, v )
>>>> +    {
>>>> +        avmx = &v->arch.hvm_vmx;
>>>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>>> +
>>>> +        if ( cr3_vmevent == cr3_ldexit )
>>>> +            continue;
>>>> +
>>>> +        /*
>>>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>>>> +         * See vmx_update_guest_cr code motion for cr = 0.
>>>> +         */
>>>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v)
>>>> )
>>>> +            continue;
>>>> +
>>>> +        if ( cr3_vmevent )
>>>> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
>>>> +        else
>>>> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
>>>> +
>>>> +        vmx_vmcs_enter(v);
>>>> +        vmx_update_cpu_exec_control(v);
>>>> +        vmx_vmcs_exit(v);
>>>> +    }
>>>> +}
>>> While Razvan gave his ack already, I wonder whether it's really a
>>> good idea to put deeply VMX-specific code outside of a VMX-specific
>>> file.
>> Well, a summary of what this function does would sound like: "adjusts
>> CR3 load-exiting for cr-write monitor vm-events". IMHO that's (monitor)
>> vm-event specific enough to be placed within the vm-event subsystem.
>> Could you suggest concretely how this separation would look like? (where
>> to put this function/parts of it (and what parts), what name should it
>> have once moved).
> I won't go into that level of detail. Fact is that VMX-specific code
> should be kept out of here. Whether you move the entire function
> behind a hvm_funcs hook or just part of it is of no interest to me.
> In no case should, if and when SVM eventually gets supported for
> vm-event/monitor too, this function end up doing both VMX and SVM
> specific things.
>
> Jan

Why move it behind a hvm_funcs hook if it's only valid for VMX? SVM 
support is not currently implemented, hence the ASSERT(cpu_has_vmx) at 
the beginning of the function.
And of course if @ some point SVM support will be implemented then the 
right thing to do is what you say, i.e. make this function part of 
hvm_function_table, but until then I don't see why we should do that.
Note that arch_monitor_get_capabilities also returns no capability at 
the moment if !cpu_has_vmx.

What if I move the vmx-specific parts to vmx.c in a function called 
something like vmx_vm_event_update_cr3_traps() and call it from 
write_ctrlreg_adjust_traps instead?...

Corneliu.
Jan Beulich July 4, 2016, 4:12 p.m. UTC | #10
>>> On 04.07.16 at 18:00, <czuzu@bitdefender.com> wrote:
> On 7/4/2016 5:58 PM, Jan Beulich wrote:
>>>>> On 04.07.16 at 13:02, <czuzu@bitdefender.com> wrote:
>>> On 7/4/2016 1:22 PM, Jan Beulich wrote:
>>>>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote:
>>>>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr)
>>>>>        return test_bit(msr, bitmap);
>>>>>    }
>>>>>    
>>>>> +static void write_ctrlreg_adjust_traps(struct domain *d)
>>>>> +{
>>>>> +    struct vcpu *v;
>>>>> +    struct arch_vmx_struct *avmx;
>>>>> +    unsigned int cr3_bitmask;
>>>>> +    bool_t cr3_vmevent, cr3_ldexit;
>>>>> +
>>>>> +    /* Adjust CR3 load-exiting. */
>>>>> +
>>>>> +    /* vmx only */
>>>>> +    ASSERT(cpu_has_vmx);
>>>>> +
>>>>> +    /* non-hap domains trap CR3 writes unconditionally */
>>>>> +    if ( !paging_mode_hap(d) )
>>>>> +    {
>>>>> +        for_each_vcpu ( d, v )
>>>>> +            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
>>>>> +    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
>>>>> +
>>>>> +    for_each_vcpu ( d, v )
>>>>> +    {
>>>>> +        avmx = &v->arch.hvm_vmx;
>>>>> +        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
>>>>> +
>>>>> +        if ( cr3_vmevent == cr3_ldexit )
>>>>> +            continue;
>>>>> +
>>>>> +        /*
>>>>> +         * If CR0.PE=0, CR3 load exiting must remain enabled.
>>>>> +         * See vmx_update_guest_cr code motion for cr = 0.
>>>>> +         */
>>>>> +        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v)
>>>>> )
>>>>> +            continue;
>>>>> +
>>>>> +        if ( cr3_vmevent )
>>>>> +            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
>>>>> +        else
>>>>> +            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
>>>>> +
>>>>> +        vmx_vmcs_enter(v);
>>>>> +        vmx_update_cpu_exec_control(v);
>>>>> +        vmx_vmcs_exit(v);
>>>>> +    }
>>>>> +}
>>>> While Razvan gave his ack already, I wonder whether it's really a
>>>> good idea to put deeply VMX-specific code outside of a VMX-specific
>>>> file.
>>> Well, a summary of what this function does would sound like: "adjusts
>>> CR3 load-exiting for cr-write monitor vm-events". IMHO that's (monitor)
>>> vm-event specific enough to be placed within the vm-event subsystem.
>>> Could you suggest concretely how this separation would look like? (where
>>> to put this function/parts of it (and what parts), what name should it
>>> have once moved).
>> I won't go into that level of detail. Fact is that VMX-specific code
>> should be kept out of here. Whether you move the entire function
>> behind a hvm_funcs hook or just part of it is of no interest to me.
>> In no case should, if and when SVM eventually gets supported for
>> vm-event/monitor too, this function end up doing both VMX and SVM
>> specific things.
> 
> Why move it behind a hvm_funcs hook if it's only valid for VMX? SVM 
> support is not currently implemented, hence the ASSERT(cpu_has_vmx) at 
> the beginning of the function.
> And of course if @ some point SVM support will be implemented then the 
> right thing to do is what you say, i.e. make this function part of 
> hvm_function_table, but until then I don't see why we should do that.
> Note that arch_monitor_get_capabilities also returns no capability at 
> the moment if !cpu_has_vmx.
> 
> What if I move the vmx-specific parts to vmx.c in a function called 
> something like vmx_vm_event_update_cr3_traps() and call it from 
> write_ctrlreg_adjust_traps instead?...

That would be fine with me.

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index c89ab6e..5481a6e 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -475,8 +475,6 @@  void hvm_do_resume(struct vcpu *v)
 
     if ( unlikely(v->arch.vm_event) )
     {
-        struct monitor_write_data *w = &v->arch.vm_event->write_data;
-
         if ( v->arch.vm_event->emulate_flags )
         {
             enum emul_kind kind = EMUL_KIND_NORMAL;
@@ -493,32 +491,10 @@  void hvm_do_resume(struct vcpu *v)
 
             v->arch.vm_event->emulate_flags = 0;
         }
-
-        if ( w->do_write.msr )
-        {
-            hvm_msr_write_intercept(w->msr, w->value, 0);
-            w->do_write.msr = 0;
-        }
-
-        if ( w->do_write.cr0 )
-        {
-            hvm_set_cr0(w->cr0, 0);
-            w->do_write.cr0 = 0;
-        }
-
-        if ( w->do_write.cr4 )
-        {
-            hvm_set_cr4(w->cr4, 0);
-            w->do_write.cr4 = 0;
-        }
-
-        if ( w->do_write.cr3 )
-        {
-            hvm_set_cr3(w->cr3, 0);
-            w->do_write.cr3 = 0;
-        }
     }
 
+    arch_monitor_write_data(v);
+
     /* Inject pending hw/sw trap */
     if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
     {
@@ -2206,7 +2182,10 @@  int hvm_set_cr0(unsigned long value, bool_t may_defer)
 
         if ( hvm_monitor_crX(CR0, value, old_value) )
         {
-            /* The actual write will occur in hvm_do_resume(), if permitted. */
+            /*
+             * The actual write will occur in arch_monitor_write_data(), if
+             * permitted.
+             */
             v->arch.vm_event->write_data.do_write.cr0 = 1;
             v->arch.vm_event->write_data.cr0 = value;
 
@@ -2308,7 +2287,10 @@  int hvm_set_cr3(unsigned long value, bool_t may_defer)
 
         if ( hvm_monitor_crX(CR3, value, old) )
         {
-            /* The actual write will occur in hvm_do_resume(), if permitted. */
+            /*
+             * The actual write will occur in arch_monitor_write_data(), if
+             * permitted.
+             */
             v->arch.vm_event->write_data.do_write.cr3 = 1;
             v->arch.vm_event->write_data.cr3 = value;
 
@@ -2388,7 +2370,10 @@  int hvm_set_cr4(unsigned long value, bool_t may_defer)
 
         if ( hvm_monitor_crX(CR4, value, old_cr) )
         {
-            /* The actual write will occur in hvm_do_resume(), if permitted. */
+            /*
+             * The actual write will occur in arch_monitor_write_data(), if
+             * permitted.
+             */
             v->arch.vm_event->write_data.do_write.cr4 = 1;
             v->arch.vm_event->write_data.cr4 = value;
 
@@ -3767,7 +3752,10 @@  int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
     {
         ASSERT(v->arch.vm_event);
 
-        /* The actual write will occur in hvm_do_resume() (if permitted). */
+        /*
+         * The actual write will occur in arch_monitor_write_data(), if
+         * permitted.
+         */
         v->arch.vm_event->write_data.do_write.msr = 1;
         v->arch.vm_event->write_data.msr = msr;
         v->arch.vm_event->write_data.value = msr_content;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 15c84c2..de04e6c 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1442,11 +1442,6 @@  static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
             if ( !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
                 v->arch.hvm_vmx.exec_control |= cr3_ctls;
 
-            /* Trap CR3 updates if CR3 memory events are enabled. */
-            if ( v->domain->arch.monitor.write_ctrlreg_enabled &
-                 monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) )
-                v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING;
-
             if ( old_ctls != v->arch.hvm_vmx.exec_control )
                 vmx_update_cpu_exec_control(v);
         }
diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
index a271161..90e4856 100644
--- a/xen/arch/x86/monitor.c
+++ b/xen/arch/x86/monitor.c
@@ -20,6 +20,9 @@ 
  */
 
 #include <asm/monitor.h>
+#include <asm/paging.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/vm_event.h>
 #include <public/vm_event.h>
 
 int arch_monitor_init_domain(struct domain *d)
@@ -41,6 +44,40 @@  void arch_monitor_cleanup_domain(struct domain *d)
     memset(&d->monitor, 0, sizeof(d->monitor));
 }
 
+void arch_monitor_write_data(struct vcpu *v)
+{
+    struct monitor_write_data *w;
+
+    if ( likely(!v->arch.vm_event) )
+        return;
+
+    w = &v->arch.vm_event->write_data;
+
+    if ( w->do_write.msr )
+    {
+        hvm_msr_write_intercept(w->msr, w->value, 0);
+        w->do_write.msr = 0;
+    }
+
+    if ( w->do_write.cr0 )
+    {
+        hvm_set_cr0(w->cr0, 0);
+        w->do_write.cr0 = 0;
+    }
+
+    if ( w->do_write.cr4 )
+    {
+        hvm_set_cr4(w->cr4, 0);
+        w->do_write.cr4 = 0;
+    }
+
+    if ( w->do_write.cr3 )
+    {
+        hvm_set_cr3(w->cr3, 0);
+        w->do_write.cr3 = 0;
+    }
+}
+
 static unsigned long *monitor_bitmap_for_msr(const struct domain *d, u32 *msr)
 {
     ASSERT(d->arch.monitor.msr_bitmap && msr);
@@ -119,6 +156,55 @@  bool_t monitored_msr(const struct domain *d, u32 msr)
     return test_bit(msr, bitmap);
 }
 
+static void write_ctrlreg_adjust_traps(struct domain *d)
+{
+    struct vcpu *v;
+    struct arch_vmx_struct *avmx;
+    unsigned int cr3_bitmask;
+    bool_t cr3_vmevent, cr3_ldexit;
+
+    /* Adjust CR3 load-exiting. */
+
+    /* vmx only */
+    ASSERT(cpu_has_vmx);
+
+    /* non-hap domains trap CR3 writes unconditionally */
+    if ( !paging_mode_hap(d) )
+    {
+        for_each_vcpu ( d, v )
+            ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING);
+        return;
+    }
+
+    cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3);
+    cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask);
+
+    for_each_vcpu ( d, v )
+    {
+        avmx = &v->arch.hvm_vmx;
+        cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING);
+
+        if ( cr3_vmevent == cr3_ldexit )
+            continue;
+
+        /*
+         * If CR0.PE=0, CR3 load exiting must remain enabled.
+         * See vmx_update_guest_cr code motion for cr = 0.
+         */
+        if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) )
+            continue;
+
+        if ( cr3_vmevent )
+            avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING;
+        else
+            avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING;
+
+        vmx_vmcs_enter(v);
+        vmx_update_cpu_exec_control(v);
+        vmx_vmcs_exit(v);
+    }
+}
+
 int arch_monitor_domctl_event(struct domain *d,
                               struct xen_domctl_monitor_op *mop)
 {
@@ -159,13 +245,7 @@  int arch_monitor_domctl_event(struct domain *d,
         else
             ad->monitor.write_ctrlreg_enabled &= ~ctrlreg_bitmask;
 
-        if ( VM_EVENT_X86_CR3 == mop->u.mov_to_cr.index )
-        {
-            struct vcpu *v;
-            /* Latches new CR3 mask through CR0 code. */
-            for_each_vcpu ( d, v )
-                hvm_update_guest_cr(v, 0);
-        }
+        write_ctrlreg_adjust_traps(d);
 
         domain_unpause(d);
 
diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
index 94b6768..1068376 100644
--- a/xen/include/asm-x86/monitor.h
+++ b/xen/include/asm-x86/monitor.h
@@ -93,6 +93,8 @@  int arch_monitor_init_domain(struct domain *d);
 
 void arch_monitor_cleanup_domain(struct domain *d);
 
+void arch_monitor_write_data(struct vcpu *v);
+
 bool_t monitored_msr(const struct domain *d, u32 msr);
 
 #endif /* __ASM_X86_MONITOR_H__ */