diff mbox

[V2] x86/vm_event: Added support for VM_EVENT_REASON_INTERRUPT

Message ID 1478766941-14003-1-git-send-email-rcojocaru@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Razvan Cojocaru Nov. 10, 2016, 8:35 a.m. UTC
Added support for a new event type, VM_EVENT_REASON_INTERRUPT,
which is now fired in a one-shot manner when enabled via the new
VM_EVENT_FLAG_GET_NEXT_INTERRUPT vm_event response flag.
The patch also fixes the behaviour of the xc_hvm_inject_trap()
hypercall, which would lead to non-architectural interrupts
overwriting pending (specifically reinjected) architectural ones.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>

---
Changes since V1:
 - Modified the if() in hvm_do_resume() for readability.
 - Replaced hard tab with spaces.
 - Removed a local variable used only once.
 - Moved cr2 assignment to the common part of the code.
 - Now listing the new event in the x86 vm_event capability list.
 - Moved struct variables for readability.
 - Padding for struct arch_vm_event and struct vm_event_interrupt.
 - Renamed vm_event_interrupt to vm_event_interrupt_x86 and
   added the interrupt union.
---
 xen/arch/x86/hvm/hvm.c            | 23 ++++++++++++++++++++++-
 xen/arch/x86/hvm/monitor.c        | 14 ++++++++++++++
 xen/arch/x86/hvm/svm/svm.c        | 15 +++++++++++++++
 xen/arch/x86/hvm/vmx/vmx.c        | 20 ++++++++++++++++++++
 xen/arch/x86/vm_event.c           |  6 ++++++
 xen/common/vm_event.c             |  3 +++
 xen/include/asm-arm/vm_event.h    |  6 ++++++
 xen/include/asm-x86/hvm/hvm.h     |  3 +++
 xen/include/asm-x86/hvm/monitor.h |  2 ++
 xen/include/asm-x86/monitor.h     |  3 ++-
 xen/include/asm-x86/vm_event.h    |  1 +
 xen/include/public/domctl.h       |  1 +
 xen/include/public/vm_event.h     | 18 ++++++++++++++++++
 xen/include/xen/vm_event.h        |  2 ++
 14 files changed, 115 insertions(+), 2 deletions(-)

Comments

Tamas K Lengyel Nov. 10, 2016, 1:11 p.m. UTC | #1
diff --git a/xen/include/asm-x86/vm_event.h b/xen/include/asm-x86/vm_event.h
> index ca73f99..38c624c 100644
> --- a/xen/include/asm-x86/vm_event.h
> +++ b/xen/include/asm-x86/vm_event.h
> @@ -27,6 +27,7 @@
>   */
>  struct arch_vm_event {
>      uint32_t emulate_flags;
> +    bool monitor_next_interrupt;
>

This should be in domain.h with the rest of the monitor control bits (as
the name of the variable suggests as well).


>      union {
>          struct vm_event_emul_read_data read;
>          struct vm_event_emul_insn_data insn;
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 177319d..85cbb7c 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -1086,6 +1086,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cmt_op_t);
>  #define XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION       5
>  #define XEN_DOMCTL_MONITOR_EVENT_CPUID                 6
>  #define XEN_DOMCTL_MONITOR_EVENT_PRIVILEGED_CALL       7
> +#define XEN_DOMCTL_MONITOR_EVENT_INTERRUPT             8
>
>  struct xen_domctl_monitor_op {
>      uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
> diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
> index c28be5a..ba9accf 100644
> --- a/xen/include/public/vm_event.h
> +++ b/xen/include/public/vm_event.h
> @@ -105,6 +105,11 @@
>   * if any of those flags are set, only those will be honored).
>   */
>  #define VM_EVENT_FLAG_SET_EMUL_INSN_DATA (1 << 9)
>

Just reading this comment it is not entirely clear whether the event will
be sent before or after the interrupt. Also, there is a typo in spelling
occurring.


> +/*
> + * Have a one-shot VM_EVENT_REASON_INTERRUPT event sent for the first
> + * interrupt occuring after resuming the VCPU.
> + */
>
+#define VM_EVENT_FLAG_GET_NEXT_INTERRUPT (1 << 10)
>
>
Tamas
Razvan Cojocaru Nov. 10, 2016, 1:33 p.m. UTC | #2
Hello Tamas, thanks for the review!

On 11/10/2016 03:11 PM, Tamas K Lengyel wrote:
>     diff --git a/xen/include/asm-x86/vm_event.h
>     b/xen/include/asm-x86/vm_event.h
>     index ca73f99..38c624c 100644
>     --- a/xen/include/asm-x86/vm_event.h
>     +++ b/xen/include/asm-x86/vm_event.h
>     @@ -27,6 +27,7 @@
>       */
>      struct arch_vm_event {
>          uint32_t emulate_flags;
>     +    bool monitor_next_interrupt;
> 
> 
> This should be in domain.h with the rest of the monitor control bits (as
> the name of the variable suggests as well).

Unfortunately that would alter the semantics of the patch, as this
variable needs to be per-VCPU. Putting it in domain.h as you suggest
would make it per-domain. Looking at places to put it so that it would
serve this purpose, struct arch_vm_event felt like the most natural place.

>          union {
>              struct vm_event_emul_read_data read;
>              struct vm_event_emul_insn_data insn;
>     diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>     index 177319d..85cbb7c 100644
>     --- a/xen/include/public/domctl.h
>     +++ b/xen/include/public/domctl.h
>     @@ -1086,6 +1086,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cmt_op_t);
>      #define XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION       5
>      #define XEN_DOMCTL_MONITOR_EVENT_CPUID                 6
>      #define XEN_DOMCTL_MONITOR_EVENT_PRIVILEGED_CALL       7
>     +#define XEN_DOMCTL_MONITOR_EVENT_INTERRUPT             8
> 
>      struct xen_domctl_monitor_op {
>          uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
>     diff --git a/xen/include/public/vm_event.h
>     b/xen/include/public/vm_event.h
>     index c28be5a..ba9accf 100644
>     --- a/xen/include/public/vm_event.h
>     +++ b/xen/include/public/vm_event.h
>     @@ -105,6 +105,11 @@
>       * if any of those flags are set, only those will be honored).
>       */
>      #define VM_EVENT_FLAG_SET_EMUL_INSN_DATA (1 << 9)
> 
> 
> Just reading this comment it is not entirely clear whether the event
> will be sent before or after the interrupt. Also, there is a typo in
> spelling occurring.
>  
> 
>     +/*
>     + * Have a one-shot VM_EVENT_REASON_INTERRUPT event sent for the first
>     + * interrupt occuring after resuming the VCPU.
>     + */
> 
>     +#define VM_EVENT_FLAG_GET_NEXT_INTERRUPT (1 << 10)

The event is sent before the actual interrupt effects, as is our
convention for vm_events (the test is for pending interrupts so they had
not had effects yet).

I'm not sure about "occuring", the sources I've found online suggest my
spelling is correct, but perhaps this is a case of a word that can be
spelled in more ways, such as "color" and "colour":
https://en.wiktionary.org/wiki/occuring

I can send a V3 if you'd like me to clarify when the event is being sent
with regards to interrupt delivery (in fact I think replacing the word
"occuring" with "pending" would nicely solve both your objections here).


Thanks,
Razvan
Julien Grall Nov. 10, 2016, 1:41 p.m. UTC | #3
On 10/11/16 13:33, Razvan Cojocaru wrote:
> I'm not sure about "occuring", the sources I've found online suggest my
> spelling is correct, but perhaps this is a case of a word that can be
> spelled in more ways, such as "color" and "colour":
> https://en.wiktionary.org/wiki/occuring

 From the link:

"Misspelling of occurring, the present participle of occur."

So it is a common misspelling and should be avoided ;). If you look at 
color/coulor, wiktionary will mention that alternative form (e.g US/UK).
Razvan Cojocaru Nov. 10, 2016, 1:43 p.m. UTC | #4
On 11/10/2016 03:41 PM, Julien Grall wrote:
> On 10/11/16 13:33, Razvan Cojocaru wrote:
>> I'm not sure about "occuring", the sources I've found online suggest my
>> spelling is correct, but perhaps this is a case of a word that can be
>> spelled in more ways, such as "color" and "colour":
>> https://en.wiktionary.org/wiki/occuring
> 
> From the link:
> 
> "Misspelling of occurring, the present participle of occur."
> 
> So it is a common misspelling and should be avoided ;). If you look at
> color/coulor, wiktionary will mention that alternative form (e.g US/UK).

Indeed, thanks for the clarification. Will change the comment.


Thanks,
Razvan
Boris Ostrovsky Nov. 10, 2016, 2:25 p.m. UTC | #5
On 11/10/2016 03:35 AM, Razvan Cojocaru wrote:
>  
> +bool hvm_get_pending_event(struct vcpu *v, struct hvm_trap *info)
> +{
> +    info->cr2 = v->arch.hvm_vcpu.guest_cr[2];
> +    return hvm_funcs.get_pending_event(v, info);
> +}

I believe it was Jan who requested that cr2 update be factored out but I
feel it's better to keep it in the hvm op and not break up
initialization of the info structure across multiple routines. The code
size may increase (by a few bytes) but I think it will be more readable.

-boris
Razvan Cojocaru Nov. 10, 2016, 2:33 p.m. UTC | #6
On 11/10/2016 04:25 PM, Boris Ostrovsky wrote:
> On 11/10/2016 03:35 AM, Razvan Cojocaru wrote:
>>  
>> +bool hvm_get_pending_event(struct vcpu *v, struct hvm_trap *info)
>> +{
>> +    info->cr2 = v->arch.hvm_vcpu.guest_cr[2];
>> +    return hvm_funcs.get_pending_event(v, info);
>> +}
> 
> I believe it was Jan who requested that cr2 update be factored out but I
> feel it's better to keep it in the hvm op and not break up
> initialization of the info structure across multiple routines. The code
> size may increase (by a few bytes) but I think it will be more readable.

I am fine with either approach, though obviously I had originally
favoured your preference for the same reasons.

Jan, would it be OK to switch back to the original code here?


Thanks,
Razvan
Jan Beulich Nov. 10, 2016, 3:41 p.m. UTC | #7
>>> On 10.11.16 at 15:25, <boris.ostrovsky@oracle.com> wrote:
> On 11/10/2016 03:35 AM, Razvan Cojocaru wrote:
>>  
>> +bool hvm_get_pending_event(struct vcpu *v, struct hvm_trap *info)
>> +{
>> +    info->cr2 = v->arch.hvm_vcpu.guest_cr[2];
>> +    return hvm_funcs.get_pending_event(v, info);
>> +}
> 
> I believe it was Jan who requested that cr2 update be factored out but I
> feel it's better to keep it in the hvm op and not break up
> initialization of the info structure across multiple routines. The code
> size may increase (by a few bytes) but I think it will be more readable.

This is not about code size, but about avoiding doing the same
thing in multiple places.

Jan
Jan Beulich Nov. 10, 2016, 3:47 p.m. UTC | #8
>>> On 10.11.16 at 09:35, <rcojocaru@bitdefender.com> wrote:
> Changes since V1:
>  - Modified the if() in hvm_do_resume() for readability.
>  - Replaced hard tab with spaces.
>  - Removed a local variable used only once.
>  - Moved cr2 assignment to the common part of the code.
>  - Now listing the new event in the x86 vm_event capability list.
>  - Moved struct variables for readability.

Hmm, looks like you've moved the field in the structure declaration,
but not the two initializers (in SVM and VMX code).

> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -535,9 +535,24 @@ void hvm_do_resume(struct vcpu *v)
>      /* Inject pending hw/sw trap */
>      if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
>      {
> -        hvm_inject_trap(&v->arch.hvm_vcpu.inject_trap);
> +        if ( !hvm_event_pending(v) )
> +            hvm_inject_trap(&v->arch.hvm_vcpu.inject_trap);
> +
>          v->arch.hvm_vcpu.inject_trap.vector = -1;
>      }
> +
> +    if ( unlikely(v->arch.vm_event) &&
> +         v->arch.vm_event->monitor_next_interrupt )
> +    {
> +        struct hvm_trap info;
> +
> +        if ( hvm_get_pending_event(v, &info) )
> +        {
> +            hvm_monitor_interrupt(info.vector, info.type, info.error_code,
> +                                  info.cr2);
> +            v->arch.vm_event->monitor_next_interrupt = false;
> +        }
> +    }
>  }
>  
>  static int hvm_print_line(
> @@ -6047,6 +6062,12 @@ void hvm_domain_soft_reset(struct domain *d)
>      hvm_destroy_all_ioreq_servers(d);
>  }
>  
> +bool hvm_get_pending_event(struct vcpu *v, struct hvm_trap *info)
> +{
> +    info->cr2 = v->arch.hvm_vcpu.guest_cr[2];
> +    return hvm_funcs.get_pending_event(v, info);
> +}

Unless you expect more callers, I'm tempted to suggest to make this
static for now (and move it up ahead of its only caller).

> --- a/xen/arch/x86/vm_event.c
> +++ b/xen/arch/x86/vm_event.c
> @@ -134,6 +134,12 @@ void vm_event_set_registers(struct vcpu *v, vm_event_response_t *rsp)
>      v->arch.user_regs.eip = rsp->data.regs.x86.rip;
>  }
>  
> +void vm_event_monitor_next_interrupt(struct vcpu *v)
> +{
> +    ASSERT(v->arch.vm_event);
> +    v->arch.vm_event->monitor_next_interrupt = true;
> +}

I think at this point we're determined to no longer permit ASSERT()s
like this: Either use a simple if() or a BUG_ON(). Andrew, please
correct me if I've misunderstood earlier discussions.

> @@ -259,6 +266,14 @@ struct vm_event_cpuid {
>      uint32_t _pad;
>  };
>  
> +struct vm_event_interrupt_x86 {
> +    uint32_t vector;
> +    uint32_t type;
> +    uint32_t error_code;
> +    uint64_t cr2;
> +    uint32_t _pad;
> +};

I'm pretty certain this is not what you want, as this make the layout
vary between 32-bit (compat) and 64-bit (native) callers.

Jan
Razvan Cojocaru Nov. 10, 2016, 3:53 p.m. UTC | #9
On 11/10/2016 05:47 PM, Jan Beulich wrote:
>>>> On 10.11.16 at 09:35, <rcojocaru@bitdefender.com> wrote:
>> Changes since V1:
>>  - Modified the if() in hvm_do_resume() for readability.
>>  - Replaced hard tab with spaces.
>>  - Removed a local variable used only once.
>>  - Moved cr2 assignment to the common part of the code.
>>  - Now listing the new event in the x86 vm_event capability list.
>>  - Moved struct variables for readability.
> 
> Hmm, looks like you've moved the field in the structure declaration,
> but not the two initializers (in SVM and VMX code).

I'll modify those as well.

>> --- a/xen/arch/x86/hvm/hvm.c
>> +++ b/xen/arch/x86/hvm/hvm.c
>> @@ -535,9 +535,24 @@ void hvm_do_resume(struct vcpu *v)
>>      /* Inject pending hw/sw trap */
>>      if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
>>      {
>> -        hvm_inject_trap(&v->arch.hvm_vcpu.inject_trap);
>> +        if ( !hvm_event_pending(v) )
>> +            hvm_inject_trap(&v->arch.hvm_vcpu.inject_trap);
>> +
>>          v->arch.hvm_vcpu.inject_trap.vector = -1;
>>      }
>> +
>> +    if ( unlikely(v->arch.vm_event) &&
>> +         v->arch.vm_event->monitor_next_interrupt )
>> +    {
>> +        struct hvm_trap info;
>> +
>> +        if ( hvm_get_pending_event(v, &info) )
>> +        {
>> +            hvm_monitor_interrupt(info.vector, info.type, info.error_code,
>> +                                  info.cr2);
>> +            v->arch.vm_event->monitor_next_interrupt = false;
>> +        }
>> +    }
>>  }
>>  
>>  static int hvm_print_line(
>> @@ -6047,6 +6062,12 @@ void hvm_domain_soft_reset(struct domain *d)
>>      hvm_destroy_all_ioreq_servers(d);
>>  }
>>  
>> +bool hvm_get_pending_event(struct vcpu *v, struct hvm_trap *info)
>> +{
>> +    info->cr2 = v->arch.hvm_vcpu.guest_cr[2];
>> +    return hvm_funcs.get_pending_event(v, info);
>> +}
> 
> Unless you expect more callers, I'm tempted to suggest to make this
> static for now (and move it up ahead of its only caller).

Will do.

>> --- a/xen/arch/x86/vm_event.c
>> +++ b/xen/arch/x86/vm_event.c
>> @@ -134,6 +134,12 @@ void vm_event_set_registers(struct vcpu *v, vm_event_response_t *rsp)
>>      v->arch.user_regs.eip = rsp->data.regs.x86.rip;
>>  }
>>  
>> +void vm_event_monitor_next_interrupt(struct vcpu *v)
>> +{
>> +    ASSERT(v->arch.vm_event);
>> +    v->arch.vm_event->monitor_next_interrupt = true;
>> +}
> 
> I think at this point we're determined to no longer permit ASSERT()s
> like this: Either use a simple if() or a BUG_ON(). Andrew, please
> correct me if I've misunderstood earlier discussions.

I'll change it to a simple if().

>> @@ -259,6 +266,14 @@ struct vm_event_cpuid {
>>      uint32_t _pad;
>>  };
>>  
>> +struct vm_event_interrupt_x86 {
>> +    uint32_t vector;
>> +    uint32_t type;
>> +    uint32_t error_code;
>> +    uint64_t cr2;
>> +    uint32_t _pad;
>> +};
> 
> I'm pretty certain this is not what you want, as this make the layout
> vary between 32-bit (compat) and 64-bit (native) callers.

I'll remove the _pad.


Thanks,
Razvan
Andrew Cooper Nov. 10, 2016, 3:59 p.m. UTC | #10
On 10/11/16 15:53, Razvan Cojocaru wrote:
> On 11/10/2016 05:47 PM, Jan Beulich wrote:
>>>>> On 10.11.16 at 09:35, <rcojocaru@bitdefender.com> wrote:
>>> Changes since V1:
>>>  - Modified the if() in hvm_do_resume() for readability.
>>>  - Replaced hard tab with spaces.
>>>  - Removed a local variable used only once.
>>>  - Moved cr2 assignment to the common part of the code.
>>>  - Now listing the new event in the x86 vm_event capability list.
>>>  - Moved struct variables for readability.
>> Hmm, looks like you've moved the field in the structure declaration,
>> but not the two initializers (in SVM and VMX code).
> I'll modify those as well.
>
>>> --- a/xen/arch/x86/hvm/hvm.c
>>> +++ b/xen/arch/x86/hvm/hvm.c
>>> @@ -535,9 +535,24 @@ void hvm_do_resume(struct vcpu *v)
>>>      /* Inject pending hw/sw trap */
>>>      if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
>>>      {
>>> -        hvm_inject_trap(&v->arch.hvm_vcpu.inject_trap);
>>> +        if ( !hvm_event_pending(v) )
>>> +            hvm_inject_trap(&v->arch.hvm_vcpu.inject_trap);
>>> +
>>>          v->arch.hvm_vcpu.inject_trap.vector = -1;
>>>      }
>>> +
>>> +    if ( unlikely(v->arch.vm_event) &&
>>> +         v->arch.vm_event->monitor_next_interrupt )
>>> +    {
>>> +        struct hvm_trap info;
>>> +
>>> +        if ( hvm_get_pending_event(v, &info) )
>>> +        {
>>> +            hvm_monitor_interrupt(info.vector, info.type, info.error_code,
>>> +                                  info.cr2);
>>> +            v->arch.vm_event->monitor_next_interrupt = false;
>>> +        }
>>> +    }
>>>  }
>>>  
>>>  static int hvm_print_line(
>>> @@ -6047,6 +6062,12 @@ void hvm_domain_soft_reset(struct domain *d)
>>>      hvm_destroy_all_ioreq_servers(d);
>>>  }
>>>  
>>> +bool hvm_get_pending_event(struct vcpu *v, struct hvm_trap *info)
>>> +{
>>> +    info->cr2 = v->arch.hvm_vcpu.guest_cr[2];
>>> +    return hvm_funcs.get_pending_event(v, info);
>>> +}
>> Unless you expect more callers, I'm tempted to suggest to make this
>> static for now (and move it up ahead of its only caller).
> Will do.
>
>>> --- a/xen/arch/x86/vm_event.c
>>> +++ b/xen/arch/x86/vm_event.c
>>> @@ -134,6 +134,12 @@ void vm_event_set_registers(struct vcpu *v, vm_event_response_t *rsp)
>>>      v->arch.user_regs.eip = rsp->data.regs.x86.rip;
>>>  }
>>>  
>>> +void vm_event_monitor_next_interrupt(struct vcpu *v)
>>> +{
>>> +    ASSERT(v->arch.vm_event);
>>> +    v->arch.vm_event->monitor_next_interrupt = true;
>>> +}
>> I think at this point we're determined to no longer permit ASSERT()s
>> like this: Either use a simple if() or a BUG_ON(). Andrew, please
>> correct me if I've misunderstood earlier discussions.
> I'll change it to a simple if().
>
>>> @@ -259,6 +266,14 @@ struct vm_event_cpuid {
>>>      uint32_t _pad;
>>>  };
>>>  
>>> +struct vm_event_interrupt_x86 {
>>> +    uint32_t vector;
>>> +    uint32_t type;
>>> +    uint32_t error_code;
>>> +    uint64_t cr2;
>>> +    uint32_t _pad;
>>> +};
>> I'm pretty certain this is not what you want, as this make the layout
>> vary between 32-bit (compat) and 64-bit (native) callers.
> I'll remove the _pad.

You need to invert the pad and cr2, so cr2 starts at 16 bytes into the
structure.

Remember that uint64_t has 8 byte alignment in 64bit, but only 4 byte
alignment in 32bit.

~Andrew
Tamas K Lengyel Nov. 10, 2016, 4:01 p.m. UTC | #11
On Thu, Nov 10, 2016 at 6:33 AM, Razvan Cojocaru <rcojocaru@bitdefender.com>
wrote:

> Hello Tamas, thanks for the review!
>
> On 11/10/2016 03:11 PM, Tamas K Lengyel wrote:
> >     diff --git a/xen/include/asm-x86/vm_event.h
> >     b/xen/include/asm-x86/vm_event.h
> >     index ca73f99..38c624c 100644
> >     --- a/xen/include/asm-x86/vm_event.h
> >     +++ b/xen/include/asm-x86/vm_event.h
> >     @@ -27,6 +27,7 @@
> >       */
> >      struct arch_vm_event {
> >          uint32_t emulate_flags;
> >     +    bool monitor_next_interrupt;
> >
> >
> > This should be in domain.h with the rest of the monitor control bits (as
> > the name of the variable suggests as well).
>
> Unfortunately that would alter the semantics of the patch, as this
> variable needs to be per-VCPU. Putting it in domain.h as you suggest
> would make it per-domain. Looking at places to put it so that it would
> serve this purpose, struct arch_vm_event felt like the most natural place.
>

I see. I generally like to keep vm_event/monitor bits separate as to not
end up in the spaghetti we were in a couple years ago. Maybe introducing a
struct arch_monitor would be appropriate?


>
> >          union {
> >              struct vm_event_emul_read_data read;
> >              struct vm_event_emul_insn_data insn;
> >     diff --git a/xen/include/public/domctl.h
> b/xen/include/public/domctl.h
> >     index 177319d..85cbb7c 100644
> >     --- a/xen/include/public/domctl.h
> >     +++ b/xen/include/public/domctl.h
> >     @@ -1086,6 +1086,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_
> domctl_psr_cmt_op_t);
> >      #define XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION       5
> >      #define XEN_DOMCTL_MONITOR_EVENT_CPUID                 6
> >      #define XEN_DOMCTL_MONITOR_EVENT_PRIVILEGED_CALL       7
> >     +#define XEN_DOMCTL_MONITOR_EVENT_INTERRUPT             8
> >
> >      struct xen_domctl_monitor_op {
> >          uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
> >     diff --git a/xen/include/public/vm_event.h
> >     b/xen/include/public/vm_event.h
> >     index c28be5a..ba9accf 100644
> >     --- a/xen/include/public/vm_event.h
> >     +++ b/xen/include/public/vm_event.h
> >     @@ -105,6 +105,11 @@
> >       * if any of those flags are set, only those will be honored).
> >       */
> >      #define VM_EVENT_FLAG_SET_EMUL_INSN_DATA (1 << 9)
> >
> >
> > Just reading this comment it is not entirely clear whether the event
> > will be sent before or after the interrupt. Also, there is a typo in
> > spelling occurring.
> >
> >
> >     +/*
> >     + * Have a one-shot VM_EVENT_REASON_INTERRUPT event sent for the
> first
> >     + * interrupt occuring after resuming the VCPU.
> >     + */
> >
> >     +#define VM_EVENT_FLAG_GET_NEXT_INTERRUPT (1 << 10)
>
> The event is sent before the actual interrupt effects, as is our
> convention for vm_events (the test is for pending interrupts so they had
> not had effects yet).
>
> I'm not sure about "occuring", the sources I've found online suggest my
> spelling is correct, but perhaps this is a case of a word that can be
> spelled in more ways, such as "color" and "colour":
> https://en.wiktionary.org/wiki/occuring
>
> I can send a V3 if you'd like me to clarify when the event is being sent
> with regards to interrupt delivery (in fact I think replacing the word
> "occuring" with "pending" would nicely solve both your objections here).
>

That would work, thanks.

Tamas
Razvan Cojocaru Nov. 10, 2016, 4:03 p.m. UTC | #12
On 11/10/2016 05:59 PM, Andrew Cooper wrote:
>>>> @ -259,6 +266,14 @@ struct vm_event_cpuid {
>>>> >>>      uint32_t _pad;
>>>> >>>  };
>>>> >>>  
>>>> >>> +struct vm_event_interrupt_x86 {
>>>> >>> +    uint32_t vector;
>>>> >>> +    uint32_t type;
>>>> >>> +    uint32_t error_code;
>>>> >>> +    uint64_t cr2;
>>>> >>> +    uint32_t _pad;
>>>> >>> +};
>>> >> I'm pretty certain this is not what you want, as this make the layout
>>> >> vary between 32-bit (compat) and 64-bit (native) callers.
>> > I'll remove the _pad.
> You need to invert the pad and cr2, so cr2 starts at 16 bytes into the
> structure.
> 
> Remember that uint64_t has 8 byte alignment in 64bit, but only 4 byte
> alignment in 32bit.

Right, it's been a long day. :) Thanks, I'll do that.


Thanks,
Razvan
Razvan Cojocaru Nov. 10, 2016, 4:08 p.m. UTC | #13
On 11/10/2016 06:01 PM, Tamas K Lengyel wrote:
> On Thu, Nov 10, 2016 at 6:33 AM, Razvan Cojocaru
> <rcojocaru@bitdefender.com <mailto:rcojocaru@bitdefender.com>> wrote:
> 
>     Hello Tamas, thanks for the review!
> 
>     On 11/10/2016 03:11 PM, Tamas K Lengyel wrote:
>     >     diff --git a/xen/include/asm-x86/vm_event.h
>     >     b/xen/include/asm-x86/vm_event.h
>     >     index ca73f99..38c624c 100644
>     >     --- a/xen/include/asm-x86/vm_event.h
>     >     +++ b/xen/include/asm-x86/vm_event.h
>     >     @@ -27,6 +27,7 @@
>     >       */
>     >      struct arch_vm_event {
>     >          uint32_t emulate_flags;
>     >     +    bool monitor_next_interrupt;
>     >
>     >
>     > This should be in domain.h with the rest of the monitor control bits (as
>     > the name of the variable suggests as well).
> 
>     Unfortunately that would alter the semantics of the patch, as this
>     variable needs to be per-VCPU. Putting it in domain.h as you suggest
>     would make it per-domain. Looking at places to put it so that it would
>     serve this purpose, struct arch_vm_event felt like the most natural
>     place.
> 
> 
> I see. I generally like to keep vm_event/monitor bits separate as to not
> end up in the spaghetti we were in a couple years ago. Maybe introducing
> a struct arch_monitor would be appropriate?

Yes, that's a reasonable request. I'll try that direction.


Thanks,
Razvan
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 704fd64..4adb894 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -535,9 +535,24 @@  void hvm_do_resume(struct vcpu *v)
     /* Inject pending hw/sw trap */
     if ( v->arch.hvm_vcpu.inject_trap.vector != -1 )
     {
-        hvm_inject_trap(&v->arch.hvm_vcpu.inject_trap);
+        if ( !hvm_event_pending(v) )
+            hvm_inject_trap(&v->arch.hvm_vcpu.inject_trap);
+
         v->arch.hvm_vcpu.inject_trap.vector = -1;
     }
+
+    if ( unlikely(v->arch.vm_event) &&
+         v->arch.vm_event->monitor_next_interrupt )
+    {
+        struct hvm_trap info;
+
+        if ( hvm_get_pending_event(v, &info) )
+        {
+            hvm_monitor_interrupt(info.vector, info.type, info.error_code,
+                                  info.cr2);
+            v->arch.vm_event->monitor_next_interrupt = false;
+        }
+    }
 }
 
 static int hvm_print_line(
@@ -6047,6 +6062,12 @@  void hvm_domain_soft_reset(struct domain *d)
     hvm_destroy_all_ioreq_servers(d);
 }
 
+bool hvm_get_pending_event(struct vcpu *v, struct hvm_trap *info)
+{
+    info->cr2 = v->arch.hvm_vcpu.guest_cr[2];
+    return hvm_funcs.get_pending_event(v, info);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
index 401a8c6..69a88ad 100644
--- a/xen/arch/x86/hvm/monitor.c
+++ b/xen/arch/x86/hvm/monitor.c
@@ -150,6 +150,20 @@  int hvm_monitor_cpuid(unsigned long insn_length, unsigned int leaf,
     return monitor_traps(curr, 1, &req);
 }
 
+void hvm_monitor_interrupt(unsigned int vector, unsigned int type,
+                           unsigned int err, uint64_t cr2)
+{
+    vm_event_request_t req = {
+        .reason = VM_EVENT_REASON_INTERRUPT,
+        .u.interrupt.x86.vector = vector,
+        .u.interrupt.x86.type = type,
+        .u.interrupt.x86.error_code = err,
+        .u.interrupt.x86.cr2 = cr2,
+    };
+
+    monitor_traps(current, 1, &req);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 16427f6..af5d458 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2220,6 +2220,20 @@  static void svm_invlpg(struct vcpu *v, unsigned long vaddr)
     svm_asid_g_invlpg(v, vaddr);
 }
 
+static bool svm_get_pending_event(struct vcpu *v, struct hvm_trap *info)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    if ( vmcb->eventinj.fields.v )
+        return false;
+
+    info->vector = vmcb->eventinj.fields.vector;
+    info->type = vmcb->eventinj.fields.type;
+    info->error_code = vmcb->eventinj.fields.errorcode;
+
+    return true;
+}
+
 static struct hvm_function_table __initdata svm_function_table = {
     .name                 = "SVM",
     .cpu_up_prepare       = svm_cpu_up_prepare,
@@ -2272,6 +2286,7 @@  static struct hvm_function_table __initdata svm_function_table = {
     .tsc_scaling = {
         .max_ratio = ~TSC_RATIO_RSVD_BITS,
     },
+    .get_pending_event = svm_get_pending_event,
 };
 
 void svm_vmexit_handler(struct cpu_user_regs *regs)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 9a8f694..3634289 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2134,6 +2134,25 @@  static int vmx_set_mode(struct vcpu *v, int mode)
     return 0;
 }
 
+static bool vmx_get_pending_event(struct vcpu *v, struct hvm_trap *info)
+{
+    unsigned long intr_info, error_code;
+
+    vmx_vmcs_enter(v);
+    __vmread(VM_ENTRY_INTR_INFO, &intr_info);
+    __vmread(VM_ENTRY_EXCEPTION_ERROR_CODE, &error_code);
+    vmx_vmcs_exit(v);
+
+    if ( !(intr_info & INTR_INFO_VALID_MASK) )
+        return false;
+
+    info->vector = MASK_EXTR(intr_info, INTR_INFO_VECTOR_MASK);
+    info->type = MASK_EXTR(intr_info, INTR_INFO_INTR_TYPE_MASK);
+    info->error_code = error_code;
+
+    return true;
+}
+
 static struct hvm_function_table __initdata vmx_function_table = {
     .name                 = "VMX",
     .cpu_up_prepare       = vmx_cpu_up_prepare,
@@ -2203,6 +2222,7 @@  static struct hvm_function_table __initdata vmx_function_table = {
         .max_ratio = VMX_TSC_MULTIPLIER_MAX,
         .setup     = vmx_setup_tsc_scaling,
     },
+    .get_pending_event = vmx_get_pending_event,
 };
 
 /* Handle VT-d posted-interrupt when VCPU is blocked. */
diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c
index 1e88d67..a153ec5 100644
--- a/xen/arch/x86/vm_event.c
+++ b/xen/arch/x86/vm_event.c
@@ -134,6 +134,12 @@  void vm_event_set_registers(struct vcpu *v, vm_event_response_t *rsp)
     v->arch.user_regs.eip = rsp->data.regs.x86.rip;
 }
 
+void vm_event_monitor_next_interrupt(struct vcpu *v)
+{
+    ASSERT(v->arch.vm_event);
+    v->arch.vm_event->monitor_next_interrupt = true;
+}
+
 void vm_event_fill_regs(vm_event_request_t *req)
 {
     const struct cpu_user_regs *regs = guest_cpu_user_regs();
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 907ab40..c947706 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -433,6 +433,9 @@  void vm_event_resume(struct domain *d, struct vm_event_domain *ved)
             if ( rsp.flags & VM_EVENT_FLAG_SET_REGISTERS )
                 vm_event_set_registers(v, &rsp);
 
+            if ( rsp.flags & VM_EVENT_FLAG_GET_NEXT_INTERRUPT )
+                vm_event_monitor_next_interrupt(v);
+
             if ( rsp.flags & VM_EVENT_FLAG_VCPU_PAUSED )
                 vm_event_vcpu_unpause(v);
         }
diff --git a/xen/include/asm-arm/vm_event.h b/xen/include/asm-arm/vm_event.h
index 66f2474..ab9c8cb 100644
--- a/xen/include/asm-arm/vm_event.h
+++ b/xen/include/asm-arm/vm_event.h
@@ -52,4 +52,10 @@  void vm_event_emulate_check(struct vcpu *v, vm_event_response_t *rsp)
     /* Not supported on ARM. */
 }
 
+static inline
+void vm_event_monitor_next_interrupt(struct vcpu *v)
+{
+    /* Not supported on ARM. */
+}
+
 #endif /* __ASM_ARM_VM_EVENT_H__ */
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 7e7462e..3370472 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -157,6 +157,7 @@  struct hvm_function_table {
     void (*init_hypercall_page)(struct domain *d, void *hypercall_page);
 
     int  (*event_pending)(struct vcpu *v);
+    bool (*get_pending_event)(struct vcpu *v, struct hvm_trap *info);
     void (*invlpg)(struct vcpu *v, unsigned long vaddr);
 
     int  (*cpu_up_prepare)(unsigned int cpu);
@@ -428,6 +429,8 @@  static inline int hvm_event_pending(struct vcpu *v)
     return hvm_funcs.event_pending(v);
 }
 
+bool hvm_get_pending_event(struct vcpu *v, struct hvm_trap *info);
+
 /* These bits in CR4 are owned by the host. */
 #define HVM_CR4_HOST_MASK (mmu_cr4_features & \
     (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
diff --git a/xen/include/asm-x86/hvm/monitor.h b/xen/include/asm-x86/hvm/monitor.h
index 82b85ec..85ca678 100644
--- a/xen/include/asm-x86/hvm/monitor.h
+++ b/xen/include/asm-x86/hvm/monitor.h
@@ -42,6 +42,8 @@  int hvm_monitor_debug(unsigned long rip, enum hvm_monitor_debug_type type,
                       unsigned long trap_type, unsigned long insn_length);
 int hvm_monitor_cpuid(unsigned long insn_length, unsigned int leaf,
                       unsigned int subleaf);
+void hvm_monitor_interrupt(unsigned int vector, unsigned int type,
+                           unsigned int err, uint64_t cr2);
 
 #endif /* __ASM_X86_HVM_MONITOR_H__ */
 
diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h
index 63a994b..e409373 100644
--- a/xen/include/asm-x86/monitor.h
+++ b/xen/include/asm-x86/monitor.h
@@ -76,7 +76,8 @@  static inline uint32_t arch_monitor_get_capabilities(struct domain *d)
                    (1U << XEN_DOMCTL_MONITOR_EVENT_SOFTWARE_BREAKPOINT) |
                    (1U << XEN_DOMCTL_MONITOR_EVENT_GUEST_REQUEST) |
                    (1U << XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION) |
-                   (1U << XEN_DOMCTL_MONITOR_EVENT_CPUID);
+                   (1U << XEN_DOMCTL_MONITOR_EVENT_CPUID) |
+                   (1U << XEN_DOMCTL_MONITOR_EVENT_INTERRUPT);
 
     /* Since we know this is on VMX, we can just call the hvm func */
     if ( hvm_is_singlestep_supported() )
diff --git a/xen/include/asm-x86/vm_event.h b/xen/include/asm-x86/vm_event.h
index ca73f99..38c624c 100644
--- a/xen/include/asm-x86/vm_event.h
+++ b/xen/include/asm-x86/vm_event.h
@@ -27,6 +27,7 @@ 
  */
 struct arch_vm_event {
     uint32_t emulate_flags;
+    bool monitor_next_interrupt;
     union {
         struct vm_event_emul_read_data read;
         struct vm_event_emul_insn_data insn;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 177319d..85cbb7c 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1086,6 +1086,7 @@  DEFINE_XEN_GUEST_HANDLE(xen_domctl_psr_cmt_op_t);
 #define XEN_DOMCTL_MONITOR_EVENT_DEBUG_EXCEPTION       5
 #define XEN_DOMCTL_MONITOR_EVENT_CPUID                 6
 #define XEN_DOMCTL_MONITOR_EVENT_PRIVILEGED_CALL       7
+#define XEN_DOMCTL_MONITOR_EVENT_INTERRUPT             8
 
 struct xen_domctl_monitor_op {
     uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index c28be5a..ba9accf 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h
@@ -105,6 +105,11 @@ 
  * if any of those flags are set, only those will be honored).
  */
 #define VM_EVENT_FLAG_SET_EMUL_INSN_DATA (1 << 9)
+/*
+ * Have a one-shot VM_EVENT_REASON_INTERRUPT event sent for the first
+ * interrupt occuring after resuming the VCPU.
+ */
+#define VM_EVENT_FLAG_GET_NEXT_INTERRUPT (1 << 10)
 
 /*
  * Reasons for the vm event request
@@ -139,6 +144,8 @@ 
  *       These kinds of events will be filtered out in future versions.
  */
 #define VM_EVENT_REASON_PRIVILEGED_CALL         11
+/* An interrupt has been delivered. */
+#define VM_EVENT_REASON_INTERRUPT               12
 
 /* Supported values for the vm_event_write_ctrlreg index. */
 #define VM_EVENT_X86_CR0    0
@@ -259,6 +266,14 @@  struct vm_event_cpuid {
     uint32_t _pad;
 };
 
+struct vm_event_interrupt_x86 {
+    uint32_t vector;
+    uint32_t type;
+    uint32_t error_code;
+    uint64_t cr2;
+    uint32_t _pad;
+};
+
 #define MEM_PAGING_DROP_PAGE       (1 << 0)
 #define MEM_PAGING_EVICT_FAIL      (1 << 1)
 
@@ -302,6 +317,9 @@  typedef struct vm_event_st {
         struct vm_event_debug                 software_breakpoint;
         struct vm_event_debug                 debug_exception;
         struct vm_event_cpuid                 cpuid;
+        union {
+            struct vm_event_interrupt_x86     x86;
+        } interrupt;
     } u;
 
     union {
diff --git a/xen/include/xen/vm_event.h b/xen/include/xen/vm_event.h
index 4f088c8..2fb3951 100644
--- a/xen/include/xen/vm_event.h
+++ b/xen/include/xen/vm_event.h
@@ -78,6 +78,8 @@  void vm_event_vcpu_unpause(struct vcpu *v);
 void vm_event_fill_regs(vm_event_request_t *req);
 void vm_event_set_registers(struct vcpu *v, vm_event_response_t *rsp);
 
+void vm_event_monitor_next_interrupt(struct vcpu *v);
+
 #endif /* __VM_EVENT_H__ */
 
 /*