diff mbox series

x86/hvm: Include interruptibility state in hvm_hw_cpu

Message ID 05d0a5b5c18d667a5527e6f834347f54a10309da.1646937728.git.tamas.lengyel@intel.com (mailing list archive)
State New, archived
Headers show
Series x86/hvm: Include interruptibility state in hvm_hw_cpu | expand

Commit Message

Tamas K Lengyel March 10, 2022, 6:44 p.m. UTC
During VM fork resetting a failed vmentry has been observed when the reset
is performed immediately after a STI instruction executed. This is due to
the guest interruptibility state in the VMCS being modified by STI but the
subsequent reset removes the IF bit from FLAGS, causing the failed vmentry.

Include the interruptibility state information in the public hvm_hw_cpu struct
so that the CPU can be safely saved/restored.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 xen/arch/x86/hvm/hvm.c                 |  9 +++++----
 xen/arch/x86/hvm/vmx/vmx.c             |  4 ++++
 xen/arch/x86/include/asm/hvm/hvm.h     | 26 ++++++++++++++++++++++++++
 xen/include/public/arch-x86/hvm/save.h |  3 ++-
 4 files changed, 37 insertions(+), 5 deletions(-)

Comments

Tian, Kevin March 14, 2022, 7:21 a.m. UTC | #1
> From: Lengyel, Tamas <tamas.lengyel@intel.com>
> Sent: Friday, March 11, 2022 2:45 AM
> 
> During VM fork resetting a failed vmentry has been observed when the reset
> is performed immediately after a STI instruction executed. This is due to
> the guest interruptibility state in the VMCS being modified by STI but the
> subsequent reset removes the IF bit from FLAGS, causing the failed vmentry.

I didn't get the rationale here. Before this patch the interruptibility state is 
not saved/restored thus I suppose after reset it will be cleared thus aligned
with RFLAGS.IF=0. Can you elaborate a bit how exactly above problem is
caused?

> 
> Include the interruptibility state information in the public hvm_hw_cpu struct
> so that the CPU can be safely saved/restored.
> 
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> ---
>  xen/arch/x86/hvm/hvm.c                 |  9 +++++----
>  xen/arch/x86/hvm/vmx/vmx.c             |  4 ++++
>  xen/arch/x86/include/asm/hvm/hvm.h     | 26

Why is this change only applied to vmx instead of svm?

> ++++++++++++++++++++++++++
>  xen/include/public/arch-x86/hvm/save.h |  3 ++-
>  4 files changed, 37 insertions(+), 5 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 709a4191ef..b239c72215 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -897,6 +897,8 @@ static int cf_check hvm_save_cpu_ctxt(struct vcpu *v,
> hvm_domain_context_t *h)
>          ctxt.flags = XEN_X86_FPU_INITIALISED;
>      }
> 
> +    ctxt.interruptibility_info = hvm_get_interrupt_shadow(v);
> +
>      return hvm_save_entry(CPU, v->vcpu_id, h, &ctxt);
>  }
> 
> @@ -990,9 +992,6 @@ static int cf_check hvm_load_cpu_ctxt(struct domain
> *d, hvm_domain_context_t *h)
>      if ( hvm_load_entry_zeroextend(CPU, h, &ctxt) != 0 )
>          return -EINVAL;
> 
> -    if ( ctxt.pad0 != 0 )
> -        return -EINVAL;
> -
>      /* Sanity check some control registers. */
>      if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) ||
>           !(ctxt.cr0 & X86_CR0_ET) ||
> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct
> domain *d, hvm_domain_context_t *h)
>      v->arch.dr6   = ctxt.dr6;
>      v->arch.dr7   = ctxt.dr7;
> 
> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
> +
>      hvmemul_cancel(v);
> 
>      /* Auxiliary processors should be woken immediately. */
> @@ -3888,7 +3889,7 @@ enum hvm_intblk hvm_interrupt_blocked(struct
> vcpu *v, struct hvm_intack intack)
>           !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
>          return hvm_intblk_rflags_ie;
> 
> -    intr_shadow = alternative_call(hvm_funcs.get_interrupt_shadow, v);
> +    intr_shadow = hvm_get_interrupt_shadow(v);
> 
>      if ( intr_shadow &
> (HVM_INTR_SHADOW_STI|HVM_INTR_SHADOW_MOV_SS) )
>          return hvm_intblk_shadow;
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index c075370f64..e13817431a 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1323,7 +1323,9 @@ static unsigned int cf_check
> vmx_get_interrupt_shadow(struct vcpu *v)
>  {
>      unsigned long intr_shadow;
> 
> +    vmx_vmcs_enter(v);
>      __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
> +    vmx_vmcs_exit(v);
> 
>      return intr_shadow;
>  }
> @@ -1331,7 +1333,9 @@ static unsigned int cf_check
> vmx_get_interrupt_shadow(struct vcpu *v)
>  static void cf_check vmx_set_interrupt_shadow(
>      struct vcpu *v, unsigned int intr_shadow)
>  {
> +    vmx_vmcs_enter(v);
>      __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
> +    vmx_vmcs_exit(v);
>  }
> 
>  static void vmx_load_pdptrs(struct vcpu *v)
> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h
> b/xen/arch/x86/include/asm/hvm/hvm.h
> index 5b7ec0cf69..2fb7865a05 100644
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -720,6 +720,22 @@ static inline int hvm_vmtrace_reset(struct vcpu *v)
>      return -EOPNOTSUPP;
>  }
> 
> +static inline unsigned long hvm_get_interrupt_shadow(struct vcpu *v)
> +{
> +    if ( hvm_funcs.get_interrupt_shadow )
> +        return alternative_call(hvm_funcs.get_interrupt_shadow, v);
> +
> +    return -EOPNOTSUPP;
> +}
> +
> +static inline void
> +hvm_set_interrupt_shadow(struct vcpu *v, unsigned long val)
> +{
> +    if ( hvm_funcs.set_interrupt_shadow )
> +        alternative_vcall(hvm_funcs.set_interrupt_shadow, v, val);
> +}
> +
> +
>  /*
>   * Accessors for registers which have per-guest-type or per-vendor locations
>   * (e.g. VMCS, msr load/save lists, VMCB, VMLOAD lazy, etc).
> @@ -863,6 +879,16 @@ static inline void hvm_set_reg(struct vcpu *v,
> unsigned int reg, uint64_t val)
>      ASSERT_UNREACHABLE();
>  }
> 
> +static inline unsigned long hvm_get_interrupt_shadow(struct vcpu *v)
> +{
> +    ASSERT_UNREACHABLE();
> +    return 0;
> +}
> +static inline void hvm_set_interrupt_shadow(struct vcpu *v, unsigned long
> val)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +
>  #define is_viridian_domain(d) ((void)(d), false)
>  #define is_viridian_vcpu(v) ((void)(v), false)
>  #define has_viridian_time_ref_count(d) ((void)(d), false)
> diff --git a/xen/include/public/arch-x86/hvm/save.h
> b/xen/include/public/arch-x86/hvm/save.h
> index 773a380bc2..e944b1756a 100644
> --- a/xen/include/public/arch-x86/hvm/save.h
> +++ b/xen/include/public/arch-x86/hvm/save.h
> @@ -165,7 +165,8 @@ struct hvm_hw_cpu {
>  #define _XEN_X86_FPU_INITIALISED        0
>  #define XEN_X86_FPU_INITIALISED         (1U<<_XEN_X86_FPU_INITIALISED)
>      uint32_t flags;
> -    uint32_t pad0;
> +
> +    uint32_t interruptibility_info;
>  };
> 
>  struct hvm_hw_cpu_compat {
> --
> 2.25.1
Tamas K Lengyel March 14, 2022, 12:13 p.m. UTC | #2
On Mon, Mar 14, 2022 at 3:22 AM Tian, Kevin <kevin.tian@intel.com> wrote:
>
> > From: Lengyel, Tamas <tamas.lengyel@intel.com>
> > Sent: Friday, March 11, 2022 2:45 AM
> >
> > During VM fork resetting a failed vmentry has been observed when the reset
> > is performed immediately after a STI instruction executed. This is due to
> > the guest interruptibility state in the VMCS being modified by STI but the
> > subsequent reset removes the IF bit from FLAGS, causing the failed vmentry.
>
> I didn't get the rationale here. Before this patch the interruptibility state is
> not saved/restored thus I suppose after reset it will be cleared thus aligned
> with RFLAGS.IF=0. Can you elaborate a bit how exactly above problem is
> caused?

The problem is that the interruptibility state is not cleared and thus
isn't aligned with RFLAGS.IF=0 after RFLAGS is reset. They go out of
sync leading to the failed vmentry. The interruptibility state needs
to be included in the hvm hw cpu struct for it to get re-aligned
during reset to avoid the failed vmentry.

>
> >
> > Include the interruptibility state information in the public hvm_hw_cpu struct
> > so that the CPU can be safely saved/restored.
> >
> > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> > ---
> >  xen/arch/x86/hvm/hvm.c                 |  9 +++++----
> >  xen/arch/x86/hvm/vmx/vmx.c             |  4 ++++
> >  xen/arch/x86/include/asm/hvm/hvm.h     | 26
>
> Why is this change only applied to vmx instead of svm?

VM forking is implemented only for vmx, thus this change is only
relevant where a VM would be immediately reset after a STI
instruction. Normal VM save/restore/migration doesn't attempt to
capture a VM state immediately after STI thus it's not relevant for
SVM.

Tamas
Tian, Kevin March 17, 2022, 6:09 a.m. UTC | #3
> From: Tamas K Lengyel <tamas.k.lengyel@gmail.com>
> Sent: Monday, March 14, 2022 8:14 PM
> 
> On Mon, Mar 14, 2022 at 3:22 AM Tian, Kevin <kevin.tian@intel.com> wrote:
> >
> > > From: Lengyel, Tamas <tamas.lengyel@intel.com>
> > > Sent: Friday, March 11, 2022 2:45 AM
> > >
> > > During VM fork resetting a failed vmentry has been observed when the
> reset
> > > is performed immediately after a STI instruction executed. This is due to
> > > the guest interruptibility state in the VMCS being modified by STI but the
> > > subsequent reset removes the IF bit from FLAGS, causing the failed
> vmentry.
> >
> > I didn't get the rationale here. Before this patch the interruptibility state is
> > not saved/restored thus I suppose after reset it will be cleared thus aligned
> > with RFLAGS.IF=0. Can you elaborate a bit how exactly above problem is
> > caused?
> 
> The problem is that the interruptibility state is not cleared and thus
> isn't aligned with RFLAGS.IF=0 after RFLAGS is reset. They go out of
> sync leading to the failed vmentry. The interruptibility state needs
> to be included in the hvm hw cpu struct for it to get re-aligned
> during reset to avoid the failed vmentry.

I'm still confused here. The interruptibility state should have bit 0 as 1
after a STI instruction is executed (RFLAGS.IF=1). Saving/restoring it
still doesn't match RFLAGS.IF=0 after vm fork reset. So I didn't understand
how this patch actually fixes the problem.

Also if there is a real problem shouldn't we just reset the interruptbility
state to match RFLAGS.IF=0?

> 
> >
> > >
> > > Include the interruptibility state information in the public hvm_hw_cpu
> struct
> > > so that the CPU can be safely saved/restored.
> > >
> > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> > > ---
> > >  xen/arch/x86/hvm/hvm.c                 |  9 +++++----
> > >  xen/arch/x86/hvm/vmx/vmx.c             |  4 ++++
> > >  xen/arch/x86/include/asm/hvm/hvm.h     | 26
> >
> > Why is this change only applied to vmx instead of svm?
> 
> VM forking is implemented only for vmx, thus this change is only
> relevant where a VM would be immediately reset after a STI

but the ops is generic and SVM already has the related callbacks.

> instruction. Normal VM save/restore/migration doesn't attempt to
> capture a VM state immediately after STI thus it's not relevant for
> SVM.
> 

Can you elaborate why save/restore/migration won't happen
right after STI while it does for vm fork?

Thanks
Kevin
Tamas K Lengyel March 17, 2022, 11:06 a.m. UTC | #4
On Thu, Mar 17, 2022, 2:09 AM Tian, Kevin <kevin.tian@intel.com> wrote:

> > From: Tamas K Lengyel <tamas.k.lengyel@gmail.com>
> > Sent: Monday, March 14, 2022 8:14 PM
> >
> > On Mon, Mar 14, 2022 at 3:22 AM Tian, Kevin <kevin.tian@intel.com>
> wrote:
> > >
> > > > From: Lengyel, Tamas <tamas.lengyel@intel.com>
> > > > Sent: Friday, March 11, 2022 2:45 AM
> > > >
> > > > During VM fork resetting a failed vmentry has been observed when the
> > reset
> > > > is performed immediately after a STI instruction executed. This is
> due to
> > > > the guest interruptibility state in the VMCS being modified by STI
> but the
> > > > subsequent reset removes the IF bit from FLAGS, causing the failed
> > vmentry.
> > >
> > > I didn't get the rationale here. Before this patch the
> interruptibility state is
> > > not saved/restored thus I suppose after reset it will be cleared thus
> aligned
> > > with RFLAGS.IF=0. Can you elaborate a bit how exactly above problem is
> > > caused?
> >
> > The problem is that the interruptibility state is not cleared and thus
> > isn't aligned with RFLAGS.IF=0 after RFLAGS is reset. They go out of
> > sync leading to the failed vmentry. The interruptibility state needs
> > to be included in the hvm hw cpu struct for it to get re-aligned
> > during reset to avoid the failed vmentry.
>
> I'm still confused here. The interruptibility state should have bit 0 as 1
> after a STI instruction is executed (RFLAGS.IF=1). Saving/restoring it
> still doesn't match RFLAGS.IF=0 after vm fork reset. So I didn't understand
> how this patch actually fixes the problem.
>

I think I see where the confusion is. We are saving the context of the
parent vm and restoring it in the fork during a reset. That's what a reset
is. So by including the field in the struct means it will be reset to be in
sync with RFLAGS of the parent vm. Right now only the RFLAGS is copied from
the parent and interruptibility state isn't touched at all.


Also if there is a real problem shouldn't we just reset the interruptbility
> state to match RFLAGS.IF=0?
>

Yes, exactly what this patch achieves. Looking at it more I think the rest
of the non-register cpu state should similarly be included so those would
get reset too (activity & pending dbg).


> >
> > >
> > > >
> > > > Include the interruptibility state information in the public
> hvm_hw_cpu
> > struct
> > > > so that the CPU can be safely saved/restored.
> > > >
> > > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> > > > ---
> > > >  xen/arch/x86/hvm/hvm.c                 |  9 +++++----
> > > >  xen/arch/x86/hvm/vmx/vmx.c             |  4 ++++
> > > >  xen/arch/x86/include/asm/hvm/hvm.h     | 26
> > >
> > > Why is this change only applied to vmx instead of svm?
> >
> > VM forking is implemented only for vmx, thus this change is only
> > relevant where a VM would be immediately reset after a STI
>
> but the ops is generic and SVM already has the related callbacks.
>
> > instruction. Normal VM save/restore/migration doesn't attempt to
> > capture a VM state immediately after STI thus it's not relevant for
> > SVM.
> >
>
> Can you elaborate why save/restore/migration won't happen
> right after STI while it does for vm fork?
>

This is just based on my observation that noone has encountered this issue
in the long life of Xen. If I'm wrong and this cornercase could be
encountered during normal routes I can wire in SVM too. I ran into this
with vm forks because we are resetting the forks very very often (thousands
of times per second) under various execution paths with the fuzzer and one
happened to hit reset just after STI.

Another question I would be interested to hear from the maintainers is in
regards to the hvm context compat macros. Right now they differentiate
between hvm hw cpu struct versions based on size. So since this patch
doesn't change the size how is that supposed to work? Or if there are more
then two versions of the struct? The compat version never changes?

Tamas

>
Jan Beulich March 17, 2022, 1:56 p.m. UTC | #5
On 10.03.2022 19:44, Tamas K Lengyel wrote:
> During VM fork resetting a failed vmentry has been observed when the reset
> is performed immediately after a STI instruction executed. This is due to
> the guest interruptibility state in the VMCS being modified by STI but the
> subsequent reset removes the IF bit from FLAGS, causing the failed vmentry.

I first thought this was backwards, but after re-reading a couple of
times I think the issue is merely with you stating this as if this
was what always happens, while it really depends on the state that
the VM is being reset to. I think it would further be helpful if you
made clear that other interruptibility state could also cause issues
when not properly restored. One way to express this would be to
simply insert "e.g." ahead of "a STI instruction".

> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
>      v->arch.dr6   = ctxt.dr6;
>      v->arch.dr7   = ctxt.dr7;
>  
> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);

Setting reserved bits as well as certain combinations of bits will
cause VM entry to fail. I think it would be nice to report this as
an error here rather than waiting for the VM entry failure.

> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -720,6 +720,22 @@ static inline int hvm_vmtrace_reset(struct vcpu *v)
>      return -EOPNOTSUPP;
>  }
>  
> +static inline unsigned long hvm_get_interrupt_shadow(struct vcpu *v)

unsigned long here and ...

> +{
> +    if ( hvm_funcs.get_interrupt_shadow )
> +        return alternative_call(hvm_funcs.get_interrupt_shadow, v);
> +
> +    return -EOPNOTSUPP;
> +}
> +
> +static inline void
> +hvm_set_interrupt_shadow(struct vcpu *v, unsigned long val)

... here are not in line with the hooks' types. Same for the stubs
further down then.

> +{
> +    if ( hvm_funcs.set_interrupt_shadow )
> +        alternative_vcall(hvm_funcs.set_interrupt_shadow, v, val);
> +}
> +
> +
>  /*

Please don't insert multiple successive blank lines.

Jan
Jan Beulich March 17, 2022, 2:03 p.m. UTC | #6
On 17.03.2022 12:06, Tamas K Lengyel wrote:
> Another question I would be interested to hear from the maintainers is in
> regards to the hvm context compat macros. Right now they differentiate
> between hvm hw cpu struct versions based on size. So since this patch
> doesn't change the size how is that supposed to work? Or if there are more
> then two versions of the struct? The compat version never changes?

struct hvm_hw_cpu_compat is indeed not meant to ever change. It needed
introducing because the msr_tsc_aux field was added in the middle of
the struct, instead of at the end. Going forward, as was done with the
"flags" field already, additions should only be at the end. Exceptions
are when padding fields are assigned a purpose (like you do), which is
why they need checking that they're zero when they're introduced.

Jan
Tamas K Lengyel March 17, 2022, 2:43 p.m. UTC | #7
On Thu, Mar 17, 2022 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 10.03.2022 19:44, Tamas K Lengyel wrote:
> > During VM fork resetting a failed vmentry has been observed when the reset
> > is performed immediately after a STI instruction executed. This is due to
> > the guest interruptibility state in the VMCS being modified by STI but the
> > subsequent reset removes the IF bit from FLAGS, causing the failed vmentry.
>
> I first thought this was backwards, but after re-reading a couple of
> times I think the issue is merely with you stating this as if this
> was what always happens, while it really depends on the state that
> the VM is being reset to. I think it would further be helpful if you
> made clear that other interruptibility state could also cause issues
> when not properly restored. One way to express this would be to
> simply insert "e.g." ahead of "a STI instruction".

Correct, there could be other instances where the interruptibility
state could go out of sync with RFLAGS, executing STI and then
resetting only the register state to the pre-STI parent is just one I
stumbled into.

>
> > @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
> >      v->arch.dr6   = ctxt.dr6;
> >      v->arch.dr7   = ctxt.dr7;
> >
> > +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
>
> Setting reserved bits as well as certain combinations of bits will
> cause VM entry to fail. I think it would be nice to report this as
> an error here rather than waiting for the VM entry failure.

Not sure if this would be the right spot to do that since that's all
VMX specific and this is the common hvm code.

>
> > --- a/xen/arch/x86/include/asm/hvm/hvm.h
> > +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> > @@ -720,6 +720,22 @@ static inline int hvm_vmtrace_reset(struct vcpu *v)
> >      return -EOPNOTSUPP;
> >  }
> >
> > +static inline unsigned long hvm_get_interrupt_shadow(struct vcpu *v)
>
> unsigned long here and ...
>
> > +{
> > +    if ( hvm_funcs.get_interrupt_shadow )
> > +        return alternative_call(hvm_funcs.get_interrupt_shadow, v);
> > +
> > +    return -EOPNOTSUPP;
> > +}
> > +
> > +static inline void
> > +hvm_set_interrupt_shadow(struct vcpu *v, unsigned long val)
>
> ... here are not in line with the hooks' types. Same for the stubs
> further down then.
>
> > +{
> > +    if ( hvm_funcs.set_interrupt_shadow )
> > +        alternative_vcall(hvm_funcs.set_interrupt_shadow, v, val);
> > +}
> > +
> > +
> >  /*
>
> Please don't insert multiple successive blank lines.

Ack.

Tamas
Jan Beulich March 17, 2022, 3:06 p.m. UTC | #8
On 17.03.2022 15:43, Tamas K Lengyel wrote:
> On Thu, Mar 17, 2022 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
>> On 10.03.2022 19:44, Tamas K Lengyel wrote:
>>> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
>>>      v->arch.dr6   = ctxt.dr6;
>>>      v->arch.dr7   = ctxt.dr7;
>>>
>>> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
>>
>> Setting reserved bits as well as certain combinations of bits will
>> cause VM entry to fail. I think it would be nice to report this as
>> an error here rather than waiting for the VM entry failure.
> 
> Not sure if this would be the right spot to do that since that's all
> VMX specific and this is the common hvm code.

Well, it would be the VMX hook to do the checking, with an error
propagated back here.

Jan
Tamas K Lengyel March 17, 2022, 3:59 p.m. UTC | #9
On Thu, Mar 17, 2022 at 11:06 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 17.03.2022 15:43, Tamas K Lengyel wrote:
> > On Thu, Mar 17, 2022 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
> >> On 10.03.2022 19:44, Tamas K Lengyel wrote:
> >>> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
> >>>      v->arch.dr6   = ctxt.dr6;
> >>>      v->arch.dr7   = ctxt.dr7;
> >>>
> >>> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
> >>
> >> Setting reserved bits as well as certain combinations of bits will
> >> cause VM entry to fail. I think it would be nice to report this as
> >> an error here rather than waiting for the VM entry failure.
> >
> > Not sure if this would be the right spot to do that since that's all
> > VMX specific and this is the common hvm code.
>
> Well, it would be the VMX hook to do the checking, with an error
> propagated back here.

I'm actually against it because the overhead of that error-checking
during vm forking would be significant with really no benefit. We are
copying the state from the parent where it was running fine before, so
doing that sanity checking thousands of times per second when we
already know its fine is bad.

Tamas
Jan Beulich March 17, 2022, 4:07 p.m. UTC | #10
On 17.03.2022 16:59, Tamas K Lengyel wrote:
> On Thu, Mar 17, 2022 at 11:06 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 17.03.2022 15:43, Tamas K Lengyel wrote:
>>> On Thu, Mar 17, 2022 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>> On 10.03.2022 19:44, Tamas K Lengyel wrote:
>>>>> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
>>>>>      v->arch.dr6   = ctxt.dr6;
>>>>>      v->arch.dr7   = ctxt.dr7;
>>>>>
>>>>> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
>>>>
>>>> Setting reserved bits as well as certain combinations of bits will
>>>> cause VM entry to fail. I think it would be nice to report this as
>>>> an error here rather than waiting for the VM entry failure.
>>>
>>> Not sure if this would be the right spot to do that since that's all
>>> VMX specific and this is the common hvm code.
>>
>> Well, it would be the VMX hook to do the checking, with an error
>> propagated back here.
> 
> I'm actually against it because the overhead of that error-checking
> during vm forking would be significant with really no benefit. We are
> copying the state from the parent where it was running fine before, so
> doing that sanity checking thousands of times per second when we
> already know its fine is bad.

I can see your point, but my concern is not forking but normal migration
or restoring of guests, where the incoming data is of effectively
unknown origin.

Jan
Tamas K Lengyel March 17, 2022, 4:17 p.m. UTC | #11
On Thu, Mar 17, 2022 at 12:07 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 17.03.2022 16:59, Tamas K Lengyel wrote:
> > On Thu, Mar 17, 2022 at 11:06 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 17.03.2022 15:43, Tamas K Lengyel wrote:
> >>> On Thu, Mar 17, 2022 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>> On 10.03.2022 19:44, Tamas K Lengyel wrote:
> >>>>> @@ -1155,6 +1154,8 @@ static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
> >>>>>      v->arch.dr6   = ctxt.dr6;
> >>>>>      v->arch.dr7   = ctxt.dr7;
> >>>>>
> >>>>> +    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
> >>>>
> >>>> Setting reserved bits as well as certain combinations of bits will
> >>>> cause VM entry to fail. I think it would be nice to report this as
> >>>> an error here rather than waiting for the VM entry failure.
> >>>
> >>> Not sure if this would be the right spot to do that since that's all
> >>> VMX specific and this is the common hvm code.
> >>
> >> Well, it would be the VMX hook to do the checking, with an error
> >> propagated back here.
> >
> > I'm actually against it because the overhead of that error-checking
> > during vm forking would be significant with really no benefit. We are
> > copying the state from the parent where it was running fine before, so
> > doing that sanity checking thousands of times per second when we
> > already know its fine is bad.
>
> I can see your point, but my concern is not forking but normal migration
> or restoring of guests, where the incoming data is of effectively
> unknown origin.

IMHO for that route the error checking is better performed at the
toolstack level that sends the data to Xen.

Tamas
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 709a4191ef..b239c72215 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -897,6 +897,8 @@  static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h)
         ctxt.flags = XEN_X86_FPU_INITIALISED;
     }
 
+    ctxt.interruptibility_info = hvm_get_interrupt_shadow(v);
+
     return hvm_save_entry(CPU, v->vcpu_id, h, &ctxt);
 }
 
@@ -990,9 +992,6 @@  static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
     if ( hvm_load_entry_zeroextend(CPU, h, &ctxt) != 0 )
         return -EINVAL;
 
-    if ( ctxt.pad0 != 0 )
-        return -EINVAL;
-
     /* Sanity check some control registers. */
     if ( (ctxt.cr0 & HVM_CR0_GUEST_RESERVED_BITS) ||
          !(ctxt.cr0 & X86_CR0_ET) ||
@@ -1155,6 +1154,8 @@  static int cf_check hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
     v->arch.dr6   = ctxt.dr6;
     v->arch.dr7   = ctxt.dr7;
 
+    hvm_set_interrupt_shadow(v, ctxt.interruptibility_info);
+
     hvmemul_cancel(v);
 
     /* Auxiliary processors should be woken immediately. */
@@ -3888,7 +3889,7 @@  enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
          !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
         return hvm_intblk_rflags_ie;
 
-    intr_shadow = alternative_call(hvm_funcs.get_interrupt_shadow, v);
+    intr_shadow = hvm_get_interrupt_shadow(v);
 
     if ( intr_shadow & (HVM_INTR_SHADOW_STI|HVM_INTR_SHADOW_MOV_SS) )
         return hvm_intblk_shadow;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c075370f64..e13817431a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1323,7 +1323,9 @@  static unsigned int cf_check vmx_get_interrupt_shadow(struct vcpu *v)
 {
     unsigned long intr_shadow;
 
+    vmx_vmcs_enter(v);
     __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
+    vmx_vmcs_exit(v);
 
     return intr_shadow;
 }
@@ -1331,7 +1333,9 @@  static unsigned int cf_check vmx_get_interrupt_shadow(struct vcpu *v)
 static void cf_check vmx_set_interrupt_shadow(
     struct vcpu *v, unsigned int intr_shadow)
 {
+    vmx_vmcs_enter(v);
     __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
+    vmx_vmcs_exit(v);
 }
 
 static void vmx_load_pdptrs(struct vcpu *v)
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 5b7ec0cf69..2fb7865a05 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -720,6 +720,22 @@  static inline int hvm_vmtrace_reset(struct vcpu *v)
     return -EOPNOTSUPP;
 }
 
+static inline unsigned long hvm_get_interrupt_shadow(struct vcpu *v)
+{
+    if ( hvm_funcs.get_interrupt_shadow )
+        return alternative_call(hvm_funcs.get_interrupt_shadow, v);
+
+    return -EOPNOTSUPP;
+}
+
+static inline void
+hvm_set_interrupt_shadow(struct vcpu *v, unsigned long val)
+{
+    if ( hvm_funcs.set_interrupt_shadow )
+        alternative_vcall(hvm_funcs.set_interrupt_shadow, v, val);
+}
+
+
 /*
  * Accessors for registers which have per-guest-type or per-vendor locations
  * (e.g. VMCS, msr load/save lists, VMCB, VMLOAD lazy, etc).
@@ -863,6 +879,16 @@  static inline void hvm_set_reg(struct vcpu *v, unsigned int reg, uint64_t val)
     ASSERT_UNREACHABLE();
 }
 
+static inline unsigned long hvm_get_interrupt_shadow(struct vcpu *v)
+{
+    ASSERT_UNREACHABLE();
+    return 0;
+}
+static inline void hvm_set_interrupt_shadow(struct vcpu *v, unsigned long val)
+{
+    ASSERT_UNREACHABLE();
+}
+
 #define is_viridian_domain(d) ((void)(d), false)
 #define is_viridian_vcpu(v) ((void)(v), false)
 #define has_viridian_time_ref_count(d) ((void)(d), false)
diff --git a/xen/include/public/arch-x86/hvm/save.h b/xen/include/public/arch-x86/hvm/save.h
index 773a380bc2..e944b1756a 100644
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -165,7 +165,8 @@  struct hvm_hw_cpu {
 #define _XEN_X86_FPU_INITIALISED        0
 #define XEN_X86_FPU_INITIALISED         (1U<<_XEN_X86_FPU_INITIALISED)
     uint32_t flags;
-    uint32_t pad0;
+
+    uint32_t interruptibility_info;
 };
 
 struct hvm_hw_cpu_compat {