diff mbox series

[v5] x86/vmx: add hvm functions to get/set non-register state

Message ID e57797cbc2c0706c03363cc7fa1b3e234921fee5.1648214955.git.tamas.lengyel@intel.com (mailing list archive)
State New, archived
Headers show
Series [v5] x86/vmx: add hvm functions to get/set non-register state | expand

Commit Message

Tamas K Lengyel March 25, 2022, 1:33 p.m. UTC
During VM forking and resetting a failed vmentry has been observed due
to the guest non-register state going out-of-sync with the guest register
state. For example, a VM fork reset right after a STI instruction can trigger
the failed entry. This is due to the guest non-register state not being saved
from the parent VM, thus the reset operation only copies the register state.

Fix this by adding a new pair of hvm functions to get/set the guest
non-register state so that the overall vCPU state remains in sync.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
v5: Switch to internal-only hvm funcs instead of adding to hvm_hw_cpu
---
 xen/arch/x86/hvm/vmx/vmx.c         | 32 ++++++++++++++++++++++++
 xen/arch/x86/include/asm/hvm/hvm.h | 40 ++++++++++++++++++++++++++++++
 xen/arch/x86/mm/mem_sharing.c      | 11 +++++++-
 3 files changed, 82 insertions(+), 1 deletion(-)

Comments

Tamas K Lengyel April 4, 2022, 1:25 p.m. UTC | #1
On Fri, Mar 25, 2022 at 9:34 AM Tamas K Lengyel <tamas.lengyel@intel.com> wrote:
>
> During VM forking and resetting a failed vmentry has been observed due
> to the guest non-register state going out-of-sync with the guest register
> state. For example, a VM fork reset right after a STI instruction can trigger
> the failed entry. This is due to the guest non-register state not being saved
> from the parent VM, thus the reset operation only copies the register state.
>
> Fix this by adding a new pair of hvm functions to get/set the guest
> non-register state so that the overall vCPU state remains in sync.
>
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> ---
> v5: Switch to internal-only hvm funcs instead of adding to hvm_hw_cpu

Patch ping.
Tian, Kevin April 8, 2022, 3:49 a.m. UTC | #2
> From: Lengyel, Tamas <tamas.lengyel@intel.com>
> Sent: Friday, March 25, 2022 9:33 PM
> 
> During VM forking and resetting a failed vmentry has been observed due
> to the guest non-register state going out-of-sync with the guest register
> state. For example, a VM fork reset right after a STI instruction can trigger
> the failed entry. This is due to the guest non-register state not being saved
> from the parent VM, thus the reset operation only copies the register state.
> 
> Fix this by adding a new pair of hvm functions to get/set the guest
> non-register state so that the overall vCPU state remains in sync.
> 
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> ---
> v5: Switch to internal-only hvm funcs instead of adding to hvm_hw_cpu
> ---
>  xen/arch/x86/hvm/vmx/vmx.c         | 32 ++++++++++++++++++++++++
>  xen/arch/x86/include/asm/hvm/hvm.h | 40
> ++++++++++++++++++++++++++++++
>  xen/arch/x86/mm/mem_sharing.c      | 11 +++++++-
>  3 files changed, 82 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index c075370f64..2685da16c8 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1334,6 +1334,36 @@ static void cf_check vmx_set_interrupt_shadow(
>      __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
>  }
> 
> +static void cf_check vmx_get_nonreg_state(struct vcpu *v,
> +    struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    vmx_vmcs_enter(v);
> +
> +    __vmread(GUEST_ACTIVITY_STATE, &nrs->vmx.activity_state);
> +    __vmread(GUEST_INTERRUPTIBILITY_INFO, &nrs-
> >vmx.interruptibility_info);
> +    __vmread(GUEST_PENDING_DBG_EXCEPTIONS, &nrs->vmx.pending_dbg);
> +
> +    if ( cpu_has_vmx_virtual_intr_delivery )
> +        __vmread(GUEST_INTR_STATUS, &nrs->vmx.interrupt_status);

There lacks of explanation somewhere how those states are selected.
Your discussion with Andrew leaves me the impression that Andrew sees
more issues in general save/restore path while you only want to deal with
the requirements for your own usage. But according to v1 your usage only
cares about the interruptiblity info. This implies that v5 is kind of in a state
between your original intention and what Andrew actually wants...

Thanks
Kevin
Tamas K Lengyel April 8, 2022, 12:40 p.m. UTC | #3
On Thu, Apr 7, 2022 at 11:49 PM Tian, Kevin <kevin.tian@intel.com> wrote:
>
> > From: Lengyel, Tamas <tamas.lengyel@intel.com>
> > Sent: Friday, March 25, 2022 9:33 PM
> >
> > During VM forking and resetting a failed vmentry has been observed due
> > to the guest non-register state going out-of-sync with the guest register
> > state. For example, a VM fork reset right after a STI instruction can trigger
> > the failed entry. This is due to the guest non-register state not being saved
> > from the parent VM, thus the reset operation only copies the register state.
> >
> > Fix this by adding a new pair of hvm functions to get/set the guest
> > non-register state so that the overall vCPU state remains in sync.
> >
> > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> > ---
> > v5: Switch to internal-only hvm funcs instead of adding to hvm_hw_cpu
> > ---
> >  xen/arch/x86/hvm/vmx/vmx.c         | 32 ++++++++++++++++++++++++
> >  xen/arch/x86/include/asm/hvm/hvm.h | 40
> > ++++++++++++++++++++++++++++++
> >  xen/arch/x86/mm/mem_sharing.c      | 11 +++++++-
> >  3 files changed, 82 insertions(+), 1 deletion(-)
> >
> > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> > index c075370f64..2685da16c8 100644
> > --- a/xen/arch/x86/hvm/vmx/vmx.c
> > +++ b/xen/arch/x86/hvm/vmx/vmx.c
> > @@ -1334,6 +1334,36 @@ static void cf_check vmx_set_interrupt_shadow(
> >      __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
> >  }
> >
> > +static void cf_check vmx_get_nonreg_state(struct vcpu *v,
> > +    struct hvm_vcpu_nonreg_state *nrs)
> > +{
> > +    vmx_vmcs_enter(v);
> > +
> > +    __vmread(GUEST_ACTIVITY_STATE, &nrs->vmx.activity_state);
> > +    __vmread(GUEST_INTERRUPTIBILITY_INFO, &nrs-
> > >vmx.interruptibility_info);
> > +    __vmread(GUEST_PENDING_DBG_EXCEPTIONS, &nrs->vmx.pending_dbg);
> > +
> > +    if ( cpu_has_vmx_virtual_intr_delivery )
> > +        __vmread(GUEST_INTR_STATUS, &nrs->vmx.interrupt_status);
>
> There lacks of explanation somewhere how those states are selected.
> Your discussion with Andrew leaves me the impression that Andrew sees
> more issues in general save/restore path while you only want to deal with
> the requirements for your own usage. But according to v1 your usage only
> cares about the interruptiblity info. This implies that v5 is kind of in a state
> between your original intention and what Andrew actually wants...

These fields are all guest non-register states so they are not
completely arbitrary. True that at v1 only the interruptibility info
was observed to be causing issues when it goes out-of-sync after a
reset. Since then pending_dbg was also noted to be needing a reset
under some circumstances. So at this point I see no reason to wait to
include the other values in the reset. If you have an insight into why
those fields don't need to be kept in sync with the rest of the vCPU
state, please share.

As for the save/restore path concerns I don't really have a clear
insight into what is needed to fix it. Furthermore the proposed sanity
checking on these values that would be legitimately needed for
save/restore are just pure overhead for our use-case. So the two paths
are better left separate in any case.

Tamas
Tamas K Lengyel April 18, 2022, 6:43 p.m. UTC | #4
On Fri, Mar 25, 2022 at 9:34 AM Tamas K Lengyel <tamas.lengyel@intel.com> wrote:
>
> During VM forking and resetting a failed vmentry has been observed due
> to the guest non-register state going out-of-sync with the guest register
> state. For example, a VM fork reset right after a STI instruction can trigger
> the failed entry. This is due to the guest non-register state not being saved
> from the parent VM, thus the reset operation only copies the register state.
>
> Fix this by adding a new pair of hvm functions to get/set the guest
> non-register state so that the overall vCPU state remains in sync.
>
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> ---
> v5: Switch to internal-only hvm funcs instead of adding to hvm_hw_cpu

Patch ping.
Tian, Kevin April 20, 2022, 6:39 a.m. UTC | #5
> From: Tamas K Lengyel <tamas@tklengyel.com>
> Sent: Tuesday, April 19, 2022 2:43 AM
> 
> On Fri, Mar 25, 2022 at 9:34 AM Tamas K Lengyel <tamas.lengyel@intel.com>
> wrote:
> >
> > During VM forking and resetting a failed vmentry has been observed due
> > to the guest non-register state going out-of-sync with the guest register
> > state. For example, a VM fork reset right after a STI instruction can trigger
> > the failed entry. This is due to the guest non-register state not being saved
> > from the parent VM, thus the reset operation only copies the register state.
> >
> > Fix this by adding a new pair of hvm functions to get/set the guest
> > non-register state so that the overall vCPU state remains in sync.
> >
> > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> > ---
> > v5: Switch to internal-only hvm funcs instead of adding to hvm_hw_cpu
> 
> Patch ping.

I'd like to hear opinions from Andrew/Jan first. Obviously they have
different thoughts when reviewing the earlier versions.
Jan Beulich April 20, 2022, 6:50 a.m. UTC | #6
On 20.04.2022 08:39, Tian, Kevin wrote:
>> From: Tamas K Lengyel <tamas@tklengyel.com>
>> Sent: Tuesday, April 19, 2022 2:43 AM
>>
>> On Fri, Mar 25, 2022 at 9:34 AM Tamas K Lengyel <tamas.lengyel@intel.com>
>> wrote:
>>>
>>> During VM forking and resetting a failed vmentry has been observed due
>>> to the guest non-register state going out-of-sync with the guest register
>>> state. For example, a VM fork reset right after a STI instruction can trigger
>>> the failed entry. This is due to the guest non-register state not being saved
>>> from the parent VM, thus the reset operation only copies the register state.
>>>
>>> Fix this by adding a new pair of hvm functions to get/set the guest
>>> non-register state so that the overall vCPU state remains in sync.
>>>
>>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
>>> ---
>>> v5: Switch to internal-only hvm funcs instead of adding to hvm_hw_cpu
>>
>> Patch ping.
> 
> I'd like to hear opinions from Andrew/Jan first. Obviously they have
> different thoughts when reviewing the earlier versions.

Well, I certainly would prefer if migration was taken care of at the same
time, but I can understand if Tamas doesn't want to put more time into
getting that case working. Plus, aiui, this solution to his problem won't
prevent the eventual wider scope change to be used also for the specific
purpose here, perhaps by simply fully replacing what is being done now.

Jan
Tamas K Lengyel April 26, 2022, 7:08 p.m. UTC | #7
On Wed, Apr 20, 2022 at 2:50 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 20.04.2022 08:39, Tian, Kevin wrote:
> >> From: Tamas K Lengyel <tamas@tklengyel.com>
> >> Sent: Tuesday, April 19, 2022 2:43 AM
> >>
> >> On Fri, Mar 25, 2022 at 9:34 AM Tamas K Lengyel <tamas.lengyel@intel.com>
> >> wrote:
> >>>
> >>> During VM forking and resetting a failed vmentry has been observed due
> >>> to the guest non-register state going out-of-sync with the guest register
> >>> state. For example, a VM fork reset right after a STI instruction can trigger
> >>> the failed entry. This is due to the guest non-register state not being saved
> >>> from the parent VM, thus the reset operation only copies the register state.
> >>>
> >>> Fix this by adding a new pair of hvm functions to get/set the guest
> >>> non-register state so that the overall vCPU state remains in sync.
> >>>
> >>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> >>> ---
> >>> v5: Switch to internal-only hvm funcs instead of adding to hvm_hw_cpu
> >>
> >> Patch ping.
> >
> > I'd like to hear opinions from Andrew/Jan first. Obviously they have
> > different thoughts when reviewing the earlier versions.
>
> Well, I certainly would prefer if migration was taken care of at the same
> time, but I can understand if Tamas doesn't want to put more time into
> getting that case working. Plus, aiui, this solution to his problem won't
> prevent the eventual wider scope change to be used also for the specific
> purpose here, perhaps by simply fully replacing what is being done now.

Can we move forward with this patch then? As Jan points out, it
doesn't prevent anyone coming up with a fix to the migration case,
whatever shape that might take.

Thanks,
Tamas
Tian, Kevin April 27, 2022, 3:46 a.m. UTC | #8
> From: Lengyel, Tamas <tamas.lengyel@intel.com>
> Sent: Friday, March 25, 2022 9:33 PM
> 
> During VM forking and resetting a failed vmentry has been observed due
> to the guest non-register state going out-of-sync with the guest register
> state. For example, a VM fork reset right after a STI instruction can trigger
> the failed entry. This is due to the guest non-register state not being saved
> from the parent VM, thus the reset operation only copies the register state.
> 
> Fix this by adding a new pair of hvm functions to get/set the guest
> non-register state so that the overall vCPU state remains in sync.
> 
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
> v5: Switch to internal-only hvm funcs instead of adding to hvm_hw_cpu
> ---
>  xen/arch/x86/hvm/vmx/vmx.c         | 32 ++++++++++++++++++++++++
>  xen/arch/x86/include/asm/hvm/hvm.h | 40
> ++++++++++++++++++++++++++++++
>  xen/arch/x86/mm/mem_sharing.c      | 11 +++++++-
>  3 files changed, 82 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index c075370f64..2685da16c8 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1334,6 +1334,36 @@ static void cf_check vmx_set_interrupt_shadow(
>      __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
>  }
> 
> +static void cf_check vmx_get_nonreg_state(struct vcpu *v,
> +    struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    vmx_vmcs_enter(v);
> +
> +    __vmread(GUEST_ACTIVITY_STATE, &nrs->vmx.activity_state);
> +    __vmread(GUEST_INTERRUPTIBILITY_INFO, &nrs-
> >vmx.interruptibility_info);
> +    __vmread(GUEST_PENDING_DBG_EXCEPTIONS, &nrs->vmx.pending_dbg);
> +
> +    if ( cpu_has_vmx_virtual_intr_delivery )
> +        __vmread(GUEST_INTR_STATUS, &nrs->vmx.interrupt_status);
> +
> +    vmx_vmcs_exit(v);
> +}
> +
> +static void cf_check vmx_set_nonreg_state(struct vcpu *v,
> +    struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    vmx_vmcs_enter(v);
> +
> +    __vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state);
> +    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, nrs-
> >vmx.interruptibility_info);
> +    __vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, nrs->vmx.pending_dbg);
> +
> +    if ( cpu_has_vmx_virtual_intr_delivery )
> +        __vmwrite(GUEST_INTR_STATUS, nrs->vmx.interrupt_status);
> +
> +    vmx_vmcs_exit(v);
> +}
> +
>  static void vmx_load_pdptrs(struct vcpu *v)
>  {
>      uint32_t cr3 = v->arch.hvm.guest_cr[3];
> @@ -2487,6 +2517,8 @@ static struct hvm_function_table
> __initdata_cf_clobber vmx_function_table = {
>      .load_cpu_ctxt        = vmx_load_vmcs_ctxt,
>      .get_interrupt_shadow = vmx_get_interrupt_shadow,
>      .set_interrupt_shadow = vmx_set_interrupt_shadow,
> +    .get_nonreg_state     = vmx_get_nonreg_state,
> +    .set_nonreg_state     = vmx_set_nonreg_state,
>      .guest_x86_mode       = vmx_guest_x86_mode,
>      .get_cpl              = _vmx_get_cpl,
>      .get_segment_register = vmx_get_segment_register,
> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h
> b/xen/arch/x86/include/asm/hvm/hvm.h
> index 5b7ec0cf69..9dee0f87a3 100644
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -84,6 +84,17 @@ enum hvm_intblk {
>  /* update_guest_cr() flags. */
>  #define HVM_UPDATE_GUEST_CR3_NOFLUSH 0x00000001
> 
> +struct hvm_vcpu_nonreg_state {
> +    union {
> +        struct {
> +            uint64_t activity_state;
> +            uint64_t interruptibility_info;
> +            uint64_t pending_dbg;
> +            uint64_t interrupt_status;
> +        } vmx;
> +    };
> +};
> +
>  /*
>   * The hardware virtual machine (HVM) interface abstracts away from the
>   * x86/x86_64 CPU virtualization assist specifics. Currently this interface
> @@ -122,6 +133,10 @@ struct hvm_function_table {
>      /* Examine specifics of the guest state. */
>      unsigned int (*get_interrupt_shadow)(struct vcpu *v);
>      void (*set_interrupt_shadow)(struct vcpu *v, unsigned int intr_shadow);
> +    void (*get_nonreg_state)(struct vcpu *v,
> +                             struct hvm_vcpu_nonreg_state *nrs);
> +    void (*set_nonreg_state)(struct vcpu *v,
> +                             struct hvm_vcpu_nonreg_state *nrs);
>      int (*guest_x86_mode)(struct vcpu *v);
>      unsigned int (*get_cpl)(struct vcpu *v);
>      void (*get_segment_register)(struct vcpu *v, enum x86_segment seg,
> @@ -744,6 +759,20 @@ void hvm_set_reg(struct vcpu *v, unsigned int reg,
> uint64_t val);
>          d_->arch.hvm.pi_ops.vcpu_block(v_);                     \
>  })
> 
> +static inline void hvm_get_nonreg_state(struct vcpu *v,
> +                                        struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    if ( hvm_funcs.get_nonreg_state )
> +        alternative_vcall(hvm_funcs.get_nonreg_state, v, nrs);
> +}
> +
> +static inline void hvm_set_nonreg_state(struct vcpu *v,
> +                                        struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    if ( hvm_funcs.set_nonreg_state )
> +        alternative_vcall(hvm_funcs.set_nonreg_state, v, nrs);
> +}
> +
>  #else  /* CONFIG_HVM */
> 
>  #define hvm_enabled false
> @@ -863,6 +892,17 @@ static inline void hvm_set_reg(struct vcpu *v,
> unsigned int reg, uint64_t val)
>      ASSERT_UNREACHABLE();
>  }
> 
> +static inline void hvm_get_nonreg_state(struct vcpu *v,
> +                                        struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline void hvm_set_nonreg_state(struct vcpu *v,
> +                                        struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +
>  #define is_viridian_domain(d) ((void)(d), false)
>  #define is_viridian_vcpu(v) ((void)(v), false)
>  #define has_viridian_time_ref_count(d) ((void)(d), false)
> diff --git a/xen/arch/x86/mm/mem_sharing.c
> b/xen/arch/x86/mm/mem_sharing.c
> index 15e6a7ed81..857accee58 100644
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -1643,6 +1643,13 @@ static int bring_up_vcpus(struct domain *cd,
> struct domain *d)
>      return 0;
>  }
> 
> +static void copy_vcpu_nonreg_state(struct vcpu *d_vcpu, struct vcpu
> *cd_vcpu)
> +{
> +    struct hvm_vcpu_nonreg_state nrs = {};
> +    hvm_get_nonreg_state(d_vcpu, &nrs);
> +    hvm_set_nonreg_state(cd_vcpu, &nrs);
> +}
> +
>  static int copy_vcpu_settings(struct domain *cd, const struct domain *d)
>  {
>      unsigned int i;
> @@ -1651,7 +1658,7 @@ static int copy_vcpu_settings(struct domain *cd,
> const struct domain *d)
> 
>      for ( i = 0; i < cd->max_vcpus; i++ )
>      {
> -        const struct vcpu *d_vcpu = d->vcpu[i];
> +        struct vcpu *d_vcpu = d->vcpu[i];
>          struct vcpu *cd_vcpu = cd->vcpu[i];
>          mfn_t vcpu_info_mfn;
> 
> @@ -1694,6 +1701,8 @@ static int copy_vcpu_settings(struct domain *cd,
> const struct domain *d)
> 
>          hvm_vmtrace_reset(cd_vcpu);
> 
> +        copy_vcpu_nonreg_state(d_vcpu, cd_vcpu);
> +
>          /*
>           * TODO: to support VMs with PV interfaces copy additional
>           * settings here, such as PV timers.
> --
> 2.25.1
Jan Beulich April 27, 2022, 7:07 a.m. UTC | #9
On 27.04.2022 05:46, Tian, Kevin wrote:
>> From: Lengyel, Tamas <tamas.lengyel@intel.com>
>> Sent: Friday, March 25, 2022 9:33 PM
>>
>> During VM forking and resetting a failed vmentry has been observed due
>> to the guest non-register state going out-of-sync with the guest register
>> state. For example, a VM fork reset right after a STI instruction can trigger
>> the failed entry. This is due to the guest non-register state not being saved
>> from the parent VM, thus the reset operation only copies the register state.
>>
>> Fix this by adding a new pair of hvm functions to get/set the guest
>> non-register state so that the overall vCPU state remains in sync.
>>
>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> 
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>

Acked-by: Jan Beulich <jbeulich@suse.com>
with ...

>> @@ -863,6 +892,17 @@ static inline void hvm_set_reg(struct vcpu *v,
>> unsigned int reg, uint64_t val)
>>      ASSERT_UNREACHABLE();
>>  }
>>
>> +static inline void hvm_get_nonreg_state(struct vcpu *v,
>> +                                        struct hvm_vcpu_nonreg_state *nrs)
>> +{
>> +    ASSERT_UNREACHABLE();
>> +}
>> +static inline void hvm_set_nonreg_state(struct vcpu *v,
>> +                                        struct hvm_vcpu_nonreg_state *nrs)
>> +{
>> +    ASSERT_UNREACHABLE();
>> +}

... these unnecessary stubs dropped (they should be introduced only
once actually needed, i.e. when a caller appears in a file which is
also built when !CONFIG_HVM), and ...

>> --- a/xen/arch/x86/mm/mem_sharing.c
>> +++ b/xen/arch/x86/mm/mem_sharing.c
>> @@ -1643,6 +1643,13 @@ static int bring_up_vcpus(struct domain *cd,
>> struct domain *d)
>>      return 0;
>>  }
>>
>> +static void copy_vcpu_nonreg_state(struct vcpu *d_vcpu, struct vcpu
>> *cd_vcpu)
>> +{
>> +    struct hvm_vcpu_nonreg_state nrs = {};
>> +    hvm_get_nonreg_state(d_vcpu, &nrs);

... this missing blank line inserted between these two lines. I'll
make both adjustments while committing.

Jan
Tamas K Lengyel April 27, 2022, 3:37 p.m. UTC | #10
On Wed, Apr 27, 2022 at 3:07 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 27.04.2022 05:46, Tian, Kevin wrote:
> >> From: Lengyel, Tamas <tamas.lengyel@intel.com>
> >> Sent: Friday, March 25, 2022 9:33 PM
> >>
> >> During VM forking and resetting a failed vmentry has been observed due
> >> to the guest non-register state going out-of-sync with the guest register
> >> state. For example, a VM fork reset right after a STI instruction can trigger
> >> the failed entry. This is due to the guest non-register state not being saved
> >> from the parent VM, thus the reset operation only copies the register state.
> >>
> >> Fix this by adding a new pair of hvm functions to get/set the guest
> >> non-register state so that the overall vCPU state remains in sync.
> >>
> >> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> >
> > Reviewed-by: Kevin Tian <kevin.tian@intel.com>
>
> Acked-by: Jan Beulich <jbeulich@suse.com>
> with ...
>
> >> @@ -863,6 +892,17 @@ static inline void hvm_set_reg(struct vcpu *v,
> >> unsigned int reg, uint64_t val)
> >>      ASSERT_UNREACHABLE();
> >>  }
> >>
> >> +static inline void hvm_get_nonreg_state(struct vcpu *v,
> >> +                                        struct hvm_vcpu_nonreg_state *nrs)
> >> +{
> >> +    ASSERT_UNREACHABLE();
> >> +}
> >> +static inline void hvm_set_nonreg_state(struct vcpu *v,
> >> +                                        struct hvm_vcpu_nonreg_state *nrs)
> >> +{
> >> +    ASSERT_UNREACHABLE();
> >> +}
>
> ... these unnecessary stubs dropped (they should be introduced only
> once actually needed, i.e. when a caller appears in a file which is
> also built when !CONFIG_HVM), and ...
>
> >> --- a/xen/arch/x86/mm/mem_sharing.c
> >> +++ b/xen/arch/x86/mm/mem_sharing.c
> >> @@ -1643,6 +1643,13 @@ static int bring_up_vcpus(struct domain *cd,
> >> struct domain *d)
> >>      return 0;
> >>  }
> >>
> >> +static void copy_vcpu_nonreg_state(struct vcpu *d_vcpu, struct vcpu
> >> *cd_vcpu)
> >> +{
> >> +    struct hvm_vcpu_nonreg_state nrs = {};
> >> +    hvm_get_nonreg_state(d_vcpu, &nrs);
>
> ... this missing blank line inserted between these two lines. I'll
> make both adjustments while committing.

Thanks, both changes are fine from my side.

Tamas
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c075370f64..2685da16c8 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1334,6 +1334,36 @@  static void cf_check vmx_set_interrupt_shadow(
     __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
 }
 
+static void cf_check vmx_get_nonreg_state(struct vcpu *v,
+    struct hvm_vcpu_nonreg_state *nrs)
+{
+    vmx_vmcs_enter(v);
+
+    __vmread(GUEST_ACTIVITY_STATE, &nrs->vmx.activity_state);
+    __vmread(GUEST_INTERRUPTIBILITY_INFO, &nrs->vmx.interruptibility_info);
+    __vmread(GUEST_PENDING_DBG_EXCEPTIONS, &nrs->vmx.pending_dbg);
+
+    if ( cpu_has_vmx_virtual_intr_delivery )
+        __vmread(GUEST_INTR_STATUS, &nrs->vmx.interrupt_status);
+
+    vmx_vmcs_exit(v);
+}
+
+static void cf_check vmx_set_nonreg_state(struct vcpu *v,
+    struct hvm_vcpu_nonreg_state *nrs)
+{
+    vmx_vmcs_enter(v);
+
+    __vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state);
+    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, nrs->vmx.interruptibility_info);
+    __vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, nrs->vmx.pending_dbg);
+
+    if ( cpu_has_vmx_virtual_intr_delivery )
+        __vmwrite(GUEST_INTR_STATUS, nrs->vmx.interrupt_status);
+
+    vmx_vmcs_exit(v);
+}
+
 static void vmx_load_pdptrs(struct vcpu *v)
 {
     uint32_t cr3 = v->arch.hvm.guest_cr[3];
@@ -2487,6 +2517,8 @@  static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
     .load_cpu_ctxt        = vmx_load_vmcs_ctxt,
     .get_interrupt_shadow = vmx_get_interrupt_shadow,
     .set_interrupt_shadow = vmx_set_interrupt_shadow,
+    .get_nonreg_state     = vmx_get_nonreg_state,
+    .set_nonreg_state     = vmx_set_nonreg_state,
     .guest_x86_mode       = vmx_guest_x86_mode,
     .get_cpl              = _vmx_get_cpl,
     .get_segment_register = vmx_get_segment_register,
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 5b7ec0cf69..9dee0f87a3 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -84,6 +84,17 @@  enum hvm_intblk {
 /* update_guest_cr() flags. */
 #define HVM_UPDATE_GUEST_CR3_NOFLUSH 0x00000001
 
+struct hvm_vcpu_nonreg_state {
+    union {
+        struct {
+            uint64_t activity_state;
+            uint64_t interruptibility_info;
+            uint64_t pending_dbg;
+            uint64_t interrupt_status;
+        } vmx;
+    };
+};
+
 /*
  * The hardware virtual machine (HVM) interface abstracts away from the
  * x86/x86_64 CPU virtualization assist specifics. Currently this interface
@@ -122,6 +133,10 @@  struct hvm_function_table {
     /* Examine specifics of the guest state. */
     unsigned int (*get_interrupt_shadow)(struct vcpu *v);
     void (*set_interrupt_shadow)(struct vcpu *v, unsigned int intr_shadow);
+    void (*get_nonreg_state)(struct vcpu *v,
+                             struct hvm_vcpu_nonreg_state *nrs);
+    void (*set_nonreg_state)(struct vcpu *v,
+                             struct hvm_vcpu_nonreg_state *nrs);
     int (*guest_x86_mode)(struct vcpu *v);
     unsigned int (*get_cpl)(struct vcpu *v);
     void (*get_segment_register)(struct vcpu *v, enum x86_segment seg,
@@ -744,6 +759,20 @@  void hvm_set_reg(struct vcpu *v, unsigned int reg, uint64_t val);
         d_->arch.hvm.pi_ops.vcpu_block(v_);                     \
 })
 
+static inline void hvm_get_nonreg_state(struct vcpu *v,
+                                        struct hvm_vcpu_nonreg_state *nrs)
+{
+    if ( hvm_funcs.get_nonreg_state )
+        alternative_vcall(hvm_funcs.get_nonreg_state, v, nrs);
+}
+
+static inline void hvm_set_nonreg_state(struct vcpu *v,
+                                        struct hvm_vcpu_nonreg_state *nrs)
+{
+    if ( hvm_funcs.set_nonreg_state )
+        alternative_vcall(hvm_funcs.set_nonreg_state, v, nrs);
+}
+
 #else  /* CONFIG_HVM */
 
 #define hvm_enabled false
@@ -863,6 +892,17 @@  static inline void hvm_set_reg(struct vcpu *v, unsigned int reg, uint64_t val)
     ASSERT_UNREACHABLE();
 }
 
+static inline void hvm_get_nonreg_state(struct vcpu *v,
+                                        struct hvm_vcpu_nonreg_state *nrs)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline void hvm_set_nonreg_state(struct vcpu *v,
+                                        struct hvm_vcpu_nonreg_state *nrs)
+{
+    ASSERT_UNREACHABLE();
+}
+
 #define is_viridian_domain(d) ((void)(d), false)
 #define is_viridian_vcpu(v) ((void)(v), false)
 #define has_viridian_time_ref_count(d) ((void)(d), false)
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 15e6a7ed81..857accee58 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1643,6 +1643,13 @@  static int bring_up_vcpus(struct domain *cd, struct domain *d)
     return 0;
 }
 
+static void copy_vcpu_nonreg_state(struct vcpu *d_vcpu, struct vcpu *cd_vcpu)
+{
+    struct hvm_vcpu_nonreg_state nrs = {};
+    hvm_get_nonreg_state(d_vcpu, &nrs);
+    hvm_set_nonreg_state(cd_vcpu, &nrs);
+}
+
 static int copy_vcpu_settings(struct domain *cd, const struct domain *d)
 {
     unsigned int i;
@@ -1651,7 +1658,7 @@  static int copy_vcpu_settings(struct domain *cd, const struct domain *d)
 
     for ( i = 0; i < cd->max_vcpus; i++ )
     {
-        const struct vcpu *d_vcpu = d->vcpu[i];
+        struct vcpu *d_vcpu = d->vcpu[i];
         struct vcpu *cd_vcpu = cd->vcpu[i];
         mfn_t vcpu_info_mfn;
 
@@ -1694,6 +1701,8 @@  static int copy_vcpu_settings(struct domain *cd, const struct domain *d)
 
         hvm_vmtrace_reset(cd_vcpu);
 
+        copy_vcpu_nonreg_state(d_vcpu, cd_vcpu);
+
         /*
          * TODO: to support VMs with PV interfaces copy additional
          * settings here, such as PV timers.