diff mbox series

PATCH] WHPX: TSC get and set should be dependent on VM state

Message ID SN4PR2101MB08804D23439166E81FF151F7C0EA0@SN4PR2101MB0880.namprd21.prod.outlook.com (mailing list archive)
State New, archived
Headers show
Series PATCH] WHPX: TSC get and set should be dependent on VM state | expand

Commit Message

Sunil Muthuswamy Feb. 26, 2020, 8:54 p.m. UTC
Currently, TSC is set as part of the VM runtime state. Setting TSC at
runtime is heavy and additionally can have side effects on the guest,
which are not very resilient to variances in the TSC. This patch uses
the VM state to determine whether to set TSC or not. Some minor
enhancements for getting TSC values as well that considers the VM state.

Additionally, while setting the TSC, the partition is suspended to
reduce the variance in the TSC value across vCPUs.

Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
---
 include/sysemu/whpx.h      |   7 +++
 target/i386/whp-dispatch.h |   9 ++++
 target/i386/whpx-all.c     | 103 +++++++++++++++++++++++++++++++++----
 3 files changed, 110 insertions(+), 9 deletions(-)

Comments

Paolo Bonzini Feb. 28, 2020, 10:45 a.m. UTC | #1
On 26/02/20 21:54, Sunil Muthuswamy wrote:
> Currently, TSC is set as part of the VM runtime state. Setting TSC at
> runtime is heavy and additionally can have side effects on the guest,
> which are not very resilient to variances in the TSC. This patch uses
> the VM state to determine whether to set TSC or not. Some minor
> enhancements for getting TSC values as well that considers the VM state.
> 
> Additionally, while setting the TSC, the partition is suspended to
> reduce the variance in the TSC value across vCPUs.
> 
> Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>

Looks good.  Do you want me to queue this until you can have your GPG
key signed?  (And also, I can help you sign it of course).

Thanks,

> ---
>  include/sysemu/whpx.h      |   7 +++
>  target/i386/whp-dispatch.h |   9 ++++
>  target/i386/whpx-all.c     | 103 +++++++++++++++++++++++++++++++++----
>  3 files changed, 110 insertions(+), 9 deletions(-)
> 
> diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h
> index 4794e8effe..a84b49e749 100644
> --- a/include/sysemu/whpx.h
> +++ b/include/sysemu/whpx.h
> @@ -35,4 +35,11 @@ int whpx_enabled(void);
>  
>  #endif /* CONFIG_WHPX */
>  
> +/* state subset only touched by the VCPU itself during runtime */
> +#define WHPX_SET_RUNTIME_STATE   1
> +/* state subset modified during VCPU reset */
> +#define WHPX_SET_RESET_STATE     2
> +/* full state set, modified during initialization or on vmload */
> +#define WHPX_SET_FULL_STATE      3
> +
>  #endif /* QEMU_WHPX_H */
> diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h
> index 87d049ceab..e4695c349f 100644
> --- a/target/i386/whp-dispatch.h
> +++ b/target/i386/whp-dispatch.h
> @@ -23,6 +23,12 @@
>    X(HRESULT, WHvGetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, WHV_REGISTER_VALUE* RegisterValues)) \
>    X(HRESULT, WHvSetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, const WHV_REGISTER_VALUE* RegisterValues)) \
>  
> +/*
> + * These are supplemental functions that may not be present
> + * on all versions and are not critical for basic functionality.
> + */
> +#define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \
> +  X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \
>  
>  #define LIST_WINHVEMULATION_FUNCTIONS(X) \
>    X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE* Emulator)) \
> @@ -40,10 +46,12 @@
>  /* Define function typedef */
>  LIST_WINHVPLATFORM_FUNCTIONS(WHP_DEFINE_TYPE)
>  LIST_WINHVEMULATION_FUNCTIONS(WHP_DEFINE_TYPE)
> +LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DEFINE_TYPE)
>  
>  struct WHPDispatch {
>      LIST_WINHVPLATFORM_FUNCTIONS(WHP_DECLARE_MEMBER)
>      LIST_WINHVEMULATION_FUNCTIONS(WHP_DECLARE_MEMBER)
> +    LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DECLARE_MEMBER)
>  };
>  
>  extern struct WHPDispatch whp_dispatch;
> @@ -53,6 +61,7 @@ bool init_whp_dispatch(void);
>  typedef enum WHPFunctionList {
>      WINHV_PLATFORM_FNS_DEFAULT,
>      WINHV_EMULATION_FNS_DEFAULT,
> +    WINHV_PLATFORM_FNS_SUPPLEMENTAL
>  } WHPFunctionList;
>  
>  #endif /* WHP_DISPATCH_H */
> diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
> index 35601b8176..6a885c0fb3 100644
> --- a/target/i386/whpx-all.c
> +++ b/target/i386/whpx-all.c
> @@ -114,7 +114,6 @@ static const WHV_REGISTER_NAME whpx_register_names[] = {
>      WHvX64RegisterXmmControlStatus,
>  
>      /* X64 MSRs */
> -    WHvX64RegisterTsc,
>      WHvX64RegisterEfer,
>  #ifdef TARGET_X86_64
>      WHvX64RegisterKernelGsBase,
> @@ -215,7 +214,44 @@ static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
>      return qs;
>  }
>  
> -static void whpx_set_registers(CPUState *cpu)
> +static int whpx_set_tsc(CPUState *cpu)
> +{
> +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
> +    WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
> +    WHV_REGISTER_VALUE tsc_val;
> +    HRESULT hr;
> +    struct whpx_state *whpx = &whpx_global;
> +
> +    /*
> +     * Suspend the partition prior to setting the TSC to reduce the variance
> +     * in TSC across vCPUs. When the first vCPU runs post suspend, the
> +     * partition is automatically resumed.
> +     */
> +    if (whp_dispatch.WHvSuspendPartitionTime) {
> +
> +        /*
> +         * Unable to suspend partition while setting TSC is not a fatal
> +         * error. It just increases the likelihood of TSC variance between
> +         * vCPUs and some guest OS are able to handle that just fine.
> +         */
> +        hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
> +        if (FAILED(hr)) {
> +            warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
> +        }
> +    }
> +
> +    tsc_val.Reg64 = env->tsc;
> +    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
> +        whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
> +    if (FAILED(hr)) {
> +        error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static void whpx_set_registers(CPUState *cpu, int level)
>  {
>      struct whpx_state *whpx = &whpx_global;
>      struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
> @@ -230,6 +266,14 @@ static void whpx_set_registers(CPUState *cpu)
>  
>      assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
>  
> +    /*
> +     * Following MSRs have side effects on the guest or are too heavy for
> +     * runtime. Limit them to full state update.
> +     */
> +    if (level >= WHPX_SET_RESET_STATE) {
> +        whpx_set_tsc(cpu);
> +    }
> +
>      memset(&vcxt, 0, sizeof(struct whpx_register_set));
>  
>      v86 = (env->eflags & VM_MASK);
> @@ -330,8 +374,6 @@ static void whpx_set_registers(CPUState *cpu)
>      idx += 1;
>  
>      /* MSRs */
> -    assert(whpx_register_names[idx] == WHvX64RegisterTsc);
> -    vcxt.values[idx++].Reg64 = env->tsc;
>      assert(whpx_register_names[idx] == WHvX64RegisterEfer);
>      vcxt.values[idx++].Reg64 = env->efer;
>  #ifdef TARGET_X86_64
> @@ -379,6 +421,25 @@ static void whpx_set_registers(CPUState *cpu)
>      return;
>  }
>  
> +static int whpx_get_tsc(CPUState *cpu)
> +{
> +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
> +    WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
> +    WHV_REGISTER_VALUE tsc_val;
> +    HRESULT hr;
> +    struct whpx_state *whpx = &whpx_global;
> +
> +    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
> +        whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
> +    if (FAILED(hr)) {
> +        error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
> +        return -1;
> +    }
> +
> +    env->tsc = tsc_val.Reg64;
> +    return 0;
> +}
> +
>  static void whpx_get_registers(CPUState *cpu)
>  {
>      struct whpx_state *whpx = &whpx_global;
> @@ -394,6 +455,11 @@ static void whpx_get_registers(CPUState *cpu)
>  
>      assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
>  
> +    if (!env->tsc_valid) {
> +        whpx_get_tsc(cpu);
> +        env->tsc_valid = !runstate_is_running();
> +    }
> +
>      hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
>          whpx->partition, cpu->cpu_index,
>          whpx_register_names,
> @@ -492,8 +558,6 @@ static void whpx_get_registers(CPUState *cpu)
>      idx += 1;
>  
>      /* MSRs */
> -    assert(whpx_register_names[idx] == WHvX64RegisterTsc);
> -    env->tsc = vcxt.values[idx++].Reg64;
>      assert(whpx_register_names[idx] == WHvX64RegisterEfer);
>      env->efer = vcxt.values[idx++].Reg64;
>  #ifdef TARGET_X86_64
> @@ -896,7 +960,7 @@ static int whpx_vcpu_run(CPUState *cpu)
>  
>      do {
>          if (cpu->vcpu_dirty) {
> -            whpx_set_registers(cpu);
> +            whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
>              cpu->vcpu_dirty = false;
>          }
>  
> @@ -1074,14 +1138,14 @@ static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
>  static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
>                                                 run_on_cpu_data arg)
>  {
> -    whpx_set_registers(cpu);
> +    whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
>      cpu->vcpu_dirty = false;
>  }
>  
>  static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
>                                                run_on_cpu_data arg)
>  {
> -    whpx_set_registers(cpu);
> +    whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
>      cpu->vcpu_dirty = false;
>  }
>  
> @@ -1123,6 +1187,15 @@ void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
>  
>  static Error *whpx_migration_blocker;
>  
> +static void whpx_cpu_update_state(void *opaque, int running, RunState state)
> +{
> +    CPUX86State *env = opaque;
> +
> +    if (running) {
> +        env->tsc_valid = false;
> +    }
> +}
> +
>  int whpx_init_vcpu(CPUState *cpu)
>  {
>      HRESULT hr;
> @@ -1178,6 +1251,7 @@ int whpx_init_vcpu(CPUState *cpu)
>  
>      cpu->vcpu_dirty = true;
>      cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
> +    qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
>  
>      return 0;
>  }
> @@ -1367,6 +1441,10 @@ static bool load_whp_dispatch_fns(HMODULE *handle,
>  
>      #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
>      #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
> +    #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
> +        whp_dispatch.function_name = \
> +            (function_name ## _t)GetProcAddress(hLib, #function_name); \
> +
>      #define WHP_LOAD_FIELD(return_type, function_name, signature) \
>          whp_dispatch.function_name = \
>              (function_name ## _t)GetProcAddress(hLib, #function_name); \
> @@ -1394,6 +1472,11 @@ static bool load_whp_dispatch_fns(HMODULE *handle,
>          WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
>          LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
>          break;
> +
> +    case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
> +        WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
> +        LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
> +        break;
>      }
>  
>      *handle = hLib;
> @@ -1554,6 +1637,8 @@ bool init_whp_dispatch(void)
>          goto error;
>      }
>  
> +    assert(load_whp_dispatch_fns(&hWinHvPlatform,
> +        WINHV_PLATFORM_FNS_SUPPLEMENTAL));
>      whp_dispatch_initialized = true;
>  
>      return true;
>
Sunil Muthuswamy Feb. 28, 2020, 9:02 p.m. UTC | #2
> -----Original Message-----
> From: Paolo Bonzini <pbonzini@redhat.com>
> Sent: Friday, February 28, 2020 2:45 AM
> To: Sunil Muthuswamy <sunilmut@microsoft.com>; Richard Henderson <rth@twiddle.net>; Eduardo Habkost
> <ehabkost@redhat.com>
> Cc: qemu-devel@nongnu.org; Stefan Weil <sw@weilnetz.de>
> Subject: [EXTERNAL] Re: PATCH] WHPX: TSC get and set should be dependent on VM state
> 
> On 26/02/20 21:54, Sunil Muthuswamy wrote:
> > Currently, TSC is set as part of the VM runtime state. Setting TSC at
> > runtime is heavy and additionally can have side effects on the guest,
> > which are not very resilient to variances in the TSC. This patch uses
> > the VM state to determine whether to set TSC or not. Some minor
> > enhancements for getting TSC values as well that considers the VM state.
> >
> > Additionally, while setting the TSC, the partition is suspended to
> > reduce the variance in the TSC value across vCPUs.
> >
> > Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
> 
> Looks good.  Do you want me to queue this until you can have your GPG
> key signed?  (And also, I can help you sign it of course).
> 

Yes, please. Thanks.

I haven't used GPG keys before. What would I be using it for?
 
> Thanks,
> 
> > ---
> >  include/sysemu/whpx.h      |   7 +++
> >  target/i386/whp-dispatch.h |   9 ++++
> >  target/i386/whpx-all.c     | 103 +++++++++++++++++++++++++++++++++----
> >  3 files changed, 110 insertions(+), 9 deletions(-)
> >
> > diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h
> > index 4794e8effe..a84b49e749 100644
> > --- a/include/sysemu/whpx.h
> > +++ b/include/sysemu/whpx.h
> > @@ -35,4 +35,11 @@ int whpx_enabled(void);
> >
> >  #endif /* CONFIG_WHPX */
> >
> > +/* state subset only touched by the VCPU itself during runtime */
> > +#define WHPX_SET_RUNTIME_STATE   1
> > +/* state subset modified during VCPU reset */
> > +#define WHPX_SET_RESET_STATE     2
> > +/* full state set, modified during initialization or on vmload */
> > +#define WHPX_SET_FULL_STATE      3
> > +
> >  #endif /* QEMU_WHPX_H */
> > diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h
> > index 87d049ceab..e4695c349f 100644
> > --- a/target/i386/whp-dispatch.h
> > +++ b/target/i386/whp-dispatch.h
> > @@ -23,6 +23,12 @@
> >    X(HRESULT, WHvGetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const
> WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, WHV_REGISTER_VALUE* RegisterValues)) \
> >    X(HRESULT, WHvSetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const
> WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, const WHV_REGISTER_VALUE* RegisterValues)) \
> >
> > +/*
> > + * These are supplemental functions that may not be present
> > + * on all versions and are not critical for basic functionality.
> > + */
> > +#define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \
> > +  X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \
> >
> >  #define LIST_WINHVEMULATION_FUNCTIONS(X) \
> >    X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE*
> Emulator)) \
> > @@ -40,10 +46,12 @@
> >  /* Define function typedef */
> >  LIST_WINHVPLATFORM_FUNCTIONS(WHP_DEFINE_TYPE)
> >  LIST_WINHVEMULATION_FUNCTIONS(WHP_DEFINE_TYPE)
> > +LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DEFINE_TYPE)
> >
> >  struct WHPDispatch {
> >      LIST_WINHVPLATFORM_FUNCTIONS(WHP_DECLARE_MEMBER)
> >      LIST_WINHVEMULATION_FUNCTIONS(WHP_DECLARE_MEMBER)
> > +    LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DECLARE_MEMBER)
> >  };
> >
> >  extern struct WHPDispatch whp_dispatch;
> > @@ -53,6 +61,7 @@ bool init_whp_dispatch(void);
> >  typedef enum WHPFunctionList {
> >      WINHV_PLATFORM_FNS_DEFAULT,
> >      WINHV_EMULATION_FNS_DEFAULT,
> > +    WINHV_PLATFORM_FNS_SUPPLEMENTAL
> >  } WHPFunctionList;
> >
> >  #endif /* WHP_DISPATCH_H */
> > diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
> > index 35601b8176..6a885c0fb3 100644
> > --- a/target/i386/whpx-all.c
> > +++ b/target/i386/whpx-all.c
> > @@ -114,7 +114,6 @@ static const WHV_REGISTER_NAME whpx_register_names[] = {
> >      WHvX64RegisterXmmControlStatus,
> >
> >      /* X64 MSRs */
> > -    WHvX64RegisterTsc,
> >      WHvX64RegisterEfer,
> >  #ifdef TARGET_X86_64
> >      WHvX64RegisterKernelGsBase,
> > @@ -215,7 +214,44 @@ static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
> >      return qs;
> >  }
> >
> > -static void whpx_set_registers(CPUState *cpu)
> > +static int whpx_set_tsc(CPUState *cpu)
> > +{
> > +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
> > +    WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
> > +    WHV_REGISTER_VALUE tsc_val;
> > +    HRESULT hr;
> > +    struct whpx_state *whpx = &whpx_global;
> > +
> > +    /*
> > +     * Suspend the partition prior to setting the TSC to reduce the variance
> > +     * in TSC across vCPUs. When the first vCPU runs post suspend, the
> > +     * partition is automatically resumed.
> > +     */
> > +    if (whp_dispatch.WHvSuspendPartitionTime) {
> > +
> > +        /*
> > +         * Unable to suspend partition while setting TSC is not a fatal
> > +         * error. It just increases the likelihood of TSC variance between
> > +         * vCPUs and some guest OS are able to handle that just fine.
> > +         */
> > +        hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
> > +        if (FAILED(hr)) {
> > +            warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
> > +        }
> > +    }
> > +
> > +    tsc_val.Reg64 = env->tsc;
> > +    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
> > +        whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
> > +    if (FAILED(hr)) {
> > +        error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
> > +        return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static void whpx_set_registers(CPUState *cpu, int level)
> >  {
> >      struct whpx_state *whpx = &whpx_global;
> >      struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
> > @@ -230,6 +266,14 @@ static void whpx_set_registers(CPUState *cpu)
> >
> >      assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
> >
> > +    /*
> > +     * Following MSRs have side effects on the guest or are too heavy for
> > +     * runtime. Limit them to full state update.
> > +     */
> > +    if (level >= WHPX_SET_RESET_STATE) {
> > +        whpx_set_tsc(cpu);
> > +    }
> > +
> >      memset(&vcxt, 0, sizeof(struct whpx_register_set));
> >
> >      v86 = (env->eflags & VM_MASK);
> > @@ -330,8 +374,6 @@ static void whpx_set_registers(CPUState *cpu)
> >      idx += 1;
> >
> >      /* MSRs */
> > -    assert(whpx_register_names[idx] == WHvX64RegisterTsc);
> > -    vcxt.values[idx++].Reg64 = env->tsc;
> >      assert(whpx_register_names[idx] == WHvX64RegisterEfer);
> >      vcxt.values[idx++].Reg64 = env->efer;
> >  #ifdef TARGET_X86_64
> > @@ -379,6 +421,25 @@ static void whpx_set_registers(CPUState *cpu)
> >      return;
> >  }
> >
> > +static int whpx_get_tsc(CPUState *cpu)
> > +{
> > +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
> > +    WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
> > +    WHV_REGISTER_VALUE tsc_val;
> > +    HRESULT hr;
> > +    struct whpx_state *whpx = &whpx_global;
> > +
> > +    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
> > +        whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
> > +    if (FAILED(hr)) {
> > +        error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
> > +        return -1;
> > +    }
> > +
> > +    env->tsc = tsc_val.Reg64;
> > +    return 0;
> > +}
> > +
> >  static void whpx_get_registers(CPUState *cpu)
> >  {
> >      struct whpx_state *whpx = &whpx_global;
> > @@ -394,6 +455,11 @@ static void whpx_get_registers(CPUState *cpu)
> >
> >      assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
> >
> > +    if (!env->tsc_valid) {
> > +        whpx_get_tsc(cpu);
> > +        env->tsc_valid = !runstate_is_running();
> > +    }
> > +
> >      hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
> >          whpx->partition, cpu->cpu_index,
> >          whpx_register_names,
> > @@ -492,8 +558,6 @@ static void whpx_get_registers(CPUState *cpu)
> >      idx += 1;
> >
> >      /* MSRs */
> > -    assert(whpx_register_names[idx] == WHvX64RegisterTsc);
> > -    env->tsc = vcxt.values[idx++].Reg64;
> >      assert(whpx_register_names[idx] == WHvX64RegisterEfer);
> >      env->efer = vcxt.values[idx++].Reg64;
> >  #ifdef TARGET_X86_64
> > @@ -896,7 +960,7 @@ static int whpx_vcpu_run(CPUState *cpu)
> >
> >      do {
> >          if (cpu->vcpu_dirty) {
> > -            whpx_set_registers(cpu);
> > +            whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
> >              cpu->vcpu_dirty = false;
> >          }
> >
> > @@ -1074,14 +1138,14 @@ static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
> >  static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
> >                                                 run_on_cpu_data arg)
> >  {
> > -    whpx_set_registers(cpu);
> > +    whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
> >      cpu->vcpu_dirty = false;
> >  }
> >
> >  static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
> >                                                run_on_cpu_data arg)
> >  {
> > -    whpx_set_registers(cpu);
> > +    whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
> >      cpu->vcpu_dirty = false;
> >  }
> >
> > @@ -1123,6 +1187,15 @@ void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
> >
> >  static Error *whpx_migration_blocker;
> >
> > +static void whpx_cpu_update_state(void *opaque, int running, RunState state)
> > +{
> > +    CPUX86State *env = opaque;
> > +
> > +    if (running) {
> > +        env->tsc_valid = false;
> > +    }
> > +}
> > +
> >  int whpx_init_vcpu(CPUState *cpu)
> >  {
> >      HRESULT hr;
> > @@ -1178,6 +1251,7 @@ int whpx_init_vcpu(CPUState *cpu)
> >
> >      cpu->vcpu_dirty = true;
> >      cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
> > +    qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
> >
> >      return 0;
> >  }
> > @@ -1367,6 +1441,10 @@ static bool load_whp_dispatch_fns(HMODULE *handle,
> >
> >      #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
> >      #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
> > +    #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
> > +        whp_dispatch.function_name = \
> > +            (function_name ## _t)GetProcAddress(hLib, #function_name); \
> > +
> >      #define WHP_LOAD_FIELD(return_type, function_name, signature) \
> >          whp_dispatch.function_name = \
> >              (function_name ## _t)GetProcAddress(hLib, #function_name); \
> > @@ -1394,6 +1472,11 @@ static bool load_whp_dispatch_fns(HMODULE *handle,
> >          WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
> >          LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
> >          break;
> > +
> > +    case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
> > +        WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
> > +        LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
> > +        break;
> >      }
> >
> >      *handle = hLib;
> > @@ -1554,6 +1637,8 @@ bool init_whp_dispatch(void)
> >          goto error;
> >      }
> >
> > +    assert(load_whp_dispatch_fns(&hWinHvPlatform,
> > +        WINHV_PLATFORM_FNS_SUPPLEMENTAL));
> >      whp_dispatch_initialized = true;
> >
> >      return true;
> >
Paolo Bonzini Feb. 29, 2020, 9:39 a.m. UTC | #3
On 28/02/20 22:02, Sunil Muthuswamy wrote:
>> -----Original Message-----
>> From: Paolo Bonzini <pbonzini@redhat.com>
>> Sent: Friday, February 28, 2020 2:45 AM
>> To: Sunil Muthuswamy <sunilmut@microsoft.com>; Richard Henderson <rth@twiddle.net>; Eduardo Habkost
>> <ehabkost@redhat.com>
>> Cc: qemu-devel@nongnu.org; Stefan Weil <sw@weilnetz.de>
>> Subject: [EXTERNAL] Re: PATCH] WHPX: TSC get and set should be dependent on VM state
>>
>> On 26/02/20 21:54, Sunil Muthuswamy wrote:
>>> Currently, TSC is set as part of the VM runtime state. Setting TSC at
>>> runtime is heavy and additionally can have side effects on the guest,
>>> which are not very resilient to variances in the TSC. This patch uses
>>> the VM state to determine whether to set TSC or not. Some minor
>>> enhancements for getting TSC values as well that considers the VM state.
>>>
>>> Additionally, while setting the TSC, the partition is suspended to
>>> reduce the variance in the TSC value across vCPUs.
>>>
>>> Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
>>
>> Looks good.  Do you want me to queue this until you can have your GPG
>> key signed?  (And also, I can help you sign it of course).
>>
> 
> Yes, please. Thanks.
> 
> I haven't used GPG keys before. What would I be using it for?

You'd be using it to include a signed tags in a pull requests; that is,
the git tag that you ask to pull has a cryptographic signature attached
to it.

Paolo
Sunil Muthuswamy March 2, 2020, 7:59 p.m. UTC | #4
> >> Looks good.  Do you want me to queue this until you can have your GPG
> >> key signed?  (And also, I can help you sign it of course).
> >>
> >
> > Yes, please. Thanks.
> >
> > I haven't used GPG keys before. What would I be using it for?
> 
> You'd be using it to include a signed tags in a pull requests; that is,
> the git tag that you ask to pull has a cryptographic signature attached
> to it.

Great. Is there a link that I can use to read up on how to get the GPG key
and how to include the signature or what process should I be following?

> Paolo
Paolo Bonzini March 3, 2020, 5:52 p.m. UTC | #5
On 02/03/20 20:59, Sunil Muthuswamy wrote:
>> You'd be using it to include a signed tags in a pull requests; that is,
>> the git tag that you ask to pull has a cryptographic signature attached
>> to it.
> Great. Is there a link that I can use to read up on how to get the GPG key
> and how to include the signature or what process should I be following?

This guide seems good, though I haven't tried:

https://medium.com/@ryanmillerc/use-gpg-signing-keys-with-git-on-windows-10-github-4acbced49f68

You don't need the "git config --local commit.gpgsign true" command, but
you will then create a signed tag with

	git tag -s -f qemu-for-upstream
	# let's say "mirror" is your github repo
	git push mirror +tags/for-upstream

and send it to Peter.  I really think we should document this final step
("send it to Peter") better in the wiki, because the git tools for
sending pull requests leave a lot to be desired.

Paolo
Sunil Muthuswamy March 4, 2020, 10:44 p.m. UTC | #6
> -----Original Message-----
> From: Paolo Bonzini <paolo.bonzini@gmail.com> On Behalf Of Paolo Bonzini
> Sent: Tuesday, March 3, 2020 9:53 AM
> To: Sunil Muthuswamy <sunilmut@microsoft.com>; Richard Henderson <rth@twiddle.net>; Eduardo Habkost <ehabkost@redhat.com>
> Cc: qemu-devel@nongnu.org; Stefan Weil <sw@weilnetz.de>
> Subject: Re: [EXTERNAL] Re: PATCH] WHPX: TSC get and set should be dependent on VM state
> 
> On 02/03/20 20:59, Sunil Muthuswamy wrote:
> >> You'd be using it to include a signed tags in a pull requests; that is,
> >> the git tag that you ask to pull has a cryptographic signature attached
> >> to it.
> > Great. Is there a link that I can use to read up on how to get the GPG key
> > and how to include the signature or what process should I be following?
> 
> This guide seems good, though I haven't tried:
> 
> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmedium.com%2F%40ryanmillerc%2Fuse-gpg-signing-keys-with-git-
> on-windows-10-github-
> 4acbced49f68&amp;data=02%7C01%7Csunilmut%40microsoft.com%7C826eb46e9f9f44281bd008d7bf9bb70d%7C72f988bf86f141af91ab2
> d7cd011db47%7C1%7C0%7C637188548242292250&amp;sdata=E%2BQ0ar57y65EI6%2FDXXOnEqHM682SOhu1WyzrXrILWGs%3D&amp;res
> erved=0
> 
> You don't need the "git config --local commit.gpgsign true" command, but
> you will then create a signed tag with
> 
> 	git tag -s -f qemu-for-upstream
> 	# let's say "mirror" is your github repo
> 	git push mirror +tags/for-upstream
> 
> and send it to Peter.  I really think we should document this final step
> ("send it to Peter") better in the wiki, because the git tools for
> sending pull requests leave a lot to be desired.
> 
> Paolo

Thanks. Ok, I am setup with GPG. Where should I be sending the pull requests to? Who is "Peter"? Do I have to send it to you?
Paolo Bonzini March 4, 2020, 10:47 p.m. UTC | #7
On 04/03/20 23:44, Sunil Muthuswamy wrote:
>> You don't need the "git config --local commit.gpgsign true" command, but
>> you will then create a signed tag with
>>
>> 	git tag -s -f qemu-for-upstream
>> 	# let's say "mirror" is your github repo
>> 	git push mirror +tags/for-upstream
>>
>> and send it to Peter.  I really think we should document this final step
>> ("send it to Peter") better in the wiki, because the git tools for
>> sending pull requests leave a lot to be desired.
>
> Thanks. Ok, I am setup with GPG. Where should I be sending the pull requests to? Who is "Peter"? Do I have to send it to you?

Peter is Peter Maydell, but for now you can send them to me.  I'll get
round to documenting the remaining steps.

Unfortunately all the scripts I have for this use the Unix shell, but
they are just a few lines so I might try to rewrite them in Python.
This way you can use them from Windows without needing to bring up WSL
or anything like that.

Paolo
Sunil Muthuswamy July 14, 2020, 6:13 p.m. UTC | #8
> >
> > Thanks. Ok, I am setup with GPG. Where should I be sending the pull requests to? Who is "Peter"? Do I have to send it to you?
> 
> Peter is Peter Maydell, but for now you can send them to me.  I'll get
> round to documenting the remaining steps.
> 
> Unfortunately all the scripts I have for this use the Unix shell, but
> they are just a few lines so I might try to rewrite them in Python.
> This way you can use them from Windows without needing to bring up WSL
> or anything like that.
> 
> Paolo

Paolo, just wanted to make sure that I understood your request. You are asking
me to submit my WHPX changes as signed PRs. But, are you also asking that I
maintain a QEMU fork for all WHPX changes (as WHPX maintainer) and merge
those occasionally? Or, should the other WHPX changes from others be submitted
directly upstream?
diff mbox series

Patch

diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h
index 4794e8effe..a84b49e749 100644
--- a/include/sysemu/whpx.h
+++ b/include/sysemu/whpx.h
@@ -35,4 +35,11 @@  int whpx_enabled(void);
 
 #endif /* CONFIG_WHPX */
 
+/* state subset only touched by the VCPU itself during runtime */
+#define WHPX_SET_RUNTIME_STATE   1
+/* state subset modified during VCPU reset */
+#define WHPX_SET_RESET_STATE     2
+/* full state set, modified during initialization or on vmload */
+#define WHPX_SET_FULL_STATE      3
+
 #endif /* QEMU_WHPX_H */
diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h
index 87d049ceab..e4695c349f 100644
--- a/target/i386/whp-dispatch.h
+++ b/target/i386/whp-dispatch.h
@@ -23,6 +23,12 @@ 
   X(HRESULT, WHvGetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, WHV_REGISTER_VALUE* RegisterValues)) \
   X(HRESULT, WHvSetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, const WHV_REGISTER_VALUE* RegisterValues)) \
 
+/*
+ * These are supplemental functions that may not be present
+ * on all versions and are not critical for basic functionality.
+ */
+#define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \
+  X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \
 
 #define LIST_WINHVEMULATION_FUNCTIONS(X) \
   X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE* Emulator)) \
@@ -40,10 +46,12 @@ 
 /* Define function typedef */
 LIST_WINHVPLATFORM_FUNCTIONS(WHP_DEFINE_TYPE)
 LIST_WINHVEMULATION_FUNCTIONS(WHP_DEFINE_TYPE)
+LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DEFINE_TYPE)
 
 struct WHPDispatch {
     LIST_WINHVPLATFORM_FUNCTIONS(WHP_DECLARE_MEMBER)
     LIST_WINHVEMULATION_FUNCTIONS(WHP_DECLARE_MEMBER)
+    LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DECLARE_MEMBER)
 };
 
 extern struct WHPDispatch whp_dispatch;
@@ -53,6 +61,7 @@  bool init_whp_dispatch(void);
 typedef enum WHPFunctionList {
     WINHV_PLATFORM_FNS_DEFAULT,
     WINHV_EMULATION_FNS_DEFAULT,
+    WINHV_PLATFORM_FNS_SUPPLEMENTAL
 } WHPFunctionList;
 
 #endif /* WHP_DISPATCH_H */
diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
index 35601b8176..6a885c0fb3 100644
--- a/target/i386/whpx-all.c
+++ b/target/i386/whpx-all.c
@@ -114,7 +114,6 @@  static const WHV_REGISTER_NAME whpx_register_names[] = {
     WHvX64RegisterXmmControlStatus,
 
     /* X64 MSRs */
-    WHvX64RegisterTsc,
     WHvX64RegisterEfer,
 #ifdef TARGET_X86_64
     WHvX64RegisterKernelGsBase,
@@ -215,7 +214,44 @@  static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
     return qs;
 }
 
-static void whpx_set_registers(CPUState *cpu)
+static int whpx_set_tsc(CPUState *cpu)
+{
+    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+    WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
+    WHV_REGISTER_VALUE tsc_val;
+    HRESULT hr;
+    struct whpx_state *whpx = &whpx_global;
+
+    /*
+     * Suspend the partition prior to setting the TSC to reduce the variance
+     * in TSC across vCPUs. When the first vCPU runs post suspend, the
+     * partition is automatically resumed.
+     */
+    if (whp_dispatch.WHvSuspendPartitionTime) {
+
+        /*
+         * Unable to suspend partition while setting TSC is not a fatal
+         * error. It just increases the likelihood of TSC variance between
+         * vCPUs and some guest OS are able to handle that just fine.
+         */
+        hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
+        if (FAILED(hr)) {
+            warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
+        }
+    }
+
+    tsc_val.Reg64 = env->tsc;
+    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+        whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
+        return -1;
+    }
+
+    return 0;
+}
+
+static void whpx_set_registers(CPUState *cpu, int level)
 {
     struct whpx_state *whpx = &whpx_global;
     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
@@ -230,6 +266,14 @@  static void whpx_set_registers(CPUState *cpu)
 
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 
+    /*
+     * Following MSRs have side effects on the guest or are too heavy for
+     * runtime. Limit them to full state update.
+     */
+    if (level >= WHPX_SET_RESET_STATE) {
+        whpx_set_tsc(cpu);
+    }
+
     memset(&vcxt, 0, sizeof(struct whpx_register_set));
 
     v86 = (env->eflags & VM_MASK);
@@ -330,8 +374,6 @@  static void whpx_set_registers(CPUState *cpu)
     idx += 1;
 
     /* MSRs */
-    assert(whpx_register_names[idx] == WHvX64RegisterTsc);
-    vcxt.values[idx++].Reg64 = env->tsc;
     assert(whpx_register_names[idx] == WHvX64RegisterEfer);
     vcxt.values[idx++].Reg64 = env->efer;
 #ifdef TARGET_X86_64
@@ -379,6 +421,25 @@  static void whpx_set_registers(CPUState *cpu)
     return;
 }
 
+static int whpx_get_tsc(CPUState *cpu)
+{
+    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+    WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
+    WHV_REGISTER_VALUE tsc_val;
+    HRESULT hr;
+    struct whpx_state *whpx = &whpx_global;
+
+    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+        whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
+        return -1;
+    }
+
+    env->tsc = tsc_val.Reg64;
+    return 0;
+}
+
 static void whpx_get_registers(CPUState *cpu)
 {
     struct whpx_state *whpx = &whpx_global;
@@ -394,6 +455,11 @@  static void whpx_get_registers(CPUState *cpu)
 
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 
+    if (!env->tsc_valid) {
+        whpx_get_tsc(cpu);
+        env->tsc_valid = !runstate_is_running();
+    }
+
     hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
         whpx->partition, cpu->cpu_index,
         whpx_register_names,
@@ -492,8 +558,6 @@  static void whpx_get_registers(CPUState *cpu)
     idx += 1;
 
     /* MSRs */
-    assert(whpx_register_names[idx] == WHvX64RegisterTsc);
-    env->tsc = vcxt.values[idx++].Reg64;
     assert(whpx_register_names[idx] == WHvX64RegisterEfer);
     env->efer = vcxt.values[idx++].Reg64;
 #ifdef TARGET_X86_64
@@ -896,7 +960,7 @@  static int whpx_vcpu_run(CPUState *cpu)
 
     do {
         if (cpu->vcpu_dirty) {
-            whpx_set_registers(cpu);
+            whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
             cpu->vcpu_dirty = false;
         }
 
@@ -1074,14 +1138,14 @@  static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
                                                run_on_cpu_data arg)
 {
-    whpx_set_registers(cpu);
+    whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
     cpu->vcpu_dirty = false;
 }
 
 static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
                                               run_on_cpu_data arg)
 {
-    whpx_set_registers(cpu);
+    whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
     cpu->vcpu_dirty = false;
 }
 
@@ -1123,6 +1187,15 @@  void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
 
 static Error *whpx_migration_blocker;
 
+static void whpx_cpu_update_state(void *opaque, int running, RunState state)
+{
+    CPUX86State *env = opaque;
+
+    if (running) {
+        env->tsc_valid = false;
+    }
+}
+
 int whpx_init_vcpu(CPUState *cpu)
 {
     HRESULT hr;
@@ -1178,6 +1251,7 @@  int whpx_init_vcpu(CPUState *cpu)
 
     cpu->vcpu_dirty = true;
     cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
+    qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
 
     return 0;
 }
@@ -1367,6 +1441,10 @@  static bool load_whp_dispatch_fns(HMODULE *handle,
 
     #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
     #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
+    #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
+        whp_dispatch.function_name = \
+            (function_name ## _t)GetProcAddress(hLib, #function_name); \
+
     #define WHP_LOAD_FIELD(return_type, function_name, signature) \
         whp_dispatch.function_name = \
             (function_name ## _t)GetProcAddress(hLib, #function_name); \
@@ -1394,6 +1472,11 @@  static bool load_whp_dispatch_fns(HMODULE *handle,
         WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
         LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
         break;
+
+    case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
+        WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
+        LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
+        break;
     }
 
     *handle = hLib;
@@ -1554,6 +1637,8 @@  bool init_whp_dispatch(void)
         goto error;
     }
 
+    assert(load_whp_dispatch_fns(&hWinHvPlatform,
+        WINHV_PLATFORM_FNS_SUPPLEMENTAL));
     whp_dispatch_initialized = true;
 
     return true;