diff mbox

[v2,1/3] x86/vpmu: Add get/put_vpmu() and VPMU_AVAILABLE

Message ID 1487353212-4395-2-git-send-email-boris.ostrovsky@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Boris Ostrovsky Feb. 17, 2017, 5:40 p.m. UTC
vpmu_enabled() (used by hvm/pv_cpuid() to properly report 0xa leaf
for Intel processors) is based on the value of VPMU_CONTEXT_ALLOCATED
bit. This is problematic:
* For HVM guests VPMU context is allocated lazily, during the first
  access to VPMU MSRs. Since the leaf is typically queried before guest
  attempts to read or write the MSRs it is likely that CPUID will report
  no PMU support
* For PV guests the context is allocated eagerly but only in responce to
  guest's XENPMU_init hypercall. There is a chance that the guest will
  try to read CPUID before making this hypercall.

This patch introduces VPMU_AVAILABLE flag which is set (subject to vpmu_mode
constraints) during VCPU initialization for both PV and HVM guests. Since
this flag is expected to be managed together with vpmu_count, get/put_vpmu()
are added to simplify code.

vpmu_enabled() (renamed to vpmu_available()) can now use this new flag.

(As a side affect this patch also fixes a race in pvpmu_init() where we
increment vcpu_count in vpmu_initialise() after checking vpmu_mode)

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
v2:
* Rename VPMU_ENABLED to VPMU_AVAILABLE (and vpmu_enabled() to vpmu_available()
* Check VPMU_AVAILABLE flag in put_vpmu() under lock

 xen/arch/x86/cpu/vpmu.c    | 111 ++++++++++++++++++++++++++++++---------------
 xen/arch/x86/cpuid.c       |   8 ++--
 xen/arch/x86/domain.c      |   5 ++
 xen/arch/x86/hvm/svm/svm.c |   5 --
 xen/arch/x86/hvm/vmx/vmx.c |   5 --
 xen/include/asm-x86/vpmu.h |   6 ++-
 6 files changed, 88 insertions(+), 52 deletions(-)

Comments

Tian, Kevin Feb. 21, 2017, 8:09 a.m. UTC | #1
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: Saturday, February 18, 2017 1:40 AM
> 
> vpmu_enabled() (used by hvm/pv_cpuid() to properly report 0xa leaf
> for Intel processors) is based on the value of VPMU_CONTEXT_ALLOCATED
> bit. This is problematic:
> * For HVM guests VPMU context is allocated lazily, during the first
>   access to VPMU MSRs. Since the leaf is typically queried before guest
>   attempts to read or write the MSRs it is likely that CPUID will report
>   no PMU support
> * For PV guests the context is allocated eagerly but only in responce to
>   guest's XENPMU_init hypercall. There is a chance that the guest will
>   try to read CPUID before making this hypercall.
> 
> This patch introduces VPMU_AVAILABLE flag which is set (subject to vpmu_mode
> constraints) during VCPU initialization for both PV and HVM guests. Since
> this flag is expected to be managed together with vpmu_count, get/put_vpmu()
> are added to simplify code.
> 
> vpmu_enabled() (renamed to vpmu_available()) can now use this new flag.
> 
> (As a side affect this patch also fixes a race in pvpmu_init() where we
> increment vcpu_count in vpmu_initialise() after checking vpmu_mode)
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
> v2:
> * Rename VPMU_ENABLED to VPMU_AVAILABLE (and vpmu_enabled() to
> vpmu_available()
> * Check VPMU_AVAILABLE flag in put_vpmu() under lock
> 
>  xen/arch/x86/cpu/vpmu.c    | 111
> ++++++++++++++++++++++++++++++---------------
>  xen/arch/x86/cpuid.c       |   8 ++--
>  xen/arch/x86/domain.c      |   5 ++
>  xen/arch/x86/hvm/svm/svm.c |   5 --
>  xen/arch/x86/hvm/vmx/vmx.c |   5 --
>  xen/include/asm-x86/vpmu.h |   6 ++-
>  6 files changed, 88 insertions(+), 52 deletions(-)
> 
> diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
> index 35a9403..b30769d 100644
> --- a/xen/arch/x86/cpu/vpmu.c
> +++ b/xen/arch/x86/cpu/vpmu.c
> @@ -456,32 +456,21 @@ int vpmu_load(struct vcpu *v, bool_t from_guest)
>      return 0;
>  }
> 
> -void vpmu_initialise(struct vcpu *v)
> +static int vpmu_arch_initialise(struct vcpu *v)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>      uint8_t vendor = current_cpu_data.x86_vendor;
>      int ret;
> -    bool_t is_priv_vpmu = is_hardware_domain(v->domain);
> 
>      BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
>      BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
>      BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
>      BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
> 
> -    ASSERT(!vpmu->flags && !vpmu->context);
> +    ASSERT(!(vpmu->flags & ~VPMU_AVAILABLE) && !vpmu->context);
> 
> -    if ( !is_priv_vpmu )
> -    {
> -        /*
> -         * Count active VPMUs so that we won't try to change vpmu_mode while
> -         * they are in use.
> -         * vpmu_mode can be safely updated while dom0's VPMUs are active and
> -         * so we don't need to include it in the count.
> -         */
> -        spin_lock(&vpmu_lock);
> -        vpmu_count++;
> -        spin_unlock(&vpmu_lock);
> -    }
> +    if ( !vpmu_available(v) )
> +        return 0;
> 
>      switch ( vendor )
>      {
> @@ -501,7 +490,7 @@ void vpmu_initialise(struct vcpu *v)
>              opt_vpmu_enabled = 0;
>              vpmu_mode = XENPMU_MODE_OFF;
>          }
> -        return; /* Don't bother restoring vpmu_count, VPMU is off forever */
> +        return -EINVAL;
>      }
> 
>      vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
> @@ -509,15 +498,63 @@ void vpmu_initialise(struct vcpu *v)
>      if ( ret )
>          printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
> 
> -    /* Intel needs to initialize VPMU ops even if VPMU is not in use */
> -    if ( !is_priv_vpmu &&
> -         (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
> -          (vpmu_mode == XENPMU_MODE_ALL)) )
> +    return ret;
> +}
> +
> +static void get_vpmu(struct vcpu *v)
> +{
> +    spin_lock(&vpmu_lock);
> +
> +    /*
> +     * Count active VPMUs so that we won't try to change vpmu_mode while
> +     * they are in use.
> +     * vpmu_mode can be safely updated while dom0's VPMUs are active and
> +     * so we don't need to include it in the count.
> +     */
> +    if ( !is_hardware_domain(v->domain) &&
> +        (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> +    {
> +        vpmu_count++;
> +        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
> +    }
> +    else if ( is_hardware_domain(v->domain) &&
> +              (vpmu_mode != XENPMU_MODE_OFF) )
> +        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
> +
> +    spin_unlock(&vpmu_lock);
> +}
> +
> +static void put_vpmu(struct vcpu *v)
> +{
> +    spin_lock(&vpmu_lock);
> +
> +    if ( !vpmu_is_set(vcpu_vpmu(v), VPMU_AVAILABLE) )
> +        goto out;

just use !vpmu_available(v)

> +
> +    if ( !is_hardware_domain(v->domain) &&
> +         (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
>      {
> -        spin_lock(&vpmu_lock);
>          vpmu_count--;
> -        spin_unlock(&vpmu_lock);
> +        vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
>      }
> +    else if ( is_hardware_domain(v->domain) &&
> +              (vpmu_mode != XENPMU_MODE_OFF) )
> +        vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
> +
> + out:
> +    spin_unlock(&vpmu_lock);
> +}
> +
> +void vpmu_initialise(struct vcpu *v)
> +{
> +    get_vpmu(v);
> +
> +    /*
> +     * Guests without LAPIC (i.e. PV) call vpmu_arch_initialise()
> +     * from pvpmu_init().
> +     */

implication is that PV VPMU is not counted then? I think it's
not aligned with original policy, where only privileged VPMU
i.e. Dom0 is not counted.

> +    if ( has_vlapic(v->domain) && vpmu_arch_initialise(v) )
> +        put_vpmu(v);
>  }
> 
>  static void vpmu_clear_last(void *arg)
> @@ -526,7 +563,7 @@ static void vpmu_clear_last(void *arg)
>          this_cpu(last_vcpu) = NULL;
>  }
> 
> -void vpmu_destroy(struct vcpu *v)
> +static void vpmu_arch_destroy(struct vcpu *v)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> 
> @@ -551,11 +588,13 @@ void vpmu_destroy(struct vcpu *v)
>                           vpmu_save_force, v, 1);
>           vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
>      }
> +}
> 
> -    spin_lock(&vpmu_lock);
> -    if ( !is_hardware_domain(v->domain) )
> -        vpmu_count--;
> -    spin_unlock(&vpmu_lock);
> +void vpmu_destroy(struct vcpu *v)
> +{
> +    vpmu_arch_destroy(v);
> +
> +    put_vpmu(v);
>  }
> 
>  static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
> @@ -565,13 +604,15 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t
> *params)
>      struct page_info *page;
>      uint64_t gfn = params->val;
> 
> -    if ( (vpmu_mode == XENPMU_MODE_OFF) ||
> -         ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) )
> -        return -EINVAL;
> -
>      if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
>          return -EINVAL;
> 
> +    v = d->vcpu[params->vcpu];
> +    vpmu = vcpu_vpmu(v);
> +
> +    if ( !vpmu_available(v) )
> +        return -ENOENT;
> +

I didn't see where get_vpmu is invoked for PV. Then why would
above condition ever set here?

>      page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
>      if ( !page )
>          return -EINVAL;
> @@ -582,9 +623,6 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t
> *params)
>          return -EINVAL;
>      }
> 
> -    v = d->vcpu[params->vcpu];
> -    vpmu = vcpu_vpmu(v);
> -
>      spin_lock(&vpmu->vpmu_lock);
> 
>      if ( v->arch.vpmu.xenpmu_data )
> @@ -602,7 +640,8 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t
> *params)
>          return -ENOMEM;
>      }
> 
> -    vpmu_initialise(v);
> +    if ( vpmu_arch_initialise(v) )
> +        put_vpmu(v);

Since no get_vpmu why should we put_vpmu here?

I feel a bit lost about your handling of PV case here...

Thanks
Kevin
Andrew Cooper Feb. 21, 2017, 11 a.m. UTC | #2
On 17/02/17 17:40, Boris Ostrovsky wrote:
> @@ -509,15 +498,63 @@ void vpmu_initialise(struct vcpu *v)
>      if ( ret )
>          printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
>  
> -    /* Intel needs to initialize VPMU ops even if VPMU is not in use */
> -    if ( !is_priv_vpmu &&
> -         (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
> -          (vpmu_mode == XENPMU_MODE_ALL)) )
> +    return ret;
> +}
> +
> +static void get_vpmu(struct vcpu *v)
> +{
> +    spin_lock(&vpmu_lock);
> +
> +    /*
> +     * Count active VPMUs so that we won't try to change vpmu_mode while
> +     * they are in use.
> +     * vpmu_mode can be safely updated while dom0's VPMUs are active and
> +     * so we don't need to include it in the count.
> +     */
> +    if ( !is_hardware_domain(v->domain) &&
> +        (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> +    {
> +        vpmu_count++;
> +        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
> +    }
> +    else if ( is_hardware_domain(v->domain) &&
> +              (vpmu_mode != XENPMU_MODE_OFF) )
> +        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);

Why do we recalculate AVAILABLE on every get()?

(In the absence of toolstack control from the CPUID side), surely it
should be set (or not) exactly once during domain creation, and be
unchanged thereafter?

~Andrew
Boris Ostrovsky Feb. 21, 2017, 2:09 p.m. UTC | #3
On 02/21/2017 03:09 AM, Tian, Kevin wrote:
>> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
>> Sent: Saturday, February 18, 2017 1:40 AM
>>
>> vpmu_enabled() (used by hvm/pv_cpuid() to properly report 0xa leaf
>> for Intel processors) is based on the value of VPMU_CONTEXT_ALLOCATED
>> bit. This is problematic:
>> * For HVM guests VPMU context is allocated lazily, during the first
>>   access to VPMU MSRs. Since the leaf is typically queried before guest
>>   attempts to read or write the MSRs it is likely that CPUID will report
>>   no PMU support
>> * For PV guests the context is allocated eagerly but only in responce to
>>   guest's XENPMU_init hypercall. There is a chance that the guest will
>>   try to read CPUID before making this hypercall.
>>
>> This patch introduces VPMU_AVAILABLE flag which is set (subject to vpmu_mode
>> constraints) during VCPU initialization for both PV and HVM guests. Since
>> this flag is expected to be managed together with vpmu_count, get/put_vpmu()
>> are added to simplify code.
>>
>> vpmu_enabled() (renamed to vpmu_available()) can now use this new flag.
>>
>> (As a side affect this patch also fixes a race in pvpmu_init() where we
>> increment vcpu_count in vpmu_initialise() after checking vpmu_mode)
>>
>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> ---
>> v2:
>> * Rename VPMU_ENABLED to VPMU_AVAILABLE (and vpmu_enabled() to
>> vpmu_available()
>> * Check VPMU_AVAILABLE flag in put_vpmu() under lock
>>
>>  xen/arch/x86/cpu/vpmu.c    | 111
>> ++++++++++++++++++++++++++++++---------------
>>  xen/arch/x86/cpuid.c       |   8 ++--
>>  xen/arch/x86/domain.c      |   5 ++
>>  xen/arch/x86/hvm/svm/svm.c |   5 --
>>  xen/arch/x86/hvm/vmx/vmx.c |   5 --
>>  xen/include/asm-x86/vpmu.h |   6 ++-
>>  6 files changed, 88 insertions(+), 52 deletions(-)
>>
>> diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
>> index 35a9403..b30769d 100644
>> --- a/xen/arch/x86/cpu/vpmu.c
>> +++ b/xen/arch/x86/cpu/vpmu.c
>> @@ -456,32 +456,21 @@ int vpmu_load(struct vcpu *v, bool_t from_guest)
>>      return 0;
>>  }
>>
>> -void vpmu_initialise(struct vcpu *v)
>> +static int vpmu_arch_initialise(struct vcpu *v)
>>  {
>>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>>      uint8_t vendor = current_cpu_data.x86_vendor;
>>      int ret;
>> -    bool_t is_priv_vpmu = is_hardware_domain(v->domain);
>>
>>      BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
>>      BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
>>      BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
>>      BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
>>
>> -    ASSERT(!vpmu->flags && !vpmu->context);
>> +    ASSERT(!(vpmu->flags & ~VPMU_AVAILABLE) && !vpmu->context);
>>
>> -    if ( !is_priv_vpmu )
>> -    {
>> -        /*
>> -         * Count active VPMUs so that we won't try to change vpmu_mode while
>> -         * they are in use.
>> -         * vpmu_mode can be safely updated while dom0's VPMUs are active and
>> -         * so we don't need to include it in the count.
>> -         */
>> -        spin_lock(&vpmu_lock);
>> -        vpmu_count++;
>> -        spin_unlock(&vpmu_lock);
>> -    }
>> +    if ( !vpmu_available(v) )
>> +        return 0;
>>
>>      switch ( vendor )
>>      {
>> @@ -501,7 +490,7 @@ void vpmu_initialise(struct vcpu *v)
>>              opt_vpmu_enabled = 0;
>>              vpmu_mode = XENPMU_MODE_OFF;
>>          }
>> -        return; /* Don't bother restoring vpmu_count, VPMU is off forever */
>> +        return -EINVAL;
>>      }
>>
>>      vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
>> @@ -509,15 +498,63 @@ void vpmu_initialise(struct vcpu *v)
>>      if ( ret )
>>          printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
>>
>> -    /* Intel needs to initialize VPMU ops even if VPMU is not in use */
>> -    if ( !is_priv_vpmu &&
>> -         (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
>> -          (vpmu_mode == XENPMU_MODE_ALL)) )
>> +    return ret;
>> +}
>> +
>> +static void get_vpmu(struct vcpu *v)
>> +{
>> +    spin_lock(&vpmu_lock);
>> +
>> +    /*
>> +     * Count active VPMUs so that we won't try to change vpmu_mode while
>> +     * they are in use.
>> +     * vpmu_mode can be safely updated while dom0's VPMUs are active and
>> +     * so we don't need to include it in the count.
>> +     */
>> +    if ( !is_hardware_domain(v->domain) &&
>> +        (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
>> +    {
>> +        vpmu_count++;
>> +        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
>> +    }
>> +    else if ( is_hardware_domain(v->domain) &&
>> +              (vpmu_mode != XENPMU_MODE_OFF) )
>> +        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
>> +
>> +    spin_unlock(&vpmu_lock);
>> +}
>> +
>> +static void put_vpmu(struct vcpu *v)
>> +{
>> +    spin_lock(&vpmu_lock);
>> +
>> +    if ( !vpmu_is_set(vcpu_vpmu(v), VPMU_AVAILABLE) )
>> +        goto out;
> just use !vpmu_available(v)

Yes.

>
>> +
>> +    if ( !is_hardware_domain(v->domain) &&
>> +         (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
>>      {
>> -        spin_lock(&vpmu_lock);
>>          vpmu_count--;
>> -        spin_unlock(&vpmu_lock);
>> +        vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
>>      }
>> +    else if ( is_hardware_domain(v->domain) &&
>> +              (vpmu_mode != XENPMU_MODE_OFF) )
>> +        vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
>> +
>> + out:
>> +    spin_unlock(&vpmu_lock);
>> +}
>> +
>> +void vpmu_initialise(struct vcpu *v)
>> +{
>> +    get_vpmu(v);
>> +
>> +    /*
>> +     * Guests without LAPIC (i.e. PV) call vpmu_arch_initialise()
>> +     * from pvpmu_init().
>> +     */
> implication is that PV VPMU is not counted then? 

No. get_vpmu() is what does the counting now. Since we do
vcpu_initialise() -> vpmu_initialise() for all type of VCPUs both PV and
HVM VPMUs are counted here. But we defer arch-specific intialization
(which doesn't do the counting) for PV until the hypercall.

> I think it's
> not aligned with original policy, where only privileged VPMU
> i.e. Dom0 is not counted.
>
>> +    if ( has_vlapic(v->domain) && vpmu_arch_initialise(v) )
>> +        put_vpmu(v);
>>  }
>>
>>  static void vpmu_clear_last(void *arg)
>> @@ -526,7 +563,7 @@ static void vpmu_clear_last(void *arg)
>>          this_cpu(last_vcpu) = NULL;
>>  }
>>
>> -void vpmu_destroy(struct vcpu *v)
>> +static void vpmu_arch_destroy(struct vcpu *v)
>>  {
>>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>>
>> @@ -551,11 +588,13 @@ void vpmu_destroy(struct vcpu *v)
>>                           vpmu_save_force, v, 1);
>>           vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
>>      }
>> +}
>>
>> -    spin_lock(&vpmu_lock);
>> -    if ( !is_hardware_domain(v->domain) )
>> -        vpmu_count--;
>> -    spin_unlock(&vpmu_lock);
>> +void vpmu_destroy(struct vcpu *v)
>> +{
>> +    vpmu_arch_destroy(v);
>> +
>> +    put_vpmu(v);
>>  }
>>
>>  static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
>> @@ -565,13 +604,15 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t
>> *params)
>>      struct page_info *page;
>>      uint64_t gfn = params->val;
>>
>> -    if ( (vpmu_mode == XENPMU_MODE_OFF) ||
>> -         ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) )
>> -        return -EINVAL;
>> -
>>      if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
>>          return -EINVAL;
>>
>> +    v = d->vcpu[params->vcpu];
>> +    vpmu = vcpu_vpmu(v);
>> +
>> +    if ( !vpmu_available(v) )
>> +        return -ENOENT;
>> +
> I didn't see where get_vpmu is invoked for PV. Then why would
> above condition ever set here?

get_vpmu is called from vpmu_initialise().

-boris

>
>>      page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
>>      if ( !page )
>>          return -EINVAL;
>> @@ -582,9 +623,6 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t
>> *params)
>>          return -EINVAL;
>>      }
>>
>> -    v = d->vcpu[params->vcpu];
>> -    vpmu = vcpu_vpmu(v);
>> -
>>      spin_lock(&vpmu->vpmu_lock);
>>
>>      if ( v->arch.vpmu.xenpmu_data )
>> @@ -602,7 +640,8 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t
>> *params)
>>          return -ENOMEM;
>>      }
>>
>> -    vpmu_initialise(v);
>> +    if ( vpmu_arch_initialise(v) )
>> +        put_vpmu(v);
> Since no get_vpmu why should we put_vpmu here?
>
> I feel a bit lost about your handling of PV case here...
>
> Thanks
> Kevin
Boris Ostrovsky Feb. 21, 2017, 2:12 p.m. UTC | #4
On 02/21/2017 06:00 AM, Andrew Cooper wrote:
> On 17/02/17 17:40, Boris Ostrovsky wrote:
>> @@ -509,15 +498,63 @@ void vpmu_initialise(struct vcpu *v)
>>      if ( ret )
>>          printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
>>  
>> -    /* Intel needs to initialize VPMU ops even if VPMU is not in use */
>> -    if ( !is_priv_vpmu &&
>> -         (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
>> -          (vpmu_mode == XENPMU_MODE_ALL)) )
>> +    return ret;
>> +}
>> +
>> +static void get_vpmu(struct vcpu *v)
>> +{
>> +    spin_lock(&vpmu_lock);
>> +
>> +    /*
>> +     * Count active VPMUs so that we won't try to change vpmu_mode while
>> +     * they are in use.
>> +     * vpmu_mode can be safely updated while dom0's VPMUs are active and
>> +     * so we don't need to include it in the count.
>> +     */
>> +    if ( !is_hardware_domain(v->domain) &&
>> +        (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
>> +    {
>> +        vpmu_count++;
>> +        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
>> +    }
>> +    else if ( is_hardware_domain(v->domain) &&
>> +              (vpmu_mode != XENPMU_MODE_OFF) )
>> +        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
> Why do we recalculate AVAILABLE on every get()?
>
> (In the absence of toolstack control from the CPUID side), surely it
> should be set (or not) exactly once during domain creation, and be
> unchanged thereafter?

Yes, that's exactly what's happening -- get_vpmu() is only called from
vcpu_initialise() -> vpmu_initialise() and so we set VPMU_AVAILABLE only
once for a VCPU.

I am not sure what you are asking.

-boris
Tian, Kevin Feb. 22, 2017, 3:05 a.m. UTC | #5
> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> Sent: Tuesday, February 21, 2017 10:10 PM
> 
> On 02/21/2017 03:09 AM, Tian, Kevin wrote:
> >> From: Boris Ostrovsky [mailto:boris.ostrovsky@oracle.com]
> >> Sent: Saturday, February 18, 2017 1:40 AM
> >>
> >> vpmu_enabled() (used by hvm/pv_cpuid() to properly report 0xa leaf
> >> for Intel processors) is based on the value of VPMU_CONTEXT_ALLOCATED
> >> bit. This is problematic:
> >> * For HVM guests VPMU context is allocated lazily, during the first
> >>   access to VPMU MSRs. Since the leaf is typically queried before guest
> >>   attempts to read or write the MSRs it is likely that CPUID will report
> >>   no PMU support
> >> * For PV guests the context is allocated eagerly but only in responce to
> >>   guest's XENPMU_init hypercall. There is a chance that the guest will
> >>   try to read CPUID before making this hypercall.
> >>
> >> This patch introduces VPMU_AVAILABLE flag which is set (subject to vpmu_mode
> >> constraints) during VCPU initialization for both PV and HVM guests. Since
> >> this flag is expected to be managed together with vpmu_count, get/put_vpmu()
> >> are added to simplify code.
> >>
> >> vpmu_enabled() (renamed to vpmu_available()) can now use this new flag.
> >>
> >> (As a side affect this patch also fixes a race in pvpmu_init() where we
> >> increment vcpu_count in vpmu_initialise() after checking vpmu_mode)
> >>
> >> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> >> ---
> >> v2:
> >> * Rename VPMU_ENABLED to VPMU_AVAILABLE (and vpmu_enabled() to
> >> vpmu_available()
> >> * Check VPMU_AVAILABLE flag in put_vpmu() under lock
> >>
> >>  xen/arch/x86/cpu/vpmu.c    | 111
> >> ++++++++++++++++++++++++++++++---------------
> >>  xen/arch/x86/cpuid.c       |   8 ++--
> >>  xen/arch/x86/domain.c      |   5 ++
> >>  xen/arch/x86/hvm/svm/svm.c |   5 --
> >>  xen/arch/x86/hvm/vmx/vmx.c |   5 --
> >>  xen/include/asm-x86/vpmu.h |   6 ++-
> >>  6 files changed, 88 insertions(+), 52 deletions(-)
> >>
> >> diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
> >> index 35a9403..b30769d 100644
> >> --- a/xen/arch/x86/cpu/vpmu.c
> >> +++ b/xen/arch/x86/cpu/vpmu.c
> >> @@ -456,32 +456,21 @@ int vpmu_load(struct vcpu *v, bool_t from_guest)
> >>      return 0;
> >>  }
> >>
> >> -void vpmu_initialise(struct vcpu *v)
> >> +static int vpmu_arch_initialise(struct vcpu *v)
> >>  {
> >>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> >>      uint8_t vendor = current_cpu_data.x86_vendor;
> >>      int ret;
> >> -    bool_t is_priv_vpmu = is_hardware_domain(v->domain);
> >>
> >>      BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
> >>      BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
> >>      BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
> >>      BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
> >>
> >> -    ASSERT(!vpmu->flags && !vpmu->context);
> >> +    ASSERT(!(vpmu->flags & ~VPMU_AVAILABLE) && !vpmu->context);
> >>
> >> -    if ( !is_priv_vpmu )
> >> -    {
> >> -        /*
> >> -         * Count active VPMUs so that we won't try to change vpmu_mode while
> >> -         * they are in use.
> >> -         * vpmu_mode can be safely updated while dom0's VPMUs are active and
> >> -         * so we don't need to include it in the count.
> >> -         */
> >> -        spin_lock(&vpmu_lock);
> >> -        vpmu_count++;
> >> -        spin_unlock(&vpmu_lock);
> >> -    }
> >> +    if ( !vpmu_available(v) )
> >> +        return 0;
> >>
> >>      switch ( vendor )
> >>      {
> >> @@ -501,7 +490,7 @@ void vpmu_initialise(struct vcpu *v)
> >>              opt_vpmu_enabled = 0;
> >>              vpmu_mode = XENPMU_MODE_OFF;
> >>          }
> >> -        return; /* Don't bother restoring vpmu_count, VPMU is off forever */
> >> +        return -EINVAL;
> >>      }
> >>
> >>      vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
> >> @@ -509,15 +498,63 @@ void vpmu_initialise(struct vcpu *v)
> >>      if ( ret )
> >>          printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
> >>
> >> -    /* Intel needs to initialize VPMU ops even if VPMU is not in use */
> >> -    if ( !is_priv_vpmu &&
> >> -         (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
> >> -          (vpmu_mode == XENPMU_MODE_ALL)) )
> >> +    return ret;
> >> +}
> >> +
> >> +static void get_vpmu(struct vcpu *v)
> >> +{
> >> +    spin_lock(&vpmu_lock);
> >> +
> >> +    /*
> >> +     * Count active VPMUs so that we won't try to change vpmu_mode while
> >> +     * they are in use.
> >> +     * vpmu_mode can be safely updated while dom0's VPMUs are active and
> >> +     * so we don't need to include it in the count.
> >> +     */
> >> +    if ( !is_hardware_domain(v->domain) &&
> >> +        (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> >> +    {
> >> +        vpmu_count++;
> >> +        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
> >> +    }
> >> +    else if ( is_hardware_domain(v->domain) &&
> >> +              (vpmu_mode != XENPMU_MODE_OFF) )
> >> +        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
> >> +
> >> +    spin_unlock(&vpmu_lock);
> >> +}
> >> +
> >> +static void put_vpmu(struct vcpu *v)
> >> +{
> >> +    spin_lock(&vpmu_lock);
> >> +
> >> +    if ( !vpmu_is_set(vcpu_vpmu(v), VPMU_AVAILABLE) )
> >> +        goto out;
> > just use !vpmu_available(v)
> 
> Yes.
> 
> >
> >> +
> >> +    if ( !is_hardware_domain(v->domain) &&
> >> +         (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
> >>      {
> >> -        spin_lock(&vpmu_lock);
> >>          vpmu_count--;
> >> -        spin_unlock(&vpmu_lock);
> >> +        vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
> >>      }
> >> +    else if ( is_hardware_domain(v->domain) &&
> >> +              (vpmu_mode != XENPMU_MODE_OFF) )
> >> +        vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
> >> +
> >> + out:
> >> +    spin_unlock(&vpmu_lock);
> >> +}
> >> +
> >> +void vpmu_initialise(struct vcpu *v)
> >> +{
> >> +    get_vpmu(v);
> >> +
> >> +    /*
> >> +     * Guests without LAPIC (i.e. PV) call vpmu_arch_initialise()
> >> +     * from pvpmu_init().
> >> +     */
> > implication is that PV VPMU is not counted then?
> 
> No. get_vpmu() is what does the counting now. Since we do
> vcpu_initialise() -> vpmu_initialise() for all type of VCPUs both PV and
> HVM VPMUs are counted here. But we defer arch-specific intialization
> (which doesn't do the counting) for PV until the hypercall.
> 

earlier comment said vpmu_count is to count active VPMUs.
what's the definition of 'active' here?  An uninitialized pv VPMU
is also considered active?

Thanks
Kevin
Boris Ostrovsky Feb. 22, 2017, 2:06 p.m. UTC | #6
>>>> +
>>>> +void vpmu_initialise(struct vcpu *v)
>>>> +{
>>>> +    get_vpmu(v);
>>>> +
>>>> +    /*
>>>> +     * Guests without LAPIC (i.e. PV) call vpmu_arch_initialise()
>>>> +     * from pvpmu_init().
>>>> +     */
>>> implication is that PV VPMU is not counted then?
>> No. get_vpmu() is what does the counting now. Since we do
>> vcpu_initialise() -> vpmu_initialise() for all type of VCPUs both PV and
>> HVM VPMUs are counted here. But we defer arch-specific intialization
>> (which doesn't do the counting) for PV until the hypercall.
>>
> earlier comment said vpmu_count is to count active VPMUs.
> what's the definition of 'active' here?  An uninitialized pv VPMU
> is also considered active?

Yes. Whenever a VCPU with appropriately defined CPUID leaf  0xa is
initialized we should assume that it eventually will be used. I can
clarify the comment.

(In the second patch I claim that "appropriately defined" is version=0
but perhaps there is a better definition.)

-boris
diff mbox

Patch

diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
index 35a9403..b30769d 100644
--- a/xen/arch/x86/cpu/vpmu.c
+++ b/xen/arch/x86/cpu/vpmu.c
@@ -456,32 +456,21 @@  int vpmu_load(struct vcpu *v, bool_t from_guest)
     return 0;
 }
 
-void vpmu_initialise(struct vcpu *v)
+static int vpmu_arch_initialise(struct vcpu *v)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
     uint8_t vendor = current_cpu_data.x86_vendor;
     int ret;
-    bool_t is_priv_vpmu = is_hardware_domain(v->domain);
 
     BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
     BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
     BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
     BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
 
-    ASSERT(!vpmu->flags && !vpmu->context);
+    ASSERT(!(vpmu->flags & ~VPMU_AVAILABLE) && !vpmu->context);
 
-    if ( !is_priv_vpmu )
-    {
-        /*
-         * Count active VPMUs so that we won't try to change vpmu_mode while
-         * they are in use.
-         * vpmu_mode can be safely updated while dom0's VPMUs are active and
-         * so we don't need to include it in the count.
-         */
-        spin_lock(&vpmu_lock);
-        vpmu_count++;
-        spin_unlock(&vpmu_lock);
-    }
+    if ( !vpmu_available(v) )
+        return 0;
 
     switch ( vendor )
     {
@@ -501,7 +490,7 @@  void vpmu_initialise(struct vcpu *v)
             opt_vpmu_enabled = 0;
             vpmu_mode = XENPMU_MODE_OFF;
         }
-        return; /* Don't bother restoring vpmu_count, VPMU is off forever */
+        return -EINVAL;
     }
 
     vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
@@ -509,15 +498,63 @@  void vpmu_initialise(struct vcpu *v)
     if ( ret )
         printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
 
-    /* Intel needs to initialize VPMU ops even if VPMU is not in use */
-    if ( !is_priv_vpmu &&
-         (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
-          (vpmu_mode == XENPMU_MODE_ALL)) )
+    return ret;
+}
+
+static void get_vpmu(struct vcpu *v)
+{
+    spin_lock(&vpmu_lock);
+
+    /*
+     * Count active VPMUs so that we won't try to change vpmu_mode while
+     * they are in use.
+     * vpmu_mode can be safely updated while dom0's VPMUs are active and
+     * so we don't need to include it in the count.
+     */
+    if ( !is_hardware_domain(v->domain) &&
+        (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
+    {
+        vpmu_count++;
+        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
+    }
+    else if ( is_hardware_domain(v->domain) &&
+              (vpmu_mode != XENPMU_MODE_OFF) )
+        vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
+
+    spin_unlock(&vpmu_lock);
+}
+
+static void put_vpmu(struct vcpu *v)
+{
+    spin_lock(&vpmu_lock);
+
+    if ( !vpmu_is_set(vcpu_vpmu(v), VPMU_AVAILABLE) )
+        goto out;
+
+    if ( !is_hardware_domain(v->domain) &&
+         (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
     {
-        spin_lock(&vpmu_lock);
         vpmu_count--;
-        spin_unlock(&vpmu_lock);
+        vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
     }
+    else if ( is_hardware_domain(v->domain) &&
+              (vpmu_mode != XENPMU_MODE_OFF) )
+        vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
+
+ out:
+    spin_unlock(&vpmu_lock);
+}
+
+void vpmu_initialise(struct vcpu *v)
+{
+    get_vpmu(v);
+
+    /*
+     * Guests without LAPIC (i.e. PV) call vpmu_arch_initialise()
+     * from pvpmu_init().
+     */
+    if ( has_vlapic(v->domain) && vpmu_arch_initialise(v) )
+        put_vpmu(v);
 }
 
 static void vpmu_clear_last(void *arg)
@@ -526,7 +563,7 @@  static void vpmu_clear_last(void *arg)
         this_cpu(last_vcpu) = NULL;
 }
 
-void vpmu_destroy(struct vcpu *v)
+static void vpmu_arch_destroy(struct vcpu *v)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
 
@@ -551,11 +588,13 @@  void vpmu_destroy(struct vcpu *v)
                          vpmu_save_force, v, 1);
          vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
     }
+}
 
-    spin_lock(&vpmu_lock);
-    if ( !is_hardware_domain(v->domain) )
-        vpmu_count--;
-    spin_unlock(&vpmu_lock);
+void vpmu_destroy(struct vcpu *v)
+{
+    vpmu_arch_destroy(v);
+
+    put_vpmu(v);
 }
 
 static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
@@ -565,13 +604,15 @@  static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
     struct page_info *page;
     uint64_t gfn = params->val;
 
-    if ( (vpmu_mode == XENPMU_MODE_OFF) ||
-         ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) )
-        return -EINVAL;
-
     if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
         return -EINVAL;
 
+    v = d->vcpu[params->vcpu];
+    vpmu = vcpu_vpmu(v);
+
+    if ( !vpmu_available(v) )
+        return -ENOENT;
+
     page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
     if ( !page )
         return -EINVAL;
@@ -582,9 +623,6 @@  static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
         return -EINVAL;
     }
 
-    v = d->vcpu[params->vcpu];
-    vpmu = vcpu_vpmu(v);
-
     spin_lock(&vpmu->vpmu_lock);
 
     if ( v->arch.vpmu.xenpmu_data )
@@ -602,7 +640,8 @@  static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
         return -ENOMEM;
     }
 
-    vpmu_initialise(v);
+    if ( vpmu_arch_initialise(v) )
+        put_vpmu(v);
 
     spin_unlock(&vpmu->vpmu_lock);
 
@@ -626,7 +665,7 @@  static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
     vpmu = vcpu_vpmu(v);
     spin_lock(&vpmu->vpmu_lock);
 
-    vpmu_destroy(v);
+    vpmu_arch_destroy(v);
     xenpmu_data = vpmu->xenpmu_data;
     vpmu->xenpmu_data = NULL;
 
diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index e0a387e..6d1946a 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -713,7 +713,7 @@  static void pv_cpuid(uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *res)
             }
         }
 
-        if ( vpmu_enabled(curr) &&
+        if ( vpmu_available(curr) &&
              vpmu_is_set(vcpu_vpmu(curr), VPMU_CPU_HAS_DS) )
         {
             res->d |= cpufeat_mask(X86_FEATURE_DS);
@@ -726,7 +726,7 @@  static void pv_cpuid(uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *res)
 
     case 0x0000000a: /* Architectural Performance Monitor Features (Intel) */
         if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-             !vpmu_enabled(curr) )
+             !vpmu_available(curr) )
             goto unsupported;
 
         /* Report at most version 3 since that's all we currently emulate. */
@@ -793,7 +793,7 @@  static void hvm_cpuid(uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *res)
         if ( !hap_enabled(d) && !hvm_pae_enabled(v) )
             res->d &= ~cpufeat_mask(X86_FEATURE_PSE36);
 
-        if ( vpmu_enabled(v) &&
+        if ( vpmu_available(v) &&
              vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) )
         {
             res->d |= cpufeat_mask(X86_FEATURE_DS);
@@ -811,7 +811,7 @@  static void hvm_cpuid(uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *res)
         break;
 
     case 0x0000000a: /* Architectural Performance Monitor Features (Intel) */
-        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || !vpmu_enabled(v) )
+        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || !vpmu_available(v) )
         {
             *res = EMPTY_LEAF;
             break;
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 71c0e3c..292442a 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -458,6 +458,8 @@  int vcpu_initialise(struct vcpu *v)
         if ( is_pv_domain(d) )
             xfree(v->arch.pv_vcpu.trap_ctxt);
     }
+    else if ( !is_idle_domain(v->domain) )
+        vpmu_initialise(v);
 
     return rc;
 }
@@ -475,6 +477,9 @@  void vcpu_destroy(struct vcpu *v)
 
     vcpu_destroy_fpu(v);
 
+    if ( !is_idle_domain(v->domain) )
+        vpmu_destroy(v);
+
     if ( has_hvm_container_vcpu(v) )
         hvm_vcpu_destroy(v);
     else
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index ca2785c..27bfb3c 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1169,10 +1169,6 @@  static int svm_vcpu_initialise(struct vcpu *v)
         return rc;
     }
 
-    /* PVH's VPMU is initialized via hypercall */
-    if ( has_vlapic(v->domain) )
-        vpmu_initialise(v);
-
     svm_guest_osvw_init(v);
 
     return 0;
@@ -1180,7 +1176,6 @@  static int svm_vcpu_initialise(struct vcpu *v)
 
 static void svm_vcpu_destroy(struct vcpu *v)
 {
-    vpmu_destroy(v);
     svm_destroy_vmcb(v);
     passive_domain_destroy(v);
 }
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 42f4fbd..d446e9e 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -330,10 +330,6 @@  static int vmx_vcpu_initialise(struct vcpu *v)
         }
     }
 
-    /* PVH's VPMU is initialized via hypercall */
-    if ( has_vlapic(v->domain) )
-        vpmu_initialise(v);
-
     vmx_install_vlapic_mapping(v);
 
     /* %eax == 1 signals full real-mode support to the guest loader. */
@@ -354,7 +350,6 @@  static void vmx_vcpu_destroy(struct vcpu *v)
      */
     vmx_vcpu_disable_pml(v);
     vmx_destroy_vmcs(v);
-    vpmu_destroy(v);
     passive_domain_destroy(v);
 }
 
diff --git a/xen/include/asm-x86/vpmu.h b/xen/include/asm-x86/vpmu.h
index e50618f..5e778ab 100644
--- a/xen/include/asm-x86/vpmu.h
+++ b/xen/include/asm-x86/vpmu.h
@@ -25,7 +25,7 @@ 
 
 #define vcpu_vpmu(vcpu)   (&(vcpu)->arch.vpmu)
 #define vpmu_vcpu(vpmu)   container_of((vpmu), struct vcpu, arch.vpmu)
-#define vpmu_enabled(vcpu) vpmu_is_set(vcpu_vpmu(vcpu), VPMU_CONTEXT_ALLOCATED)
+#define vpmu_available(vcpu) vpmu_is_set(vcpu_vpmu(vcpu), VPMU_AVAILABLE)
 
 #define MSR_TYPE_COUNTER            0
 #define MSR_TYPE_CTRL               1
@@ -74,6 +74,7 @@  struct vpmu_struct {
 #define VPMU_PASSIVE_DOMAIN_ALLOCATED       0x20
 /* PV(H) guests: VPMU registers are accessed by guest from shared page */
 #define VPMU_CACHED                         0x40
+#define VPMU_AVAILABLE                      0x80
 
 /* Intel-specific VPMU features */
 #define VPMU_CPU_HAS_DS                     0x100 /* Has Debug Store */
@@ -89,7 +90,8 @@  static inline void vpmu_reset(struct vpmu_struct *vpmu, const u32 mask)
 }
 static inline void vpmu_clear(struct vpmu_struct *vpmu)
 {
-    vpmu->flags = 0;
+    /* VPMU_AVAILABLE should be altered by get/put_vpmu(). */
+    vpmu->flags &= VPMU_AVAILABLE;
 }
 static inline bool_t vpmu_is_set(const struct vpmu_struct *vpmu, const u32 mask)
 {