diff mbox series

xen/mem_sharing: support forks with active vPMU state

Message ID 4f3ff38d8226d10dab3440f020c9ba7f07cab1fd.1658250756.git.tamas.lengyel@intel.com (mailing list archive)
State Superseded
Headers show
Series xen/mem_sharing: support forks with active vPMU state | expand

Commit Message

Tamas K Lengyel July 19, 2022, 5:18 p.m. UTC
Currently the vPMU state from a parent isn't copied to VM forks. To enable the
vPMU state to be copied to a fork VM we export certain vPMU functions. First,
the vPMU context needs to be allocated for the fork if the parent has one. For
this we introduce vpmu->allocate_context, which has previously only been called
when the guest enables the PMU on itself. Furthermore, we export
vpmu_save_force so that the PMU context can be saved on-demand even if no
context switch took place on the parent's CPU yet. Additionally, we make sure
all relevant configuration MSRs are saved in the vPMU context so the copy is
complete and the fork starts with the same PMU config as the parent.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 xen/arch/x86/cpu/vpmu.c         | 12 ++++++++-
 xen/arch/x86/cpu/vpmu_intel.c   | 16 +++++++++++
 xen/arch/x86/include/asm/vpmu.h |  5 ++++
 xen/arch/x86/mm/mem_sharing.c   | 48 +++++++++++++++++++++++++++++++++
 4 files changed, 80 insertions(+), 1 deletion(-)

Comments

Andrew Cooper July 19, 2022, 6:22 p.m. UTC | #1
On 19/07/2022 18:18, Tamas K Lengyel wrote:
> diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
> index d2c03a1104..2b5d64a60d 100644
> --- a/xen/arch/x86/cpu/vpmu.c
> +++ b/xen/arch/x86/cpu/vpmu.c
> @@ -529,6 +529,16 @@ void vpmu_initialise(struct vcpu *v)
>          put_vpmu(v);
>  }
>  
> +void vpmu_allocate_context(struct vcpu *v)
> +{
> +    struct vpmu_struct *vpmu = vcpu_vpmu(v);
> +
> +    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
> +        return;
> +
> +    alternative_call(vpmu_ops.allocate_context, v);

You need to fill in an AMD pointer, or make this conditional.

All alternatives have NULL pointers turned into UDs.

Should be a two-liner on the AMD side.

> diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
> index 8612f46973..31dc0ee14b 100644
> --- a/xen/arch/x86/cpu/vpmu_intel.c
> +++ b/xen/arch/x86/cpu/vpmu_intel.c
>  static int core2_vpmu_verify(struct vcpu *v)
> @@ -474,7 +485,11 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
>                                      sizeof(uint64_t) * fixed_pmc_cnt;
>  
>      vpmu->context = core2_vpmu_cxt;
> +    vpmu->context_size = sizeof(struct xen_pmu_intel_ctxt) +
> +                         fixed_pmc_cnt * sizeof(uint64_t) +
> +                         arch_pmc_cnt * sizeof(struct xen_pmu_cntr_pair);

This wants deduplicating with the earlier calculation, surely?

>      vpmu->priv_context = p;
> +    vpmu->priv_context_size = sizeof(uint64_t);
>  
>      if ( !has_vlapic(v->domain) )
>      {
> @@ -882,6 +897,7 @@ static int cf_check vmx_vpmu_initialise(struct vcpu *v)
>  
>  static const struct arch_vpmu_ops __initconst_cf_clobber core2_vpmu_ops = {
>      .initialise = vmx_vpmu_initialise,
> +    .allocate_context = core2_vpmu_alloc_resource,

core2_vpmu_alloc_resource() needs to gain a cf_check to not explode on
TGL/SPR.

>      .do_wrmsr = core2_vpmu_do_wrmsr,
>      .do_rdmsr = core2_vpmu_do_rdmsr,
>      .do_interrupt = core2_vpmu_do_interrupt,
> diff --git a/xen/arch/x86/include/asm/vpmu.h b/xen/arch/x86/include/asm/vpmu.h
> index e5709bd44a..14d0939247 100644
> --- a/xen/arch/x86/include/asm/vpmu.h
> +++ b/xen/arch/x86/include/asm/vpmu.h
> @@ -106,8 +109,10 @@ void vpmu_lvtpc_update(uint32_t val);
>  int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, bool is_write);
>  void vpmu_do_interrupt(struct cpu_user_regs *regs);
>  void vpmu_initialise(struct vcpu *v);
> +void vpmu_allocate_context(struct vcpu *v);
>  void vpmu_destroy(struct vcpu *v);
>  void vpmu_save(struct vcpu *v);
> +void vpmu_save_force(void *arg);

Needs the cf_check to compile.

>  int vpmu_load(struct vcpu *v, bool_t from_guest);
>  void vpmu_dump(struct vcpu *v);
>  
> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> index 8f9d9ed9a9..39cd03abf7 100644
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -1653,6 +1653,50 @@ static void copy_vcpu_nonreg_state(struct vcpu *d_vcpu, struct vcpu *cd_vcpu)
>      hvm_set_nonreg_state(cd_vcpu, &nrs);
>  }
>  
> +static int copy_vpmu(struct vcpu *d_vcpu, struct vcpu *cd_vcpu)
> +{
> +    struct vpmu_struct *d_vpmu = vcpu_vpmu(d_vcpu);
> +    struct vpmu_struct *cd_vpmu = vcpu_vpmu(cd_vcpu);
> +
> +    if ( !vpmu_are_all_set(d_vpmu, VPMU_INITIALIZED | VPMU_CONTEXT_ALLOCATED) )
> +        return 0;
> +    if ( !vpmu_is_set(cd_vpmu, VPMU_CONTEXT_ALLOCATED) )
> +    {
> +        vpmu_allocate_context(cd_vcpu);
> +        if ( !vpmu_is_set(cd_vpmu, VPMU_CONTEXT_ALLOCATED) )
> +            return -ENOMEM;

vpmu_allocate_context() already checks VPMU_CONTEXT_ALLOCATED.  But
isn't the double check here redundant?

The subsequent check looks like you want to pass the hook's return value
up through vpmu_allocate_context().

(And if you feel like turning it from bool-as-int to something more
sane, say -errno, that would also be great.)

Otherwise, looks plausible.

~Andrew
Tamas K Lengyel July 19, 2022, 6:42 p.m. UTC | #2
On Tue, Jul 19, 2022 at 2:23 PM Andrew Cooper <Andrew.Cooper3@citrix.com> wrote:
>
> On 19/07/2022 18:18, Tamas K Lengyel wrote:
> > diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
> > index d2c03a1104..2b5d64a60d 100644
> > --- a/xen/arch/x86/cpu/vpmu.c
> > +++ b/xen/arch/x86/cpu/vpmu.c
> > @@ -529,6 +529,16 @@ void vpmu_initialise(struct vcpu *v)
> >          put_vpmu(v);
> >  }
> >
> > +void vpmu_allocate_context(struct vcpu *v)
> > +{
> > +    struct vpmu_struct *vpmu = vcpu_vpmu(v);
> > +
> > +    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
> > +        return;
> > +
> > +    alternative_call(vpmu_ops.allocate_context, v);
>
> You need to fill in an AMD pointer, or make this conditional.
>
> All alternatives have NULL pointers turned into UDs.
>
> Should be a two-liner on the AMD side.

There is no AMD caller to this so I'll just make it conditional to
ensure it's non-NULL.

>
> > diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
> > index 8612f46973..31dc0ee14b 100644
> > --- a/xen/arch/x86/cpu/vpmu_intel.c
> > +++ b/xen/arch/x86/cpu/vpmu_intel.c
> >  static int core2_vpmu_verify(struct vcpu *v)
> > @@ -474,7 +485,11 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
> >                                      sizeof(uint64_t) * fixed_pmc_cnt;
> >
> >      vpmu->context = core2_vpmu_cxt;
> > +    vpmu->context_size = sizeof(struct xen_pmu_intel_ctxt) +
> > +                         fixed_pmc_cnt * sizeof(uint64_t) +
> > +                         arch_pmc_cnt * sizeof(struct xen_pmu_cntr_pair);
>
> This wants deduplicating with the earlier calculation, surely?

Sure.

>
> >      vpmu->priv_context = p;
> > +    vpmu->priv_context_size = sizeof(uint64_t);
> >
> >      if ( !has_vlapic(v->domain) )
> >      {
> > @@ -882,6 +897,7 @@ static int cf_check vmx_vpmu_initialise(struct vcpu *v)
> >
> >  static const struct arch_vpmu_ops __initconst_cf_clobber core2_vpmu_ops = {
> >      .initialise = vmx_vpmu_initialise,
> > +    .allocate_context = core2_vpmu_alloc_resource,
>
> core2_vpmu_alloc_resource() needs to gain a cf_check to not explode on
> TGL/SPR.
>
> >      .do_wrmsr = core2_vpmu_do_wrmsr,
> >      .do_rdmsr = core2_vpmu_do_rdmsr,
> >      .do_interrupt = core2_vpmu_do_interrupt,
> > diff --git a/xen/arch/x86/include/asm/vpmu.h b/xen/arch/x86/include/asm/vpmu.h
> > index e5709bd44a..14d0939247 100644
> > --- a/xen/arch/x86/include/asm/vpmu.h
> > +++ b/xen/arch/x86/include/asm/vpmu.h
> > @@ -106,8 +109,10 @@ void vpmu_lvtpc_update(uint32_t val);
> >  int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, bool is_write);
> >  void vpmu_do_interrupt(struct cpu_user_regs *regs);
> >  void vpmu_initialise(struct vcpu *v);
> > +void vpmu_allocate_context(struct vcpu *v);
> >  void vpmu_destroy(struct vcpu *v);
> >  void vpmu_save(struct vcpu *v);
> > +void vpmu_save_force(void *arg);
>
> Needs the cf_check to compile.
>
> >  int vpmu_load(struct vcpu *v, bool_t from_guest);
> >  void vpmu_dump(struct vcpu *v);
> >
> > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> > index 8f9d9ed9a9..39cd03abf7 100644
> > --- a/xen/arch/x86/mm/mem_sharing.c
> > +++ b/xen/arch/x86/mm/mem_sharing.c
> > @@ -1653,6 +1653,50 @@ static void copy_vcpu_nonreg_state(struct vcpu *d_vcpu, struct vcpu *cd_vcpu)
> >      hvm_set_nonreg_state(cd_vcpu, &nrs);
> >  }
> >
> > +static int copy_vpmu(struct vcpu *d_vcpu, struct vcpu *cd_vcpu)
> > +{
> > +    struct vpmu_struct *d_vpmu = vcpu_vpmu(d_vcpu);
> > +    struct vpmu_struct *cd_vpmu = vcpu_vpmu(cd_vcpu);
> > +
> > +    if ( !vpmu_are_all_set(d_vpmu, VPMU_INITIALIZED | VPMU_CONTEXT_ALLOCATED) )
> > +        return 0;
> > +    if ( !vpmu_is_set(cd_vpmu, VPMU_CONTEXT_ALLOCATED) )
> > +    {
> > +        vpmu_allocate_context(cd_vcpu);
> > +        if ( !vpmu_is_set(cd_vpmu, VPMU_CONTEXT_ALLOCATED) )
> > +            return -ENOMEM;
>
> vpmu_allocate_context() already checks VPMU_CONTEXT_ALLOCATED.  But
> isn't the double check here redundant?

True, I could drop the top level check here.

>
> The subsequent check looks like you want to pass the hook's return value
> up through vpmu_allocate_context().
>
> (And if you feel like turning it from bool-as-int to something more
> sane, say -errno, that would also be great.)

Yea, I wanted to avoid having to rework the currently backwards
meaning of the returned int values of vpmu functions. So that's why I
double check the allocation worked instead. If I do what you recommend
it would be the only vpmu function that doesn't return void and the
only callback that returns error codes instead of boolean
success/failure. I rather keep the code self-consistent in vpmu and
just live with this arguably kinda odd looking logic here.

Tamas
Jan Beulich July 20, 2022, 8:24 a.m. UTC | #3
On 19.07.2022 19:18, Tamas K Lengyel wrote:
> --- a/xen/arch/x86/include/asm/vpmu.h
> +++ b/xen/arch/x86/include/asm/vpmu.h
> @@ -40,6 +40,7 @@
>  /* Arch specific operations shared by all vpmus */
>  struct arch_vpmu_ops {
>      int (*initialise)(struct vcpu *v);
> +    int (*allocate_context)(struct vcpu *v);

I think this hook would best be compiled out when !MEM_SHARING, even
more so when it can be left with NULL in it.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
index d2c03a1104..2b5d64a60d 100644
--- a/xen/arch/x86/cpu/vpmu.c
+++ b/xen/arch/x86/cpu/vpmu.c
@@ -336,7 +336,7 @@  void vpmu_do_interrupt(struct cpu_user_regs *regs)
 #endif
 }
 
-static void cf_check vpmu_save_force(void *arg)
+void cf_check vpmu_save_force(void *arg)
 {
     struct vcpu *v = arg;
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
@@ -529,6 +529,16 @@  void vpmu_initialise(struct vcpu *v)
         put_vpmu(v);
 }
 
+void vpmu_allocate_context(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+        return;
+
+    alternative_call(vpmu_ops.allocate_context, v);
+}
+
 static void cf_check vpmu_clear_last(void *arg)
 {
     if ( this_cpu(last_vcpu) == arg )
diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index 8612f46973..31dc0ee14b 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -282,10 +282,17 @@  static inline void __core2_vpmu_save(struct vcpu *v)
     for ( i = 0; i < fixed_pmc_cnt; i++ )
         rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
     for ( i = 0; i < arch_pmc_cnt; i++ )
+    {
         rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
+        rdmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
+    }
 
     if ( !is_hvm_vcpu(v) )
         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
+    /* Save MSR to private context to make it fork-friendly */
+    else if ( mem_sharing_enabled(v->domain) )
+        vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
+                           &core2_vpmu_cxt->global_ctrl);
 }
 
 static int cf_check core2_vpmu_save(struct vcpu *v, bool to_guest)
@@ -346,6 +353,10 @@  static inline void __core2_vpmu_load(struct vcpu *v)
         core2_vpmu_cxt->global_ovf_ctrl = 0;
         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
     }
+    /* Restore MSR from context when used with a fork */
+    else if ( mem_sharing_is_fork(v->domain) )
+        vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
+                            core2_vpmu_cxt->global_ctrl);
 }
 
 static int core2_vpmu_verify(struct vcpu *v)
@@ -474,7 +485,11 @@  static int core2_vpmu_alloc_resource(struct vcpu *v)
                                     sizeof(uint64_t) * fixed_pmc_cnt;
 
     vpmu->context = core2_vpmu_cxt;
+    vpmu->context_size = sizeof(struct xen_pmu_intel_ctxt) +
+                         fixed_pmc_cnt * sizeof(uint64_t) +
+                         arch_pmc_cnt * sizeof(struct xen_pmu_cntr_pair);
     vpmu->priv_context = p;
+    vpmu->priv_context_size = sizeof(uint64_t);
 
     if ( !has_vlapic(v->domain) )
     {
@@ -882,6 +897,7 @@  static int cf_check vmx_vpmu_initialise(struct vcpu *v)
 
 static const struct arch_vpmu_ops __initconst_cf_clobber core2_vpmu_ops = {
     .initialise = vmx_vpmu_initialise,
+    .allocate_context = core2_vpmu_alloc_resource,
     .do_wrmsr = core2_vpmu_do_wrmsr,
     .do_rdmsr = core2_vpmu_do_rdmsr,
     .do_interrupt = core2_vpmu_do_interrupt,
diff --git a/xen/arch/x86/include/asm/vpmu.h b/xen/arch/x86/include/asm/vpmu.h
index e5709bd44a..14d0939247 100644
--- a/xen/arch/x86/include/asm/vpmu.h
+++ b/xen/arch/x86/include/asm/vpmu.h
@@ -40,6 +40,7 @@ 
 /* Arch specific operations shared by all vpmus */
 struct arch_vpmu_ops {
     int (*initialise)(struct vcpu *v);
+    int (*allocate_context)(struct vcpu *v);
     int (*do_wrmsr)(unsigned int msr, uint64_t msr_content);
     int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
     int (*do_interrupt)(struct cpu_user_regs *regs);
@@ -59,6 +60,8 @@  struct vpmu_struct {
     u32 hw_lapic_lvtpc;
     void *context;      /* May be shared with PV guest */
     void *priv_context; /* hypervisor-only */
+    size_t context_size;
+    size_t priv_context_size;
     struct xen_pmu_data *xenpmu_data;
     spinlock_t vpmu_lock;
 };
@@ -106,8 +109,10 @@  void vpmu_lvtpc_update(uint32_t val);
 int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, bool is_write);
 void vpmu_do_interrupt(struct cpu_user_regs *regs);
 void vpmu_initialise(struct vcpu *v);
+void vpmu_allocate_context(struct vcpu *v);
 void vpmu_destroy(struct vcpu *v);
 void vpmu_save(struct vcpu *v);
+void vpmu_save_force(void *arg);
 int vpmu_load(struct vcpu *v, bool_t from_guest);
 void vpmu_dump(struct vcpu *v);
 
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 8f9d9ed9a9..39cd03abf7 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1653,6 +1653,50 @@  static void copy_vcpu_nonreg_state(struct vcpu *d_vcpu, struct vcpu *cd_vcpu)
     hvm_set_nonreg_state(cd_vcpu, &nrs);
 }
 
+static int copy_vpmu(struct vcpu *d_vcpu, struct vcpu *cd_vcpu)
+{
+    struct vpmu_struct *d_vpmu = vcpu_vpmu(d_vcpu);
+    struct vpmu_struct *cd_vpmu = vcpu_vpmu(cd_vcpu);
+
+    if ( !vpmu_are_all_set(d_vpmu, VPMU_INITIALIZED | VPMU_CONTEXT_ALLOCATED) )
+        return 0;
+    if ( !vpmu_is_set(cd_vpmu, VPMU_CONTEXT_ALLOCATED) )
+    {
+        vpmu_allocate_context(cd_vcpu);
+        if ( !vpmu_is_set(cd_vpmu, VPMU_CONTEXT_ALLOCATED) )
+            return -ENOMEM;
+    }
+
+    /*
+     * The VPMU subsystem only saves the context when the CPU does a context
+     * switch. Otherwise, the relevant MSRs are not saved on vmexit.
+     * We force a save here in case the parent CPU context is still loaded.
+     */
+    if ( vpmu_is_set(d_vpmu, VPMU_CONTEXT_LOADED) )
+    {
+        int pcpu = smp_processor_id();
+
+        if ( d_vpmu->last_pcpu != pcpu )
+        {
+            on_selected_cpus(cpumask_of(d_vpmu->last_pcpu),
+                             vpmu_save_force, (void *)d_vcpu, 1);
+            vpmu_reset(d_vpmu, VPMU_CONTEXT_LOADED);
+        } else
+            vpmu_save(d_vcpu);
+    }
+
+    if ( vpmu_is_set(d_vpmu, VPMU_RUNNING) )
+        vpmu_set(cd_vpmu, VPMU_RUNNING);
+
+    /* Make sure context gets (re-)loaded when scheduled next */
+    vpmu_reset(cd_vpmu, VPMU_CONTEXT_LOADED);
+
+    memcpy(cd_vpmu->context, d_vpmu->context, d_vpmu->context_size);
+    memcpy(cd_vpmu->priv_context, d_vpmu->priv_context, d_vpmu->priv_context_size);
+
+    return 0;
+}
+
 static int copy_vcpu_settings(struct domain *cd, const struct domain *d)
 {
     unsigned int i;
@@ -1702,6 +1746,10 @@  static int copy_vcpu_settings(struct domain *cd, const struct domain *d)
             copy_domain_page(new_vcpu_info_mfn, vcpu_info_mfn);
         }
 
+        ret = copy_vpmu(d_vcpu, cd_vcpu);
+        if ( ret )
+            return ret;
+
         hvm_vmtrace_reset(cd_vcpu);
 
         copy_vcpu_nonreg_state(d_vcpu, cd_vcpu);