diff mbox

[v4] x86/vpmu: add cpu hot unplug notifier for vpmu

Message ID 1495485940-20803-1-git-send-email-luwei.kang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Luwei Kang May 22, 2017, 8:45 p.m. UTC
Currently, Hot unplug a physical CPU with vpmu enabled may cause
system hang due to send a remote call to an offlined pCPU. This
patch add a cpu hot unplug notifer to save vpmu context before
cpu offline.

Consider one scenario, hot unplug pCPU N with vpmu enabled.
The vcpu which running on this pCPU will be switch to other
online cpu. A remote call will be send to pCPU N to save the
vpmu context before loading the vpmu context on this pCPU.
System will hang in function on_select_cpus() because of that
pCPU is offlined and can not do any respond.

The purpose of add a VPMU_CONTEXT_LOADED check in vpmu_arch_destroy()
before send a remote call to save vpmu contex is:
a. when a vpmu context has been loaded in a remote pCPU, make a
   remote call to save the vpmu contex and stop counters is necessary.
b. VPMU_CONTEXT_LOADED flag will be reset if a pCPU is offlined.
   this check will prevent send a remote call to an offlined pCPU.

Signed-off-by: Luwei Kang <luwei.kang@intel.com>
---
v4:
 1.remove cpu_online() check in vpm_load();
 2.remove "vpmu_" prefix;
 3.fix a coding style;
 4.add some commit message about VPMU_CONTEXT_LOADED in vpmu_arch_destroy();
v3:
 1.add cpu_online() check in vpm_load() and vpmu_arch_destroy();
 2.add vpmu_ prefix. rename cpu_callback() to vpmu_cpu_callback();
v2:
 1.fix some typo and coding style;
 2.change "swith" to "if" in cpu_callback() because of there just have one case;
 3.add VPMU_CONTEX_LOADED check before send remote call in vpmu_arch_destroy();
---
 xen/arch/x86/cpu/vpmu.c | 45 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 4 deletions(-)

Comments

Jan Beulich May 26, 2017, 2:05 p.m. UTC | #1
>>> On 22.05.17 at 22:45, <luwei.kang@intel.com> wrote:
> Currently, Hot unplug a physical CPU with vpmu enabled may cause
> system hang due to send a remote call to an offlined pCPU. This
> patch add a cpu hot unplug notifer to save vpmu context before
> cpu offline.
> 
> Consider one scenario, hot unplug pCPU N with vpmu enabled.
> The vcpu which running on this pCPU will be switch to other
> online cpu. A remote call will be send to pCPU N to save the
> vpmu context before loading the vpmu context on this pCPU.
> System will hang in function on_select_cpus() because of that
> pCPU is offlined and can not do any respond.
> 
> The purpose of add a VPMU_CONTEXT_LOADED check in vpmu_arch_destroy()
> before send a remote call to save vpmu contex is:
> a. when a vpmu context has been loaded in a remote pCPU, make a
>    remote call to save the vpmu contex and stop counters is necessary.
> b. VPMU_CONTEXT_LOADED flag will be reset if a pCPU is offlined.
>    this check will prevent send a remote call to an offlined pCPU.
> 
> Signed-off-by: Luwei Kang <luwei.kang@intel.com>

Acked-by: Jan Beulich <jbeulich@suse.com>
partly on the basis that Boris had agreed with how the change
is being done, so Boris, an R-b from you would be nice.

Jan
Boris Ostrovsky May 26, 2017, 2:22 p.m. UTC | #2
On 05/26/2017 10:05 AM, Jan Beulich wrote:
>>>> On 22.05.17 at 22:45, <luwei.kang@intel.com> wrote:
>> Currently, Hot unplug a physical CPU with vpmu enabled may cause
>> system hang due to send a remote call to an offlined pCPU. This
>> patch add a cpu hot unplug notifer to save vpmu context before
>> cpu offline.
>>
>> Consider one scenario, hot unplug pCPU N with vpmu enabled.
>> The vcpu which running on this pCPU will be switch to other
>> online cpu. A remote call will be send to pCPU N to save the
>> vpmu context before loading the vpmu context on this pCPU.
>> System will hang in function on_select_cpus() because of that
>> pCPU is offlined and can not do any respond.
>>
>> The purpose of add a VPMU_CONTEXT_LOADED check in vpmu_arch_destroy()
>> before send a remote call to save vpmu contex is:
>> a. when a vpmu context has been loaded in a remote pCPU, make a
>>    remote call to save the vpmu contex and stop counters is necessary.
>> b. VPMU_CONTEXT_LOADED flag will be reset if a pCPU is offlined.
>>    this check will prevent send a remote call to an offlined pCPU.
>>
>> Signed-off-by: Luwei Kang <luwei.kang@intel.com>
> Acked-by: Jan Beulich <jbeulich@suse.com>
> partly on the basis that Boris had agreed with how the change
> is being done, so Boris, an R-b from you would be nice.

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
diff mbox

Patch

diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
index 03401fd..1f7830b 100644
--- a/xen/arch/x86/cpu/vpmu.c
+++ b/xen/arch/x86/cpu/vpmu.c
@@ -21,6 +21,7 @@ 
 #include <xen/xenoprof.h>
 #include <xen/event.h>
 #include <xen/guest_access.h>
+#include <xen/cpu.h>
 #include <asm/regs.h>
 #include <asm/types.h>
 #include <asm/msr.h>
@@ -575,15 +576,21 @@  static void vpmu_arch_destroy(struct vcpu *v)
      * We will test it again in vpmu_clear_last() with interrupts
      * disabled to make sure we don't clear someone else.
      */
-    if ( per_cpu(last_vcpu, vpmu->last_pcpu) == v )
+    if ( cpu_online(vpmu->last_pcpu) &&
+         per_cpu(last_vcpu, vpmu->last_pcpu) == v )
         on_selected_cpus(cpumask_of(vpmu->last_pcpu),
                          vpmu_clear_last, v, 1);
 
     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
     {
-        /* Unload VPMU first. This will stop counters */
-        on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
-                         vpmu_save_force, v, 1);
+        /*
+         * Unload VPMU first if VPMU_CONTEXT_LOADED being set.
+         * This will stop counters.
+         */
+        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+            on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
+                             vpmu_save_force, v, 1);
+
          vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
     }
 }
@@ -835,6 +842,33 @@  long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
     return ret;
 }
 
+static int cpu_callback(
+    struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+    unsigned int cpu = (unsigned long)hcpu;
+    struct vcpu *vcpu = per_cpu(last_vcpu, cpu);
+    struct vpmu_struct *vpmu;
+
+    if ( !vcpu )
+        return NOTIFY_DONE;
+
+    vpmu = vcpu_vpmu(vcpu);
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+        return NOTIFY_DONE;
+
+    if ( action == CPU_DYING )
+    {
+        vpmu_save_force(vcpu);
+        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+    }
+
+    return NOTIFY_DONE;
+}
+
+static struct notifier_block cpu_nfb = {
+    .notifier_call = cpu_callback
+};
+
 static int __init vpmu_init(void)
 {
     int vendor = current_cpu_data.x86_vendor;
@@ -872,8 +906,11 @@  static int __init vpmu_init(void)
     }
 
     if ( vpmu_mode != XENPMU_MODE_OFF )
+    {
+        register_cpu_notifier(&cpu_nfb);
         printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
                __stringify(XENPMU_VER_MIN) "\n");
+    }
     else
         opt_vpmu_enabled = 0;