diff mbox series

[v2,3/4] KVM: Rename and move CPUHP_AP_KVM_STARTING to ONLINE section

Message ID 20220118064430.3882337-4-chao.gao@intel.com (mailing list archive)
State New, archived
Headers show
Series Improve KVM's interaction with CPU hotplug | expand

Commit Message

Chao Gao Jan. 18, 2022, 6:44 a.m. UTC
The CPU STARTING section doesn't allow callbacks to fail. Move KVM's
hotplug callback to ONLINE section so that it can abort onlining a CPU in
certain cases to avoid potentially breaking VMs running on existing CPUs.
For example, when kvm fails to enable hardware virtualization on the
hotplugged CPU.

Place KVM's hotplug state before CPUHP_AP_SCHED_WAIT_EMPTY as it ensures
when offlining a CPU, all user tasks and non-pinned kernel tasks have left
the CPU, i.e. there cannot be a vCPU task around. So, it is safe for KVM's
CPU offline callback to disable hardware virtualization at that point.
Likewise, KVM's online callback can enable hardware virtualization before
any vCPU task gets a chance to run on hotplugged CPUs.

KVM's CPU hotplug callbacks are renamed as well.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chao Gao <chao.gao@intel.com>
---
 include/linux/cpuhotplug.h |  2 +-
 virt/kvm/kvm_main.c        | 28 ++++++++++++++++++++--------
 2 files changed, 21 insertions(+), 9 deletions(-)

Comments

Sean Christopherson Feb. 9, 2022, 12:29 a.m. UTC | #1
On Tue, Jan 18, 2022, Chao Gao wrote:
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 148f7169b431..528741601122 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -4856,13 +4856,25 @@ static void hardware_enable_nolock(void *junk)
>  	}
>  }
>  
> -static int kvm_starting_cpu(unsigned int cpu)
> +static int kvm_online_cpu(unsigned int cpu)
>  {
> +	int ret = 0;
> +
>  	raw_spin_lock(&kvm_count_lock);
> -	if (kvm_usage_count)
> +	/*
> +	 * Abort the CPU online process if hardware virtualization cannot
> +	 * be enabled. Otherwise running VMs would encounter unrecoverable
> +	 * errors when scheduled to this CPU.
> +	 */
> +	if (kvm_usage_count) {


>  		hardware_enable_nolock(NULL);
> +		if (atomic_read(&hardware_enable_failed)) {

This needs:

		atomic_set(&hardware_enable_failed, 0);

otherwise failure to online one CPU will prevent onlining other non-broken CPUs.
It's probably worth adding a WARN_ON_ONCE above this too, e.g.

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 70e034cbe813..b25a00c76b3a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4863,8 +4863,11 @@ static int kvm_online_cpu(unsigned int cpu)
         * errors when scheduled to this CPU.
         */
        if (kvm_usage_count) {
+               WARN_ON_ONCE(atomic_read(&hardware_enable_failed));
+
                hardware_enable_nolock(NULL);
                if (atomic_read(&hardware_enable_failed)) {
+                       atomic_set(&hardware_enable_failed, 0);
                        ret = -EIO;
                        pr_warn("kvm: abort onlining CPU%d", cpu);
                }


> +			ret = -EIO;
> +			pr_warn("kvm: abort onlining CPU%d", cpu);

This is somewhat redundant with the pr_info() message in hardware_enable_nolock().
What about adding the below as a prep patch?  I think/hope it would be obvious to
the user/admin that onlining the CPU failed?  E.g. this for the output

  kvm: enabling virtualization on CPU2 failed during hardware_enable_all()

From: Sean Christopherson <seanjc@google.com>
Date: Tue, 8 Feb 2022 13:26:19 -0800
Subject: [PATCH] KVM: Provide more information in kernel log if hardware
 enabling fails

Provide the name of the calling function to hardware_enable_nolock() and
include it in the error message to provide additional information on
exactly what path failed.

Opportunistically bump the pr_info() to pr_warn(), failure to enable
virtualization support is warn-worthy as _something_ is wrong with the
system.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 virt/kvm/kvm_main.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index be614a6325e4..23481fd746aa 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4833,7 +4833,7 @@ static struct miscdevice kvm_dev = {
 	&kvm_chardev_ops,
 };

-static void hardware_enable_nolock(void *junk)
+static void hardware_enable_nolock(void *caller_name)
 {
 	int cpu = raw_smp_processor_id();
 	int r;
@@ -4848,7 +4848,8 @@ static void hardware_enable_nolock(void *junk)
 	if (r) {
 		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
 		atomic_inc(&hardware_enable_failed);
-		pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu);
+		pr_warn("kvm: enabling virtualization on CPU%d failed during %s()\n",
+			cpu, (const char *)caller_name);
 	}
 }

@@ -4856,7 +4857,7 @@ static int kvm_starting_cpu(unsigned int cpu)
 {
 	raw_spin_lock(&kvm_count_lock);
 	if (kvm_usage_count)
-		hardware_enable_nolock(NULL);
+		hardware_enable_nolock((void *)__func__);
 	raw_spin_unlock(&kvm_count_lock);
 	return 0;
 }
@@ -4905,7 +4906,7 @@ static int hardware_enable_all(void)
 	kvm_usage_count++;
 	if (kvm_usage_count == 1) {
 		atomic_set(&hardware_enable_failed, 0);
-		on_each_cpu(hardware_enable_nolock, NULL, 1);
+		on_each_cpu(hardware_enable_nolock, (void *)__func__, 1);

 		if (atomic_read(&hardware_enable_failed)) {
 			hardware_disable_all_nolock();
@@ -5530,7 +5531,7 @@ static void kvm_resume(void)
 #ifdef CONFIG_LOCKDEP
 		WARN_ON(lockdep_is_held(&kvm_count_lock));
 #endif
-		hardware_enable_nolock(NULL);
+		hardware_enable_nolock((void *)__func__);
 	}
 }


base-commit: 357ef9d9c0728bc2bbb9810c662263bba6b8dbc7
--
Chao Gao Feb. 9, 2022, 7:59 a.m. UTC | #2
On Wed, Feb 09, 2022 at 12:29:57AM +0000, Sean Christopherson wrote:
>On Tue, Jan 18, 2022, Chao Gao wrote:
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 148f7169b431..528741601122 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -4856,13 +4856,25 @@ static void hardware_enable_nolock(void *junk)
>>  	}
>>  }
>>  
>> -static int kvm_starting_cpu(unsigned int cpu)
>> +static int kvm_online_cpu(unsigned int cpu)
>>  {
>> +	int ret = 0;
>> +
>>  	raw_spin_lock(&kvm_count_lock);
>> -	if (kvm_usage_count)
>> +	/*
>> +	 * Abort the CPU online process if hardware virtualization cannot
>> +	 * be enabled. Otherwise running VMs would encounter unrecoverable
>> +	 * errors when scheduled to this CPU.
>> +	 */
>> +	if (kvm_usage_count) {
>
>
>>  		hardware_enable_nolock(NULL);
>> +		if (atomic_read(&hardware_enable_failed)) {
>
>This needs:
>
>		atomic_set(&hardware_enable_failed, 0);
>
>otherwise failure to online one CPU will prevent onlining other non-broken CPUs.
>It's probably worth adding a WARN_ON_ONCE above this too, e.g.

Thanks. All your comments to this series make sense. I just post a revised
version.
diff mbox series

Patch

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 773c83730906..14d354c8ce35 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -182,7 +182,6 @@  enum cpuhp_state {
 	CPUHP_AP_CSKY_TIMER_STARTING,
 	CPUHP_AP_TI_GP_TIMER_STARTING,
 	CPUHP_AP_HYPERV_TIMER_STARTING,
-	CPUHP_AP_KVM_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_STARTING,
 	CPUHP_AP_KVM_ARM_TIMER_STARTING,
@@ -200,6 +199,7 @@  enum cpuhp_state {
 
 	/* Online section invoked on the hotplugged CPU from the hotplug thread */
 	CPUHP_AP_ONLINE_IDLE,
+	CPUHP_AP_KVM_ONLINE,
 	CPUHP_AP_SCHED_WAIT_EMPTY,
 	CPUHP_AP_SMPBOOT_THREADS,
 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 148f7169b431..528741601122 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4856,13 +4856,25 @@  static void hardware_enable_nolock(void *junk)
 	}
 }
 
-static int kvm_starting_cpu(unsigned int cpu)
+static int kvm_online_cpu(unsigned int cpu)
 {
+	int ret = 0;
+
 	raw_spin_lock(&kvm_count_lock);
-	if (kvm_usage_count)
+	/*
+	 * Abort the CPU online process if hardware virtualization cannot
+	 * be enabled. Otherwise running VMs would encounter unrecoverable
+	 * errors when scheduled to this CPU.
+	 */
+	if (kvm_usage_count) {
 		hardware_enable_nolock(NULL);
+		if (atomic_read(&hardware_enable_failed)) {
+			ret = -EIO;
+			pr_warn("kvm: abort onlining CPU%d", cpu);
+		}
+	}
 	raw_spin_unlock(&kvm_count_lock);
-	return 0;
+	return ret;
 }
 
 static void hardware_disable_nolock(void *junk)
@@ -4875,7 +4887,7 @@  static void hardware_disable_nolock(void *junk)
 	kvm_arch_hardware_disable();
 }
 
-static int kvm_dying_cpu(unsigned int cpu)
+static int kvm_offline_cpu(unsigned int cpu)
 {
 	raw_spin_lock(&kvm_count_lock);
 	if (kvm_usage_count)
@@ -5644,8 +5656,8 @@  int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 			goto out_free_2;
 	}
 
-	r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
-				      kvm_starting_cpu, kvm_dying_cpu);
+	r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online",
+				      kvm_online_cpu, kvm_offline_cpu);
 	if (r)
 		goto out_free_2;
 	register_reboot_notifier(&kvm_reboot_notifier);
@@ -5708,7 +5720,7 @@  int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 	kmem_cache_destroy(kvm_vcpu_cache);
 out_free_3:
 	unregister_reboot_notifier(&kvm_reboot_notifier);
-	cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
+	cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
 out_free_2:
 	kvm_arch_hardware_unsetup();
 out_free_1:
@@ -5734,7 +5746,7 @@  void kvm_exit(void)
 	kvm_async_pf_deinit();
 	unregister_syscore_ops(&kvm_syscore_ops);
 	unregister_reboot_notifier(&kvm_reboot_notifier);
-	cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
+	cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
 	on_each_cpu(hardware_disable_nolock, NULL, 1);
 	kvm_arch_hardware_unsetup();
 	kvm_arch_exit();