diff mbox series

[stable,v5.4+,1/3] x86/kvm: Teardown PV features on boot CPU as well

Message ID 20210520125625.12566-1-krzysztof.kozlowski@canonical.com (mailing list archive)
State New, archived
Headers show
Series [stable,v5.4+,1/3] x86/kvm: Teardown PV features on boot CPU as well | expand

Commit Message

Krzysztof Kozlowski May 20, 2021, 12:56 p.m. UTC
From: Vitaly Kuznetsov <vkuznets@redhat.com>

commit 8b79feffeca28c5459458fe78676b081e87c93a4 upstream.

Various PV features (Async PF, PV EOI, steal time) work through memory
shared with hypervisor and when we restore from hibernation we must
properly teardown all these features to make sure hypervisor doesn't
write to stale locations after we jump to the previously hibernated kernel
(which can try to place anything there). For secondary CPUs the job is
already done by kvm_cpu_down_prepare(), register syscore ops to do
the same for boot CPU.

Krzysztof:
This fixes memory corruption visible after second resume from
hibernation:

  BUG: Bad page state in process dbus-daemon  pfn:18b01
  page:ffffea000062c040 refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 compound_mapcount: -30591
  flags: 0xfffffc0078141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)
  raw: 000fffffc0078141 dead0000000002d0 dead000000000100 0000000000000000
  raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
  page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set
  bad because of flags: 0x78141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
[krzysztof: Extend the commit message]
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
---

Backport to v5.4 seems reasonable. Might have sense to earlier versions,
but this was not tested/investigated.

 arch/x86/kernel/kvm.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini May 20, 2021, 4:47 p.m. UTC | #1
On 20/05/21 14:56, Krzysztof Kozlowski wrote:
> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> 
> commit 8b79feffeca28c5459458fe78676b081e87c93a4 upstream.
> 
> Various PV features (Async PF, PV EOI, steal time) work through memory
> shared with hypervisor and when we restore from hibernation we must
> properly teardown all these features to make sure hypervisor doesn't
> write to stale locations after we jump to the previously hibernated kernel
> (which can try to place anything there). For secondary CPUs the job is
> already done by kvm_cpu_down_prepare(), register syscore ops to do
> the same for boot CPU.
> 
> Krzysztof:
> This fixes memory corruption visible after second resume from
> hibernation:

Hi, you should include a cover letter detailing the differences between 
the original patches and the backport.

(I'll review it anyway, but it would have helped).

Paolo

>    BUG: Bad page state in process dbus-daemon  pfn:18b01
>    page:ffffea000062c040 refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 compound_mapcount: -30591
>    flags: 0xfffffc0078141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)
>    raw: 000fffffc0078141 dead0000000002d0 dead000000000100 0000000000000000
>    raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
>    page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set
>    bad because of flags: 0x78141(locked|error|workingset|writeback|head|mappedtodisk|reclaim)
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> Message-Id: <20210414123544.1060604-3-vkuznets@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> [krzysztof: Extend the commit message]
> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> ---
> 
> Backport to v5.4 seems reasonable. Might have sense to earlier versions,
> but this was not tested/investigated.
> 
>   arch/x86/kernel/kvm.c | 32 ++++++++++++++++++++++++++++----
>   1 file changed, 28 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index e820568ed4d5..6b906a651fb1 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -24,6 +24,7 @@
>   #include <linux/debugfs.h>
>   #include <linux/nmi.h>
>   #include <linux/swait.h>
> +#include <linux/syscore_ops.h>
>   #include <asm/timer.h>
>   #include <asm/cpu.h>
>   #include <asm/traps.h>
> @@ -558,17 +559,21 @@ static void kvm_guest_cpu_offline(void)
>   
>   static int kvm_cpu_online(unsigned int cpu)
>   {
> -	local_irq_disable();
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
>   	kvm_guest_cpu_init();
> -	local_irq_enable();
> +	local_irq_restore(flags);
>   	return 0;
>   }
>   
>   static int kvm_cpu_down_prepare(unsigned int cpu)
>   {
> -	local_irq_disable();
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
>   	kvm_guest_cpu_offline();
> -	local_irq_enable();
> +	local_irq_restore(flags);
>   	return 0;
>   }
>   #endif
> @@ -606,6 +611,23 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
>   	native_flush_tlb_others(flushmask, info);
>   }
>   
> +static int kvm_suspend(void)
> +{
> +	kvm_guest_cpu_offline();
> +
> +	return 0;
> +}
> +
> +static void kvm_resume(void)
> +{
> +	kvm_cpu_online(raw_smp_processor_id());
> +}
> +
> +static struct syscore_ops kvm_syscore_ops = {
> +	.suspend	= kvm_suspend,
> +	.resume		= kvm_resume,
> +};
> +
>   static void __init kvm_guest_init(void)
>   {
>   	int i;
> @@ -649,6 +671,8 @@ static void __init kvm_guest_init(void)
>   	kvm_guest_cpu_init();
>   #endif
>   
> +	register_syscore_ops(&kvm_syscore_ops);
> +
>   	/*
>   	 * Hard lockup detection is enabled by default. Disable it, as guests
>   	 * can get false positives too easily, for example if the host is
>
Krzysztof Kozlowski May 20, 2021, 5:17 p.m. UTC | #2
On 20/05/2021 12:47, Paolo Bonzini wrote:
> On 20/05/21 14:56, Krzysztof Kozlowski wrote:
>> From: Vitaly Kuznetsov <vkuznets@redhat.com>
>>
>> commit 8b79feffeca28c5459458fe78676b081e87c93a4 upstream.
>>
>> Various PV features (Async PF, PV EOI, steal time) work through memory
>> shared with hypervisor and when we restore from hibernation we must
>> properly teardown all these features to make sure hypervisor doesn't
>> write to stale locations after we jump to the previously hibernated kernel
>> (which can try to place anything there). For secondary CPUs the job is
>> already done by kvm_cpu_down_prepare(), register syscore ops to do
>> the same for boot CPU.
>>
>> Krzysztof:
>> This fixes memory corruption visible after second resume from
>> hibernation:
> 
> Hi, you should include a cover letter detailing the differences between 
> the original patches and the backport.
> 
> (I'll review it anyway, but it would have helped).

My bad, I actually was not aware that backport differs that much. I can
describe in v2.

The patch context looks quite a different and now I see
kvm_guest_cpu_offline() ends up within CONFIG_SMP for unclear reasons.
Let me try to fix it in v2.


Best regards,
Krzysztof
diff mbox series

Patch

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e820568ed4d5..6b906a651fb1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -24,6 +24,7 @@ 
 #include <linux/debugfs.h>
 #include <linux/nmi.h>
 #include <linux/swait.h>
+#include <linux/syscore_ops.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -558,17 +559,21 @@  static void kvm_guest_cpu_offline(void)
 
 static int kvm_cpu_online(unsigned int cpu)
 {
-	local_irq_disable();
+	unsigned long flags;
+
+	local_irq_save(flags);
 	kvm_guest_cpu_init();
-	local_irq_enable();
+	local_irq_restore(flags);
 	return 0;
 }
 
 static int kvm_cpu_down_prepare(unsigned int cpu)
 {
-	local_irq_disable();
+	unsigned long flags;
+
+	local_irq_save(flags);
 	kvm_guest_cpu_offline();
-	local_irq_enable();
+	local_irq_restore(flags);
 	return 0;
 }
 #endif
@@ -606,6 +611,23 @@  static void kvm_flush_tlb_others(const struct cpumask *cpumask,
 	native_flush_tlb_others(flushmask, info);
 }
 
+static int kvm_suspend(void)
+{
+	kvm_guest_cpu_offline();
+
+	return 0;
+}
+
+static void kvm_resume(void)
+{
+	kvm_cpu_online(raw_smp_processor_id());
+}
+
+static struct syscore_ops kvm_syscore_ops = {
+	.suspend	= kvm_suspend,
+	.resume		= kvm_resume,
+};
+
 static void __init kvm_guest_init(void)
 {
 	int i;
@@ -649,6 +671,8 @@  static void __init kvm_guest_init(void)
 	kvm_guest_cpu_init();
 #endif
 
+	register_syscore_ops(&kvm_syscore_ops);
+
 	/*
 	 * Hard lockup detection is enabled by default. Disable it, as guests
 	 * can get false positives too easily, for example if the host is