[00/24] KVM: nSVM: event fixes and migration support
diff mbox

Message ID 20200520172145.23284-1-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini May 20, 2020, 5:21 p.m. UTC
Large parts of this series were posted before (patches 1, 3-4-5 and
6-7-8-12-13-14).  This is basically what I'd like to get into 5.8 as
far as nested SVM is concerned; the fix for exception vmexits is related
to migration support, because it gets rid of the exit_required flag
and therefore consolidates the SVM migration format.

There are a couple more bugfixes (2 and 21), the latter of which actually
affects VMX as well.

The SVM migration data consists of:

- the GIF state

- the guest mode and nested-run-pending flags

- the host state from before VMRUN

- the nested VMCB control state

The last two items are conveniently packaged in VMCB format.  Compared
to the previous prototype, HF_HIF_MASK is removed since it is part of
"the host state from before VMRUN".

The patch has been tested with the QEMU changes after my signature,
where it also fixes system_reset while x86/svm.flat runs.

Paolo

Paolo Bonzini (24):
  KVM: nSVM: fix condition for filtering async PF
  KVM: nSVM: leave ASID aside in copy_vmcb_control_area
  KVM: nSVM: inject exceptions via svm_check_nested_events
  KVM: nSVM: remove exit_required
  KVM: nSVM: correctly inject INIT vmexits
  KVM: nSVM: move map argument out of enter_svm_guest_mode
  KVM: nSVM: extract load_nested_vmcb_control
  KVM: nSVM: extract preparation of VMCB for nested run
  KVM: nSVM: clean up tsc_offset update
  KVM: nSVM: pass vmcb_control_area to copy_vmcb_control_area
  KVM: nSVM: remove trailing padding for struct vmcb_control_area
  KVM: nSVM: save all control fields in svm->nested
  KVM: nSVM: do not reload pause filter fields from VMCB
  KVM: nSVM: remove HF_VINTR_MASK
  KVM: nSVM: remove HF_HIF_MASK
  KVM: nSVM: split nested_vmcb_check_controls
  KVM: nSVM: do all MMU switch work in init/uninit functions
  KVM: nSVM: leave guest mode when clearing EFER.SVME
  KVM: nSVM: extract svm_set_gif
  KVM: MMU: pass arbitrary CR0/CR4/EFER to kvm_init_shadow_mmu
  KVM: x86: always update CR3 in VMCB
  uaccess: add memzero_user
  selftests: kvm: add a SVM version of state-test
  KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE

 arch/x86/include/asm/kvm_host.h               |   2 -
 arch/x86/include/asm/svm.h                    |   9 +-
 arch/x86/include/uapi/asm/kvm.h               |  17 +-
 arch/x86/kvm/cpuid.h                          |   5 +
 arch/x86/kvm/irq.c                            |   1 +
 arch/x86/kvm/mmu.h                            |   2 +-
 arch/x86/kvm/mmu/mmu.c                        |  14 +-
 arch/x86/kvm/svm/nested.c                     | 525 +++++++++++-------
 arch/x86/kvm/svm/svm.c                        | 107 ++--
 arch/x86/kvm/svm/svm.h                        |  32 +-
 arch/x86/kvm/vmx/nested.c                     |   5 -
 arch/x86/kvm/vmx/vmx.c                        |   5 +-
 arch/x86/kvm/x86.c                            |   3 +-
 include/linux/uaccess.h                       |   1 +
 lib/usercopy.c                                |  63 +++
 .../testing/selftests/kvm/x86_64/state_test.c |  65 ++-
 16 files changed, 549 insertions(+), 307 deletions(-)

Comments

Maxim Levitsky May 20, 2020, 7:24 p.m. UTC | #1
On Wed, 2020-05-20 at 13:21 -0400, Paolo Bonzini wrote:
> Large parts of this series were posted before (patches 1, 3-4-5 and
> 6-7-8-12-13-14).  This is basically what I'd like to get into 5.8 as
> far as nested SVM is concerned; the fix for exception vmexits is related
> to migration support, because it gets rid of the exit_required flag
> and therefore consolidates the SVM migration format.
> 
> There are a couple more bugfixes (2 and 21), the latter of which actually
> affects VMX as well.
> 
> The SVM migration data consists of:
> 
> - the GIF state
> 
> - the guest mode and nested-run-pending flags
> 
> - the host state from before VMRUN
> 
> - the nested VMCB control state
> 
> The last two items are conveniently packaged in VMCB format.  Compared
> to the previous prototype, HF_HIF_MASK is removed since it is part of
> "the host state from before VMRUN".
> 
> The patch has been tested with the QEMU changes after my signature,
> where it also fixes system_reset while x86/svm.flat runs.
> 
> Paolo
> 
> Paolo Bonzini (24):
>   KVM: nSVM: fix condition for filtering async PF
>   KVM: nSVM: leave ASID aside in copy_vmcb_control_area
>   KVM: nSVM: inject exceptions via svm_check_nested_events
>   KVM: nSVM: remove exit_required
>   KVM: nSVM: correctly inject INIT vmexits
>   KVM: nSVM: move map argument out of enter_svm_guest_mode
>   KVM: nSVM: extract load_nested_vmcb_control
>   KVM: nSVM: extract preparation of VMCB for nested run
>   KVM: nSVM: clean up tsc_offset update
>   KVM: nSVM: pass vmcb_control_area to copy_vmcb_control_area
>   KVM: nSVM: remove trailing padding for struct vmcb_control_area
>   KVM: nSVM: save all control fields in svm->nested
>   KVM: nSVM: do not reload pause filter fields from VMCB
>   KVM: nSVM: remove HF_VINTR_MASK
>   KVM: nSVM: remove HF_HIF_MASK
>   KVM: nSVM: split nested_vmcb_check_controls
>   KVM: nSVM: do all MMU switch work in init/uninit functions
>   KVM: nSVM: leave guest mode when clearing EFER.SVME
>   KVM: nSVM: extract svm_set_gif
>   KVM: MMU: pass arbitrary CR0/CR4/EFER to kvm_init_shadow_mmu
>   KVM: x86: always update CR3 in VMCB
>   uaccess: add memzero_user
>   selftests: kvm: add a SVM version of state-test
>   KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE
> 
>  arch/x86/include/asm/kvm_host.h               |   2 -
>  arch/x86/include/asm/svm.h                    |   9 +-
>  arch/x86/include/uapi/asm/kvm.h               |  17 +-
>  arch/x86/kvm/cpuid.h                          |   5 +
>  arch/x86/kvm/irq.c                            |   1 +
>  arch/x86/kvm/mmu.h                            |   2 +-
>  arch/x86/kvm/mmu/mmu.c                        |  14 +-
>  arch/x86/kvm/svm/nested.c                     | 525 +++++++++++-------
>  arch/x86/kvm/svm/svm.c                        | 107 ++--
>  arch/x86/kvm/svm/svm.h                        |  32 +-
>  arch/x86/kvm/vmx/nested.c                     |   5 -
>  arch/x86/kvm/vmx/vmx.c                        |   5 +-
>  arch/x86/kvm/x86.c                            |   3 +-
>  include/linux/uaccess.h                       |   1 +
>  lib/usercopy.c                                |  63 +++
>  .../testing/selftests/kvm/x86_64/state_test.c |  65 ++-
>  16 files changed, 549 insertions(+), 307 deletions(-)
> 

I just smoke-tested this patch series on my system.

Patch 24 doesn't apply cleanly on top of kvm/queue, I appplied it manually,
due to missing KVM_STATE_NESTED_MTF_PENDING bit

Also patch 22 needes ALIGN_UP which is not on mainline.
Probably in linux-next?

With these fixes, I don't see #DE exceptions on a nested guest I try to run
however it still hangs, right around the time it tries to access PS/2 keyboard/mouse.

Best regards,
	Maxim Levitsky
Paolo Bonzini May 20, 2020, 8:42 p.m. UTC | #2
On 20/05/20 21:24, Maxim Levitsky wrote:
> Patch 24 doesn't apply cleanly on top of kvm/queue, I appplied it manually,
> due to missing KVM_STATE_NESTED_MTF_PENDING bit
> 
> Also patch 22 needes ALIGN_UP which is not on mainline.
> Probably in linux-next?

Just replace it with ALIGN.  (I tested it with memzero_user in
arch/x86/kvm/ for convenience, and the lib/ patch ended up out of sync
with the actual code).

> With these fixes, I don't see #DE exceptions on a nested guest I try to run
> however it still hangs, right around the time it tries to access PS/2 keyboard/mouse.

IIRC you said that the bug appeared with the vintr rework, and then went
from hang to #DE and now back to hang?  And the hang is reported by L2,
not L1?

In order to debug the hang, a good start would be to understand if it
also happens with vgif=0.  This is because with vgif=1 we use VINTR
intercepts even while GIF=0, so the whole thing is a bit more complicated.

Paolo
Maxim Levitsky May 20, 2020, 9:08 p.m. UTC | #3
On Wed, 2020-05-20 at 22:42 +0200, Paolo Bonzini wrote:
> On 20/05/20 21:24, Maxim Levitsky wrote:
> > Patch 24 doesn't apply cleanly on top of kvm/queue, I appplied it manually,
> > due to missing KVM_STATE_NESTED_MTF_PENDING bit
> > 
> > Also patch 22 needes ALIGN_UP which is not on mainline.
> > Probably in linux-next?
> 
> Just replace it with ALIGN.  (I tested it with memzero_user in
> arch/x86/kvm/ for convenience, and the lib/ patch ended up out of sync
> with the actual code).
That is exactly what I did.

> 
> > With these fixes, I don't see #DE exceptions on a nested guest I try to run
> > however it still hangs, right around the time it tries to access PS/2 keyboard/mouse.
> 
> IIRC you said that the bug appeared with the vintr rework, and then went
> from hang to #DE and now back to hang?  And the hang is reported by L2,
> not L1?
Yes, and now the hang appears to be deterministic. The initial hang could happen
randomally. I remember that once the nested guest got to getty prompt even and hanged
on shutdown. Now hang is deterministic when kernel prints something about PS/2.
I will re-check this tomorrow.

> 
> In order to debug the hang, a good start would be to understand if it
> also happens with vgif=0.  This is because with vgif=1 we use VINTR
> intercepts even while GIF=0, so the whole thing is a bit more complicated.
I will check this tomorrow as well.


Best regards,
	Maxim Levitsky

> 
> Paolo
>
Paolo Bonzini May 20, 2020, 9:15 p.m. UTC | #4
On 20/05/20 23:08, Maxim Levitsky wrote:
>> IIRC you said that the bug appeared with the vintr rework, and then went
>> from hang to #DE and now back to hang?  And the hang is reported by L2,
>> not L1?
> Yes, and now the hang appears to be deterministic.

Ok, that's actually progress.  Also because we can write a testcase.

Paolo

Patch
diff mbox

diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 3f3f780c8c..c4a8c10e2d 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -385,18 +385,22 @@  struct kvm_sync_regs {
 #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
 
 #define KVM_STATE_NESTED_FORMAT_VMX	0
-#define KVM_STATE_NESTED_FORMAT_SVM	1	/* unused */
+#define KVM_STATE_NESTED_FORMAT_SVM	1
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
 #define KVM_STATE_NESTED_EVMCS		0x00000004
 #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
+#define KVM_STATE_NESTED_GIF_SET	0x00000100
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
 
 #define KVM_STATE_NESTED_VMX_VMCS_SIZE	0x1000
 
+#define KVM_STATE_NESTED_SVM_VMCB_SIZE	0x1000
+
+
 struct kvm_vmx_nested_state_data {
 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
 	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -411,6 +415,15 @@  struct kvm_vmx_nested_state_hdr {
 	} smm;
 };
 
+struct kvm_svm_nested_state_data {
+	/* Save area only used if KVM_STATE_NESTED_RUN_PENDING.  */
+	__u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
+};
+
+struct kvm_svm_nested_state_hdr {
+	__u64 vmcb_pa;
+};
+
 /* for KVM_CAP_NESTED_STATE */
 struct kvm_nested_state {
 	__u16 flags;
@@ -419,6 +432,7 @@  struct kvm_nested_state {
 
 	union {
 		struct kvm_vmx_nested_state_hdr vmx;
+		struct kvm_svm_nested_state_hdr svm;
 
 		/* Pad the header to 128 bytes.  */
 		__u8 pad[120];
@@ -431,6 +445,7 @@  struct kvm_nested_state {
 	 */
 	union {
 		struct kvm_vmx_nested_state_data vmx[0];
+		struct kvm_svm_nested_state_data svm[0];
 	} data;
 };
 
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 9804495a46..f4ff71da0b 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1017,6 +1017,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_VCPU_RESETS 179
 #define KVM_CAP_S390_PROTECTED 180
 #define KVM_CAP_PPC_SECURE_GUEST 181
+#define KVM_CAP_HALT_POLL 182
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e818fc712a..9627f88ebf 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2103,6 +2103,11 @@  static inline bool cpu_has_vmx(CPUX86State *env)
     return env->features[FEAT_1_ECX] & CPUID_EXT_VMX;
 }
 
+static inline bool cpu_has_svm(CPUX86State *env)
+{
+    return env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM;
+}
+
 /*
  * In order for a vCPU to enter VMX operation it must have CR4.VMXE set.
  * Since it was set, CR4.VMXE must remain set as long as vCPU is in
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 9c256ab159..6833400191 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5939,6 +5939,7 @@  static void x86_cpu_reset(DeviceState *dev)
     /* init to reset state */
 
     env->hflags2 |= HF2_GIF_MASK;
+    env->hflags &= ~HF_GUEST_MASK;
 
     cpu_x86_update_cr0(env, 0x60000010);
     env->a20_mask = ~0x0;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 4901c6dd74..599a34b49d 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -1834,16 +1834,18 @@  int kvm_arch_init_vcpu(CPUState *cs)
     if (max_nested_state_len > 0) {
         assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data));
 
-        if (cpu_has_vmx(env)) {
+        if (cpu_has_vmx(env) || cpu_has_svm(env)) {
             struct kvm_vmx_nested_state_hdr *vmx_hdr;
 
             env->nested_state = g_malloc0(max_nested_state_len);
             env->nested_state->size = max_nested_state_len;
             env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
 
-            vmx_hdr = &env->nested_state->hdr.vmx;
-            vmx_hdr->vmxon_pa = -1ull;
-            vmx_hdr->vmcs12_pa = -1ull;
+            if (cpu_has_vmx(env)) {
+                    vmx_hdr = &env->nested_state->hdr.vmx;
+                    vmx_hdr->vmxon_pa = -1ull;
+                    vmx_hdr->vmcs12_pa = -1ull;
+            }
         }
     }
 
@@ -3847,6 +3849,20 @@  static int kvm_put_nested_state(X86CPU *cpu)
         return 0;
     }
 
+    /*
+     * Copy flags that are affected by reset from env->hflags and env->hflags2.
+     */
+    if (env->hflags & HF_GUEST_MASK) {
+        env->nested_state->flags |= KVM_STATE_NESTED_GUEST_MODE;
+    } else {
+        env->nested_state->flags &= ~KVM_STATE_NESTED_GUEST_MODE;
+    }
+    if (env->hflags2 & HF2_GIF_MASK) {
+        env->nested_state->flags |= KVM_STATE_NESTED_GIF_SET;
+    } else {
+        env->nested_state->flags &= ~KVM_STATE_NESTED_GIF_SET;
+    }
+
     assert(env->nested_state->size <= max_nested_state_len);
     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state);
 }
@@ -3875,11 +3891,19 @@  static int kvm_get_nested_state(X86CPU *cpu)
         return ret;
     }
 
+    /*
+     * Copy flags that are affected by reset to env->hflags and env->hflags2.
+     */
     if (env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE) {
         env->hflags |= HF_GUEST_MASK;
     } else {
         env->hflags &= ~HF_GUEST_MASK;
     }
+    if (env->nested_state->flags & KVM_STATE_NESTED_GIF_SET) {
+        env->hflags2 |= HF2_GIF_MASK;
+    } else {
+        env->hflags2 &= ~HF2_GIF_MASK;
+    }
 
     return ret;
 }
@@ -3891,6 +3915,12 @@  int kvm_arch_put_registers(CPUState *cpu, int level)
 
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 
+    /* must be before kvm_put_nested_state so that EFER.SVME is set */
+    ret = kvm_put_sregs(x86_cpu);
+    if (ret < 0) {
+        return ret;
+    }
+
     if (level >= KVM_PUT_RESET_STATE) {
         ret = kvm_put_nested_state(x86_cpu);
         if (ret < 0) {
@@ -3924,10 +3954,6 @@  int kvm_arch_put_registers(CPUState *cpu, int level)
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_put_sregs(x86_cpu);
-    if (ret < 0) {
-        return ret;
-    }
     /* must be before kvm_put_msrs */
     ret = kvm_inject_mce_oldstyle(x86_cpu);
     if (ret < 0) {
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 0c96531a56..8684a247c1 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1071,13 +1071,40 @@  static const VMStateDescription vmstate_vmx_nested_state = {
     }
 };
 
+static bool svm_nested_state_needed(void *opaque)
+{
+    struct kvm_nested_state *nested_state = opaque;
+
+    /*
+     * HF2_GIF_MASK is relevant for non-guest mode but it is already
+     * serialized via hflags2.
+     */
+    return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM &&
+            nested_state->size > offsetof(struct kvm_nested_state, data));
+}
+
+static const VMStateDescription vmstate_svm_nested_state = {
+    .name = "cpu/kvm_nested_state/svm",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = svm_nested_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_U64(hdr.svm.vmcb_pa, struct kvm_nested_state),
+        VMSTATE_UINT8_ARRAY(data.svm[0].vmcb12,
+                            struct kvm_nested_state,
+                            KVM_STATE_NESTED_SVM_VMCB_SIZE),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static bool nested_state_needed(void *opaque)
 {
     X86CPU *cpu = opaque;
     CPUX86State *env = &cpu->env;
 
     return (env->nested_state &&
-            vmx_nested_state_needed(env->nested_state));
+            (vmx_nested_state_needed(env->nested_state) ||
+             svm_nested_state_needed(env->nested_state)));
 }
 
 static int nested_state_post_load(void *opaque, int version_id)
@@ -1139,6 +1166,7 @@  static const VMStateDescription vmstate_kvm_nested_state = {
     },
     .subsections = (const VMStateDescription*[]) {
         &vmstate_vmx_nested_state,
+        &vmstate_svm_nested_state,
         NULL
     }
 };