[QEMU,v2,2/2] KVM: i386: Add support for save and restore nested state
diff mbox series

Message ID 20180916124631.39016-3-liran.alon@oracle.com
State New
Headers show
Series
  • : KVM: i386: Add support for save and restore nested state
Related show

Commit Message

Liran Alon Sept. 16, 2018, 12:46 p.m. UTC
Kernel commit 8fcc4b5923af ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE")
introduced new IOCTLs to extract and restore KVM internal state used to
run a VM that is in VMX operation.

Utilize these IOCTLs to add support of migration of VMs which are
running nested hypervisors.

Reviewed-by: Nikita Leshchenko <nikita.leshchenko@oracle.com>
Reviewed-by: Patrick Colp <patrick.colp@oracle.com>
Reviewed-by: Mihai Carabas <mihai.carabas@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
---
 accel/kvm/kvm-all.c   | 15 +++++++++++
 include/sysemu/kvm.h  |  1 +
 target/i386/cpu.h     |  2 ++
 target/i386/kvm.c     | 58 ++++++++++++++++++++++++++++++++++++++++
 target/i386/machine.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 149 insertions(+)

Patch
diff mbox series

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index de12f78eb8e4..fe6377ce9bcc 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -87,6 +87,7 @@  struct KVMState
 #ifdef KVM_CAP_SET_GUEST_DEBUG
     struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
 #endif
+    uint32_t max_nested_state_len;
     int many_ioeventfds;
     int intx_set_mask;
     bool sync_mmu;
@@ -1628,6 +1629,15 @@  static int kvm_init(MachineState *ms)
     s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
 #endif
 
+    ret = kvm_check_extension(s, KVM_CAP_NESTED_STATE);
+    if (ret < 0) {
+        fprintf(stderr,
+                "kvm failed to get max size of nested state (%d)",
+                ret);
+        goto err;
+    }
+    s->max_nested_state_len = (uint32_t)ret;
+
 #ifdef KVM_CAP_IRQ_ROUTING
     kvm_direct_msi_allowed = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
 #endif
@@ -2187,6 +2197,11 @@  int kvm_has_debugregs(void)
     return kvm_state->debugregs;
 }
 
+uint32_t kvm_max_nested_state_length(void)
+{
+    return kvm_state->max_nested_state_len;
+}
+
 int kvm_has_many_ioeventfds(void)
 {
     if (!kvm_enabled()) {
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 0b64b8e06786..352c7fd4e3d2 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -210,6 +210,7 @@  bool kvm_has_sync_mmu(void);
 int kvm_has_vcpu_events(void);
 int kvm_has_robust_singlestep(void);
 int kvm_has_debugregs(void);
+uint32_t kvm_max_nested_state_length(void);
 int kvm_has_pit_state2(void);
 int kvm_has_many_ioeventfds(void);
 int kvm_has_gsi_routing(void);
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 6e4c2b02f947..3b97b5b280f0 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1330,6 +1330,8 @@  typedef struct CPUX86State {
 #if defined(CONFIG_KVM) || defined(CONFIG_HVF)
     void *xsave_buf;
 #endif
+    struct kvm_nested_state *nested_state;
+    uint32_t nested_state_len;   /* needed for migration */
 #if defined(CONFIG_HVF)
     HVFX86EmulatorState *hvf_emul;
 #endif
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index c1cd8c461fe4..aeb55b5ed6f5 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -1191,6 +1191,22 @@  int kvm_arch_init_vcpu(CPUState *cs)
     if (has_xsave) {
         env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
     }
+
+    env->nested_state_len = kvm_max_nested_state_length();
+    if (env->nested_state_len > 0) {
+        uint32_t min_nested_state_len =
+            offsetof(struct kvm_nested_state, size) + sizeof(uint32_t);
+
+        /*
+         * Verify nested state length cover at least the size
+         * field of struct kvm_nested_state
+         */
+        assert(env->nested_state_len >= min_nested_state_len);
+
+        env->nested_state = g_malloc0(env->nested_state_len);
+        env->nested_state->size = env->nested_state_len;
+    }
+
     cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE);
 
     if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) {
@@ -2867,6 +2883,39 @@  static int kvm_get_debugregs(X86CPU *cpu)
     return 0;
 }
 
+static int kvm_put_nested_state(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+
+    if (kvm_max_nested_state_length() == 0) {
+        return 0;
+    }
+
+    assert(env->nested_state->size <= env->nested_state_len);
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state);
+}
+
+static int kvm_get_nested_state(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+
+    if (kvm_max_nested_state_length() == 0) {
+        return 0;
+    }
+
+
+    /*
+     * It is possible that migration restored a smaller size into
+     * nested_state->size than what our kernel support.
+     * We preserve migration origin nested_state->size for
+     * call to KVM_SET_NESTED_STATE but wish that our next call
+     * to KVM_GET_NESTED_STATE will use max size our kernel support.
+     */
+    env->nested_state->size = env->nested_state_len;
+
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_GET_NESTED_STATE, env->nested_state);
+}
+
 int kvm_arch_put_registers(CPUState *cpu, int level)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
@@ -2874,6 +2923,11 @@  int kvm_arch_put_registers(CPUState *cpu, int level)
 
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 
+    ret = kvm_put_nested_state(x86_cpu);
+    if (ret < 0) {
+        return ret;
+    }
+
     if (level >= KVM_PUT_RESET_STATE) {
         ret = kvm_put_msr_feature_control(x86_cpu);
         if (ret < 0) {
@@ -2989,6 +3043,10 @@  int kvm_arch_get_registers(CPUState *cs)
     if (ret < 0) {
         goto out;
     }
+    ret = kvm_get_nested_state(cpu);
+    if (ret < 0) {
+        goto out;
+    }
     ret = 0;
  out:
     cpu_sync_bndcs_hflags(&cpu->env);
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 084c2c73a8f7..781de40dfcbe 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -842,6 +842,78 @@  static const VMStateDescription vmstate_tsc_khz = {
     }
 };
 
+static int nested_state_post_load(void *opaque, int version_id)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+    uint32_t min_nested_state_len =
+        offsetof(struct kvm_nested_state, size) + sizeof(uint32_t);
+    uint32_t max_nested_state_len = kvm_max_nested_state_length();
+
+    /*
+     * If our kernel don't support setting nested state
+     * and we have received nested state from migration stream,
+     * we need to fail migration
+     */
+    if (max_nested_state_len == 0) {
+        error_report("Received nested state when "
+                     "kernel cannot restore it");
+        return -EINVAL;
+    }
+
+    /*
+     * Verify that the size of received buffer covers the
+     * struct size field and that the size specified
+     * in given struct is set to no more than the size
+     * that our kernel support
+     */
+    if (env->nested_state_len < min_nested_state_len) {
+        error_report("Received nested state size less than min: "
+                     "len=%d, min=%d",
+                     env->nested_state_len, min_nested_state_len);
+        return -EINVAL;
+    }
+    if (env->nested_state->size > max_nested_state_len) {
+        error_report("Recieved unsupported nested state size: "
+                     "nested_state->size=%d, max=%d",
+                     env->nested_state->size, max_nested_state_len);
+        return -EINVAL;
+    }
+
+    /*
+     * Reallocate nested_state buffer to always remain
+     * in max size which our kernel can support
+     */
+    env->nested_state_len = max_nested_state_len;
+    env->nested_state = g_realloc(env->nested_state,
+                                  env->nested_state_len);
+    assert(env->nested_state);
+
+    return 0;
+}
+
+static bool nested_state_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+    return (env->nested_state_len > 0);
+}
+
+static const VMStateDescription vmstate_nested_state = {
+    .name = "cpu/nested_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = nested_state_post_load,
+    .needed = nested_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(env.nested_state_len, X86CPU),
+        VMSTATE_VBUFFER_ALLOC_UINT32(env.nested_state, X86CPU,
+                                     0, NULL,
+                                     env.nested_state_len),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static bool mcg_ext_ctl_needed(void *opaque)
 {
     X86CPU *cpu = opaque;
@@ -1080,6 +1152,7 @@  VMStateDescription vmstate_x86_cpu = {
         &vmstate_msr_intel_pt,
         &vmstate_msr_virt_ssbd,
         &vmstate_svm_npt,
+        &vmstate_nested_state,
         NULL
     }
 };