diff mbox series

[RFC,08/13] kvm: Add Mirror VM support.

Message ID c81a02bfd698ed366bf2d61a36adcbb8ca21eb9c.1629118207.git.ashish.kalra@amd.com (mailing list archive)
State New, archived
Headers show
Series Add support for Mirror VM. | expand

Commit Message

Kalra, Ashish Aug. 16, 2021, 1:29 p.m. UTC
From: Ashish Kalra <ashish.kalra@amd.com>

Add a new kvm_mirror_vcpu_thread_fn() which is qemu's mirror vcpu
thread and the corresponding kvm_init_mirror_vcpu() which creates
the vcpu's for the mirror VM and a different KVM run loop
kvm_mirror_cpu_exec() which differs from the main KVM run loop as
it currently mainly handles IO and MMIO exits, does not handle
any interrupt exits as the mirror VM does not have an interrupt
controller. This mirror vcpu run loop can be further optimized.

Also, we have a different kvm_arch_put_registers() for mirror
vcpu's as we dont' save/restore MSRs currently for mirror vcpu's,
kvm_put_msrs() fails for mirror vcpu's as mirror VM does not have
any interrupt controller such as the in-kernel irqchip controller.

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 accel/kvm/kvm-accel-ops.c |  45 ++++++++-
 accel/kvm/kvm-all.c       | 191 +++++++++++++++++++++++++++++++++++++-
 accel/kvm/kvm-cpus.h      |   2 +
 include/sysemu/kvm.h      |   1 +
 target/i386/kvm/kvm.c     |  42 +++++++++
 5 files changed, 277 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
index 7516c67a3f..e49a14e58c 100644
--- a/accel/kvm/kvm-accel-ops.c
+++ b/accel/kvm/kvm-accel-ops.c
@@ -61,6 +61,42 @@  static void *kvm_vcpu_thread_fn(void *arg)
     return NULL;
 }
 
+static void *kvm_mirror_vcpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+    int r;
+
+    rcu_register_thread();
+
+    qemu_mutex_lock_iothread();
+    qemu_thread_get_self(cpu->thread);
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->can_do_io = 1;
+
+    r = kvm_init_mirror_vcpu(cpu, &error_fatal);
+    kvm_init_cpu_signals(cpu);
+
+    /* signal CPU creation */
+    cpu_thread_signal_created(cpu);
+    qemu_guest_random_seed_thread_part2(cpu->random_seed);
+
+    do {
+        if (cpu_can_run(cpu)) {
+            r = kvm_mirror_cpu_exec(cpu);
+            if (r == EXCP_DEBUG) {
+                cpu_handle_guest_debug(cpu);
+            }
+        }
+        qemu_wait_io_event(cpu);
+    } while (!cpu->unplug || cpu_can_run(cpu));
+
+    kvm_destroy_vcpu(cpu);
+    qemu_mutex_unlock_iothread();
+    cpu_thread_signal_destroyed(cpu);
+    rcu_unregister_thread();
+    return NULL;
+}
+
 static void kvm_start_vcpu_thread(CPUState *cpu)
 {
     char thread_name[VCPU_THREAD_NAME_SIZE];
@@ -70,8 +106,13 @@  static void kvm_start_vcpu_thread(CPUState *cpu)
     qemu_cond_init(cpu->halt_cond);
     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
              cpu->cpu_index);
-    qemu_thread_create(cpu->thread, thread_name, kvm_vcpu_thread_fn,
-                       cpu, QEMU_THREAD_JOINABLE);
+    if (!cpu->mirror_vcpu) {
+        qemu_thread_create(cpu->thread, thread_name, kvm_vcpu_thread_fn,
+                            cpu, QEMU_THREAD_JOINABLE);
+    } else {
+        qemu_thread_create(cpu->thread, thread_name, kvm_mirror_vcpu_thread_fn,
+                           cpu, QEMU_THREAD_JOINABLE);
+    }
 }
 
 static void kvm_accel_ops_class_init(ObjectClass *oc, void *data)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 4bc5971881..f14b33dde1 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2294,6 +2294,55 @@  bool kvm_vcpu_id_is_valid(int vcpu_id)
     return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s);
 }
 
+int kvm_init_mirror_vcpu(CPUState *cpu, Error **errp)
+{
+    KVMState *s = kvm_state;
+    long mmap_size;
+    int ret;
+
+    ret =  kvm_mirror_vm_ioctl(s, KVM_CREATE_VCPU, kvm_arch_vcpu_id(cpu));
+    if (ret < 0) {
+        error_setg_errno(errp, -ret,
+                         "kvm_init_mirror_vcpu: kvm_get_vcpu failed");
+        goto err;
+    }
+
+    cpu->kvm_fd = ret;
+    cpu->kvm_state = s;
+    cpu->vcpu_dirty = true;
+
+    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
+    if (mmap_size < 0) {
+        ret = mmap_size;
+        error_setg_errno(errp, -mmap_size,
+                         "kvm_init_mirror_vcpu: KVM_GET_VCPU_MMAP_SIZE failed");
+        goto err;
+    }
+
+    cpu->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                        cpu->kvm_fd, 0);
+    if (cpu->kvm_run == MAP_FAILED) {
+        ret = -errno;
+        error_setg_errno(errp, ret,
+                         "kvm_init_mirror_vcpu: mmap'ing vcpu state failed");
+    }
+
+    if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
+        s->coalesced_mmio_ring =
+            (void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
+    }
+
+    ret = kvm_arch_init_vcpu(cpu);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret,
+                         "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)",
+                         kvm_arch_vcpu_id(cpu));
+    }
+
+err:
+    return ret;
+}
+
 static int kvm_init(MachineState *ms)
 {
     MachineClass *mc = MACHINE_GET_CLASS(ms);
@@ -2717,7 +2766,11 @@  void kvm_cpu_synchronize_state(CPUState *cpu)
 
 static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
 {
-    kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
+    if (!cpu->mirror_vcpu) {
+        kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
+    } else {
+        kvm_arch_mirror_put_registers(cpu, KVM_PUT_RESET_STATE);
+    }
     cpu->vcpu_dirty = false;
 }
 
@@ -2728,7 +2781,11 @@  void kvm_cpu_synchronize_post_reset(CPUState *cpu)
 
 static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 {
-    kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
+    if (!cpu->mirror_vcpu) {
+        kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
+    } else {
+        kvm_arch_mirror_put_registers(cpu, KVM_PUT_FULL_STATE);
+    }
     cpu->vcpu_dirty = false;
 }
 
@@ -2968,6 +3025,136 @@  int kvm_cpu_exec(CPUState *cpu)
     return ret;
 }
 
+int kvm_mirror_cpu_exec(CPUState *cpu)
+{
+    struct kvm_run *run = cpu->kvm_run;
+    int ret, run_ret = 0;
+
+    DPRINTF("kvm_mirror_cpu_exec()\n");
+    assert(cpu->mirror_vcpu == TRUE);
+
+    qemu_mutex_unlock_iothread();
+    cpu_exec_start(cpu);
+
+    do {
+        MemTxAttrs attrs;
+
+        if (cpu->vcpu_dirty) {
+            kvm_arch_mirror_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
+            cpu->vcpu_dirty = false;
+        }
+
+        kvm_arch_pre_run(cpu, run);
+        if (qatomic_read(&cpu->exit_request)) {
+            DPRINTF("interrupt exit requested\n");
+            /*
+             * KVM requires us to reenter the kernel after IO exits to complete
+             * instruction emulation. This self-signal will ensure that we
+             * leave ASAP again.
+             */
+            kvm_cpu_kick_self();
+        }
+
+        /*
+         * Read cpu->exit_request before KVM_RUN reads run->immediate_exit.
+         * Matching barrier in kvm_eat_signals.
+         */
+        smp_rmb();
+
+        run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
+
+        attrs = kvm_arch_post_run(cpu, run);
+
+        if (run_ret < 0) {
+            if (run_ret == -EINTR || run_ret == -EAGAIN) {
+                DPRINTF("io window exit\n");
+                kvm_eat_signals(cpu);
+                ret = EXCP_INTERRUPT;
+                break;
+            }
+            fprintf(stderr, "error: kvm run failed %s\n",
+                    strerror(-run_ret));
+            ret = -1;
+            break;
+        }
+
+        trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
+        switch (run->exit_reason) {
+        case KVM_EXIT_IO:
+            DPRINTF("handle_io\n");
+            /* Called outside BQL */
+            kvm_handle_io(run->io.port, attrs,
+                          (uint8_t *)run + run->io.data_offset,
+                          run->io.direction,
+                          run->io.size,
+                          run->io.count);
+           ret = 0;
+            break;
+        case KVM_EXIT_MMIO:
+            DPRINTF("handle_mmio\n");
+            /* Called outside BQL */
+            address_space_rw(&address_space_memory,
+                             run->mmio.phys_addr, attrs,
+                             run->mmio.data,
+                             run->mmio.len,
+                             run->mmio.is_write);
+            ret = 0;
+            break;
+        case KVM_EXIT_SHUTDOWN:
+            DPRINTF("shutdown\n");
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+            ret = EXCP_INTERRUPT;
+            break;
+        case KVM_EXIT_UNKNOWN:
+            fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
+                    (uint64_t)run->hw.hardware_exit_reason);
+            ret = -1;
+            break;
+        case KVM_EXIT_INTERNAL_ERROR:
+            ret = kvm_handle_internal_error(cpu, run);
+            break;
+        case KVM_EXIT_SYSTEM_EVENT:
+            switch (run->system_event.type) {
+            case KVM_SYSTEM_EVENT_SHUTDOWN:
+                qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+                ret = EXCP_INTERRUPT;
+                break;
+            case KVM_SYSTEM_EVENT_RESET:
+                qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+                ret = EXCP_INTERRUPT;
+                break;
+            case KVM_SYSTEM_EVENT_CRASH:
+                kvm_cpu_synchronize_state(cpu);
+                qemu_mutex_lock_iothread();
+                qemu_system_guest_panicked(cpu_get_crash_info(cpu));
+                qemu_mutex_unlock_iothread();
+                ret = 0;
+                break;
+            default:
+                DPRINTF("kvm_arch_handle_exit\n");
+                ret = kvm_arch_handle_exit(cpu, run);
+                break;
+            }
+            break;
+        default:
+            DPRINTF("kvm_arch_handle_exit\n");
+            ret = kvm_arch_handle_exit(cpu, run);
+            break;
+        }
+    } while (ret == 0);
+
+    cpu_exec_end(cpu);
+    qemu_mutex_lock_iothread();
+
+    if (ret < 0) {
+        cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
+        vm_stop(RUN_STATE_INTERNAL_ERROR);
+    }
+
+    qatomic_set(&cpu->exit_request, 0);
+    return ret;
+}
+
 int kvm_ioctl(KVMState *s, int type, ...)
 {
     int ret;
diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h
index bf0bd1bee4..c8c7e52bcd 100644
--- a/accel/kvm/kvm-cpus.h
+++ b/accel/kvm/kvm-cpus.h
@@ -13,7 +13,9 @@ 
 #include "sysemu/cpus.h"
 
 int kvm_init_vcpu(CPUState *cpu, Error **errp);
+int kvm_init_mirror_vcpu(CPUState *cpu, Error **errp);
 int kvm_cpu_exec(CPUState *cpu);
+int kvm_mirror_cpu_exec(CPUState *cpu);
 void kvm_destroy_vcpu(CPUState *cpu);
 void kvm_cpu_synchronize_post_reset(CPUState *cpu);
 void kvm_cpu_synchronize_post_init(CPUState *cpu);
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 6847ffcdfd..03e7b5afa0 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -352,6 +352,7 @@  int kvm_arch_get_registers(CPUState *cpu);
 #define KVM_PUT_FULL_STATE      3
 
 int kvm_arch_put_registers(CPUState *cpu, int level);
+int kvm_arch_mirror_put_registers(CPUState *cpu, int level);
 
 int kvm_arch_init(MachineState *ms, KVMState *s);
 
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index e69abe48e3..d6d52a06bc 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4154,6 +4154,48 @@  int kvm_arch_put_registers(CPUState *cpu, int level)
     return 0;
 }
 
+int kvm_arch_mirror_put_registers(CPUState *cpu, int level)
+{
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    int ret;
+
+    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
+
+    /* must be before kvm_put_nested_state so that EFER.SVME is set */
+    ret = kvm_put_sregs(x86_cpu);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (level == KVM_PUT_FULL_STATE) {
+        /*
+         * We don't check for kvm_arch_set_tsc_khz() errors here,
+         * because TSC frequency mismatch shouldn't abort migration,
+         * unless the user explicitly asked for a more strict TSC
+         * setting (e.g. using an explicit "tsc-freq" option).
+         */
+        kvm_arch_set_tsc_khz(cpu);
+    }
+
+    ret = kvm_getput_regs(x86_cpu, 1);
+    if (ret < 0) {
+        return ret;
+    }
+    ret = kvm_put_xsave(x86_cpu);
+    if (ret < 0) {
+        return ret;
+    }
+    ret = kvm_put_xcrs(x86_cpu);
+    if (ret < 0) {
+        return ret;
+    }
+    ret = kvm_put_debugregs(x86_cpu);
+    if (ret < 0) {
+        return ret;
+    }
+    return 0;
+}
+
 int kvm_arch_get_registers(CPUState *cs)
 {
     X86CPU *cpu = X86_CPU(cs);