diff mbox series

[v4] i386: hvf: Implement CPU kick

Message ID 20200729124832.79375-1-r.bolshakov@yadro.com (mailing list archive)
State New, archived
Headers show
Series [v4] i386: hvf: Implement CPU kick | expand

Commit Message

Roman Bolshakov July 29, 2020, 12:48 p.m. UTC
HVF doesn't have a CPU kick and without it it's not possible to perform
an action on CPU thread until a VMEXIT happens. The kick is also needed
for timely interrupt delivery.

Existing implementation of CPU kick sends SIG_IPI (aka SIGUSR1) to vCPU
thread, but it's different from what hv_vcpu_interrupt does. The latter
one results in invocation of mp_cpus_kick() in XNU kernel [1].

mp_cpus_kick() sends an IPI through the host LAPIC to the HVF vCPU.
And the kick interrupt leads to VM exit because "external-interrupt
exiting” VM-execution control is enabled for HVF.

hv_vcpu_interrupt() has no effect if it's delivered when vCPU is outside
of a guest, therefore to avoid kick loss it's complemented with a
SIG_IPI handler and zero VMX-preemption timer. If the kick happens
outside of hv_vcpu_run(), the signal handler will re-queue the kick by
setting exit_request. exit_request is cleared when the request is
satisifed, i.e. when vCPU thread returns with EXCP_INTERRUPT.

So we get the following scenarios time/location-wise for the kick:

1) vCPU thread is far away before hv_vcpu_run(), then exit_request is
   scheduled. As soon as vCPU thread approaches hv_vcpu_run(), the
   exit request is satasified.

2) vCPU thread is about to enter the guest, then VMX-preemption timer is
   enabled to expedite immediate VM-exit. The VMX-preemption timer is
   then cleared in VM-exit handler, exit from vCPU thread is performed.

3) The guest is running, then hv_vcpu_run() is interrupted by
   hv_vcpu_interrupt() and vCPU thread quits.

4) vCPU thread has just made VM-exit, then exit_request is recorded and
   VMX-preemption timer is enabled but the exit request won't be
   satisfied until the next iteration of vCPU thread, no kick loss
   happens.

5) vCPU thread is far after hv_vcpu_run(), then exit_request is recorded
   and VMX-preemption timer is not enabled. The exit request will be
   satasfied on the next iteration of vCPU thread, like in 4). The kick
   is not lost.

6) If some external interupt happens we can satisify exit request and can
   clear VMX-preemption timer, i.e. kicks are coalesced with interrupts.

While at it, correct type of hvf_fd to the type of hv_vcpuid_t to avoid
compilation warnings.

1. https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/i386/mp.c

Cc: Cameron Esfahani <dirty@apple.com>
Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
---

Hi,

The version is just for review. There's an issue where interrupts
(x86_gsi_interrupt) don't reach HVF (i.e. no interrupts are loaded into
VMCS interruption info field). I've been triaging the issue for a while
and dtrace patches are byproduct of the triage efforts. Once I figure
out where interrupts are lost/suppressed (which won't happen until the
end of the next week due to vacation), I'll send v5 with kick patch and
the interrupt delivery fix.

Thanks,
Roman

Changes since v3:
 - Replaced TLS pthread_key_t with current_cpu (Paolo)
 - Eliminated race condition by signalling through atomic variables (Paolo)

Changes since v2:
 - Reworked workaround to minimize kick loss race. Use signals to
   interrupt vCPU thread outside of hv_vcpu_run() and turn-on/turn-off
   VMX-preemeption timer, while timer value is always zero. v3 also
   assumes that VMX-preemption timer control is always available on the
   hardware that supports HVF.

Changes since v1:
 - Reduced kick loss race (Paolo) and removed SIG_IPI blocking

 include/hw/core/cpu.h  |  2 +-
 include/sysemu/hvf.h   |  1 +
 softmmu/cpus.c         | 13 ++++++---
 target/i386/cpu.h      |  1 +
 target/i386/hvf/hvf.c  | 60 ++++++++++++++++++++++++++++++++++++++++--
 target/i386/hvf/vmcs.h |  1 +
 6 files changed, 71 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 8f145733ce..deaf9fe7ca 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -454,7 +454,7 @@  struct CPUState {
 
     struct hax_vcpu_state *hax_vcpu;
 
-    int hvf_fd;
+    unsigned hvf_fd;
 
     /* track IOMMUs whose translations we've cached in the TCG TLB */
     GArray *iommu_notifiers;
diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h
index 6d3ee4fdb7..a1ab61403f 100644
--- a/include/sysemu/hvf.h
+++ b/include/sysemu/hvf.h
@@ -25,6 +25,7 @@  extern bool hvf_allowed;
 
 int hvf_init_vcpu(CPUState *);
 int hvf_vcpu_exec(CPUState *);
+void hvf_vcpu_kick(CPUState *);
 void hvf_cpu_synchronize_state(CPUState *);
 void hvf_cpu_synchronize_post_reset(CPUState *);
 void hvf_cpu_synchronize_post_init(CPUState *);
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index a802e899ab..a2c3ed93fe 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -1708,10 +1708,15 @@  static void qemu_cpu_kick_thread(CPUState *cpu)
         return;
     }
     cpu->thread_kicked = true;
-    err = pthread_kill(cpu->thread->thread, SIG_IPI);
-    if (err && err != ESRCH) {
-        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
-        exit(1);
+
+    if (hvf_enabled()) {
+        hvf_vcpu_kick(cpu);
+    } else {
+        err = pthread_kill(cpu->thread->thread, SIG_IPI);
+        if (err && err != ESRCH) {
+            fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
+            exit(1);
+        }
     }
 #else /* _WIN32 */
     if (!qemu_cpu_is_self(cpu)) {
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e1a5c174dc..ef7dbcb8dc 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1608,6 +1608,7 @@  typedef struct CPUX86State {
     struct kvm_nested_state *nested_state;
 #endif
 #if defined(CONFIG_HVF)
+    bool hvf_in_guest;
     HVFX86LazyFlags hvf_lflags;
     void *hvf_mmio_buf;
 #endif
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index d81f569aed..1d77245bca 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -458,8 +458,27 @@  void hvf_vcpu_destroy(CPUState *cpu)
     assert_hvf_ok(ret);
 }
 
-static void dummy_signal(int sig)
+static void hvf_handle_ipi(int sig)
 {
+    if (!current_cpu) {
+        return;
+    }
+
+    /*
+     * skip object type check to avoid a deadlock in
+     * qemu_log/vfprintf/flockfile.
+     */
+    X86CPU *x86_cpu = (X86CPU *)current_cpu;
+    CPUX86State *env = &x86_cpu->env;
+
+    atomic_set(&current_cpu->exit_request, true);
+    /* Write cpu->exit_request before reading env->hvf_in_guest */
+    smp_mb();
+    if (atomic_read(&env->hvf_in_guest)) {
+        wvmcs(current_cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+              rvmcs(current_cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
+                | VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
+    }
 }
 
 int hvf_init_vcpu(CPUState *cpu)
@@ -474,16 +493,18 @@  int hvf_init_vcpu(CPUState *cpu)
     struct sigaction sigact;
 
     memset(&sigact, 0, sizeof(sigact));
-    sigact.sa_handler = dummy_signal;
+    sigact.sa_handler = hvf_handle_ipi;
     sigaction(SIG_IPI, &sigact, NULL);
 
     pthread_sigmask(SIG_BLOCK, NULL, &set);
     sigdelset(&set, SIG_IPI);
+    pthread_sigmask(SIG_SETMASK, &set, NULL);
 
     init_emu();
     init_decoder();
 
     hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
+    env->hvf_in_guest = false;
     env->hvf_mmio_buf = g_new(char, 4096);
 
     r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
@@ -529,6 +550,7 @@  int hvf_init_vcpu(CPUState *cpu)
     wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
 
     wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
+    wvmcs(cpu->hvf_fd, VMCS_PREEMPTION_TIMER_VALUE, 0);
 
     x86cpu = X86_CPU(cpu);
     x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
@@ -631,7 +653,20 @@  int hvf_vcpu_exec(CPUState *cpu)
             return EXCP_HLT;
         }
 
+        atomic_set(&env->hvf_in_guest, true);
+        /* Read cpu->exit_request after writing env->hvf_in_guest */
+        smp_mb();
+        if (atomic_read(&cpu->exit_request)) {
+            atomic_set(&env->hvf_in_guest, false);
+            qemu_mutex_lock_iothread();
+            wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+                  rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
+                    & ~VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
+            atomic_set(&cpu->exit_request, false);
+            return EXCP_INTERRUPT;
+        }
         hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
+        atomic_store_release(&env->hvf_in_guest, false);
         assert_hvf_ok(r);
 
         /* handle VMEXIT */
@@ -774,7 +809,12 @@  int hvf_vcpu_exec(CPUState *cpu)
             vmx_clear_nmi_window_exiting(cpu);
             ret = EXCP_INTERRUPT;
             break;
+        case EXIT_REASON_VMX_PREEMPT:
         case EXIT_REASON_EXT_INTR:
+            wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+                  rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
+                    & ~VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
+            atomic_set(&cpu->exit_request, false);
             /* force exit and allow io handling */
             ret = EXCP_INTERRUPT;
             break;
@@ -872,6 +912,22 @@  int hvf_vcpu_exec(CPUState *cpu)
     return ret;
 }
 
+void hvf_vcpu_kick(CPUState *cpu)
+{
+    hv_return_t err;
+
+    err = pthread_kill(cpu->thread->thread, SIG_IPI);
+    if (err) {
+        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
+        exit(1);
+    }
+    err = hv_vcpu_interrupt(&cpu->hvf_fd, 1);
+    if (err) {
+        fprintf(stderr, "qemu:%s error %#x\n", __func__, err);
+        exit(1);
+    }
+}
+
 bool hvf_allowed;
 
 static int hvf_accel_init(MachineState *ms)
diff --git a/target/i386/hvf/vmcs.h b/target/i386/hvf/vmcs.h
index 42de7ebc3a..6615365023 100644
--- a/target/i386/hvf/vmcs.h
+++ b/target/i386/hvf/vmcs.h
@@ -349,6 +349,7 @@ 
 #define VMCS_PIN_BASED_CTLS_EXTINT            (1 << 0)
 #define VMCS_PIN_BASED_CTLS_NMI               (1 << 3)
 #define VMCS_PIN_BASED_CTLS_VNMI              (1 << 5)
+#define VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER (1 << 6)
 
 #define VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING (1 << 2)
 #define VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET (1 << 3)