diff mbox

virtio: Use ioeventfd for virtqueue notify

Message ID 1285855312-11739-1-git-send-email-stefanha@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefan Hajnoczi Sept. 30, 2010, 2:01 p.m. UTC
None
diff mbox

Patch

diff --git a/hw/vhost.c b/hw/vhost.c
index 1b8624d..f127a07 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -517,7 +517,7 @@  static int vhost_virtqueue_init(struct vhost_dev *dev,
         goto fail_guest_notifier;
     }
 
-    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true);
+    r = virtio_set_host_notifier(vdev, idx, true);
     if (r < 0) {
         fprintf(stderr, "Error binding host notifier: %d\n", -r);
         goto fail_host_notifier;
@@ -539,7 +539,7 @@  static int vhost_virtqueue_init(struct vhost_dev *dev,
 
 fail_call:
 fail_kick:
-    vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
+    virtio_set_host_notifier(vdev, idx, false);
 fail_host_notifier:
     vdev->binding->set_guest_notifier(vdev->binding_opaque, idx, false);
 fail_guest_notifier:
@@ -575,7 +575,7 @@  static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
     }
     assert (r >= 0);
 
-    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
+    r = virtio_set_host_notifier(vdev, idx, false);
     if (r < 0) {
         fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
         fflush(stderr);
diff --git a/hw/virtio.c b/hw/virtio.c
index fbef788..f075b3a 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -16,6 +16,7 @@ 
 #include "trace.h"
 #include "virtio.h"
 #include "sysemu.h"
+#include "kvm.h"
 
 /* The alignment to use between consumer and producer parts of vring.
  * x86 pagesize again. */
@@ -77,6 +78,11 @@  struct VirtQueue
     VirtIODevice *vdev;
     EventNotifier guest_notifier;
     EventNotifier host_notifier;
+    enum {
+        HOST_NOTIFIER_DEASSIGNED,   /* inactive */
+        HOST_NOTIFIER_ASSIGNED,     /* active */
+        HOST_NOTIFIER_OFFLIMITS,    /* active but outside our control */
+    } host_notifier_state;
 };
 
 /* virt queue functions */
@@ -453,6 +459,93 @@  void virtio_update_irq(VirtIODevice *vdev)
     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
+/* Service virtqueue notify from a host notifier */
+static void virtio_read_host_notifier(void *opaque)
+{
+    VirtQueue *vq = opaque;
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    if (event_notifier_test_and_clear(notifier)) {
+        if (vq->vring.desc) {
+            vq->handle_output(vq->vdev, vq);
+        }
+    }
+}
+
+/* Transition between host notifier states */
+static int virtio_set_host_notifier_state(VirtIODevice *vdev, int n, int state)
+{
+    VirtQueue *vq = &vdev->vq[n];
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    int rc;
+
+    if (!kvm_enabled()) {
+        return -ENOSYS;
+    }
+
+    /* If the number of ioeventfds is limited, use them for vhost only */
+    if (state == HOST_NOTIFIER_ASSIGNED && !kvm_has_many_iobus_devs()) {
+        state = HOST_NOTIFIER_DEASSIGNED;
+    }
+
+    /* Ignore if no state change */
+    if (vq->host_notifier_state == state) {
+        return 0;
+    }
+
+    /* Disable read handler if transitioning away from assigned */
+    if (vq->host_notifier_state == HOST_NOTIFIER_ASSIGNED) {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier), NULL, NULL, NULL);
+    }
+
+    /* Toggle host notifier if transitioning to or from deassigned */
+    if (state == HOST_NOTIFIER_DEASSIGNED ||
+        vq->host_notifier_state == HOST_NOTIFIER_DEASSIGNED) {
+        rc = vdev->binding->set_host_notifier(vdev->binding_opaque, n,
+                state != HOST_NOTIFIER_DEASSIGNED);
+        if (rc < 0) {
+            return rc;
+        }
+    }
+
+    /* Enable read handler if transitioning to assigned */
+    if (state == HOST_NOTIFIER_ASSIGNED) {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            virtio_read_host_notifier, NULL, vq);
+    }
+
+    vq->host_notifier_state = state;
+    return 0;
+}
+
+/* Try to assign/deassign host notifiers for all virtqueues */
+static void virtio_set_host_notifiers(VirtIODevice *vdev, bool assigned)
+{
+    int state = assigned ? HOST_NOTIFIER_ASSIGNED : HOST_NOTIFIER_DEASSIGNED;
+    int i;
+
+    if (!vdev->binding->set_host_notifier) {
+        return;
+    }
+
+    for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+        if (vdev->vq[i].host_notifier_state == HOST_NOTIFIER_OFFLIMITS) {
+            continue;
+        }
+
+        if (!vdev->vq[i].vring.desc) {
+            continue;
+        }
+
+        virtio_set_host_notifier_state(vdev, i, state);
+    }
+}
+
+int virtio_set_host_notifier(VirtIODevice *vdev, int n, bool assigned)
+{
+    int state = assigned ? HOST_NOTIFIER_OFFLIMITS : HOST_NOTIFIER_ASSIGNED;
+    return virtio_set_host_notifier_state(vdev, n, state);
+}
+
 void virtio_reset(void *opaque)
 {
     VirtIODevice *vdev = opaque;
@@ -467,6 +560,7 @@  void virtio_reset(void *opaque)
     vdev->isr = 0;
     vdev->config_vector = VIRTIO_NO_VECTOR;
     virtio_notify_vector(vdev, vdev->config_vector);
+    virtio_set_host_notifiers(vdev, false);
 
     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         vdev->vq[i].vring.desc = 0;
@@ -592,6 +686,16 @@  void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
         vdev->vq[n].vector = vector;
 }
 
+void virtio_set_status(VirtIODevice *vdev, uint8_t val)
+{
+    virtio_set_host_notifiers(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
+
+    if (vdev->set_status) {
+        vdev->set_status(vdev, val);
+    }
+    vdev->status = val;
+}
+
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                             void (*handle_output)(VirtIODevice *, VirtQueue *))
 {
@@ -719,6 +823,7 @@  int virtio_load(VirtIODevice *vdev, QEMUFile *f)
     }
 
     virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+    virtio_set_host_notifiers(vdev, vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
     return 0;
 }
 
@@ -746,6 +851,7 @@  VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
         vdev->vq[i].vdev = vdev;
+        vdev->vq[i].host_notifier_state = HOST_NOTIFIER_DEASSIGNED;
     }
 
     vdev->name = name;
diff --git a/hw/virtio.h b/hw/virtio.h
index 96514e6..d76157e 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -125,13 +125,7 @@  struct VirtIODevice
     uint16_t device_id;
 };
 
-static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val)
-{
-    if (vdev->set_status) {
-        vdev->set_status(vdev, val);
-    }
-    vdev->status = val;
-}
+void virtio_set_status(VirtIODevice *vdev, uint8_t val);
 
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                             void (*handle_output)(VirtIODevice *,
@@ -217,6 +211,7 @@  target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n);
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
+int virtio_set_host_notifier(VirtIODevice *vdev, int n, bool assigned);
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
 void virtio_irq(VirtQueue *vq);
diff --git a/kvm-all.c b/kvm-all.c
index 1cc696f..2f09e34 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -24,6 +24,7 @@ 
 #include "qemu-barrier.h"
 #include "sysemu.h"
 #include "hw/hw.h"
+#include "hw/event_notifier.h"
 #include "gdbstub.h"
 #include "kvm.h"
 #include "bswap.h"
@@ -72,6 +73,7 @@  struct KVMState
     int irqchip_in_kernel;
     int pit_in_kernel;
     int xsave, xcrs;
+    int many_iobus_devs;
 };
 
 static KVMState *kvm_state;
@@ -423,6 +425,36 @@  int kvm_check_extension(KVMState *s, unsigned int extension)
     return ret;
 }
 
+static int kvm_check_many_iobus_devs(void)
+{
+    /* Older kernels have a 6 device limit on the KVM io bus.  In that case
+     * creating many ioeventfds must be avoided.  This tests checks for the
+     * limitation.
+     */
+    EventNotifier notifiers[7];
+    int i, ret = 0;
+    for (i = 0; i < ARRAY_SIZE(notifiers); i++) {
+        ret = event_notifier_init(&notifiers[i], 0);
+        if (ret < 0) {
+            break;
+        }
+        ret = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(&notifiers[i]), 0, i, true);
+        if (ret < 0) {
+            event_notifier_cleanup(&notifiers[i]);
+            break;
+        }
+    }
+
+    /* Decide whether many devices are supported or not */
+    ret = i == ARRAY_SIZE(notifiers);
+
+    while (i-- > 0) {
+        kvm_set_ioeventfd_pio_word(event_notifier_get_fd(&notifiers[i]), 0, i, false);
+        event_notifier_cleanup(&notifiers[i]);
+    }
+    return ret;
+}
+
 static void kvm_set_phys_mem(target_phys_addr_t start_addr,
 			     ram_addr_t size,
 			     ram_addr_t phys_offset)
@@ -699,6 +731,8 @@  int kvm_init(int smp_cpus)
     kvm_state = s;
     cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
 
+    s->many_iobus_devs = kvm_check_many_iobus_devs();
+
     return 0;
 
 err:
@@ -1028,6 +1062,11 @@  int kvm_has_xcrs(void)
     return kvm_state->xcrs;
 }
 
+int kvm_has_many_iobus_devs(void)
+{
+    return kvm_state->many_iobus_devs;
+}
+
 void kvm_setup_guest_memory(void *start, size_t size)
 {
     if (!kvm_has_sync_mmu()) {
diff --git a/kvm-stub.c b/kvm-stub.c
index d45f9fa..b0887fb 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -99,6 +99,11 @@  int kvm_has_robust_singlestep(void)
     return 0;
 }
 
+int kvm_has_many_iobus_devs(void)
+{
+    return 0;
+}
+
 void kvm_setup_guest_memory(void *start, size_t size)
 {
 }
diff --git a/kvm.h b/kvm.h
index 50b6c01..f405906 100644
--- a/kvm.h
+++ b/kvm.h
@@ -42,6 +42,7 @@  int kvm_has_robust_singlestep(void);
 int kvm_has_debugregs(void);
 int kvm_has_xsave(void);
 int kvm_has_xcrs(void);
+int kvm_has_many_iobus_devs(void);
 
 #ifdef NEED_CPU_H
 int kvm_init_vcpu(CPUState *env);