From patchwork Thu Sep 30 14:01:52 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Stefan Hajnoczi X-Patchwork-Id: 220332 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id o8UE2RR8025930 for ; Thu, 30 Sep 2010 14:02:28 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756123Ab0I3OCX (ORCPT ); Thu, 30 Sep 2010 10:02:23 -0400 Received: from mtagate1.de.ibm.com ([195.212.17.161]:54000 "EHLO mtagate1.de.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752905Ab0I3OCW (ORCPT ); Thu, 30 Sep 2010 10:02:22 -0400 Received: from d12nrmr1607.megacenter.de.ibm.com (d12nrmr1607.megacenter.de.ibm.com [9.149.167.49]) by mtagate1.de.ibm.com (8.13.1/8.13.1) with ESMTP id o8UE2FXQ013282 for ; Thu, 30 Sep 2010 14:02:15 GMT Received: from d12av04.megacenter.de.ibm.com (d12av04.megacenter.de.ibm.com [9.149.165.229]) by d12nrmr1607.megacenter.de.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id o8UE2FPH3985412 for ; Thu, 30 Sep 2010 16:02:15 +0200 Received: from d12av04.megacenter.de.ibm.com (loopback [127.0.0.1]) by d12av04.megacenter.de.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id o8UE2ELJ020689 for ; Thu, 30 Sep 2010 16:02:15 +0200 Received: from stefan-thinkpad.manchester-maybrook.uk.ibm.com (dyn-9-174-219-27.manchester-maybrook.uk.ibm.com [9.174.219.27]) by d12av04.megacenter.de.ibm.com (8.12.11.20060308/8.12.11) with ESMTP id o8UE2EpZ020666; Thu, 30 Sep 2010 16:02:14 +0200 From: Stefan Hajnoczi To: Cc: , "Michael S. Tsirkin" , Anthony Liguori , Khoa Huynh , Sridhar Samudrala , Steve Dobbelstein , Stefan Hajnoczi Subject: [PATCH] virtio: Use ioeventfd for virtqueue notify Date: Thu, 30 Sep 2010 15:01:52 +0100 Message-Id: <1285855312-11739-1-git-send-email-stefanha@linux.vnet.ibm.com> X-Mailer: git-send-email 1.7.1 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Thu, 30 Sep 2010 14:02:28 +0000 (UTC) diff --git a/hw/vhost.c b/hw/vhost.c index 1b8624d..f127a07 100644 --- a/hw/vhost.c +++ b/hw/vhost.c @@ -517,7 +517,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, goto fail_guest_notifier; } - r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true); + r = virtio_set_host_notifier(vdev, idx, true); if (r < 0) { fprintf(stderr, "Error binding host notifier: %d\n", -r); goto fail_host_notifier; @@ -539,7 +539,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, fail_call: fail_kick: - vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false); + virtio_set_host_notifier(vdev, idx, false); fail_host_notifier: vdev->binding->set_guest_notifier(vdev->binding_opaque, idx, false); fail_guest_notifier: @@ -575,7 +575,7 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev, } assert (r >= 0); - r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false); + r = virtio_set_host_notifier(vdev, idx, false); if (r < 0) { fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r); fflush(stderr); diff --git a/hw/virtio.c b/hw/virtio.c index fbef788..f075b3a 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -16,6 +16,7 @@ #include "trace.h" #include "virtio.h" #include "sysemu.h" +#include "kvm.h" /* The alignment to use between consumer and producer parts of vring. * x86 pagesize again. */ @@ -77,6 +78,11 @@ struct VirtQueue VirtIODevice *vdev; EventNotifier guest_notifier; EventNotifier host_notifier; + enum { + HOST_NOTIFIER_DEASSIGNED, /* inactive */ + HOST_NOTIFIER_ASSIGNED, /* active */ + HOST_NOTIFIER_OFFLIMITS, /* active but outside our control */ + } host_notifier_state; }; /* virt queue functions */ @@ -453,6 +459,93 @@ void virtio_update_irq(VirtIODevice *vdev) virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); } +/* Service virtqueue notify from a host notifier */ +static void virtio_read_host_notifier(void *opaque) +{ + VirtQueue *vq = opaque; + EventNotifier *notifier = virtio_queue_get_host_notifier(vq); + if (event_notifier_test_and_clear(notifier)) { + if (vq->vring.desc) { + vq->handle_output(vq->vdev, vq); + } + } +} + +/* Transition between host notifier states */ +static int virtio_set_host_notifier_state(VirtIODevice *vdev, int n, int state) +{ + VirtQueue *vq = &vdev->vq[n]; + EventNotifier *notifier = virtio_queue_get_host_notifier(vq); + int rc; + + if (!kvm_enabled()) { + return -ENOSYS; + } + + /* If the number of ioeventfds is limited, use them for vhost only */ + if (state == HOST_NOTIFIER_ASSIGNED && !kvm_has_many_iobus_devs()) { + state = HOST_NOTIFIER_DEASSIGNED; + } + + /* Ignore if no state change */ + if (vq->host_notifier_state == state) { + return 0; + } + + /* Disable read handler if transitioning away from assigned */ + if (vq->host_notifier_state == HOST_NOTIFIER_ASSIGNED) { + qemu_set_fd_handler(event_notifier_get_fd(notifier), NULL, NULL, NULL); + } + + /* Toggle host notifier if transitioning to or from deassigned */ + if (state == HOST_NOTIFIER_DEASSIGNED || + vq->host_notifier_state == HOST_NOTIFIER_DEASSIGNED) { + rc = vdev->binding->set_host_notifier(vdev->binding_opaque, n, + state != HOST_NOTIFIER_DEASSIGNED); + if (rc < 0) { + return rc; + } + } + + /* Enable read handler if transitioning to assigned */ + if (state == HOST_NOTIFIER_ASSIGNED) { + qemu_set_fd_handler(event_notifier_get_fd(notifier), + virtio_read_host_notifier, NULL, vq); + } + + vq->host_notifier_state = state; + return 0; +} + +/* Try to assign/deassign host notifiers for all virtqueues */ +static void virtio_set_host_notifiers(VirtIODevice *vdev, bool assigned) +{ + int state = assigned ? HOST_NOTIFIER_ASSIGNED : HOST_NOTIFIER_DEASSIGNED; + int i; + + if (!vdev->binding->set_host_notifier) { + return; + } + + for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { + if (vdev->vq[i].host_notifier_state == HOST_NOTIFIER_OFFLIMITS) { + continue; + } + + if (!vdev->vq[i].vring.desc) { + continue; + } + + virtio_set_host_notifier_state(vdev, i, state); + } +} + +int virtio_set_host_notifier(VirtIODevice *vdev, int n, bool assigned) +{ + int state = assigned ? HOST_NOTIFIER_OFFLIMITS : HOST_NOTIFIER_ASSIGNED; + return virtio_set_host_notifier_state(vdev, n, state); +} + void virtio_reset(void *opaque) { VirtIODevice *vdev = opaque; @@ -467,6 +560,7 @@ void virtio_reset(void *opaque) vdev->isr = 0; vdev->config_vector = VIRTIO_NO_VECTOR; virtio_notify_vector(vdev, vdev->config_vector); + virtio_set_host_notifiers(vdev, false); for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { vdev->vq[i].vring.desc = 0; @@ -592,6 +686,16 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector) vdev->vq[n].vector = vector; } +void virtio_set_status(VirtIODevice *vdev, uint8_t val) +{ + virtio_set_host_notifiers(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK); + + if (vdev->set_status) { + vdev->set_status(vdev, val); + } + vdev->status = val; +} + VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, void (*handle_output)(VirtIODevice *, VirtQueue *)) { @@ -719,6 +823,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) } virtio_notify_vector(vdev, VIRTIO_NO_VECTOR); + virtio_set_host_notifiers(vdev, vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); return 0; } @@ -746,6 +851,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id, for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { vdev->vq[i].vector = VIRTIO_NO_VECTOR; vdev->vq[i].vdev = vdev; + vdev->vq[i].host_notifier_state = HOST_NOTIFIER_DEASSIGNED; } vdev->name = name; diff --git a/hw/virtio.h b/hw/virtio.h index 96514e6..d76157e 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -125,13 +125,7 @@ struct VirtIODevice uint16_t device_id; }; -static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val) -{ - if (vdev->set_status) { - vdev->set_status(vdev, val); - } - vdev->status = val; -} +void virtio_set_status(VirtIODevice *vdev, uint8_t val); VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, void (*handle_output)(VirtIODevice *, @@ -217,6 +211,7 @@ target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n); uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n); void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx); VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n); +int virtio_set_host_notifier(VirtIODevice *vdev, int n, bool assigned); EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq); EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); void virtio_irq(VirtQueue *vq); diff --git a/kvm-all.c b/kvm-all.c index 1cc696f..2f09e34 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -24,6 +24,7 @@ #include "qemu-barrier.h" #include "sysemu.h" #include "hw/hw.h" +#include "hw/event_notifier.h" #include "gdbstub.h" #include "kvm.h" #include "bswap.h" @@ -72,6 +73,7 @@ struct KVMState int irqchip_in_kernel; int pit_in_kernel; int xsave, xcrs; + int many_iobus_devs; }; static KVMState *kvm_state; @@ -423,6 +425,36 @@ int kvm_check_extension(KVMState *s, unsigned int extension) return ret; } +static int kvm_check_many_iobus_devs(void) +{ + /* Older kernels have a 6 device limit on the KVM io bus. In that case + * creating many ioeventfds must be avoided. This tests checks for the + * limitation. + */ + EventNotifier notifiers[7]; + int i, ret = 0; + for (i = 0; i < ARRAY_SIZE(notifiers); i++) { + ret = event_notifier_init(¬ifiers[i], 0); + if (ret < 0) { + break; + } + ret = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(¬ifiers[i]), 0, i, true); + if (ret < 0) { + event_notifier_cleanup(¬ifiers[i]); + break; + } + } + + /* Decide whether many devices are supported or not */ + ret = i == ARRAY_SIZE(notifiers); + + while (i-- > 0) { + kvm_set_ioeventfd_pio_word(event_notifier_get_fd(¬ifiers[i]), 0, i, false); + event_notifier_cleanup(¬ifiers[i]); + } + return ret; +} + static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, ram_addr_t phys_offset) @@ -699,6 +731,8 @@ int kvm_init(int smp_cpus) kvm_state = s; cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client); + s->many_iobus_devs = kvm_check_many_iobus_devs(); + return 0; err: @@ -1028,6 +1062,11 @@ int kvm_has_xcrs(void) return kvm_state->xcrs; } +int kvm_has_many_iobus_devs(void) +{ + return kvm_state->many_iobus_devs; +} + void kvm_setup_guest_memory(void *start, size_t size) { if (!kvm_has_sync_mmu()) { diff --git a/kvm-stub.c b/kvm-stub.c index d45f9fa..b0887fb 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -99,6 +99,11 @@ int kvm_has_robust_singlestep(void) return 0; } +int kvm_has_many_iobus_devs(void) +{ + return 0; +} + void kvm_setup_guest_memory(void *start, size_t size) { } diff --git a/kvm.h b/kvm.h index 50b6c01..f405906 100644 --- a/kvm.h +++ b/kvm.h @@ -42,6 +42,7 @@ int kvm_has_robust_singlestep(void); int kvm_has_debugregs(void); int kvm_has_xsave(void); int kvm_has_xcrs(void); +int kvm_has_many_iobus_devs(void); #ifdef NEED_CPU_H int kvm_init_vcpu(CPUState *env);