From patchwork Mon Oct 15 20:28:15 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alex Williamson X-Patchwork-Id: 1596521 Return-Path: X-Original-To: patchwork-kvm@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork1.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork1.kernel.org (Postfix) with ESMTP id 0D54E3FD86 for ; Mon, 15 Oct 2012 20:28:22 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754702Ab2JOU2S (ORCPT ); Mon, 15 Oct 2012 16:28:18 -0400 Received: from mx1.redhat.com ([209.132.183.28]:37410 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754680Ab2JOU2R (ORCPT ); Mon, 15 Oct 2012 16:28:17 -0400 Received: from int-mx02.intmail.prod.int.phx2.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id q9FKSG8w014636 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Mon, 15 Oct 2012 16:28:16 -0400 Received: from bling.home (ovpn-113-170.phx2.redhat.com [10.3.113.170]) by int-mx02.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id q9FKSFZi024999; Mon, 15 Oct 2012 16:28:15 -0400 From: Alex Williamson Subject: [PATCH] vfio-pci: Add KVM INTx acceleration To: alex.williamson@redhat.com Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org, mst@redhat.com Date: Mon, 15 Oct 2012 14:28:15 -0600 Message-ID: <20121015202031.23323.72827.stgit@bling.home> User-Agent: StGIT/0.14.3 MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.67 on 10.5.11.12 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org This makes use of the new level irqfd support enabling bypass of qemu userspace both on INTx injection and unmask. This significantly boosts the performance of devices making use of legacy interrupts. Signed-off-by: Alex Williamson --- My INTx routing workaround below will probably raise some eyebrows, but I don't feel it's worth subjecting users to core dumps if they want to try vfio-pci on new platforms. INTx routing is part of some larger plan, but until that plan materializes we have to try to avoid the API unless we think there's a good chance it might be there. I'll accept the maintenance of updating a whitelist in the interim. Thanks, Alex hw/vfio_pci.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c index 639371e..777a5f8 100644 --- a/hw/vfio_pci.c +++ b/hw/vfio_pci.c @@ -154,6 +154,53 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled); /* + * PCI code refuses to make it possible to probe whether the chipset + * supports pci_device_route_intx_to_irq() and booby traps the call + * to assert if doesn't. For us, this is just an optimization, so + * only enable it when we know it's present. Unfortunately PCIBus is + * private, so we can't just look at the function pointer. + */ +static bool vfio_pci_bus_has_intx_route(PCIDevice *pdev) +{ +#ifdef CONFIG_KVM + BusState *bus = qdev_get_parent_bus(&pdev->qdev); + DeviceState *dev; + + if (!kvm_irqchip_in_kernel() || + !kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { + return false; + } + + for (; bus->parent; bus = qdev_get_parent_bus(dev)) { + + dev = bus->parent; + + if (!strncmp("i440FX-pcihost", object_get_typename(OBJECT(dev)), 14)) { + return true; + } + } + + error_report("vfio-pci: VM chipset does not support INTx routing, " + "using slow INTx mode\n"); +#endif + return false; +} + +static PCIINTxRoute vfio_pci_device_route_intx_to_irq(PCIDevice *pdev, int pin) +{ + if (!vfio_pci_bus_has_intx_route(pdev)) { + return (PCIINTxRoute) { .mode = PCI_INTX_DISABLED, .irq = -1 }; + } + + return pci_device_route_intx_to_irq(pdev, pin); +} + +static bool vfio_pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new) +{ + return old->mode != new->mode || old->irq != new->irq; +} + +/* * Common VFIO interrupt disable */ static void vfio_disable_irqindex(VFIODevice *vdev, int index) @@ -185,6 +232,21 @@ static void vfio_unmask_intx(VFIODevice *vdev) ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); } +#ifdef CONFIG_KVM +static void vfio_mask_intx(VFIODevice *vdev) +{ + struct vfio_irq_set irq_set = { + .argsz = sizeof(irq_set), + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, + .index = VFIO_PCI_INTX_IRQ_INDEX, + .start = 0, + .count = 1, + }; + + ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +} +#endif + /* * Disabling BAR mmaping can be slow, but toggling it around INTx can * also be a huge overhead. We try to get the best of both worlds by @@ -248,6 +310,161 @@ static void vfio_eoi(VFIODevice *vdev) vfio_unmask_intx(vdev); } +static void vfio_enable_intx_kvm(VFIODevice *vdev) +{ +#ifdef CONFIG_KVM + struct kvm_irqfd irqfd = { + .fd = event_notifier_get_fd(&vdev->intx.interrupt), + .gsi = vdev->intx.route.irq, + .flags = KVM_IRQFD_FLAG_RESAMPLE, + }; + struct vfio_irq_set *irq_set; + int ret, argsz; + int32_t *pfd; + + if (!kvm_irqchip_in_kernel() || + vdev->intx.route.mode != PCI_INTX_ENABLED || + !kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { + return; + } + + /* Get to a known interrupt state */ + qemu_set_fd_handler(irqfd.fd, NULL, NULL, vdev); + vfio_mask_intx(vdev); + vdev->intx.pending = false; + qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0); + + /* Get an eventfd for resample/unmask */ + if (event_notifier_init(&vdev->intx.unmask, 0)) { + error_report("vfio: Error: event_notifier_init failed eoi\n"); + goto fail; + } + + /* KVM triggers it, VFIO listens for it */ + irqfd.resamplefd = event_notifier_get_fd(&vdev->intx.unmask); + + if (kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd)) { + error_report("vfio: Error: Failed to setup resample irqfd: %m\n"); + goto fail_irqfd; + } + + argsz = sizeof(*irq_set) + sizeof(*pfd); + + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *)&irq_set->data; + + *pfd = irqfd.resamplefd; + + ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); + g_free(irq_set); + if (ret) { + error_report("vfio: Error: Failed to setup INTx unmask fd: %m\n"); + goto fail_vfio; + } + + /* Let'em rip */ + vfio_unmask_intx(vdev); + + vdev->intx.kvm_accel = true; + + DPRINTF("%s(%04x:%02x:%02x.%x) KVM INTx accel enabled\n", + __func__, vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); + + return; + +fail_vfio: + irqfd.flags = KVM_IRQFD_FLAG_DEASSIGN; + kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd); +fail_irqfd: + event_notifier_cleanup(&vdev->intx.unmask); +fail: + qemu_set_fd_handler(irqfd.fd, vfio_intx_interrupt, NULL, vdev); + vfio_unmask_intx(vdev); +#endif +} + +static void vfio_disable_intx_kvm(VFIODevice *vdev) +{ +#ifdef CONFIG_KVM + struct kvm_irqfd irqfd = { + .fd = event_notifier_get_fd(&vdev->intx.interrupt), + .gsi = vdev->intx.route.irq, + .flags = KVM_IRQFD_FLAG_DEASSIGN, + }; + + if (!vdev->intx.kvm_accel) { + return; + } + + /* + * Get to a known state, hardware masked, QEMU ready to accept new + * interrupts, QEMU IRQ de-asserted. + */ + vfio_mask_intx(vdev); + vdev->intx.pending = false; + qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0); + + /* Tell KVM to stop listening for an INTx irqfd */ + if (kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd)) { + error_report("vfio: Error: Failed to disable INTx irqfd: %m\n"); + } + + /* We only need to close the eventfd for VFIO to cleanup the kernel side */ + event_notifier_cleanup(&vdev->intx.unmask); + + /* QEMU starts listening for interrupt events. */ + qemu_set_fd_handler(irqfd.fd, vfio_intx_interrupt, NULL, vdev); + + vdev->intx.kvm_accel = false; + + /* If we've missed an event, let it re-fire through QEMU */ + vfio_unmask_intx(vdev); + + DPRINTF("%s(%04x:%02x:%02x.%x) KVM INTx accel disabled\n", + __func__, vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); +#endif +} + +static void vfio_update_irq(PCIDevice *pdev) +{ + VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); + PCIINTxRoute route; + + if (vdev->interrupt != VFIO_INT_INTx) { + return; + } + + route = vfio_pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin); + + if (!vfio_pci_intx_route_changed(&vdev->intx.route, &route)) { + return; /* Nothing changed */ + } + + DPRINTF("%s(%04x:%02x:%02x.%x) IRQ moved %d -> %d\n", __func__, + vdev->host.domain, vdev->host.bus, vdev->host.slot, + vdev->host.function, vdev->intx.route.irq, route.irq); + + vfio_disable_intx_kvm(vdev); + + vdev->intx.route = route; + + if (route.mode != PCI_INTX_ENABLED) { + return; + } + + vfio_enable_intx_kvm(vdev); + + /* Re-enable the interrupt in cased we missed an EOI */ + vfio_eoi(vdev); +} + static int vfio_enable_intx(VFIODevice *vdev) { uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); @@ -262,6 +479,9 @@ static int vfio_enable_intx(VFIODevice *vdev) vfio_disable_interrupts(vdev); vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ + vdev->intx.route = vfio_pci_device_route_intx_to_irq(&vdev->pdev, + vdev->intx.pin); + ret = event_notifier_init(&vdev->intx.interrupt, 0); if (ret) { error_report("vfio: Error: event_notifier_init failed\n"); @@ -290,6 +510,8 @@ static int vfio_enable_intx(VFIODevice *vdev) return -errno; } + vfio_enable_intx_kvm(vdev); + vdev->interrupt = VFIO_INT_INTx; DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, @@ -303,6 +525,7 @@ static void vfio_disable_intx(VFIODevice *vdev) int fd; qemu_del_timer(vdev->intx.mmap_timer); + vfio_disable_intx_kvm(vdev); vfio_disable_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0); @@ -1870,6 +2093,7 @@ static int vfio_initfn(PCIDevice *pdev) if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { vdev->intx.mmap_timer = qemu_new_timer_ms(vm_clock, vfio_intx_mmap_enable, vdev); + pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_update_irq); ret = vfio_enable_intx(vdev); if (ret) { goto out_teardown;