@@ -45,6 +45,9 @@
#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
+/* Protected by BQL */
+static KVMRouteChange vfio_route_change;
+
static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
static void vfio_msi_disable_common(VFIOPCIDevice *vdev);
@@ -413,33 +416,36 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
int vector_n, bool msix)
{
- KVMRouteChange c;
- int virq;
-
if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) {
return;
}
- if (event_notifier_init(&vector->kvm_interrupt, 0)) {
+ vector->virq = kvm_irqchip_add_msi_route(&vfio_route_change,
+ vector_n, &vdev->pdev);
+}
+
+static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector)
+{
+ if (vector->virq < 0) {
return;
}
- c = kvm_irqchip_begin_route_changes(kvm_state);
- virq = kvm_irqchip_add_msi_route(&c, vector_n, &vdev->pdev);
- if (virq < 0) {
- event_notifier_cleanup(&vector->kvm_interrupt);
- return;
+ if (event_notifier_init(&vector->kvm_interrupt, 0)) {
+ goto fail_notifier;
}
- kvm_irqchip_commit_route_changes(&c);
if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
- NULL, virq) < 0) {
- kvm_irqchip_release_virq(kvm_state, virq);
- event_notifier_cleanup(&vector->kvm_interrupt);
- return;
+ NULL, vector->virq) < 0) {
+ goto fail_kvm;
}
- vector->virq = virq;
+ return;
+
+fail_kvm:
+ event_notifier_cleanup(&vector->kvm_interrupt);
+fail_notifier:
+ kvm_irqchip_release_virq(kvm_state, vector->virq);
+ vector->virq = -1;
}
static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
@@ -494,7 +500,14 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
}
} else {
if (msg) {
- vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ if (vdev->defer_kvm_irq_routing) {
+ vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ } else {
+ vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state);
+ vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ kvm_irqchip_commit_route_changes(&vfio_route_change);
+ vfio_connect_kvm_msi_virq(vector);
+ }
}
}
@@ -504,11 +517,13 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
* increase them as needed.
*/
if (vdev->nr_vectors < nr + 1) {
- vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
vdev->nr_vectors = nr + 1;
- ret = vfio_enable_vectors(vdev, true);
- if (ret) {
- error_report("vfio: failed to enable vectors, %d", ret);
+ if (!vdev->defer_kvm_irq_routing) {
+ vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
+ ret = vfio_enable_vectors(vdev, true);
+ if (ret) {
+ error_report("vfio: failed to enable vectors, %d", ret);
+ }
}
} else {
Error *err = NULL;
@@ -570,6 +585,27 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
}
}
+static void vfio_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
+{
+ assert(!vdev->defer_kvm_irq_routing);
+ vdev->defer_kvm_irq_routing = true;
+ vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state);
+}
+
+static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
+{
+ int i;
+
+ assert(vdev->defer_kvm_irq_routing);
+ vdev->defer_kvm_irq_routing = false;
+
+ kvm_irqchip_commit_route_changes(&vfio_route_change);
+
+ for (i = 0; i < vdev->nr_vectors; i++) {
+ vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i]);
+ }
+}
+
static void vfio_msix_enable(VFIOPCIDevice *vdev)
{
vfio_disable_interrupts(vdev);
@@ -579,26 +615,45 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
vdev->interrupt = VFIO_INT_MSIX;
/*
- * Some communication channels between VF & PF or PF & fw rely on the
- * physical state of the device and expect that enabling MSI-X from the
- * guest enables the same on the host. When our guest is Linux, the
- * guest driver call to pci_enable_msix() sets the enabling bit in the
- * MSI-X capability, but leaves the vector table masked. We therefore
- * can't rely on a vector_use callback (from request_irq() in the guest)
- * to switch the physical device into MSI-X mode because that may come a
- * long time after pci_enable_msix(). This code enables vector 0 with
- * triggering to userspace, then immediately release the vector, leaving
- * the physical device with no vectors enabled, but MSI-X enabled, just
- * like the guest view.
+ * Setting vector notifiers triggers synchronous vector-use
+ * callbacks for each active vector. Deferring to commit the KVM
+ * routes once rather than per vector provides a substantial
+ * performance improvement.
*/
- vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL);
- vfio_msix_vector_release(&vdev->pdev, 0);
+ vfio_prepare_kvm_msi_virq_batch(vdev);
if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
vfio_msix_vector_release, NULL)) {
error_report("vfio: msix_set_vector_notifiers failed");
}
+ vfio_commit_kvm_msi_virq_batch(vdev);
+
+ if (vdev->nr_vectors) {
+ int ret;
+
+ ret = vfio_enable_vectors(vdev, true);
+ if (ret) {
+ error_report("vfio: failed to enable vectors, %d", ret);
+ }
+ } else {
+ /*
+ * Some communication channels between VF & PF or PF & fw rely on the
+ * physical state of the device and expect that enabling MSI-X from the
+ * guest enables the same on the host. When our guest is Linux, the
+ * guest driver call to pci_enable_msix() sets the enabling bit in the
+ * MSI-X capability, but leaves the vector table masked. We therefore
+ * can't rely on a vector_use callback (from request_irq() in the guest)
+ * to switch the physical device into MSI-X mode because that may come a
+ * long time after pci_enable_msix(). This code enables vector 0 with
+ * triggering to userspace, then immediately release the vector, leaving
+ * the physical device with no vectors enabled, but MSI-X enabled, just
+ * like the guest view.
+ */
+ vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL);
+ vfio_msix_vector_release(&vdev->pdev, 0);
+ }
+
trace_vfio_msix_enable(vdev->vbasedev.name);
}
@@ -608,6 +663,13 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev)
vfio_disable_interrupts(vdev);
+ /*
+ * Setting vector notifiers needs to enable route for each vector.
+ * Deferring to commit the KVM routes once rather than per vector
+ * provides a substantial performance improvement.
+ */
+ vfio_prepare_kvm_msi_virq_batch(vdev);
+
vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev);
retry:
vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors);
@@ -633,6 +695,8 @@ retry:
vfio_add_kvm_msi_virq(vdev, vector, i, false);
}
+ vfio_commit_kvm_msi_virq_batch(vdev);
+
/* Set interrupt type prior to possible interrupts */
vdev->interrupt = VFIO_INT_MSI;
@@ -19,6 +19,7 @@
#include "qemu/queue.h"
#include "qemu/timer.h"
#include "qom/object.h"
+#include "sysemu/kvm.h"
#define PCI_ANY_ID (~0)
@@ -171,6 +172,7 @@ struct VFIOPCIDevice {
bool no_kvm_ioeventfd;
bool no_vfio_ioeventfd;
bool enable_ramfb;
+ bool defer_kvm_irq_routing;
VFIODisplay *dpy;
Notifier irqchip_change_notifier;
};