diff mbox series

[v3,9/9] vfio: defer to commit kvm irq routing when enable msi/msix

Message ID 20210920230202.1439-10-longpeng2@huawei.com (mailing list archive)
State New, archived
Headers show
Series optimize the downtime for vfio migration | expand

Commit Message

Longpeng(Mike) Sept. 20, 2021, 11:02 p.m. UTC
In migration resume phase, all unmasked msix vectors need to be
setup when load the VF state. However, the setup operation would
take longer if the VM has more VFs and each VF has more unmasked
vectors.

The hot spot is kvm_irqchip_commit_routes, it'll scan and update
all irqfds that already assigned each invocation, so more vectors
means need more time to process them.

vfio_pci_load_config
  vfio_msix_enable
    msix_set_vector_notifiers
      for (vector = 0; vector < dev->msix_entries_nr; vector++) {
        vfio_msix_vector_do_use
          vfio_add_kvm_msi_virq
            kvm_irqchip_commit_routes <-- expensive
      }

We can reduce the cost by only commit once outside the loop. The
routes is cached in kvm_state, we commit them first and then bind
irqfd for each vector.

The test VM has 128 vcpus and 8 VF (each one has 65 vectors),
we measure the cost of the vfio_msix_enable for each VF, and
we can see 90+% costs can be reduce.

VF      Count of irqfds[*]  Original        With this patch

1st           65            8               2
2nd           130           15              2
3rd           195           22              2
4th           260           24              3
5th           325           36              2
6th           390           44              3
7th           455           51              3
8th           520           58              4
Total                       258ms           21ms

[*] Count of irqfds
How many irqfds that already assigned and need to process in this
round.

The optimition can be applied to msi type too.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
---
 hw/vfio/pci.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

Comments

Alex Williamson Oct. 1, 2021, 11:04 p.m. UTC | #1
On Tue, 21 Sep 2021 07:02:02 +0800
"Longpeng(Mike)" <longpeng2@huawei.com> wrote:

> In migration resume phase, all unmasked msix vectors need to be
> setup when load the VF state. However, the setup operation would

s/load/loading/

> take longer if the VM has more VFs and each VF has more unmasked
> vectors.
> 
> The hot spot is kvm_irqchip_commit_routes, it'll scan and update
> all irqfds that already assigned each invocation, so more vectors

s/that/that are/

> means need more time to process them.
> 
> vfio_pci_load_config
>   vfio_msix_enable
>     msix_set_vector_notifiers
>       for (vector = 0; vector < dev->msix_entries_nr; vector++) {
>         vfio_msix_vector_do_use
>           vfio_add_kvm_msi_virq
>             kvm_irqchip_commit_routes <-- expensive
>       }
> 
> We can reduce the cost by only commit once outside the loop. The

s/commit/committing/

> routes is cached in kvm_state, we commit them first and then bind

s/is/are/

> irqfd for each vector.
> 
> The test VM has 128 vcpus and 8 VF (each one has 65 vectors),
> we measure the cost of the vfio_msix_enable for each VF, and
> we can see 90+% costs can be reduce.
> 
> VF      Count of irqfds[*]  Original        With this patch
> 
> 1st           65            8               2
> 2nd           130           15              2
> 3rd           195           22              2
> 4th           260           24              3
> 5th           325           36              2
> 6th           390           44              3
> 7th           455           51              3
> 8th           520           58              4
> Total                       258ms           21ms
> 
> [*] Count of irqfds
> How many irqfds that already assigned and need to process in this
> round.
> 
> The optimition can be applied to msi type too.

s/optimition/optimization/

> 
> Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
> ---
>  hw/vfio/pci.c | 36 ++++++++++++++++++++++++++++--------
>  1 file changed, 28 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 2de1cc5425..b26129bddf 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -513,11 +513,13 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
>       * increase them as needed.
>       */
>      if (vdev->nr_vectors < nr + 1) {
> -        vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
>          vdev->nr_vectors = nr + 1;
> -        ret = vfio_enable_vectors(vdev, true);
> -        if (ret) {
> -            error_report("vfio: failed to enable vectors, %d", ret);
> +        if (!vdev->defer_kvm_irq_routing) {
> +            vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
> +            ret = vfio_enable_vectors(vdev, true);
> +            if (ret) {
> +                error_report("vfio: failed to enable vectors, %d", ret);
> +            }
>          }
>      } else {
>          Error *err = NULL;
> @@ -579,8 +581,7 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
>      }
>  }
>  
> -/* TODO: invoked when enclabe msi/msix vectors */
> -static __attribute__((unused)) void vfio_commit_kvm_msi_virq(VFIOPCIDevice *vdev)
> +static void vfio_commit_kvm_msi_virq(VFIOPCIDevice *vdev)
>  {
>      int i;
>      VFIOMSIVector *vector;
> @@ -610,6 +611,9 @@ static __attribute__((unused)) void vfio_commit_kvm_msi_virq(VFIOPCIDevice *vdev
>  
>  static void vfio_msix_enable(VFIOPCIDevice *vdev)
>  {
> +    PCIDevice *pdev = &vdev->pdev;
> +    int ret;
> +
>      vfio_disable_interrupts(vdev);
>  
>      vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries);
> @@ -632,11 +636,22 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
>      vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL);
>      vfio_msix_vector_release(&vdev->pdev, 0);
>  
> -    if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
> -                                  vfio_msix_vector_release, NULL)) {

A comment would be useful here, maybe something like:

    /*
     * Setting vector notifiers triggers synchronous vector-use
     * callbacks for each active vector.  Deferring to commit the KVM
     * routes once rather than per vector provides a substantial
     * performance improvement.
     */

> +    vdev->defer_kvm_irq_routing = true;
> +
> +    ret = msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
> +                                    vfio_msix_vector_release, NULL);
> +    if (ret < 0) {
>          error_report("vfio: msix_set_vector_notifiers failed");
> +    } else if (!pdev->msix_function_masked) {
> +        vfio_commit_kvm_msi_virq(vdev);
> +        vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);

Couldn't we also optimize the do_use/release on vector 0 above to avoid
this gratuitous disable here?  We only want to make sure MSIX is always
enabled on the device when we exit this function, so maybe that code
becomes an "else" branch below?

> +        ret = vfio_enable_vectors(vdev, true);
> +        if (ret) {
> +            error_report("vfio: failed to enable vectors, %d", ret);
> +        }
>      }
>  
> +    vdev->defer_kvm_irq_routing = false;
>      trace_vfio_msix_enable(vdev->vbasedev.name);
>  }
>  
> @@ -645,6 +660,7 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev)
>      int ret, i;
>  
>      vfio_disable_interrupts(vdev);
> +    vdev->defer_kvm_irq_routing = true;
>  
>      vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev);
>  retry:
> @@ -671,6 +687,8 @@ retry:
>          vfio_add_kvm_msi_virq(vdev, vector, i, false);
>      }
>  
> +    vfio_commit_kvm_msi_virq(vdev);
> +
>      /* Set interrupt type prior to possible interrupts */
>      vdev->interrupt = VFIO_INT_MSI;
>  
> @@ -697,9 +715,11 @@ retry:
>           */
>          error_report("vfio: Error: Failed to enable MSI");
>  
> +        vdev->defer_kvm_irq_routing = false;
>          return;
>      }
>  
> +    vdev->defer_kvm_irq_routing = false;

Why wouldn't we clear the flag in vfio_commit_kvm_msi_virq()?  It
almost feels like there should be a vfio_prepare_kvm_msi_virq_batch()
that enables the flag and an unconditional
vfio_commit_kvm_msi_virq_batch() that clears the flag and decides if
further work is necessary.  Thanks,

Alex

>      trace_vfio_msi_enable(vdev->vbasedev.name, vdev->nr_vectors);
>  }
>
Longpeng(Mike) Oct. 5, 2021, 1:10 p.m. UTC | #2
> -----Original Message-----
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Saturday, October 2, 2021 7:05 AM
> To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.)
> <longpeng2@huawei.com>
> Cc: philmd@redhat.com; pbonzini@redhat.com; marcel.apfelbaum@gmail.com;
> mst@redhat.com; qemu-devel@nongnu.org; Gonglei (Arei)
> <arei.gonglei@huawei.com>; chenjiashang <chenjiashang@huawei.com>
> Subject: Re: [PATCH v3 9/9] vfio: defer to commit kvm irq routing when enable
> msi/msix
> 
> On Tue, 21 Sep 2021 07:02:02 +0800
> "Longpeng(Mike)" <longpeng2@huawei.com> wrote:
> 
> > In migration resume phase, all unmasked msix vectors need to be
> > setup when load the VF state. However, the setup operation would
> 
> s/load/loading/
> 
> > take longer if the VM has more VFs and each VF has more unmasked
> > vectors.
> >
> > The hot spot is kvm_irqchip_commit_routes, it'll scan and update
> > all irqfds that already assigned each invocation, so more vectors
> 
> s/that/that are/
> 
> > means need more time to process them.
> >
> > vfio_pci_load_config
> >   vfio_msix_enable
> >     msix_set_vector_notifiers
> >       for (vector = 0; vector < dev->msix_entries_nr; vector++) {
> >         vfio_msix_vector_do_use
> >           vfio_add_kvm_msi_virq
> >             kvm_irqchip_commit_routes <-- expensive
> >       }
> >
> > We can reduce the cost by only commit once outside the loop. The
> 
> s/commit/committing/
> 

OK, will fix in the next version, thanks.

> > routes is cached in kvm_state, we commit them first and then bind
> 
> s/is/are/
> 

OK.

> > irqfd for each vector.
> >
> > The test VM has 128 vcpus and 8 VF (each one has 65 vectors),
> > we measure the cost of the vfio_msix_enable for each VF, and
> > we can see 90+% costs can be reduce.
> >
> > VF      Count of irqfds[*]  Original        With this patch
> >
> > 1st           65            8               2
> > 2nd           130           15              2
> > 3rd           195           22              2
> > 4th           260           24              3
> > 5th           325           36              2
> > 6th           390           44              3
> > 7th           455           51              3
> > 8th           520           58              4
> > Total                       258ms           21ms
> >
> > [*] Count of irqfds
> > How many irqfds that already assigned and need to process in this
> > round.
> >
> > The optimition can be applied to msi type too.
> 
> s/optimition/optimization/
> 

OK, thanks.

> >
> > Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
> > ---
> >  hw/vfio/pci.c | 36 ++++++++++++++++++++++++++++--------
> >  1 file changed, 28 insertions(+), 8 deletions(-)
> >
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index 2de1cc5425..b26129bddf 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -513,11 +513,13 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev,
> unsigned int nr,
> >       * increase them as needed.
> >       */
> >      if (vdev->nr_vectors < nr + 1) {
> > -        vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
> >          vdev->nr_vectors = nr + 1;
> > -        ret = vfio_enable_vectors(vdev, true);
> > -        if (ret) {
> > -            error_report("vfio: failed to enable vectors, %d", ret);
> > +        if (!vdev->defer_kvm_irq_routing) {
> > +            vfio_disable_irqindex(&vdev->vbasedev,
> VFIO_PCI_MSIX_IRQ_INDEX);
> > +            ret = vfio_enable_vectors(vdev, true);
> > +            if (ret) {
> > +                error_report("vfio: failed to enable vectors, %d", ret);
> > +            }
> >          }
> >      } else {
> >          Error *err = NULL;
> > @@ -579,8 +581,7 @@ static void vfio_msix_vector_release(PCIDevice *pdev,
> unsigned int nr)
> >      }
> >  }
> >
> > -/* TODO: invoked when enclabe msi/msix vectors */
> > -static __attribute__((unused)) void vfio_commit_kvm_msi_virq(VFIOPCIDevice
> *vdev)
> > +static void vfio_commit_kvm_msi_virq(VFIOPCIDevice *vdev)
> >  {
> >      int i;
> >      VFIOMSIVector *vector;
> > @@ -610,6 +611,9 @@ static __attribute__((unused)) void
> vfio_commit_kvm_msi_virq(VFIOPCIDevice *vdev
> >
> >  static void vfio_msix_enable(VFIOPCIDevice *vdev)
> >  {
> > +    PCIDevice *pdev = &vdev->pdev;
> > +    int ret;
> > +
> >      vfio_disable_interrupts(vdev);
> >
> >      vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries);
> > @@ -632,11 +636,22 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
> >      vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL);
> >      vfio_msix_vector_release(&vdev->pdev, 0);
> >
> > -    if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
> > -                                  vfio_msix_vector_release, NULL)) {
> 
> A comment would be useful here, maybe something like:
> 
>     /*
>      * Setting vector notifiers triggers synchronous vector-use
>      * callbacks for each active vector.  Deferring to commit the KVM
>      * routes once rather than per vector provides a substantial
>      * performance improvement.
>      */
> 

Will add in the next version.

> > +    vdev->defer_kvm_irq_routing = true;
> > +
> > +    ret = msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
> > +                                    vfio_msix_vector_release, NULL);
> > +    if (ret < 0) {
> >          error_report("vfio: msix_set_vector_notifiers failed");
> > +    } else if (!pdev->msix_function_masked) {
> > +        vfio_commit_kvm_msi_virq(vdev);
> > +        vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
> 
> Couldn't we also optimize the do_use/release on vector 0 above to avoid
> this gratuitous disable here? We only want to make sure MSIX is always

The disable here seems can be removed directly, because we already disable
interrupts at the beginning of the vfio_msix_enable() ?

> enabled on the device when we exit this function, so maybe that code
> becomes an "else" branch below?
> 

Do you mean something like:

if (ret < 0) {
    ....
} else if (!pdev->msix_function_masked) {
    ....
    ret = vfio_enable_vectors(vdev, true);
    ....
} else {
    /* do_use/release on vector 0 */
}

We'll get '-EINVAL' if invoke vfio_enable_vectors with vdev->nr_vectors=0,
this cannot happen before but it can now in this way. So maybe the "else if"
conditional expression should be convert to
"!pdev->msix_function_masked && vdev->nr_vectors" ?


> > +        ret = vfio_enable_vectors(vdev, true);
> > +        if (ret) {
> > +            error_report("vfio: failed to enable vectors, %d", ret);
> > +        }
> >      }
> >
> > +    vdev->defer_kvm_irq_routing = false;
> >      trace_vfio_msix_enable(vdev->vbasedev.name);
> >  }
> >
> > @@ -645,6 +660,7 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev)
> >      int ret, i;
> >
> >      vfio_disable_interrupts(vdev);
> > +    vdev->defer_kvm_irq_routing = true;
> >
> >      vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev);
> >  retry:
> > @@ -671,6 +687,8 @@ retry:
> >          vfio_add_kvm_msi_virq(vdev, vector, i, false);
> >      }
> >
> > +    vfio_commit_kvm_msi_virq(vdev);
> > +
> >      /* Set interrupt type prior to possible interrupts */
> >      vdev->interrupt = VFIO_INT_MSI;
> >
> > @@ -697,9 +715,11 @@ retry:
> >           */
> >          error_report("vfio: Error: Failed to enable MSI");
> >
> > +        vdev->defer_kvm_irq_routing = false;
> >          return;
> >      }
> >
> > +    vdev->defer_kvm_irq_routing = false;
> 
> Why wouldn't we clear the flag in vfio_commit_kvm_msi_virq()?  It
> almost feels like there should be a vfio_prepare_kvm_msi_virq_batch()
> that enables the flag and an unconditional
> vfio_commit_kvm_msi_virq_batch() that clears the flag and decides if
> further work is necessary.  Thanks,
> 
> Alex
> 
> >      trace_vfio_msi_enable(vdev->vbasedev.name, vdev->nr_vectors);
> >  }
> >
diff mbox series

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 2de1cc5425..b26129bddf 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -513,11 +513,13 @@  static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
      * increase them as needed.
      */
     if (vdev->nr_vectors < nr + 1) {
-        vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
         vdev->nr_vectors = nr + 1;
-        ret = vfio_enable_vectors(vdev, true);
-        if (ret) {
-            error_report("vfio: failed to enable vectors, %d", ret);
+        if (!vdev->defer_kvm_irq_routing) {
+            vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
+            ret = vfio_enable_vectors(vdev, true);
+            if (ret) {
+                error_report("vfio: failed to enable vectors, %d", ret);
+            }
         }
     } else {
         Error *err = NULL;
@@ -579,8 +581,7 @@  static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
     }
 }
 
-/* TODO: invoked when enclabe msi/msix vectors */
-static __attribute__((unused)) void vfio_commit_kvm_msi_virq(VFIOPCIDevice *vdev)
+static void vfio_commit_kvm_msi_virq(VFIOPCIDevice *vdev)
 {
     int i;
     VFIOMSIVector *vector;
@@ -610,6 +611,9 @@  static __attribute__((unused)) void vfio_commit_kvm_msi_virq(VFIOPCIDevice *vdev
 
 static void vfio_msix_enable(VFIOPCIDevice *vdev)
 {
+    PCIDevice *pdev = &vdev->pdev;
+    int ret;
+
     vfio_disable_interrupts(vdev);
 
     vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries);
@@ -632,11 +636,22 @@  static void vfio_msix_enable(VFIOPCIDevice *vdev)
     vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL);
     vfio_msix_vector_release(&vdev->pdev, 0);
 
-    if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
-                                  vfio_msix_vector_release, NULL)) {
+    vdev->defer_kvm_irq_routing = true;
+
+    ret = msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
+                                    vfio_msix_vector_release, NULL);
+    if (ret < 0) {
         error_report("vfio: msix_set_vector_notifiers failed");
+    } else if (!pdev->msix_function_masked) {
+        vfio_commit_kvm_msi_virq(vdev);
+        vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
+        ret = vfio_enable_vectors(vdev, true);
+        if (ret) {
+            error_report("vfio: failed to enable vectors, %d", ret);
+        }
     }
 
+    vdev->defer_kvm_irq_routing = false;
     trace_vfio_msix_enable(vdev->vbasedev.name);
 }
 
@@ -645,6 +660,7 @@  static void vfio_msi_enable(VFIOPCIDevice *vdev)
     int ret, i;
 
     vfio_disable_interrupts(vdev);
+    vdev->defer_kvm_irq_routing = true;
 
     vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev);
 retry:
@@ -671,6 +687,8 @@  retry:
         vfio_add_kvm_msi_virq(vdev, vector, i, false);
     }
 
+    vfio_commit_kvm_msi_virq(vdev);
+
     /* Set interrupt type prior to possible interrupts */
     vdev->interrupt = VFIO_INT_MSI;
 
@@ -697,9 +715,11 @@  retry:
          */
         error_report("vfio: Error: Failed to enable MSI");
 
+        vdev->defer_kvm_irq_routing = false;
         return;
     }
 
+    vdev->defer_kvm_irq_routing = false;
     trace_vfio_msi_enable(vdev->vbasedev.name, vdev->nr_vectors);
 }