diff mbox series

[v4,18/19] spapr: Handle irq backend changes with VFIO PCI devices

Message ID 20191009060818.29719-19-david@gibson.dropbear.id.au (mailing list archive)
State New, archived
Headers show
Series spapr: IRQ subsystem cleanup | expand

Commit Message

David Gibson Oct. 9, 2019, 6:08 a.m. UTC
pseries machine type can have one of two different interrupt controllers in
use depending on feature negotiation with the guest.  Usually this is
invisible to devices, because they route to a common set of qemu_irqs which
in turn dispatch to the correct back end.

VFIO passthrough devices, however, wire themselves up directly to the KVM
irqchip for performance, which means they are affected by this change in
interrupt controller.

Luckily, there's a notifier chain that will tell VFIO devices to update
their mappings - we just need to call it whenever the intc backend might
change.

In addition, we make sure we set an active intc earlier, because otherwise
vfio can issue a false warning, because it doesn't think a KVM irqchip is
in use (which is essentially for good INTx performance).

Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_irq.c     | 6 ++++++
 hw/ppc/spapr_pci.c     | 9 +++++++++
 include/hw/ppc/spapr.h | 1 +
 3 files changed, 16 insertions(+)

Comments

David Gibson Oct. 9, 2019, 8:57 a.m. UTC | #1
On Wed, Oct 09, 2019 at 05:08:17PM +1100, David Gibson wrote:
> pseries machine type can have one of two different interrupt controllers in
> use depending on feature negotiation with the guest.  Usually this is
> invisible to devices, because they route to a common set of qemu_irqs which
> in turn dispatch to the correct back end.
> 
> VFIO passthrough devices, however, wire themselves up directly to the KVM
> irqchip for performance, which means they are affected by this change in
> interrupt controller.
> 
> Luckily, there's a notifier chain that will tell VFIO devices to update
> their mappings - we just need to call it whenever the intc backend might
> change.
> 
> In addition, we make sure we set an active intc earlier, because otherwise
> vfio can issue a false warning, because it doesn't think a KVM irqchip is
> in use (which is essentially for good INTx performance).
> 
> Cc: Alex Williamson <alex.williamson@redhat.com>
> Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

For reference, the reason I described this as RFC only in the cover
letter, is that this doesn't work as it stands.

With this patch, when we switch intc we call the intx_routing
notifiers, and vfio_intx_update() in particular.  However that exits
early without calling vfio_intx_enable_kvm() - the bit we actually
need - because the test on pci_intx_route_changed() thinks there's
nothing to do.  The difficulty is that our use case isn't really the
same as the x86 one this notifier path was designed for: they're
changing routing of INTx to global interrupts.  For us the routing to
global interrupts remains the same, but the interrupt controller
handling those global interrupts has changed.

> ---
>  hw/ppc/spapr_irq.c     | 6 ++++++
>  hw/ppc/spapr_pci.c     | 9 +++++++++
>  include/hw/ppc/spapr.h | 1 +
>  3 files changed, 16 insertions(+)
> 
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index 473fc8780a..7964e4a1b8 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -409,6 +409,12 @@ static void set_active_intc(SpaprMachineState *spapr,
>      }
>  
>      spapr->active_intc = new_intc;
> +
> +    /*
> +     * We've changed the interrupt routing at the KVM level, let VFIO
> +     * devices know they need to readjust.
> +     */
> +    spapr_pci_fire_intx_routing_notifiers(spapr);
>  }
>  
>  void spapr_irq_update_active_intc(SpaprMachineState *spapr)
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index cc0e7829b6..3bcf6325d4 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -93,6 +93,15 @@ PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid,
>      return pci_find_device(phb->bus, bus_num, devfn);
>  }
>  
> +void spapr_pci_fire_intx_routing_notifiers(SpaprMachineState *spapr)
> +{
> +    SpaprPhbState *sphb;
> +
> +    QLIST_FOREACH(sphb, &spapr->phbs, list) {
> +        pci_bus_fire_intx_routing_notifier(PCI_HOST_BRIDGE(sphb)->bus);
> +    }
> +}
> +
>  static uint32_t rtas_pci_cfgaddr(uint32_t arg)
>  {
>      /* This handles the encoding of extended config space addresses */
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index d3b4dd7de3..66b68fdd5e 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -805,6 +805,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr);
>  int spapr_max_server_number(SpaprMachineState *spapr);
>  void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
>                        uint64_t pte0, uint64_t pte1);
> +void spapr_pci_fire_intx_routing_notifiers(SpaprMachineState *spapr);
>  
>  /* DRC callbacks. */
>  void spapr_core_release(DeviceState *dev);
diff mbox series

Patch

diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index 473fc8780a..7964e4a1b8 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -409,6 +409,12 @@  static void set_active_intc(SpaprMachineState *spapr,
     }
 
     spapr->active_intc = new_intc;
+
+    /*
+     * We've changed the interrupt routing at the KVM level, let VFIO
+     * devices know they need to readjust.
+     */
+    spapr_pci_fire_intx_routing_notifiers(spapr);
 }
 
 void spapr_irq_update_active_intc(SpaprMachineState *spapr)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index cc0e7829b6..3bcf6325d4 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -93,6 +93,15 @@  PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid,
     return pci_find_device(phb->bus, bus_num, devfn);
 }
 
+void spapr_pci_fire_intx_routing_notifiers(SpaprMachineState *spapr)
+{
+    SpaprPhbState *sphb;
+
+    QLIST_FOREACH(sphb, &spapr->phbs, list) {
+        pci_bus_fire_intx_routing_notifier(PCI_HOST_BRIDGE(sphb)->bus);
+    }
+}
+
 static uint32_t rtas_pci_cfgaddr(uint32_t arg)
 {
     /* This handles the encoding of extended config space addresses */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index d3b4dd7de3..66b68fdd5e 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -805,6 +805,7 @@  void spapr_clear_pending_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);
 void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
                       uint64_t pte0, uint64_t pte1);
+void spapr_pci_fire_intx_routing_notifiers(SpaprMachineState *spapr);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);