diff mbox

[RFC,2/2] pci: add PCIIOMMUOps and PCIIOMMUIntRemapFunc

Message ID 1455704742-21171-3-git-send-email-peterx@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Peter Xu Feb. 17, 2016, 10:25 a.m. UTC
This patch extended the current PCI IOMMU functions into operation list,
one new op is added to do interrupt remapping.

Currently it is not working since int_remap is always NULL. It only
provide a interface to extend PCI MSI to support interrupt remapping in
the future.

One helper function pci_setup_iommu_ops() is introduced. We can use this
instead of the origin pci_setup_iommu() one to extend interrupt
remapping on specific platform.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 hw/pci/pci.c             | 21 ++++++++++++++++-----
 include/hw/pci/pci.h     | 10 ++++++++++
 include/hw/pci/pci_bus.h |  2 +-
 3 files changed, 27 insertions(+), 6 deletions(-)

Comments

Paolo Bonzini Feb. 17, 2016, 7:46 p.m. UTC | #1
On 17/02/2016 11:25, Peter Xu wrote:
> This patch extended the current PCI IOMMU functions into operation list,
> one new op is added to do interrupt remapping.
> 
> Currently it is not working since int_remap is always NULL. It only
> provide a interface to extend PCI MSI to support interrupt remapping in
> the future.
> 
> One helper function pci_setup_iommu_ops() is introduced. We can use this
> instead of the origin pci_setup_iommu() one to extend interrupt
> remapping on specific platform.

For MSI, I think interrupt remapping can be done directly in the IOMMU
MemoryRegion.  You can just overlay a new MemoryRegion on top of the
IOMMU region where MSIs are sent (that's around 0xFEE00000, I don't
remember where exactly).  It will catch interrupts sent by the device,
remap them and forward them to the right interrupt destination in the host.

I'm not sure about INTX interrupts, but I think that the host kernel
remaps them simply by virtualizing the IOAPIC's redirection table.

Paolo
Peter Xu Feb. 18, 2016, 4:54 a.m. UTC | #2
On Wed, Feb 17, 2016 at 08:46:18PM +0100, Paolo Bonzini wrote:
> 
> 
> On 17/02/2016 11:25, Peter Xu wrote:
> > This patch extended the current PCI IOMMU functions into operation list,
> > one new op is added to do interrupt remapping.
> > 
> > Currently it is not working since int_remap is always NULL. It only
> > provide a interface to extend PCI MSI to support interrupt remapping in
> > the future.
> > 
> > One helper function pci_setup_iommu_ops() is introduced. We can use this
> > instead of the origin pci_setup_iommu() one to extend interrupt
> > remapping on specific platform.
> 
> For MSI, I think interrupt remapping can be done directly in the IOMMU
> MemoryRegion.  You can just overlay a new MemoryRegion on top of the
> IOMMU region where MSIs are sent (that's around 0xFEE00000, I don't
> remember where exactly).  It will catch interrupts sent by the device,
> remap them and forward them to the right interrupt destination in the host.

Yes, it should be 0xfee00000. I'd say this is a much better idea, so
that I can leverage current memory region codes and avoid touching
PCI at all.

If the work is in iommu part, I think I can send my next RFC with
basic IR together next time.

> 
> I'm not sure about INTX interrupts, but I think that the host kernel
> remaps them simply by virtualizing the IOAPIC's redirection table.

Yes, what I understand is that, IOAPIC is handling all INTX
interrupts. To remap these interrupts, we just need a translation
for the IOAPIC IRQ table entries before the interrupts are delivered
to APIC bus.

There will need some code change in ACPI too to enumerate the IOAPIC
device in DMAR region, so that we can declare that "this IOMMU owns
the default IOAPIC".

If so, I can call vtd_* function in ioapic_service() directly right?
IIUC IOAPIC should be intel-specific too?

Thanks!
Peter

> 
> Paolo
diff mbox

Patch

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 3f58bd4..65046e4 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2404,21 +2404,32 @@  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
     PCIBus *bus = PCI_BUS(dev->bus);
     PCIBus *iommu_bus = bus;
 
-    while(iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
+    while(iommu_bus && !iommu_bus->iommu_ops.as_lookup && \
+          iommu_bus->parent_dev) {
         iommu_bus = PCI_BUS(iommu_bus->parent_dev->bus);
     }
-    if (iommu_bus && iommu_bus->iommu_fn) {
-        return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, dev->devfn);
+    if (iommu_bus && iommu_bus->iommu_ops.as_lookup) {
+        return iommu_bus->iommu_ops.as_lookup(bus, iommu_bus->iommu_opaque,
+                                              dev->devfn);
     }
     return &address_space_memory;
 }
 
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUASLookupFunc fn, void *opaque)
+void pci_setup_iommu_ops(PCIBus *bus, PCIIOMMUOps *ops, void *opaque)
 {
-    bus->iommu_fn = fn;
+    bus->iommu_ops = *ops;
     bus->iommu_opaque = opaque;
 }
 
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUASLookupFunc fn, void *opaque)
+{
+    PCIIOMMUOps ops = {
+        .as_lookup = fn,
+        .int_remap = NULL,
+    };
+    pci_setup_iommu_ops(bus, &ops, opaque);
+}
+
 static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
 {
     Range *range = opaque;
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 846afee..3636151 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -418,10 +418,20 @@  void pci_bus_get_w64_range(PCIBus *bus, Range *range);
 
 void pci_device_deassert_intx(PCIDevice *dev);
 
+/* IOMMU op to find address space for device */
 typedef AddressSpace *(*PCIIOMMUASLookupFunc)(PCIBus *, void *, int);
+/* IOMMU op to do interrupt remapping */
+typedef int (*PCIIOMMUIntRemapFunc)(void *, MSIMessage *origin, MSIMessage *out);
+
+struct PCIIOMMUOps {
+    PCIIOMMUASLookupFunc as_lookup;
+    PCIIOMMUIntRemapFunc int_remap;
+};
+typedef struct PCIIOMMUOps PCIIOMMUOps;
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
 void pci_setup_iommu(PCIBus *bus, PCIIOMMUASLookupFunc fn, void *opaque);
+void pci_setup_iommu_ops(PCIBus *bus, PCIIOMMUOps *ops, void *opaque);
 
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index a8ab9c2..034a411 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -20,7 +20,7 @@  typedef struct PCIBusClass {
 
 struct PCIBus {
     BusState qbus;
-    PCIIOMMUASLookupFunc iommu_fn;
+    PCIIOMMUOps iommu_ops;
     void *iommu_opaque;
     uint8_t devfn_min;
     pci_set_irq_fn set_irq;