diff mbox

[v2,23/30] xen/x86: route legacy PCI interrupts to Dom0

Message ID 1474991845-27962-24-git-send-email-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roger Pau Monné Sept. 27, 2016, 3:57 p.m. UTC
This is done adding some Dom0 specific logic to the IO APIC emulation inside
of Xen, so that writes to the IO APIC registers that should unmask an
interrupt will take care of setting up this interrupt with Xen. A Dom0
specific EIO handler also has to be used, since Xen doesn't know the
topology of the PCI devices and it just has to passthrough what Dom0 does.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Paul Durrant <paul.durrant@citrix.com>
---
 xen/arch/x86/hvm/irq.c       |   9 +++
 xen/arch/x86/hvm/vioapic.c   |  28 ++++++++-
 xen/arch/x86/physdev.c       |   4 --
 xen/drivers/passthrough/io.c | 144 ++++++++++++++++++++++++++++++++++++++-----
 xen/include/asm-x86/hvm/io.h |   2 +
 xen/include/asm-x86/irq.h    |   5 ++
 xen/include/xen/hvm/irq.h    |   3 +
 xen/include/xen/iommu.h      |   1 +
 8 files changed, 177 insertions(+), 19 deletions(-)

Comments

Jan Beulich Oct. 10, 2016, 1:37 p.m. UTC | #1
>>> On 27.09.16 at 17:57, <roger.pau@citrix.com> wrote:
> This is done adding some Dom0 specific logic to the IO APIC emulation inside
> of Xen, so that writes to the IO APIC registers that should unmask an
> interrupt will take care of setting up this interrupt with Xen. A Dom0
> specific EIO handler also has to be used, since Xen doesn't know the
> topology of the PCI devices and it just has to passthrough what Dom0 does.

Without having looked at the patch (yet) I have a hard time seeing
the connection between EOI and PCI topology. I therefore think the
description needs improvement.

> --- a/xen/arch/x86/hvm/vioapic.c
> +++ b/xen/arch/x86/hvm/vioapic.c
> @@ -148,6 +148,29 @@ static void vioapic_write_redirent(
>          unmasked = unmasked && !ent.fields.mask;
>      }
>  
> +    if ( is_hardware_domain(d) && unmasked )
> +    {
> +        int ret, gsi;
> +
> +        /* Interrupt has been unmasked */
> +        gsi = idx;
> +        ret = mp_register_gsi(gsi, ent.fields.trig_mode, ent.fields.polarity);
> +        if ( ret && ret != -EEXIST )
> +        {
> +            gdprintk(XENLOG_WARNING,
> +                     "%s: error registering GSI %d\n", __func__, ret);

The message text suggests the number is the GSI, whereas it really
looks to be an error code (and I guess you really mean to log both).
Also please no unnecessary new uses of __func__.

> +        }
> +        if ( !ret )
> +        {
> +            ret = physdev_map_pirq(DOMID_SELF, MAP_PIRQ_TYPE_GSI, &gsi, &gsi,
> +                                   NULL);
> +            BUG_ON(ret);
> +
> +            ret = pt_irq_bind_hw_domain(gsi);
> +            BUG_ON(ret);

Why BUG_ON() (in both cases)? I don't think we're necessarily hosed
just because of one IRQ setup failure.

> @@ -409,7 +432,10 @@ void vioapic_update_EOI(struct domain *d, u8 vector)
>          if ( iommu_enabled )
>          {
>              spin_unlock(&d->arch.hvm_domain.irq_lock);
> -            hvm_dpci_eoi(d, gsi, ent);
> +            if ( is_hardware_domain(d) )
> +                hvm_hw_dpci_eoi(d, gsi, ent);
> +            else
> +                hvm_dpci_eoi(d, gsi, ent);

This looks like you rather want to make the distinction inside the
called function.

> --- a/xen/drivers/passthrough/io.c
> +++ b/xen/drivers/passthrough/io.c
> @@ -159,26 +159,29 @@ static int pt_irq_guest_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
>  static void pt_irq_time_out(void *data)
>  {
>      struct hvm_pirq_dpci *irq_map = data;
> -    const struct hvm_irq_dpci *dpci;
>      const struct dev_intx_gsi_link *digl;
>  
>      spin_lock(&irq_map->dom->event_lock);
>  
> -    dpci = domain_get_irq_dpci(irq_map->dom);
> -    ASSERT(dpci);
> -    list_for_each_entry ( digl, &irq_map->digl_list, list )
> +    if ( !is_hardware_domain(irq_map->dom) )
>      {
> -        unsigned int guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
> -        const struct hvm_girq_dpci_mapping *girq;
> -
> -        list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
> +        const struct hvm_irq_dpci *dpci = domain_get_irq_dpci(irq_map->dom);
> +        ASSERT(dpci);

Blank line between declarations and statements please.

> +        list_for_each_entry ( digl, &irq_map->digl_list, list )
>          {
> -            struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi);
> +            unsigned int guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
> +            const struct hvm_girq_dpci_mapping *girq;
> +
> +            list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
> +            {
> +                struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi);
>  
> -            pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH;
> +                pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH;
> +            }
> +            hvm_pci_intx_deassert(irq_map->dom, digl->device, digl->intx);
>          }
> -        hvm_pci_intx_deassert(irq_map->dom, digl->device, digl->intx);
> -    }
> +    } else

Coding style.

> +        irq_map->flags |= HVM_IRQ_DPCI_EOI_LATCH;

And I'm afraid I can't conclude anyway why you do what you do
here, as you don't really describe your the changes in any detail.

> @@ -557,6 +560,85 @@ int pt_irq_create_bind(
>      return 0;
>  }
>  
> +int pt_irq_bind_hw_domain(int gsi)
> +{
> +    struct domain *d = hardware_domain;
> +    struct hvm_pirq_dpci *pirq_dpci;
> +    struct hvm_irq_dpci *hvm_irq_dpci;
> +    struct pirq *info;
> +    int rc;
> +
> +    if ( gsi < 0 || gsi >= d->nr_pirqs )
> +        return -EINVAL;
> +
> +restart:

Labels (if they're needed at all) indented by at least one blank
please.

And I'm afraid I'm giving up again.

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
index 5323d7c..be9b648 100644
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -88,6 +88,15 @@  void hvm_pci_intx_assert(
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
 
+void hvm_hw_gsi_assert(struct domain *d, unsigned int gsi)
+{
+
+    ASSERT(is_hardware_domain(d));
+    spin_lock(&d->arch.hvm_domain.irq_lock);
+    assert_gsi(d, gsi);
+    spin_unlock(&d->arch.hvm_domain.irq_lock);
+}
+
 static void __hvm_pci_intx_deassert(
     struct domain *d, unsigned int device, unsigned int intx)
 {
diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index 611be87..18305be 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -148,6 +148,29 @@  static void vioapic_write_redirent(
         unmasked = unmasked && !ent.fields.mask;
     }
 
+    if ( is_hardware_domain(d) && unmasked )
+    {
+        int ret, gsi;
+
+        /* Interrupt has been unmasked */
+        gsi = idx;
+        ret = mp_register_gsi(gsi, ent.fields.trig_mode, ent.fields.polarity);
+        if ( ret && ret != -EEXIST )
+        {
+            gdprintk(XENLOG_WARNING,
+                     "%s: error registering GSI %d\n", __func__, ret);
+        }
+        if ( !ret )
+        {
+            ret = physdev_map_pirq(DOMID_SELF, MAP_PIRQ_TYPE_GSI, &gsi, &gsi,
+                                   NULL);
+            BUG_ON(ret);
+
+            ret = pt_irq_bind_hw_domain(gsi);
+            BUG_ON(ret);
+        }
+    }
+
     *pent = ent;
 
     if ( idx == 0 )
@@ -409,7 +432,10 @@  void vioapic_update_EOI(struct domain *d, u8 vector)
         if ( iommu_enabled )
         {
             spin_unlock(&d->arch.hvm_domain.irq_lock);
-            hvm_dpci_eoi(d, gsi, ent);
+            if ( is_hardware_domain(d) )
+                hvm_hw_dpci_eoi(d, gsi, ent);
+            else
+                hvm_dpci_eoi(d, gsi, ent);
             spin_lock(&d->arch.hvm_domain.irq_lock);
         }
 
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index 0bea6e1..27dcbf4 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -19,10 +19,6 @@ 
 #include <xsm/xsm.h>
 #include <asm/p2m.h>
 
-int physdev_map_pirq(domid_t, int type, int *index, int *pirq_p,
-                     struct msi_info *);
-int physdev_unmap_pirq(domid_t, int pirq);
-
 #include "x86_64/mmconfig.h"
 
 #ifndef COMPAT
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 66577b6..edd8dbd 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -159,26 +159,29 @@  static int pt_irq_guest_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
 static void pt_irq_time_out(void *data)
 {
     struct hvm_pirq_dpci *irq_map = data;
-    const struct hvm_irq_dpci *dpci;
     const struct dev_intx_gsi_link *digl;
 
     spin_lock(&irq_map->dom->event_lock);
 
-    dpci = domain_get_irq_dpci(irq_map->dom);
-    ASSERT(dpci);
-    list_for_each_entry ( digl, &irq_map->digl_list, list )
+    if ( !is_hardware_domain(irq_map->dom) )
     {
-        unsigned int guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
-        const struct hvm_girq_dpci_mapping *girq;
-
-        list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
+        const struct hvm_irq_dpci *dpci = domain_get_irq_dpci(irq_map->dom);
+        ASSERT(dpci);
+        list_for_each_entry ( digl, &irq_map->digl_list, list )
         {
-            struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi);
+            unsigned int guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
+            const struct hvm_girq_dpci_mapping *girq;
+
+            list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
+            {
+                struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi);
 
-            pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH;
+                pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH;
+            }
+            hvm_pci_intx_deassert(irq_map->dom, digl->device, digl->intx);
         }
-        hvm_pci_intx_deassert(irq_map->dom, digl->device, digl->intx);
-    }
+    } else
+        irq_map->flags |= HVM_IRQ_DPCI_EOI_LATCH;
 
     pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL);
 
@@ -557,6 +560,85 @@  int pt_irq_create_bind(
     return 0;
 }
 
+int pt_irq_bind_hw_domain(int gsi)
+{
+    struct domain *d = hardware_domain;
+    struct hvm_pirq_dpci *pirq_dpci;
+    struct hvm_irq_dpci *hvm_irq_dpci;
+    struct pirq *info;
+    int rc;
+
+    if ( gsi < 0 || gsi >= d->nr_pirqs )
+        return -EINVAL;
+
+restart:
+    spin_lock(&d->event_lock);
+
+    hvm_irq_dpci = domain_get_irq_dpci(d);
+    if ( hvm_irq_dpci == NULL )
+    {
+        unsigned int i;
+
+        hvm_irq_dpci = xzalloc(struct hvm_irq_dpci);
+        if ( hvm_irq_dpci == NULL )
+        {
+            spin_unlock(&d->event_lock);
+            return -ENOMEM;
+        }
+        for ( i = 0; i < NR_HVM_IRQS; i++ )
+            INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
+
+        d->arch.hvm_domain.irq.dpci = hvm_irq_dpci;
+    }
+
+    info = pirq_get_info(d, gsi);
+    if ( !info )
+    {
+        spin_unlock(&d->event_lock);
+        return -ENOMEM;
+    }
+    pirq_dpci = pirq_dpci(info);
+
+    /*
+     * A crude 'while' loop with us dropping the spinlock and giving
+     * the softirq_dpci a chance to run.
+     * We MUST check for this condition as the softirq could be scheduled
+     * and hasn't run yet. Note that this code replaced tasklet_kill which
+     * would have spun forever and would do the same thing (wait to flush out
+     * outstanding hvm_dirq_assist calls.
+     */
+    if ( pt_pirq_softirq_active(pirq_dpci) )
+    {
+        spin_unlock(&d->event_lock);
+        cpu_relax();
+        goto restart;
+    }
+
+    pirq_dpci->dom = d;
+    pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
+                       HVM_IRQ_DPCI_MACH_PCI |
+                       HVM_IRQ_DPCI_GUEST_PCI;
+
+    /* Init timer before binding */
+    if ( pt_irq_need_timer(pirq_dpci->flags) )
+        init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0);
+
+    rc = pirq_guest_bind(d->vcpu[0], info, gsi > 15 ? BIND_PIRQ__WILL_SHARE :
+                                                      0);
+    if ( unlikely(rc) )
+    {
+        if ( pt_irq_need_timer(pirq_dpci->flags) )
+            kill_timer(&pirq_dpci->timer);
+        pirq_dpci->dom = NULL;
+        pirq_cleanup_check(info, d);
+        spin_unlock(&d->event_lock);
+        return rc;
+    }
+
+    spin_unlock(&d->event_lock);
+    return 0;
+}
+
 int pt_irq_destroy_bind(
     struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
 {
@@ -819,11 +901,19 @@  static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
             return;
         }
 
-        list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
+        if ( is_hardware_domain(d) )
         {
-            hvm_pci_intx_assert(d, digl->device, digl->intx);
+            hvm_hw_gsi_assert(d, pirq->pirq);
             pirq_dpci->pending++;
         }
+        else
+        {
+            list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
+            {
+                hvm_pci_intx_assert(d, digl->device, digl->intx);
+                pirq_dpci->pending++;
+            }
+        }
 
         if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE )
         {
@@ -899,6 +989,32 @@  unlock:
     spin_unlock(&d->event_lock);
 }
 
+void hvm_hw_dpci_eoi(struct domain *d, unsigned int gsi,
+                     const union vioapic_redir_entry *ent)
+{
+    struct pirq *pirq = pirq_info(d, gsi);
+    struct hvm_pirq_dpci *pirq_dpci;
+
+    ASSERT(is_hardware_domain(d) && iommu_enabled);
+
+    if ( pirq == NULL )
+        return;
+
+    pirq_dpci = pirq_dpci(pirq);
+    ASSERT(pirq_dpci != NULL);
+
+    spin_lock(&d->event_lock);
+    if ( --pirq_dpci->pending || (ent && ent->fields.mask) ||
+         !pt_irq_need_timer(pirq_dpci->flags) )
+        goto unlock;
+
+    stop_timer(&pirq_dpci->timer);
+    pirq_guest_eoi(pirq);
+
+unlock:
+    spin_unlock(&d->event_lock);
+}
+
 /*
  * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to
  * doing it. If that is the case we let 'pt_pirq_softirq_reset' do ref-counting.
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
index 25af036..bfd76ff 100644
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -126,6 +126,8 @@  int handle_pio(uint16_t port, unsigned int size, int dir);
 void hvm_interrupt_post(struct vcpu *v, int vector, int type);
 void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
                   const union vioapic_redir_entry *ent);
+void hvm_hw_dpci_eoi(struct domain *d, unsigned int gsi,
+                     const union vioapic_redir_entry *ent);
 void msix_write_completion(struct vcpu *);
 void msixtbl_init(struct domain *d);
 
diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h
index 7efdd37..07f21ab 100644
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -201,4 +201,9 @@  bool_t cpu_has_pending_apic_eoi(void);
 
 static inline void arch_move_irqs(struct vcpu *v) { }
 
+struct msi_info;
+int physdev_map_pirq(domid_t, int type, int *index, int *pirq_p,
+                     struct msi_info *);
+int physdev_unmap_pirq(domid_t, int pirq);
+
 #endif /* _ASM_HW_IRQ_H */
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index 4c9cb20..2ffaf35 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -122,6 +122,9 @@  void hvm_isa_irq_assert(
 void hvm_isa_irq_deassert(
     struct domain *d, unsigned int isa_irq);
 
+/* Modify state of a hardware domain GSI */
+void hvm_hw_gsi_assert(struct domain *d, unsigned int gsi);
+
 void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
 
 int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data);
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 5803e3f..07c6c40 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -114,6 +114,7 @@  struct pirq;
 int hvm_do_IRQ_dpci(struct domain *, struct pirq *);
 int pt_irq_create_bind(struct domain *, xen_domctl_bind_pt_irq_t *);
 int pt_irq_destroy_bind(struct domain *, xen_domctl_bind_pt_irq_t *);
+int pt_irq_bind_hw_domain(int gsi);
 
 void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq);
 struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *);