diff mbox

PVH Dom0 Intel IOMMU issues

Message ID 20170417103833.vwofneibjm46zbty@dhcp-3-128.uk.xensource.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roger Pau Monné April 17, 2017, 10:38 a.m. UTC
On Mon, Apr 17, 2017 at 10:49:45AM +0800, Chao Gao wrote:
> On Mon, Apr 17, 2017 at 09:38:54AM +0100, Roger Pau Monné wrote:
> >On Mon, Apr 17, 2017 at 09:03:12AM +0800, Chao Gao wrote:
> >> On Mon, Apr 17, 2017 at 08:47:48AM +0100, Roger Pau Monné wrote:
> >> >On Mon, Apr 17, 2017 at 07:32:45AM +0800, Chao Gao wrote:
> >> >> On Fri, Apr 14, 2017 at 04:34:41PM +0100, Roger Pau Monné wrote:
> >> >> >Hello,
> >> >> >
> >> >> >Although PVHv2 Dom0 is not yet finished, I've been trying the current code on
> >> >> >different hardware, and found that with pre-Haswell Intel hardware PVHv2 Dom0
> >> >> >completely freezes the box when calling iommu_hwdom_init in dom0_construct_pvh.
> >> >> >OTOH the same doesn't happen when using a newer CPU (ie: haswell or newer).
> >> >> >
> >> >> >I'm not able to debug that in any meaningful way because the box seems to lock
> >> >> >up completely, even the watchdog NMI stops working. Here is the boot log, up to
> >> >> >the point where it freezes:
> >> >> 
> >> >> I try "dom0=pvh" with my skylake. An assertion failed. Is it a software bug?
> >> >> 
> >
> >It seems like we are not properly adding/accounting the vIO APICs, but I cannot
> >really see how. I have another patch for you to try below.
> >
> >Thanks, Roger.
> >
> >---8<---
> >	diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
> >index 527ac2aadd..40075e2756 100644
> >--- a/xen/arch/x86/hvm/vioapic.c
> >+++ b/xen/arch/x86/hvm/vioapic.c
> >@@ -610,11 +610,15 @@ int vioapic_init(struct domain *d)
> >            xzalloc_array(struct hvm_vioapic *, nr_vioapics)) == NULL) )
> >         return -ENOMEM;
> > 
> >+    printk("Adding %u vIO APICs\n", nr_vioapics);
> >+
> >     for ( i = 0; i < nr_vioapics; i++ )
> >     {
> >         unsigned int nr_pins = is_hardware_domain(d) ? nr_ioapic_entries[i] :
> >             ARRAY_SIZE(domain_vioapic(d, 0)->domU.redirtbl);
> > 
> >+        printk("vIO APIC %u has %u pins\n", i, nr_pins);
> >+
> >         if ( (domain_vioapic(d, i) =
> >               xmalloc_bytes(hvm_vioapic_size(nr_pins))) == NULL )
> >         {
> >@@ -623,8 +627,12 @@ int vioapic_init(struct domain *d)
> >         }
> >         domain_vioapic(d, i)->nr_pins = nr_pins;
> >         nr_gsis += nr_pins;
> >+        printk("nr_gsis: %u\n", nr_gsis);
> >     }
> > 
> >+    printk("domain nr_gsis: %u vioapic gsis: %u nr_irqs_gsi: %u highest_gsi: %u\n",
> >+           hvm_domain_irq(d)->nr_gsis, nr_gsis, nr_irqs_gsi, highest_gsi());
> >+
> >     ASSERT(hvm_domain_irq(d)->nr_gsis == nr_gsis);
> > 
> >     d->arch.hvm_domain.nr_vioapics = nr_vioapics;
> >
> 
> Please Cc or To me.  Is there some holes in all physical IOAPICs gsi ranges?

That's weird, my MUA (Mutt) seems to automatically remove your address from the
"To:" field. I have no idea why it does that.

So yes, your box has as GSI gap which is not handled by any IO APIC. TBH, I
didn't even knew that was possible. In any case, patch below should solve it.

---8<---
commit f52d05fca03440d771eb56077c9d60bb630eb423

Comments

Chao Gao April 17, 2017, 5:12 a.m. UTC | #1
On Mon, Apr 17, 2017 at 11:38:33AM +0100, Roger Pau Monné wrote:
>On Mon, Apr 17, 2017 at 10:49:45AM +0800, Chao Gao wrote:
>> On Mon, Apr 17, 2017 at 09:38:54AM +0100, Roger Pau Monné wrote:
>> >On Mon, Apr 17, 2017 at 09:03:12AM +0800, Chao Gao wrote:
>> >> On Mon, Apr 17, 2017 at 08:47:48AM +0100, Roger Pau Monné wrote:
>> >> >On Mon, Apr 17, 2017 at 07:32:45AM +0800, Chao Gao wrote:
>> >> >> On Fri, Apr 14, 2017 at 04:34:41PM +0100, Roger Pau Monné wrote:
>> >> >> >Hello,
>> >> >> >
>> >> >> >Although PVHv2 Dom0 is not yet finished, I've been trying the current code on
>> >> >> >different hardware, and found that with pre-Haswell Intel hardware PVHv2 Dom0
>> >> >> >completely freezes the box when calling iommu_hwdom_init in dom0_construct_pvh.
>> >> >> >OTOH the same doesn't happen when using a newer CPU (ie: haswell or newer).
>> >> >> >
>> >> >> >I'm not able to debug that in any meaningful way because the box seems to lock
>> >> >> >up completely, even the watchdog NMI stops working. Here is the boot log, up to
>> >> >> >the point where it freezes:
>> >> >> 
>> >> >> I try "dom0=pvh" with my skylake. An assertion failed. Is it a software bug?
>> >> >> 
>> >
>> >It seems like we are not properly adding/accounting the vIO APICs, but I cannot
>> >really see how. I have another patch for you to try below.
>> >
>> >Thanks, Roger.
>> >
>> >---8<---
>> >	diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
>> >index 527ac2aadd..40075e2756 100644
>> >--- a/xen/arch/x86/hvm/vioapic.c
>> >+++ b/xen/arch/x86/hvm/vioapic.c
>> >@@ -610,11 +610,15 @@ int vioapic_init(struct domain *d)
>> >            xzalloc_array(struct hvm_vioapic *, nr_vioapics)) == NULL) )
>> >         return -ENOMEM;
>> > 
>> >+    printk("Adding %u vIO APICs\n", nr_vioapics);
>> >+
>> >     for ( i = 0; i < nr_vioapics; i++ )
>> >     {
>> >         unsigned int nr_pins = is_hardware_domain(d) ? nr_ioapic_entries[i] :
>> >             ARRAY_SIZE(domain_vioapic(d, 0)->domU.redirtbl);
>> > 
>> >+        printk("vIO APIC %u has %u pins\n", i, nr_pins);
>> >+
>> >         if ( (domain_vioapic(d, i) =
>> >               xmalloc_bytes(hvm_vioapic_size(nr_pins))) == NULL )
>> >         {
>> >@@ -623,8 +627,12 @@ int vioapic_init(struct domain *d)
>> >         }
>> >         domain_vioapic(d, i)->nr_pins = nr_pins;
>> >         nr_gsis += nr_pins;
>> >+        printk("nr_gsis: %u\n", nr_gsis);
>> >     }
>> > 
>> >+    printk("domain nr_gsis: %u vioapic gsis: %u nr_irqs_gsi: %u highest_gsi: %u\n",
>> >+           hvm_domain_irq(d)->nr_gsis, nr_gsis, nr_irqs_gsi, highest_gsi());
>> >+
>> >     ASSERT(hvm_domain_irq(d)->nr_gsis == nr_gsis);
>> > 
>> >     d->arch.hvm_domain.nr_vioapics = nr_vioapics;
>> >
>> 
>> Please Cc or To me.  Is there some holes in all physical IOAPICs gsi ranges?
>
>That's weird, my MUA (Mutt) seems to automatically remove your address from the
>"To:" field. I have no idea why it does that.
>
>So yes, your box has as GSI gap which is not handled by any IO APIC. TBH, I
>didn't even knew that was possible. In any case, patch below should solve it.
>
>---8<---
>commit f52d05fca03440d771eb56077c9d60bb630eb423
>diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
>index 5157db7a4e..ec87a97651 100644
>--- a/xen/arch/x86/hvm/vioapic.c
>+++ b/xen/arch/x86/hvm/vioapic.c
>@@ -64,37 +64,23 @@ static struct hvm_vioapic *addr_vioapic(const struct domain *d,
> struct hvm_vioapic *gsi_vioapic(const struct domain *d, unsigned int gsi,
>                                 unsigned int *pin)
> {
>-    unsigned int i, base_gsi = 0;
>+    unsigned int i;
> 
>     for ( i = 0; i < d->arch.hvm_domain.nr_vioapics; i++ )
>     {
>         struct hvm_vioapic *vioapic = domain_vioapic(d, i);
> 
>-        if ( gsi >= base_gsi && gsi < base_gsi + vioapic->nr_pins )
>+        if ( gsi >= vioapic->base_gsi &&
>+             gsi < vioapic->base_gsi + vioapic->nr_pins )
>         {
>-            *pin = gsi - base_gsi;
>+            *pin = gsi - vioapic->base_gsi;
>             return vioapic;
>         }
>-
>-        base_gsi += vioapic->nr_pins;
>     }
> 
>     return NULL;
> }
> 
>-static unsigned int base_gsi(const struct domain *d,
>-                             const struct hvm_vioapic *vioapic)
>-{
>-    unsigned int nr_vioapics = d->arch.hvm_domain.nr_vioapics;
>-    unsigned int base_gsi = 0, i = 0;
>-    const struct hvm_vioapic *tmp;
>-
>-    while ( i < nr_vioapics && (tmp = domain_vioapic(d, i++)) != vioapic )
>-        base_gsi += tmp->nr_pins;
>-
>-    return base_gsi;
>-}
>-
> static uint32_t vioapic_read_indirect(const struct hvm_vioapic *vioapic)
> {
>     uint32_t result = 0;
>@@ -180,7 +166,7 @@ static void vioapic_write_redirent(
>     struct hvm_irq *hvm_irq = hvm_domain_irq(d);
>     union vioapic_redir_entry *pent, ent;
>     int unmasked = 0;
>-    unsigned int gsi = base_gsi(d, vioapic) + idx;
>+    unsigned int gsi = vioapic->base_gsi + idx;
> 
>     spin_lock(&d->arch.hvm_domain.irq_lock);
> 
>@@ -340,7 +326,7 @@ static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>     struct domain *d = vioapic_domain(vioapic);
>     struct vlapic *target;
>     struct vcpu *v;
>-    unsigned int irq = base_gsi(d, vioapic) + pin;
>+    unsigned int irq = vioapic->base_gsi + pin;
> 
>     ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock));
> 
>@@ -451,7 +437,7 @@ void vioapic_update_EOI(struct domain *d, u8 vector)
> {
>     struct hvm_irq *hvm_irq = hvm_domain_irq(d);
>     union vioapic_redir_entry *ent;
>-    unsigned int i, base_gsi = 0;
>+    unsigned int i;
> 
>     ASSERT(has_vioapic(d));
> 
>@@ -473,19 +459,18 @@ void vioapic_update_EOI(struct domain *d, u8 vector)
>             if ( iommu_enabled )
>             {
>                 spin_unlock(&d->arch.hvm_domain.irq_lock);
>-                hvm_dpci_eoi(d, base_gsi + pin, ent);
>+                hvm_dpci_eoi(d, vioapic->base_gsi + pin, ent);
>                 spin_lock(&d->arch.hvm_domain.irq_lock);
>             }
> 
>             if ( (ent->fields.trig_mode == VIOAPIC_LEVEL_TRIG) &&
>                  !ent->fields.mask &&
>-                 hvm_irq->gsi_assert_count[base_gsi + pin] )
>+                 hvm_irq->gsi_assert_count[vioapic->base_gsi + pin] )
>             {
>                 ent->fields.remote_irr = 1;
>                 vioapic_deliver(vioapic, pin);
>             }
>         }
>-        base_gsi += vioapic->nr_pins;
>     }
> 
>     spin_unlock(&d->arch.hvm_domain.irq_lock);
>@@ -554,6 +539,7 @@ void vioapic_reset(struct domain *d)
>         {
>             vioapic->base_address = mp_ioapics[i].mpc_apicaddr;
>             vioapic->id = mp_ioapics[i].mpc_apicid;
>+            vioapic->base_gsi = io_apic_gsi_base(i);
>         }
>         vioapic->nr_pins = nr_pins;
>         vioapic->domain = d;
>@@ -601,7 +587,12 @@ int vioapic_init(struct domain *d)
>         nr_gsis += nr_pins;
>     }
> 
>-    ASSERT(hvm_domain_irq(d)->nr_gsis == nr_gsis);
>+    /*
>+     * NB: hvm_domain_irq(d)->nr_gsis is actually the highest GSI + 1, but
>+     * there might be holes in this range (ie: GSIs that don't belong to any
>+     * vIO APIC).
>+     */
>+    ASSERT(hvm_domain_irq(d)->nr_gsis >= nr_gsis);
> 
>     d->arch.hvm_domain.nr_vioapics = nr_vioapics;
>     vioapic_reset(d);
>diff --git a/xen/include/asm-x86/hvm/vioapic.h b/xen/include/asm-x86/hvm/vioapic.h
>index 8ec91d2bd1..2ceb60eaef 100644
>--- a/xen/include/asm-x86/hvm/vioapic.h
>+++ b/xen/include/asm-x86/hvm/vioapic.h
>@@ -50,6 +50,7 @@
> struct hvm_vioapic {
>     struct domain *domain;
>     uint32_t nr_pins;
>+    unsigned int base_gsi;
>     union {
>         XEN_HVM_VIOAPIC(,);
>         struct hvm_hw_vioapic domU;
>

It works. I can test for you when you send out a formal patch.

Thanks
Chao
Chao Gao April 17, 2017, 5:47 a.m. UTC | #2
On Mon, Apr 17, 2017 at 01:21:01PM +0100, Roger Pau Monné wrote:
>On Mon, Apr 17, 2017 at 01:12:27PM +0800, Chao Gao wrote:
>[...]
>> It works. I can test for you when you send out a formal patch.
>
>Thanks for the testing, will send formal patch shortly.
>
>Have you been able to reproduce the IOMMU issue with that, or you just hit the
>panic at the end of PVH Dom0 build?

No, I haven't. The output is some like ELFxxx not found, I think, due to lack
of pvh domain0 kernel. As mentioned before, my platform is skylake.

Thanks
Chao
Chao Gao April 17, 2017, 6:18 a.m. UTC | #3
On Mon, Apr 17, 2017 at 01:57:10PM +0100, Roger Pau Monné wrote:
>On Mon, Apr 17, 2017 at 01:47:47PM +0800, Chao Gao wrote:
>> On Mon, Apr 17, 2017 at 01:21:01PM +0100, Roger Pau Monné wrote:
>> >On Mon, Apr 17, 2017 at 01:12:27PM +0800, Chao Gao wrote:
>> >[...]
>> >> It works. I can test for you when you send out a formal patch.
>> >
>> >Thanks for the testing, will send formal patch shortly.
>> >
>> >Have you been able to reproduce the IOMMU issue with that, or you just hit the
>> >panic at the end of PVH Dom0 build?
>> 
>> No, I haven't. The output is some like ELFxxx not found, I think, due to lack
>> of pvh domain0 kernel. As mentioned before, my platform is skylake.
>
>Right, if you get to the ELF stuff it means the IOMMU has been initialized
>successfully. Skylake is post-haswell, so I don't think it's going to exhibit
>those issues. Is there any chance you can test on something older
>(pre-haswell?).

I am not sure that I can find a pre-haswell machine. Will try later.

Thanks
Chao
Roger Pau Monné April 17, 2017, 12:21 p.m. UTC | #4
On Mon, Apr 17, 2017 at 01:12:27PM +0800, Chao Gao wrote:
[...]
> It works. I can test for you when you send out a formal patch.

Thanks for the testing, will send formal patch shortly.

Have you been able to reproduce the IOMMU issue with that, or you just hit the
panic at the end of PVH Dom0 build?

Roger.
Roger Pau Monné April 17, 2017, 12:57 p.m. UTC | #5
On Mon, Apr 17, 2017 at 01:47:47PM +0800, Chao Gao wrote:
> On Mon, Apr 17, 2017 at 01:21:01PM +0100, Roger Pau Monné wrote:
> >On Mon, Apr 17, 2017 at 01:12:27PM +0800, Chao Gao wrote:
> >[...]
> >> It works. I can test for you when you send out a formal patch.
> >
> >Thanks for the testing, will send formal patch shortly.
> >
> >Have you been able to reproduce the IOMMU issue with that, or you just hit the
> >panic at the end of PVH Dom0 build?
> 
> No, I haven't. The output is some like ELFxxx not found, I think, due to lack
> of pvh domain0 kernel. As mentioned before, my platform is skylake.

Right, if you get to the ELF stuff it means the IOMMU has been initialized
successfully. Skylake is post-haswell, so I don't think it's going to exhibit
those issues. Is there any chance you can test on something older
(pre-haswell?).

Thanks, Roger.
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index 5157db7a4e..ec87a97651 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -64,37 +64,23 @@  static struct hvm_vioapic *addr_vioapic(const struct domain *d,
 struct hvm_vioapic *gsi_vioapic(const struct domain *d, unsigned int gsi,
                                 unsigned int *pin)
 {
-    unsigned int i, base_gsi = 0;
+    unsigned int i;
 
     for ( i = 0; i < d->arch.hvm_domain.nr_vioapics; i++ )
     {
         struct hvm_vioapic *vioapic = domain_vioapic(d, i);
 
-        if ( gsi >= base_gsi && gsi < base_gsi + vioapic->nr_pins )
+        if ( gsi >= vioapic->base_gsi &&
+             gsi < vioapic->base_gsi + vioapic->nr_pins )
         {
-            *pin = gsi - base_gsi;
+            *pin = gsi - vioapic->base_gsi;
             return vioapic;
         }
-
-        base_gsi += vioapic->nr_pins;
     }
 
     return NULL;
 }
 
-static unsigned int base_gsi(const struct domain *d,
-                             const struct hvm_vioapic *vioapic)
-{
-    unsigned int nr_vioapics = d->arch.hvm_domain.nr_vioapics;
-    unsigned int base_gsi = 0, i = 0;
-    const struct hvm_vioapic *tmp;
-
-    while ( i < nr_vioapics && (tmp = domain_vioapic(d, i++)) != vioapic )
-        base_gsi += tmp->nr_pins;
-
-    return base_gsi;
-}
-
 static uint32_t vioapic_read_indirect(const struct hvm_vioapic *vioapic)
 {
     uint32_t result = 0;
@@ -180,7 +166,7 @@  static void vioapic_write_redirent(
     struct hvm_irq *hvm_irq = hvm_domain_irq(d);
     union vioapic_redir_entry *pent, ent;
     int unmasked = 0;
-    unsigned int gsi = base_gsi(d, vioapic) + idx;
+    unsigned int gsi = vioapic->base_gsi + idx;
 
     spin_lock(&d->arch.hvm_domain.irq_lock);
 
@@ -340,7 +326,7 @@  static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
     struct domain *d = vioapic_domain(vioapic);
     struct vlapic *target;
     struct vcpu *v;
-    unsigned int irq = base_gsi(d, vioapic) + pin;
+    unsigned int irq = vioapic->base_gsi + pin;
 
     ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock));
 
@@ -451,7 +437,7 @@  void vioapic_update_EOI(struct domain *d, u8 vector)
 {
     struct hvm_irq *hvm_irq = hvm_domain_irq(d);
     union vioapic_redir_entry *ent;
-    unsigned int i, base_gsi = 0;
+    unsigned int i;
 
     ASSERT(has_vioapic(d));
 
@@ -473,19 +459,18 @@  void vioapic_update_EOI(struct domain *d, u8 vector)
             if ( iommu_enabled )
             {
                 spin_unlock(&d->arch.hvm_domain.irq_lock);
-                hvm_dpci_eoi(d, base_gsi + pin, ent);
+                hvm_dpci_eoi(d, vioapic->base_gsi + pin, ent);
                 spin_lock(&d->arch.hvm_domain.irq_lock);
             }
 
             if ( (ent->fields.trig_mode == VIOAPIC_LEVEL_TRIG) &&
                  !ent->fields.mask &&
-                 hvm_irq->gsi_assert_count[base_gsi + pin] )
+                 hvm_irq->gsi_assert_count[vioapic->base_gsi + pin] )
             {
                 ent->fields.remote_irr = 1;
                 vioapic_deliver(vioapic, pin);
             }
         }
-        base_gsi += vioapic->nr_pins;
     }
 
     spin_unlock(&d->arch.hvm_domain.irq_lock);
@@ -554,6 +539,7 @@  void vioapic_reset(struct domain *d)
         {
             vioapic->base_address = mp_ioapics[i].mpc_apicaddr;
             vioapic->id = mp_ioapics[i].mpc_apicid;
+            vioapic->base_gsi = io_apic_gsi_base(i);
         }
         vioapic->nr_pins = nr_pins;
         vioapic->domain = d;
@@ -601,7 +587,12 @@  int vioapic_init(struct domain *d)
         nr_gsis += nr_pins;
     }
 
-    ASSERT(hvm_domain_irq(d)->nr_gsis == nr_gsis);
+    /*
+     * NB: hvm_domain_irq(d)->nr_gsis is actually the highest GSI + 1, but
+     * there might be holes in this range (ie: GSIs that don't belong to any
+     * vIO APIC).
+     */
+    ASSERT(hvm_domain_irq(d)->nr_gsis >= nr_gsis);
 
     d->arch.hvm_domain.nr_vioapics = nr_vioapics;
     vioapic_reset(d);
diff --git a/xen/include/asm-x86/hvm/vioapic.h b/xen/include/asm-x86/hvm/vioapic.h
index 8ec91d2bd1..2ceb60eaef 100644
--- a/xen/include/asm-x86/hvm/vioapic.h
+++ b/xen/include/asm-x86/hvm/vioapic.h
@@ -50,6 +50,7 @@ 
 struct hvm_vioapic {
     struct domain *domain;
     uint32_t nr_pins;
+    unsigned int base_gsi;
     union {
         XEN_HVM_VIOAPIC(,);
         struct hvm_hw_vioapic domU;