diff mbox

[v2,3/4] x86/vtd: introduce a PVH implementation of iommu_inclusive_mapping

Message ID 20170811164320.92899-4-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roger Pau Monné Aug. 11, 2017, 4:43 p.m. UTC
On certain Intel systems, as far as I can tell almost all pre-Haswell ones,
trying to boot a PVH Dom0 will freeze the box completely, up to the point that
not even the watchdog works. The freeze happens exactly when enabling the DMA
remapping in the IOMMU, the last line seen is:

(XEN) [VT-D]iommu_enable_translation: iommu->reg = ffff82c00021b000

In order to workaround this (which seems to be a lack of proper RMRR entries,
plus the IOMMU being unable to generate faults and freezing the entire system)
add a PVH specific implementation of iommu_inclusive_mapping, that maps
non-RAM, non-unusable regions into Dom0 p2m. Note that care is taken to not map
device MMIO regions that Xen is emulating, like the local APIC or the IO APIC.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Kevin Tian <kevin.tian@intel.com>
---
 xen/drivers/passthrough/vtd/extern.h  |  1 +
 xen/drivers/passthrough/vtd/iommu.c   |  2 ++
 xen/drivers/passthrough/vtd/x86/vtd.c | 39 +++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+)

Comments

Tian, Kevin Aug. 17, 2017, 3:28 a.m. UTC | #1
> From: Roger Pau Monne [mailto:roger.pau@citrix.com]

> Sent: Saturday, August 12, 2017 12:43 AM

> 

> On certain Intel systems, as far as I can tell almost all pre-Haswell ones,

> trying to boot a PVH Dom0 will freeze the box completely, up to the point

> that

> not even the watchdog works. The freeze happens exactly when enabling

> the DMA

> remapping in the IOMMU, the last line seen is:

> 

> (XEN) [VT-D]iommu_enable_translation: iommu->reg = ffff82c00021b000

> 

> In order to workaround this (which seems to be a lack of proper RMRR

> entries,


since you position this patch as 'workaround', what is the side-effect with
such workaround? Do you want to restrict such workaround only for old
boxes?

better you can also put some comment in the code so others can understand
why pvh requires its own way when reading the code.

> plus the IOMMU being unable to generate faults and freezing the entire

> system)

> add a PVH specific implementation of iommu_inclusive_mapping, that

> maps

> non-RAM, non-unusable regions into Dom0 p2m. Note that care is taken to

> not map

> device MMIO regions that Xen is emulating, like the local APIC or the IO

> APIC.

> 

> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

> ---

> Cc: Kevin Tian <kevin.tian@intel.com>

> ---

>  xen/drivers/passthrough/vtd/extern.h  |  1 +

>  xen/drivers/passthrough/vtd/iommu.c   |  2 ++

>  xen/drivers/passthrough/vtd/x86/vtd.c | 39

> +++++++++++++++++++++++++++++++++++

>  3 files changed, 42 insertions(+)

> 

> diff --git a/xen/drivers/passthrough/vtd/extern.h

> b/xen/drivers/passthrough/vtd/extern.h

> index fb7edfaef9..0eaf8956ff 100644

> --- a/xen/drivers/passthrough/vtd/extern.h

> +++ b/xen/drivers/passthrough/vtd/extern.h

> @@ -100,5 +100,6 @@ bool_t platform_supports_intremap(void);

>  bool_t platform_supports_x2apic(void);

> 

>  void vtd_set_hwdom_mapping(struct domain *d);

> +void vtd_set_pvh_hwdom_mapping(struct domain *d);

> 

>  #endif // _VTD_EXTERN_H_

> diff --git a/xen/drivers/passthrough/vtd/iommu.c

> b/xen/drivers/passthrough/vtd/iommu.c

> index daaed0abbd..8ed28defe2 100644

> --- a/xen/drivers/passthrough/vtd/iommu.c

> +++ b/xen/drivers/passthrough/vtd/iommu.c

> @@ -1303,6 +1303,8 @@ static void __hwdom_init

> intel_iommu_hwdom_init(struct domain *d)

>          /* Set up 1:1 page table for hardware domain. */

>          vtd_set_hwdom_mapping(d);

>      }

> +    else if ( is_hvm_domain(d) )

> +        vtd_set_pvh_hwdom_mapping(d);


Can you elaborate a bit here? Current condition is:

    if ( !iommu_passthrough && !need_iommu(d) )
    {
        /* Set up 1:1 page table for hardware domain. */
        vtd_set_hwdom_mapping(d);
    }

So you assume for PVH above condition will never be true?

> 

>      setup_hwdom_pci_devices(d, setup_hwdom_device);

>      setup_hwdom_rmrr(d);

> diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c

> b/xen/drivers/passthrough/vtd/x86/vtd.c

> index 88a60b3307..79c9b0526f 100644

> --- a/xen/drivers/passthrough/vtd/x86/vtd.c

> +++ b/xen/drivers/passthrough/vtd/x86/vtd.c

> @@ -21,10 +21,12 @@

>  #include <xen/softirq.h>

>  #include <xen/domain_page.h>

>  #include <asm/paging.h>

> +#include <xen/iocap.h>

>  #include <xen/iommu.h>

>  #include <xen/irq.h>

>  #include <xen/numa.h>

>  #include <asm/fixmap.h>

> +#include <asm/p2m.h>

>  #include <asm/setup.h>

>  #include "../iommu.h"

>  #include "../dmar.h"

> @@ -159,3 +161,40 @@ void __hwdom_init

> vtd_set_hwdom_mapping(struct domain *d)

>      }

>  }

> 

> +void __hwdom_init vtd_set_pvh_hwdom_mapping(struct domain *d)

> +{

> +    unsigned long pfn;

> +

> +    BUG_ON(!is_hardware_domain(d));

> +

> +    if ( !iommu_inclusive_mapping )

> +        return;

> +

> +    /* NB: the low 1MB is already mapped in pvh_setup_p2m. */

> +    for ( pfn = PFN_DOWN(MB(1)); pfn < PFN_DOWN(GB(4)); pfn++ )

> +    {

> +        p2m_access_t a;

> +        int rc;

> +

> +        if ( !(pfn & 0xfff) )

> +            process_pending_softirqs();

> +

> +        /* Skip RAM, ACPI and unusable regions. */

> +        if ( page_is_ram_type(pfn, RAM_TYPE_CONVENTIONAL) ||

> +             page_is_ram_type(pfn, RAM_TYPE_UNUSABLE) ||

> +             page_is_ram_type(pfn, RAM_TYPE_ACPI) ||

> +             !iomem_access_permitted(d, pfn, pfn) )

> +            continue;


I'm a bit confused here. So you only handle RESERVED memory
type here, which doesn't match the definition of inclusive mapping.

/*
 * iommu_inclusive_mapping: when set, all memory below 4GB is included in dom0
 * 1:1 iommu mappings except xen and unusable regions.
 */

there must be some background which I missed...

> +

> +        ASSERT(!xen_in_range(pfn));

> +

> +        a = rangeset_contains_range(mmio_ro_ranges, pfn, pfn) ?

> p2m_access_r

> +                                                              : p2m_access_rw;

> +        rc = set_identity_p2m_entry(d, pfn, a, 0);

> +        if ( rc )

> +           printk(XENLOG_WARNING VTDPREFIX

> +                  " d%d: IOMMU mapping failed pfn %#lx: %d\n",

> +                  d->domain_id, pfn, rc);

> +    }

> +}

> +

> --

> 2.11.0 (Apple Git-81)
Roger Pau Monné Aug. 17, 2017, 9:39 a.m. UTC | #2
On Thu, Aug 17, 2017 at 03:28:29AM +0000, Tian, Kevin wrote:
> > From: Roger Pau Monne [mailto:roger.pau@citrix.com]
> > Sent: Saturday, August 12, 2017 12:43 AM
> > 
> > On certain Intel systems, as far as I can tell almost all pre-Haswell ones,
> > trying to boot a PVH Dom0 will freeze the box completely, up to the point
> > that
> > not even the watchdog works. The freeze happens exactly when enabling
> > the DMA
> > remapping in the IOMMU, the last line seen is:
> > 
> > (XEN) [VT-D]iommu_enable_translation: iommu->reg = ffff82c00021b000
> > 
> > In order to workaround this (which seems to be a lack of proper RMRR
> > entries,
> 
> since you position this patch as 'workaround', what is the side-effect with
> such workaround?

Side effect is that Xen basically maps everything below 4GB to the PVH
Dom0 p2m. It's farily similar to what's already done for PV Dom0.

> Do you want to restrict such workaround only for old
> boxes?

Hm, I don't think so, the more that I don't think it's feasible to
identify the broken boxes from Xen's PoV.

> better you can also put some comment in the code so others can understand
> why pvh requires its own way when reading the code.
> 
> > plus the IOMMU being unable to generate faults and freezing the entire
> > system)
> > add a PVH specific implementation of iommu_inclusive_mapping, that
> > maps
> > non-RAM, non-unusable regions into Dom0 p2m. Note that care is taken to
> > not map
> > device MMIO regions that Xen is emulating, like the local APIC or the IO
> > APIC.
> > 
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> > ---
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > ---
> >  xen/drivers/passthrough/vtd/extern.h  |  1 +
> >  xen/drivers/passthrough/vtd/iommu.c   |  2 ++
> >  xen/drivers/passthrough/vtd/x86/vtd.c | 39
> > +++++++++++++++++++++++++++++++++++
> >  3 files changed, 42 insertions(+)
> > 
> > diff --git a/xen/drivers/passthrough/vtd/extern.h
> > b/xen/drivers/passthrough/vtd/extern.h
> > index fb7edfaef9..0eaf8956ff 100644
> > --- a/xen/drivers/passthrough/vtd/extern.h
> > +++ b/xen/drivers/passthrough/vtd/extern.h
> > @@ -100,5 +100,6 @@ bool_t platform_supports_intremap(void);
> >  bool_t platform_supports_x2apic(void);
> > 
> >  void vtd_set_hwdom_mapping(struct domain *d);
> > +void vtd_set_pvh_hwdom_mapping(struct domain *d);
> > 
> >  #endif // _VTD_EXTERN_H_
> > diff --git a/xen/drivers/passthrough/vtd/iommu.c
> > b/xen/drivers/passthrough/vtd/iommu.c
> > index daaed0abbd..8ed28defe2 100644
> > --- a/xen/drivers/passthrough/vtd/iommu.c
> > +++ b/xen/drivers/passthrough/vtd/iommu.c
> > @@ -1303,6 +1303,8 @@ static void __hwdom_init
> > intel_iommu_hwdom_init(struct domain *d)
> >          /* Set up 1:1 page table for hardware domain. */
> >          vtd_set_hwdom_mapping(d);
> >      }
> > +    else if ( is_hvm_domain(d) )
> > +        vtd_set_pvh_hwdom_mapping(d);
> 
> Can you elaborate a bit here? Current condition is:
> 
>     if ( !iommu_passthrough && !need_iommu(d) )
>     {
>         /* Set up 1:1 page table for hardware domain. */
>         vtd_set_hwdom_mapping(d);
>     }
> 
> So you assume for PVH above condition will never be true?

No, PVH Dom0 always requires an iommu, so the above condition will
never be true for a PVH Dom0.

> > 
> >      setup_hwdom_pci_devices(d, setup_hwdom_device);
> >      setup_hwdom_rmrr(d);
> > diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c
> > b/xen/drivers/passthrough/vtd/x86/vtd.c
> > index 88a60b3307..79c9b0526f 100644
> > --- a/xen/drivers/passthrough/vtd/x86/vtd.c
> > +++ b/xen/drivers/passthrough/vtd/x86/vtd.c
> > @@ -21,10 +21,12 @@
> >  #include <xen/softirq.h>
> >  #include <xen/domain_page.h>
> >  #include <asm/paging.h>
> > +#include <xen/iocap.h>
> >  #include <xen/iommu.h>
> >  #include <xen/irq.h>
> >  #include <xen/numa.h>
> >  #include <asm/fixmap.h>
> > +#include <asm/p2m.h>
> >  #include <asm/setup.h>
> >  #include "../iommu.h"
> >  #include "../dmar.h"
> > @@ -159,3 +161,40 @@ void __hwdom_init
> > vtd_set_hwdom_mapping(struct domain *d)
> >      }
> >  }
> > 
> > +void __hwdom_init vtd_set_pvh_hwdom_mapping(struct domain *d)
> > +{
> > +    unsigned long pfn;
> > +
> > +    BUG_ON(!is_hardware_domain(d));
> > +
> > +    if ( !iommu_inclusive_mapping )
> > +        return;
> > +
> > +    /* NB: the low 1MB is already mapped in pvh_setup_p2m. */
> > +    for ( pfn = PFN_DOWN(MB(1)); pfn < PFN_DOWN(GB(4)); pfn++ )
> > +    {
> > +        p2m_access_t a;
> > +        int rc;
> > +
> > +        if ( !(pfn & 0xfff) )
> > +            process_pending_softirqs();
> > +
> > +        /* Skip RAM, ACPI and unusable regions. */
> > +        if ( page_is_ram_type(pfn, RAM_TYPE_CONVENTIONAL) ||
> > +             page_is_ram_type(pfn, RAM_TYPE_UNUSABLE) ||
> > +             page_is_ram_type(pfn, RAM_TYPE_ACPI) ||
> > +             !iomem_access_permitted(d, pfn, pfn) )
> > +            continue;
> 
> I'm a bit confused here. So you only handle RESERVED memory
> type here, which doesn't match the definition of inclusive mapping.
> 
> /*
>  * iommu_inclusive_mapping: when set, all memory below 4GB is included in dom0
>  * 1:1 iommu mappings except xen and unusable regions.
>  */
> 
> there must be some background which I missed...

Right, RAM and ACPI regions are already mapped by the Dom0 builder, so
the only thing left are reserved regions not being used by Xen.

I can expand the comment above to say:

/*
 * Skip RAM, ACPI and unusable regions because they have been already
 * mapped by the PVH Dom0 builder.
 */

Does that seem better?

Roger.
Jan Beulich Aug. 22, 2017, 12:31 p.m. UTC | #3
>>> On 11.08.17 at 18:43, <roger.pau@citrix.com> wrote:
> On certain Intel systems, as far as I can tell almost all pre-Haswell ones,
> trying to boot a PVH Dom0 will freeze the box completely, up to the point that
> not even the watchdog works. The freeze happens exactly when enabling the DMA
> remapping in the IOMMU, the last line seen is:
> 
> (XEN) [VT-D]iommu_enable_translation: iommu->reg = ffff82c00021b000
> 
> In order to workaround this (which seems to be a lack of proper RMRR entries,
> plus the IOMMU being unable to generate faults and freezing the entire system)
> add a PVH specific implementation of iommu_inclusive_mapping, that maps
> non-RAM, non-unusable regions into Dom0 p2m. Note that care is taken to not map
> device MMIO regions that Xen is emulating, like the local APIC or the IO APIC.
> 
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

I don't mean to object to the patch, but it certainly would be helpful
to understand the behavior a little better, in particular also to
perhaps be able to derive what RMRRs are missing (which could
then be added via command line option instead of this all-or-norhing
approach). Kevin, could you perhaps help here?

Jan
Roger Pau Monné Aug. 22, 2017, 2:01 p.m. UTC | #4
On Tue, Aug 22, 2017 at 06:31:27AM -0600, Jan Beulich wrote:
> >>> On 11.08.17 at 18:43, <roger.pau@citrix.com> wrote:
> > On certain Intel systems, as far as I can tell almost all pre-Haswell ones,
> > trying to boot a PVH Dom0 will freeze the box completely, up to the point that
> > not even the watchdog works. The freeze happens exactly when enabling the DMA
> > remapping in the IOMMU, the last line seen is:
> > 
> > (XEN) [VT-D]iommu_enable_translation: iommu->reg = ffff82c00021b000
> > 
> > In order to workaround this (which seems to be a lack of proper RMRR entries,
> > plus the IOMMU being unable to generate faults and freezing the entire system)
> > add a PVH specific implementation of iommu_inclusive_mapping, that maps
> > non-RAM, non-unusable regions into Dom0 p2m. Note that care is taken to not map
> > device MMIO regions that Xen is emulating, like the local APIC or the IO APIC.
> > 
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> I don't mean to object to the patch, but it certainly would be helpful
> to understand the behavior a little better, in particular also to
> perhaps be able to derive what RMRRs are missing (which could
> then be added via command line option instead of this all-or-norhing
> approach). Kevin, could you perhaps help here?

I tied that, but since the system freezes completely I have no idea
what's missing. It's quite clear to me that it's related to the IOMMU
and it's inability to properly generate a fault, but further than that
I have no other clue.

Roger.
Jan Beulich Aug. 23, 2017, 8:18 a.m. UTC | #5
>>> On 22.08.17 at 16:01, <roger.pau@citrix.com> wrote:
> On Tue, Aug 22, 2017 at 06:31:27AM -0600, Jan Beulich wrote:
>> >>> On 11.08.17 at 18:43, <roger.pau@citrix.com> wrote:
>> > On certain Intel systems, as far as I can tell almost all pre-Haswell ones,
>> > trying to boot a PVH Dom0 will freeze the box completely, up to the point that
>> > not even the watchdog works. The freeze happens exactly when enabling the DMA
>> > remapping in the IOMMU, the last line seen is:
>> > 
>> > (XEN) [VT-D]iommu_enable_translation: iommu->reg = ffff82c00021b000
>> > 
>> > In order to workaround this (which seems to be a lack of proper RMRR entries,
>> > plus the IOMMU being unable to generate faults and freezing the entire system)
>> > add a PVH specific implementation of iommu_inclusive_mapping, that maps
>> > non-RAM, non-unusable regions into Dom0 p2m. Note that care is taken to not map
>> > device MMIO regions that Xen is emulating, like the local APIC or the IO APIC.
>> > 
>> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
>> 
>> I don't mean to object to the patch, but it certainly would be helpful
>> to understand the behavior a little better, in particular also to
>> perhaps be able to derive what RMRRs are missing (which could
>> then be added via command line option instead of this all-or-norhing
>> approach). Kevin, could you perhaps help here?
> 
> I tied that, but since the system freezes completely I have no idea
> what's missing. It's quite clear to me that it's related to the IOMMU
> and it's inability to properly generate a fault, but further than that
> I have no other clue.

Hence my request for Kevin to help (perhaps indirectly by pulling
in other Intel folks). Someone being able to check what the chipset
actually does or being able to observe what's going on in a logic
analyzer should be able to explain the observed behavior.

Jan
Tian, Kevin Aug. 28, 2017, 6:13 a.m. UTC | #6
> From: Roger Pau Monne [mailto:roger.pau@citrix.com]
> Sent: Thursday, August 17, 2017 5:39 PM
> 
> > >
> > > +void __hwdom_init vtd_set_pvh_hwdom_mapping(struct domain *d)
> > > +{
> > > +    unsigned long pfn;
> > > +
> > > +    BUG_ON(!is_hardware_domain(d));
> > > +
> > > +    if ( !iommu_inclusive_mapping )
> > > +        return;
> > > +
> > > +    /* NB: the low 1MB is already mapped in pvh_setup_p2m. */
> > > +    for ( pfn = PFN_DOWN(MB(1)); pfn < PFN_DOWN(GB(4)); pfn++ )
> > > +    {
> > > +        p2m_access_t a;
> > > +        int rc;
> > > +
> > > +        if ( !(pfn & 0xfff) )
> > > +            process_pending_softirqs();
> > > +
> > > +        /* Skip RAM, ACPI and unusable regions. */
> > > +        if ( page_is_ram_type(pfn, RAM_TYPE_CONVENTIONAL) ||
> > > +             page_is_ram_type(pfn, RAM_TYPE_UNUSABLE) ||
> > > +             page_is_ram_type(pfn, RAM_TYPE_ACPI) ||
> > > +             !iomem_access_permitted(d, pfn, pfn) )
> > > +            continue;
> >
> > I'm a bit confused here. So you only handle RESERVED memory
> > type here, which doesn't match the definition of inclusive mapping.
> >
> > /*
> >  * iommu_inclusive_mapping: when set, all memory below 4GB is
> included in dom0
> >  * 1:1 iommu mappings except xen and unusable regions.
> >  */
> >
> > there must be some background which I missed...
> 
> Right, RAM and ACPI regions are already mapped by the Dom0 builder, so
> the only thing left are reserved regions not being used by Xen.
> 
> I can expand the comment above to say:
> 
> /*
>  * Skip RAM, ACPI and unusable regions because they have been already
>  * mapped by the PVH Dom0 builder.
>  */
> 
> Does that seem better?
> 
> Roger.

yes it's better. btw if you can limit the function name further then 
it might be clearer, e.g. vtd_set_pvh_hwdom_reserved_mapping
Tian, Kevin Aug. 28, 2017, 6:14 a.m. UTC | #7
> From: Jan Beulich [mailto:JBeulich@suse.com]

> Sent: Wednesday, August 23, 2017 4:19 PM

> To: Roger Pau Monne <roger.pau@citrix.com>

> Cc: Tian, Kevin <kevin.tian@intel.com>; xen-devel@lists.xenproject.org

> Subject: Re: [Xen-devel] [PATCH v2 3/4] x86/vtd: introduce a PVH

> implementation of iommu_inclusive_mapping

> 

> >>> On 22.08.17 at 16:01, <roger.pau@citrix.com> wrote:

> > On Tue, Aug 22, 2017 at 06:31:27AM -0600, Jan Beulich wrote:

> >> >>> On 11.08.17 at 18:43, <roger.pau@citrix.com> wrote:

> >> > On certain Intel systems, as far as I can tell almost all pre-Haswell ones,

> >> > trying to boot a PVH Dom0 will freeze the box completely, up to the

> point that

> >> > not even the watchdog works. The freeze happens exactly when

> enabling the DMA

> >> > remapping in the IOMMU, the last line seen is:

> >> >

> >> > (XEN) [VT-D]iommu_enable_translation: iommu->reg =

> ffff82c00021b000

> >> >

> >> > In order to workaround this (which seems to be a lack of proper RMRR

> entries,

> >> > plus the IOMMU being unable to generate faults and freezing the

> entire system)

> >> > add a PVH specific implementation of iommu_inclusive_mapping, that

> maps

> >> > non-RAM, non-unusable regions into Dom0 p2m. Note that care is

> taken to not map

> >> > device MMIO regions that Xen is emulating, like the local APIC or the IO

> APIC.

> >> >

> >> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

> >>

> >> I don't mean to object to the patch, but it certainly would be helpful

> >> to understand the behavior a little better, in particular also to

> >> perhaps be able to derive what RMRRs are missing (which could

> >> then be added via command line option instead of this all-or-norhing

> >> approach). Kevin, could you perhaps help here?

> >

> > I tied that, but since the system freezes completely I have no idea

> > what's missing. It's quite clear to me that it's related to the IOMMU

> > and it's inability to properly generate a fault, but further than that

> > I have no other clue.

> 

> Hence my request for Kevin to help (perhaps indirectly by pulling

> in other Intel folks). Someone being able to check what the chipset

> actually does or being able to observe what's going on in a logic

> analyzer should be able to explain the observed behavior.

> 


We don't have logic analyzer specifically to examine VTd, but yes
we can help have a try whether it's reproducible in our side and
then do some analysis.

what's the hardware configuration?

Thanks
Kevin
Roger Pau Monné Aug. 29, 2017, 7:39 a.m. UTC | #8
On Mon, Aug 28, 2017 at 06:14:45AM +0000, Tian, Kevin wrote:
> > From: Jan Beulich [mailto:JBeulich@suse.com]
> > Sent: Wednesday, August 23, 2017 4:19 PM
> > To: Roger Pau Monne <roger.pau@citrix.com>
> > Cc: Tian, Kevin <kevin.tian@intel.com>; xen-devel@lists.xenproject.org
> > Subject: Re: [Xen-devel] [PATCH v2 3/4] x86/vtd: introduce a PVH
> > implementation of iommu_inclusive_mapping
> > 
> > >>> On 22.08.17 at 16:01, <roger.pau@citrix.com> wrote:
> > > On Tue, Aug 22, 2017 at 06:31:27AM -0600, Jan Beulich wrote:
> > >> >>> On 11.08.17 at 18:43, <roger.pau@citrix.com> wrote:
> > >> > On certain Intel systems, as far as I can tell almost all pre-Haswell ones,
> > >> > trying to boot a PVH Dom0 will freeze the box completely, up to the
> > point that
> > >> > not even the watchdog works. The freeze happens exactly when
> > enabling the DMA
> > >> > remapping in the IOMMU, the last line seen is:
> > >> >
> > >> > (XEN) [VT-D]iommu_enable_translation: iommu->reg =
> > ffff82c00021b000
> > >> >
> > >> > In order to workaround this (which seems to be a lack of proper RMRR
> > entries,
> > >> > plus the IOMMU being unable to generate faults and freezing the
> > entire system)
> > >> > add a PVH specific implementation of iommu_inclusive_mapping, that
> > maps
> > >> > non-RAM, non-unusable regions into Dom0 p2m. Note that care is
> > taken to not map
> > >> > device MMIO regions that Xen is emulating, like the local APIC or the IO
> > APIC.
> > >> >
> > >> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> > >>
> > >> I don't mean to object to the patch, but it certainly would be helpful
> > >> to understand the behavior a little better, in particular also to
> > >> perhaps be able to derive what RMRRs are missing (which could
> > >> then be added via command line option instead of this all-or-norhing
> > >> approach). Kevin, could you perhaps help here?
> > >
> > > I tied that, but since the system freezes completely I have no idea
> > > what's missing. It's quite clear to me that it's related to the IOMMU
> > > and it's inability to properly generate a fault, but further than that
> > > I have no other clue.
> > 
> > Hence my request for Kevin to help (perhaps indirectly by pulling
> > in other Intel folks). Someone being able to check what the chipset
> > actually does or being able to observe what's going on in a logic
> > analyzer should be able to explain the observed behavior.
> > 
> 
> We don't have logic analyzer specifically to examine VTd, but yes
> we can help have a try whether it's reproducible in our side and
> then do some analysis.
> 
> what's the hardware configuration?

It's a Dell Precision T3600 with Xeon(R) CPU E5-1607 0 @ 3.00GHz, a
C600/X79 chipset and BIOS version A14.

Just trying to boot a Xen kernel with dom0=pvh will show the issue
(ie: you won't be able to reach the panic at the end of the Dom0
builder because the box will get stuck in the iommu_hwdom_init
call).

Roger.
diff mbox

Patch

diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
index fb7edfaef9..0eaf8956ff 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -100,5 +100,6 @@  bool_t platform_supports_intremap(void);
 bool_t platform_supports_x2apic(void);
 
 void vtd_set_hwdom_mapping(struct domain *d);
+void vtd_set_pvh_hwdom_mapping(struct domain *d);
 
 #endif // _VTD_EXTERN_H_
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index daaed0abbd..8ed28defe2 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1303,6 +1303,8 @@  static void __hwdom_init intel_iommu_hwdom_init(struct domain *d)
         /* Set up 1:1 page table for hardware domain. */
         vtd_set_hwdom_mapping(d);
     }
+    else if ( is_hvm_domain(d) )
+        vtd_set_pvh_hwdom_mapping(d);
 
     setup_hwdom_pci_devices(d, setup_hwdom_device);
     setup_hwdom_rmrr(d);
diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c b/xen/drivers/passthrough/vtd/x86/vtd.c
index 88a60b3307..79c9b0526f 100644
--- a/xen/drivers/passthrough/vtd/x86/vtd.c
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -21,10 +21,12 @@ 
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
 #include <asm/paging.h>
+#include <xen/iocap.h>
 #include <xen/iommu.h>
 #include <xen/irq.h>
 #include <xen/numa.h>
 #include <asm/fixmap.h>
+#include <asm/p2m.h>
 #include <asm/setup.h>
 #include "../iommu.h"
 #include "../dmar.h"
@@ -159,3 +161,40 @@  void __hwdom_init vtd_set_hwdom_mapping(struct domain *d)
     }
 }
 
+void __hwdom_init vtd_set_pvh_hwdom_mapping(struct domain *d)
+{
+    unsigned long pfn;
+
+    BUG_ON(!is_hardware_domain(d));
+
+    if ( !iommu_inclusive_mapping )
+        return;
+
+    /* NB: the low 1MB is already mapped in pvh_setup_p2m. */
+    for ( pfn = PFN_DOWN(MB(1)); pfn < PFN_DOWN(GB(4)); pfn++ )
+    {
+        p2m_access_t a;
+        int rc;
+
+        if ( !(pfn & 0xfff) )
+            process_pending_softirqs();
+
+        /* Skip RAM, ACPI and unusable regions. */
+        if ( page_is_ram_type(pfn, RAM_TYPE_CONVENTIONAL) ||
+             page_is_ram_type(pfn, RAM_TYPE_UNUSABLE) ||
+             page_is_ram_type(pfn, RAM_TYPE_ACPI) ||
+             !iomem_access_permitted(d, pfn, pfn) )
+            continue;
+
+        ASSERT(!xen_in_range(pfn));
+
+        a = rangeset_contains_range(mmio_ro_ranges, pfn, pfn) ? p2m_access_r
+                                                              : p2m_access_rw;
+        rc = set_identity_p2m_entry(d, pfn, a, 0);
+        if ( rc )
+           printk(XENLOG_WARNING VTDPREFIX
+                  " d%d: IOMMU mapping failed pfn %#lx: %d\n",
+                  d->domain_id, pfn, rc);
+    }
+}
+