diff mbox

[v3,2/9] x86/ecam: add handlers for the PVH Dom0 MMCFG areas

Message ID 20170427143546.14662-3-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roger Pau Monné April 27, 2017, 2:35 p.m. UTC
Introduce a set of handlers for the accesses to the ECAM areas. Those areas are
setup based on the contents of the hardware MMCFG tables, and the list of
handled ECAM areas is stored inside of the hvm_domain struct.

The read/writes are forwarded to the generic vpci handlers once the address is
decoded in order to obtain the device and register the guest is trying to
access.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Paul Durrant <paul.durrant@citrix.com>
---
Changes since v1:
 - Added locking.
---
 xen/arch/x86/hvm/dom0_build.c    |  27 ++++++++
 xen/arch/x86/hvm/hvm.c           |  10 +++
 xen/arch/x86/hvm/io.c            | 139 +++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/hvm/domain.h |  11 ++++
 xen/include/asm-x86/hvm/io.h     |   5 ++
 5 files changed, 192 insertions(+)

Comments

Jan Beulich May 19, 2017, 1:25 p.m. UTC | #1
>>> On 27.04.17 at 16:35, <roger.pau@citrix.com> wrote:
> @@ -1048,6 +1050,24 @@ static int __init pvh_setup_acpi(struct domain *d, 
> paddr_t start_info)
>      return 0;
>  }
>  
> +int __init pvh_setup_ecam(struct domain *d)

While I won't object to the term ecam in title and description,
please use mmcfg uniformly in code - that's the way we name
the thing everywhere else.

> +{
> +    unsigned int i;
> +    int rc;
> +
> +    for ( i = 0; i < pci_mmcfg_config_num; i++ )
> +    {
> +        rc = register_vpci_ecam_handler(d, pci_mmcfg_config[i].address,
> +                                        pci_mmcfg_config[i].start_bus_number,
> +                                        pci_mmcfg_config[i].end_bus_number,
> +                                        pci_mmcfg_config[i].pci_segment);
> +        if ( rc )
> +            return rc;
> +    }
> +
> +    return 0;
> +}

What about regions becoming available only post-boot?

> @@ -752,6 +754,14 @@ void hvm_domain_destroy(struct domain *d)
>          list_del(&ioport->list);
>          xfree(ioport);
>      }
> +
> +    list_for_each_entry_safe ( ecam, etmp, &d->arch.hvm_domain.ecam_regions,
> +                               next )
> +    {
> +        list_del(&ecam->next);
> +        xfree(ecam);
> +    }
> +
>  }

Stray blank line. Of course the addition is of questionable use
anyway as long as all of this is Dom0 only.

> --- a/xen/arch/x86/hvm/io.c
> +++ b/xen/arch/x86/hvm/io.c
> @@ -403,6 +403,145 @@ void register_vpci_portio_handler(struct domain *d)
>      handler->ops = &vpci_portio_ops;
>  }
>  
> +/* Handlers to trap PCI ECAM config accesses. */
> +static struct hvm_ecam *vpci_ecam_find(struct domain *d, unsigned long addr)

Logically d should be a pointer to const, and I think no caller really
needs you to return a pointer to non-const.

> +{
> +    struct hvm_ecam *ecam = NULL;

Pointless initializer.

> +static void vpci_ecam_decode_addr(struct hvm_ecam *ecam, unsigned long addr,

const

> +static int vpci_ecam_accept(struct vcpu *v, unsigned long addr)
> +{
> +    struct domain *d = v->domain;
> +    int found;
> +
> +    vpci_lock(d);
> +    found = !!vpci_ecam_find(v->domain, addr);

Please use the local variable consistently.

> +static int vpci_ecam_read(struct vcpu *v, unsigned long addr,

Did I overlook this in patch 1? Why is this a vcpu instead of a
domain parameter? All of PCI is (virtual) machine wide...

> +                          unsigned int len, unsigned long *data)
> +{
> +    struct domain *d = v->domain;
> +    struct hvm_ecam *ecam;
> +    unsigned int bus, devfn, reg;
> +    uint32_t data32;
> +    int rc;
> +
> +    vpci_lock(d);
> +    ecam = vpci_ecam_find(d, addr);
> +    if ( !ecam )
> +    {
> +        vpci_unlock(d);
> +        return X86EMUL_UNHANDLEABLE;
> +    }
> +
> +    vpci_ecam_decode_addr(ecam, addr, &bus, &devfn, &reg);
> +
> +    if ( vpci_access_check(reg, len) || reg >= 0xfff )

So this function iirc allows only 1-, 2-, and 4-byte accesses. Other
than with port I/O, MMCFG allows wider ones, and once again I
don't think hardware would raise any kind of fault in such a case.
The general expectation is for the fabric to split such accesses.

Also the reg check is once again off by one.

> +int register_vpci_ecam_handler(struct domain *d, paddr_t addr,
> +                               unsigned int start_bus, unsigned int end_bus,
> +                               unsigned int seg)
> +{
> +    struct hvm_ecam *ecam;
> +
> +    ASSERT(is_hardware_domain(d));
> +
> +    vpci_lock(d);
> +    if ( vpci_ecam_find(d, addr) )
> +    {
> +        vpci_unlock(d);
> +        return -EEXIST;
> +    }
> +
> +    ecam = xzalloc(struct hvm_ecam);

xmalloc() would again suffice afaict.

> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -100,6 +100,14 @@ struct hvm_pi_ops {
>      void (*do_resume)(struct vcpu *v);
>  };
>  
> +struct hvm_ecam {
> +    paddr_t addr;
> +    size_t size;
> +    unsigned int bus;
> +    unsigned int segment;
> +    struct list_head next;
> +};

If you moved the addition to hvm_domain_destroy() into a function
in hvm/io.c, this type could be private to that latter file afaict.

Jan
Roger Pau Monné June 20, 2017, 11:56 a.m. UTC | #2
On Fri, May 19, 2017 at 07:25:22AM -0600, Jan Beulich wrote:
> >>> On 27.04.17 at 16:35, <roger.pau@citrix.com> wrote:
> > @@ -1048,6 +1050,24 @@ static int __init pvh_setup_acpi(struct domain *d, 
> > paddr_t start_info)
> >      return 0;
> >  }
> >  
> > +int __init pvh_setup_ecam(struct domain *d)
> 
> While I won't object to the term ecam in title and description,
> please use mmcfg uniformly in code - that's the way we name
> the thing everywhere else.

OK.

> > +{
> > +    unsigned int i;
> > +    int rc;
> > +
> > +    for ( i = 0; i < pci_mmcfg_config_num; i++ )
> > +    {
> > +        rc = register_vpci_ecam_handler(d, pci_mmcfg_config[i].address,
> > +                                        pci_mmcfg_config[i].start_bus_number,
> > +                                        pci_mmcfg_config[i].end_bus_number,
> > +                                        pci_mmcfg_config[i].pci_segment);
> > +        if ( rc )
> > +            return rc;
> > +    }
> > +
> > +    return 0;
> > +}
> 
> What about regions becoming available only post-boot?

This is not yet supported. It needs to be implemented using the
PHYSDEVOP_pci_mmcfg_reserved hypercall.

> > @@ -752,6 +754,14 @@ void hvm_domain_destroy(struct domain *d)
> >          list_del(&ioport->list);
> >          xfree(ioport);
> >      }
> > +
> > +    list_for_each_entry_safe ( ecam, etmp, &d->arch.hvm_domain.ecam_regions,
> > +                               next )
> > +    {
> > +        list_del(&ecam->next);
> > +        xfree(ecam);
> > +    }
> > +
> >  }
> 
> Stray blank line. Of course the addition is of questionable use
> anyway as long as all of this is Dom0 only.

Right, I just felt it would be better to do proper cleanup since it's
just a couple of lines.

> > --- a/xen/arch/x86/hvm/io.c
> > +++ b/xen/arch/x86/hvm/io.c
> > @@ -403,6 +403,145 @@ void register_vpci_portio_handler(struct domain *d)
> >      handler->ops = &vpci_portio_ops;
> >  }
> >  
> > +/* Handlers to trap PCI ECAM config accesses. */
> > +static struct hvm_ecam *vpci_ecam_find(struct domain *d, unsigned long addr)
> 
> Logically d should be a pointer to const, and I think no caller really
> needs you to return a pointer to non-const.
> > +{
> > +    struct hvm_ecam *ecam = NULL;
> 
> Pointless initializer.
> 
> > +static void vpci_ecam_decode_addr(struct hvm_ecam *ecam, unsigned long addr,
> 
> const

Fixed all the above.

> > +static int vpci_ecam_accept(struct vcpu *v, unsigned long addr)
> > +{
> > +    struct domain *d = v->domain;
> > +    int found;
> > +
> > +    vpci_lock(d);
> > +    found = !!vpci_ecam_find(v->domain, addr);
> 
> Please use the local variable consistently.
> 
> > +static int vpci_ecam_read(struct vcpu *v, unsigned long addr,
> 
> Did I overlook this in patch 1? Why is this a vcpu instead of a
> domain parameter? All of PCI is (virtual) machine wide...

That's what the hvm_mmio_ops struct expects (vcpu instead of domain),
which is where this function is used.

> > +                          unsigned int len, unsigned long *data)
> > +{
> > +    struct domain *d = v->domain;
> > +    struct hvm_ecam *ecam;
> > +    unsigned int bus, devfn, reg;
> > +    uint32_t data32;
> > +    int rc;
> > +
> > +    vpci_lock(d);
> > +    ecam = vpci_ecam_find(d, addr);
> > +    if ( !ecam )
> > +    {
> > +        vpci_unlock(d);
> > +        return X86EMUL_UNHANDLEABLE;
> > +    }
> > +
> > +    vpci_ecam_decode_addr(ecam, addr, &bus, &devfn, &reg);
> > +
> > +    if ( vpci_access_check(reg, len) || reg >= 0xfff )
> 
> So this function iirc allows only 1-, 2-, and 4-byte accesses. Other
> than with port I/O, MMCFG allows wider ones, and once again I
> don't think hardware would raise any kind of fault in such a case.
> The general expectation is for the fabric to split such accesses.

Hm, the PCIe spec is not authoritative in this regard, is states that
supporting 8B accesses is not mandatory. Xen/Linux/FreeBSD will never
attempt any access > 4B, hence I haven't coded this case.

Would you be fine with leaving this for later, or would you rather
have it implemented as part of this series?

> Also the reg check is once again off by one.

This is now gone, since reg cannot be > 0xfff in any case.

> > +int register_vpci_ecam_handler(struct domain *d, paddr_t addr,
> > +                               unsigned int start_bus, unsigned int end_bus,
> > +                               unsigned int seg)
> > +{
> > +    struct hvm_ecam *ecam;
> > +
> > +    ASSERT(is_hardware_domain(d));
> > +
> > +    vpci_lock(d);
> > +    if ( vpci_ecam_find(d, addr) )
> > +    {
> > +        vpci_unlock(d);
> > +        return -EEXIST;
> > +    }
> > +
> > +    ecam = xzalloc(struct hvm_ecam);
> 
> xmalloc() would again suffice afaict.

Right.

> > --- a/xen/include/asm-x86/hvm/domain.h
> > +++ b/xen/include/asm-x86/hvm/domain.h
> > @@ -100,6 +100,14 @@ struct hvm_pi_ops {
> >      void (*do_resume)(struct vcpu *v);
> >  };
> >  
> > +struct hvm_ecam {
> > +    paddr_t addr;
> > +    size_t size;
> > +    unsigned int bus;
> > +    unsigned int segment;
> > +    struct list_head next;
> > +};
> 
> If you moved the addition to hvm_domain_destroy() into a function
> in hvm/io.c, this type could be private to that latter file afaict.

Yes, I've now done that and named the function destroy_vpci_mmcfg.

Thanks, Roger.
Jan Beulich June 20, 2017, 1:14 p.m. UTC | #3
>>> On 20.06.17 at 13:56, <roger.pau@citrix.com> wrote:
> On Fri, May 19, 2017 at 07:25:22AM -0600, Jan Beulich wrote:
>> >>> On 27.04.17 at 16:35, <roger.pau@citrix.com> wrote:
>> > +{
>> > +    unsigned int i;
>> > +    int rc;
>> > +
>> > +    for ( i = 0; i < pci_mmcfg_config_num; i++ )
>> > +    {
>> > +        rc = register_vpci_ecam_handler(d, pci_mmcfg_config[i].address,
>> > +                                        pci_mmcfg_config[i].start_bus_number,
>> > +                                        pci_mmcfg_config[i].end_bus_number,
>> > +                                        pci_mmcfg_config[i].pci_segment);
>> > +        if ( rc )
>> > +            return rc;
>> > +    }
>> > +
>> > +    return 0;
>> > +}
>> 
>> What about regions becoming available only post-boot?
> 
> This is not yet supported. It needs to be implemented using the
> PHYSDEVOP_pci_mmcfg_reserved hypercall.

But then the patch here is incomplete.

>> > +                          unsigned int len, unsigned long *data)
>> > +{
>> > +    struct domain *d = v->domain;
>> > +    struct hvm_ecam *ecam;
>> > +    unsigned int bus, devfn, reg;
>> > +    uint32_t data32;
>> > +    int rc;
>> > +
>> > +    vpci_lock(d);
>> > +    ecam = vpci_ecam_find(d, addr);
>> > +    if ( !ecam )
>> > +    {
>> > +        vpci_unlock(d);
>> > +        return X86EMUL_UNHANDLEABLE;
>> > +    }
>> > +
>> > +    vpci_ecam_decode_addr(ecam, addr, &bus, &devfn, &reg);
>> > +
>> > +    if ( vpci_access_check(reg, len) || reg >= 0xfff )
>> 
>> So this function iirc allows only 1-, 2-, and 4-byte accesses. Other
>> than with port I/O, MMCFG allows wider ones, and once again I
>> don't think hardware would raise any kind of fault in such a case.
>> The general expectation is for the fabric to split such accesses.
> 
> Hm, the PCIe spec is not authoritative in this regard, is states that
> supporting 8B accesses is not mandatory. Xen/Linux/FreeBSD will never
> attempt any access > 4B, hence I haven't coded this case.
> 
> Would you be fine with leaving this for later, or would you rather
> have it implemented as part of this series?

Since it shouldn't meaningfully much more code, I'd prefer if it was
done right away. Otherwise I'd have to ask for a "fixme" comment,
and I'd rather avoid such considering the PVHv1 history.

Jan
Roger Pau Monné June 20, 2017, 3:04 p.m. UTC | #4
On Tue, Jun 20, 2017 at 07:14:07AM -0600, Jan Beulich wrote:
> >>> On 20.06.17 at 13:56, <roger.pau@citrix.com> wrote:
> > On Fri, May 19, 2017 at 07:25:22AM -0600, Jan Beulich wrote:
> >> >>> On 27.04.17 at 16:35, <roger.pau@citrix.com> wrote:
> >> > +{
> >> > +    unsigned int i;
> >> > +    int rc;
> >> > +
> >> > +    for ( i = 0; i < pci_mmcfg_config_num; i++ )
> >> > +    {
> >> > +        rc = register_vpci_ecam_handler(d, pci_mmcfg_config[i].address,
> >> > +                                        pci_mmcfg_config[i].start_bus_number,
> >> > +                                        pci_mmcfg_config[i].end_bus_number,
> >> > +                                        pci_mmcfg_config[i].pci_segment);
> >> > +        if ( rc )
> >> > +            return rc;
> >> > +    }
> >> > +
> >> > +    return 0;
> >> > +}
> >> 
> >> What about regions becoming available only post-boot?
> > 
> > This is not yet supported. It needs to be implemented using the
> > PHYSDEVOP_pci_mmcfg_reserved hypercall.
> 
> But then the patch here is incomplete.

OK, I don't think it's going to be a lot of code, it's just
registering extra MMCFG regions.

> >> > +                          unsigned int len, unsigned long *data)
> >> > +{
> >> > +    struct domain *d = v->domain;
> >> > +    struct hvm_ecam *ecam;
> >> > +    unsigned int bus, devfn, reg;
> >> > +    uint32_t data32;
> >> > +    int rc;
> >> > +
> >> > +    vpci_lock(d);
> >> > +    ecam = vpci_ecam_find(d, addr);
> >> > +    if ( !ecam )
> >> > +    {
> >> > +        vpci_unlock(d);
> >> > +        return X86EMUL_UNHANDLEABLE;
> >> > +    }
> >> > +
> >> > +    vpci_ecam_decode_addr(ecam, addr, &bus, &devfn, &reg);
> >> > +
> >> > +    if ( vpci_access_check(reg, len) || reg >= 0xfff )
> >> 
> >> So this function iirc allows only 1-, 2-, and 4-byte accesses. Other
> >> than with port I/O, MMCFG allows wider ones, and once again I
> >> don't think hardware would raise any kind of fault in such a case.
> >> The general expectation is for the fabric to split such accesses.
> > 
> > Hm, the PCIe spec is not authoritative in this regard, is states that
> > supporting 8B accesses is not mandatory. Xen/Linux/FreeBSD will never
> > attempt any access > 4B, hence I haven't coded this case.
> > 
> > Would you be fine with leaving this for later, or would you rather
> > have it implemented as part of this series?
> 
> Since it shouldn't meaningfully much more code, I'd prefer if it was
> done right away. Otherwise I'd have to ask for a "fixme" comment,
> and I'd rather avoid such considering the PVHv1 history.

NP, I've just added it. I have however implemented it by splitting the
access into two 4 byte accesses, and performing two calls to
vpci_{read/write}.

Thanks, Roger.
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
index 020c355faf..b8999a053a 100644
--- a/xen/arch/x86/hvm/dom0_build.c
+++ b/xen/arch/x86/hvm/dom0_build.c
@@ -38,6 +38,8 @@ 
 #include <public/hvm/hvm_info_table.h>
 #include <public/hvm/hvm_vcpu.h>
 
+#include "../x86_64/mmconfig.h"
+
 /*
  * Have the TSS cover the ISA port range, which makes it
  * - 104 bytes base structure
@@ -1048,6 +1050,24 @@  static int __init pvh_setup_acpi(struct domain *d, paddr_t start_info)
     return 0;
 }
 
+int __init pvh_setup_ecam(struct domain *d)
+{
+    unsigned int i;
+    int rc;
+
+    for ( i = 0; i < pci_mmcfg_config_num; i++ )
+    {
+        rc = register_vpci_ecam_handler(d, pci_mmcfg_config[i].address,
+                                        pci_mmcfg_config[i].start_bus_number,
+                                        pci_mmcfg_config[i].end_bus_number,
+                                        pci_mmcfg_config[i].pci_segment);
+        if ( rc )
+            return rc;
+    }
+
+    return 0;
+}
+
 int __init dom0_construct_pvh(struct domain *d, const module_t *image,
                               unsigned long image_headroom,
                               module_t *initrd,
@@ -1090,6 +1110,13 @@  int __init dom0_construct_pvh(struct domain *d, const module_t *image,
         return rc;
     }
 
+    rc = pvh_setup_ecam(d);
+    if ( rc )
+    {
+        printk("Failed to setup Dom0 PCI ECAM areas: %d\n", rc);
+        return rc;
+    }
+
     panic("Building a PVHv2 Dom0 is not yet supported.");
     return 0;
 }
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 7f3322ede6..ef3ad2a615 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -613,6 +613,7 @@  int hvm_domain_initialise(struct domain *d)
     spin_lock_init(&d->arch.hvm_domain.write_map.lock);
     INIT_LIST_HEAD(&d->arch.hvm_domain.write_map.list);
     INIT_LIST_HEAD(&d->arch.hvm_domain.g2m_ioport_list);
+    INIT_LIST_HEAD(&d->arch.hvm_domain.ecam_regions);
 
     hvm_init_cacheattr_region_list(d);
 
@@ -725,6 +726,7 @@  void hvm_domain_destroy(struct domain *d)
 {
     struct list_head *ioport_list, *tmp;
     struct g2m_ioport *ioport;
+    struct hvm_ecam *ecam, *etmp;
 
     xfree(d->arch.hvm_domain.io_handler);
     d->arch.hvm_domain.io_handler = NULL;
@@ -752,6 +754,14 @@  void hvm_domain_destroy(struct domain *d)
         list_del(&ioport->list);
         xfree(ioport);
     }
+
+    list_for_each_entry_safe ( ecam, etmp, &d->arch.hvm_domain.ecam_regions,
+                               next )
+    {
+        list_del(&ecam->next);
+        xfree(ecam);
+    }
+
 }
 
 static int hvm_save_tsc_adjust(struct domain *d, hvm_domain_context_t *h)
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index 80f842b092..1f138989ff 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -403,6 +403,145 @@  void register_vpci_portio_handler(struct domain *d)
     handler->ops = &vpci_portio_ops;
 }
 
+/* Handlers to trap PCI ECAM config accesses. */
+static struct hvm_ecam *vpci_ecam_find(struct domain *d, unsigned long addr)
+{
+    struct hvm_ecam *ecam = NULL;
+
+    ASSERT(vpci_locked(d));
+    list_for_each_entry ( ecam, &d->arch.hvm_domain.ecam_regions, next )
+        if ( addr >= ecam->addr && addr < ecam->addr + ecam->size )
+            return ecam;
+
+    return NULL;
+}
+
+static void vpci_ecam_decode_addr(struct hvm_ecam *ecam, unsigned long addr,
+                                  unsigned int *bus, unsigned int *devfn,
+                                  unsigned int *reg)
+{
+    addr -= ecam->addr;
+    *bus = ((addr >> 20) & 0xff) + ecam->bus;
+    *devfn = (addr >> 12) & 0xff;
+    *reg = addr & 0xfff;
+}
+
+static int vpci_ecam_accept(struct vcpu *v, unsigned long addr)
+{
+    struct domain *d = v->domain;
+    int found;
+
+    vpci_lock(d);
+    found = !!vpci_ecam_find(v->domain, addr);
+    vpci_unlock(d);
+
+    return found;
+}
+
+static int vpci_ecam_read(struct vcpu *v, unsigned long addr,
+                          unsigned int len, unsigned long *data)
+{
+    struct domain *d = v->domain;
+    struct hvm_ecam *ecam;
+    unsigned int bus, devfn, reg;
+    uint32_t data32;
+    int rc;
+
+    vpci_lock(d);
+    ecam = vpci_ecam_find(d, addr);
+    if ( !ecam )
+    {
+        vpci_unlock(d);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    vpci_ecam_decode_addr(ecam, addr, &bus, &devfn, &reg);
+
+    if ( vpci_access_check(reg, len) || reg >= 0xfff )
+    {
+        vpci_unlock(d);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    rc = xen_vpci_read(ecam->segment, bus, devfn, reg, len, &data32);
+    if ( !rc )
+        *data = data32;
+    vpci_unlock(d);
+
+    return rc ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY;
+}
+
+static int vpci_ecam_write(struct vcpu *v, unsigned long addr,
+                           unsigned int len, unsigned long data)
+{
+    struct domain *d = v->domain;
+    struct hvm_ecam *ecam;
+    unsigned int bus, devfn, reg;
+    int rc;
+
+    vpci_lock(d);
+    ecam = vpci_ecam_find(d, addr);
+    if ( !ecam )
+    {
+        vpci_unlock(d);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    vpci_ecam_decode_addr(ecam, addr, &bus, &devfn, &reg);
+
+    if ( vpci_access_check(reg, len) || reg >= 0xfff )
+    {
+        vpci_unlock(d);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    rc = xen_vpci_write(ecam->segment, bus, devfn, reg, len, data);
+    vpci_unlock(d);
+
+    return rc ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY;
+}
+
+static const struct hvm_mmio_ops vpci_ecam_ops = {
+    .check = vpci_ecam_accept,
+    .read = vpci_ecam_read,
+    .write = vpci_ecam_write,
+};
+
+int register_vpci_ecam_handler(struct domain *d, paddr_t addr,
+                               unsigned int start_bus, unsigned int end_bus,
+                               unsigned int seg)
+{
+    struct hvm_ecam *ecam;
+
+    ASSERT(is_hardware_domain(d));
+
+    vpci_lock(d);
+    if ( vpci_ecam_find(d, addr) )
+    {
+        vpci_unlock(d);
+        return -EEXIST;
+    }
+
+    ecam = xzalloc(struct hvm_ecam);
+    if ( !ecam )
+    {
+        vpci_unlock(d);
+        return -ENOMEM;
+    }
+
+    if ( list_empty(&d->arch.hvm_domain.ecam_regions) )
+        register_mmio_handler(d, &vpci_ecam_ops);
+
+    ecam->addr = addr + (start_bus << 20);
+    ecam->bus = start_bus;
+    ecam->segment = seg;
+    ecam->size = (end_bus - start_bus + 1) << 20;
+    list_add(&ecam->next, &d->arch.hvm_domain.ecam_regions);
+    vpci_unlock(d);
+
+    return 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index cbf4170789..b69c33df3c 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -100,6 +100,14 @@  struct hvm_pi_ops {
     void (*do_resume)(struct vcpu *v);
 };
 
+struct hvm_ecam {
+    paddr_t addr;
+    size_t size;
+    unsigned int bus;
+    unsigned int segment;
+    struct list_head next;
+};
+
 struct hvm_domain {
     /* Guest page range used for non-default ioreq servers */
     struct {
@@ -187,6 +195,9 @@  struct hvm_domain {
     /* Lock for the PCI emulation layer (vPCI). */
     spinlock_t vpci_lock;
 
+    /* List of ECAM (MMCFG) regions trapped by Xen. */
+    struct list_head ecam_regions;
+
     /* List of permanently write-mapped pages. */
     struct {
         spinlock_t lock;
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
index 9ed1bf2e06..f9b9829eaf 100644
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -163,6 +163,11 @@  void register_g2m_portio_handler(struct domain *d);
 /* HVM port IO handler for PCI accesses. */
 void register_vpci_portio_handler(struct domain *d);
 
+/* HVM MMIO handler for PCI ECAM accesses. */
+int register_vpci_ecam_handler(struct domain *d, paddr_t addr,
+                               unsigned int start_bus, unsigned int end_bus,
+                               unsigned int seg);
+
 #endif /* __ASM_X86_HVM_IO_H__ */