diff mbox

[v2,8/9] vpci/msi: add MSI handlers

Message ID 20170420151743.90889-9-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roger Pau Monné April 20, 2017, 3:17 p.m. UTC
Add handlers for the MSI control, address, data and mask fields in order to
detect accesses to them and setup the interrupts as requested by the guest.

Note that the pending register is not trapped, and the guest can freely
read/write to it.

Whether Xen is going to provide this functionality to Dom0 (MSI emulation) is
controlled by the "msi" option in the dom0 field. When disabling this option
Xen will hide the MSI capability structure from Dom0.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Jan Beulich <jbeulich@suse.com>
Andrew Cooper <andrew.cooper3@citrix.com>
Paul Durrant <paul.durrant@citrix.com>
---
NB: I've only been able to test this with devices using a single MSI interrupt
and no mask register. I will try to find hardware that supports the mask
register and more than one vector, but I cannot make any promises.

If there are doubts about the untested parts we could always force Xen to
report no per-vector masking support and only 1 available vector, but I would
rather avoid doing it.
---
 docs/misc/xen-command-line.markdown |   9 +-
 xen/arch/x86/dom0_build.c           |  12 +-
 xen/arch/x86/hvm/vmsi.c             |  21 ++
 xen/drivers/vpci/Makefile           |   2 +-
 xen/drivers/vpci/capabilities.c     |   7 +-
 xen/drivers/vpci/msi.c              | 469 ++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/hvm/io.h        |   4 +
 xen/include/asm-x86/msi.h           |   2 +
 xen/include/xen/hvm/irq.h           |   1 +
 xen/include/xen/vpci.h              |  26 ++
 10 files changed, 545 insertions(+), 8 deletions(-)
 create mode 100644 xen/drivers/vpci/msi.c

Comments

Julien Grall April 24, 2017, 3:31 p.m. UTC | #1
Hi Roger,

On 20/04/17 16:17, Roger Pau Monne wrote:
> diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
> new file mode 100644
> index 0000000000..aea6c68907
> --- /dev/null
> +++ b/xen/drivers/vpci/msi.c
> @@ -0,0 +1,469 @@
> +/*
> + * Handlers for accesses to the MSI capability structure.
> + *
> + * Copyright (C) 2017 Citrix Systems R&D
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms and conditions of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public
> + * License along with this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/sched.h>
> +#include <xen/vpci.h>
> +#include <asm/msi.h>
> +#include <xen/keyhandler.h>
> +
> +static void vpci_msi_mask_pirq(int pirq, bool mask)
> +{
> +        struct pirq *pinfo = pirq_info(current->domain, pirq);

We don't have pirq on ARM and don't plan to introduce it for MSI as 
interrupt will be handled directly by a virtual interrupt controller 
(see the vITS series [1]).

It would be nice if you can get the vPCI architecture agnostic. We would 
be to help here.

> +        struct irq_desc *desc;
> +        unsigned long flags;
> +        int irq;
> +
> +        ASSERT(pinfo);
> +        irq = pinfo->arch.irq;
> +        ASSERT(irq < nr_irqs);
> +
> +        desc = irq_to_desc(irq);

Similarly we don't have irq_desc for MSI.

> +        ASSERT(desc);
> +
> +        spin_lock_irqsave(&desc->lock, flags);
> +        guest_mask_msi_irq(desc, mask);
> +        spin_unlock_irqrestore(&desc->lock, flags);
> +}
> +

[...]

> +static int vpci_init_msi(struct pci_dev *pdev)
> +{
> +    uint8_t seg = pdev->seg, bus = pdev->bus;
> +    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
> +    struct vpci_msi *msi = NULL;
> +    unsigned int msi_offset;
> +    uint16_t control;
> +    int rc;
> +
> +    msi_offset = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
> +    if ( !msi_offset )
> +        return 0;
> +
> +    if ( !dom0_msi )

I would introduce an helper to allow per-architecture decision. Likely 
on ARM MSI will be enabled by default.

[...]

> diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
> index 0434aca706..899e37ae0f 100644
> --- a/xen/include/asm-x86/hvm/io.h
> +++ b/xen/include/asm-x86/hvm/io.h
> @@ -126,6 +126,10 @@ void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
>  void msix_write_completion(struct vcpu *);
>  void msixtbl_init(struct domain *d);
>
> +/* Get the vector/flags from a MSI address/data fields. */
> +unsigned int msi_vector(uint16_t data);
> +unsigned int msi_flags(uint16_t data, uint64_t addr);

Should not those 2 helpers go in msi.h?

> +
>  enum stdvga_cache_state {
>      STDVGA_CACHE_UNINITIALIZED,
>      STDVGA_CACHE_ENABLED,
> diff --git a/xen/include/asm-x86/msi.h b/xen/include/asm-x86/msi.h
> index a5de6a1328..dcbec8cf04 100644
> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -251,4 +251,6 @@ void ack_nonmaskable_msi_irq(struct irq_desc *);
>  void end_nonmaskable_msi_irq(struct irq_desc *, u8 vector);
>  void set_msi_affinity(struct irq_desc *, const cpumask_t *);
>
> +extern bool dom0_msi;
> +
>  #endif /* __ASM_MSI_H */

Cheers,

[1] 
https://lists.xenproject.org/archives/html/xen-devel/2017-04/msg01672.html
Roger Pau Monné April 25, 2017, 11:49 a.m. UTC | #2
On Mon, Apr 24, 2017 at 04:31:57PM +0100, Julien Grall wrote:
> Hi Roger,
> 
> On 20/04/17 16:17, Roger Pau Monne wrote:
> > diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
> > new file mode 100644
> > index 0000000000..aea6c68907
> > --- /dev/null
> > +++ b/xen/drivers/vpci/msi.c
> > @@ -0,0 +1,469 @@
> > +/*
> > + * Handlers for accesses to the MSI capability structure.
> > + *
> > + * Copyright (C) 2017 Citrix Systems R&D
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms and conditions of the GNU General Public
> > + * License, version 2, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public
> > + * License along with this program; If not, see <http://www.gnu.org/licenses/>.
> > + */
> > +
> > +#include <xen/sched.h>
> > +#include <xen/vpci.h>
> > +#include <asm/msi.h>
> > +#include <xen/keyhandler.h>
> > +
> > +static void vpci_msi_mask_pirq(int pirq, bool mask)
> > +{
> > +        struct pirq *pinfo = pirq_info(current->domain, pirq);
> 
> We don't have pirq on ARM and don't plan to introduce it for MSI as
> interrupt will be handled directly by a virtual interrupt controller (see
> the vITS series [1]).
> 
> It would be nice if you can get the vPCI architecture agnostic. We would be
> to help here.
>
> > +        struct irq_desc *desc;
> > +        unsigned long flags;
> > +        int irq;
> > +
> > +        ASSERT(pinfo);
> > +        irq = pinfo->arch.irq;
> > +        ASSERT(irq < nr_irqs);
> > +
> > +        desc = irq_to_desc(irq);
> 
> Similarly we don't have irq_desc for MSI.

OK, I've moved all the arch-specific functions into vmsi.c, and introduced a
vpci_arch_msi struct in order to store the PIRQ on x86.

> > +        ASSERT(desc);
> > +
> > +        spin_lock_irqsave(&desc->lock, flags);
> > +        guest_mask_msi_irq(desc, mask);
> > +        spin_unlock_irqrestore(&desc->lock, flags);
> > +}
> > +
> 
> [...]
> 
> > +static int vpci_init_msi(struct pci_dev *pdev)
> > +{
> > +    uint8_t seg = pdev->seg, bus = pdev->bus;
> > +    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
> > +    struct vpci_msi *msi = NULL;
> > +    unsigned int msi_offset;
> > +    uint16_t control;
> > +    int rc;
> > +
> > +    msi_offset = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
> > +    if ( !msi_offset )
> > +        return 0;
> > +
> > +    if ( !dom0_msi )
> 
> I would introduce an helper to allow per-architecture decision. Likely on
> ARM MSI will be enabled by default.

dom0_msi is also enabled by default on x86.

> > diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
> > index 0434aca706..899e37ae0f 100644
> > --- a/xen/include/asm-x86/hvm/io.h
> > +++ b/xen/include/asm-x86/hvm/io.h
> > @@ -126,6 +126,10 @@ void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
> >  void msix_write_completion(struct vcpu *);
> >  void msixtbl_init(struct domain *d);
> > 
> > +/* Get the vector/flags from a MSI address/data fields. */
> > +unsigned int msi_vector(uint16_t data);
> > +unsigned int msi_flags(uint16_t data, uint64_t addr);
> 
> Should not those 2 helpers go in msi.h?

The other guest-related msi functions are in io.h, msi.h seems to only contain
functions that deal with the hardware itself (although I could be wrong).

Thanks, Roger.
Julien Grall April 25, 2017, noon UTC | #3
Hi Roger,

On 25/04/17 12:49, Roger Pau Monne wrote:
> On Mon, Apr 24, 2017 at 04:31:57PM +0100, Julien Grall wrote:
>>> +static int vpci_init_msi(struct pci_dev *pdev)
>>> +{
>>> +    uint8_t seg = pdev->seg, bus = pdev->bus;
>>> +    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
>>> +    struct vpci_msi *msi = NULL;
>>> +    unsigned int msi_offset;
>>> +    uint16_t control;
>>> +    int rc;
>>> +
>>> +    msi_offset = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
>>> +    if ( !msi_offset )
>>> +        return 0;
>>> +
>>> +    if ( !dom0_msi )
>>
>> I would introduce an helper to allow per-architecture decision. Likely on
>> ARM MSI will be enabled by default.
>
> dom0_msi is also enabled by default on x86.

Sorry by default I meant that they will never be disabled on ARM. So you 
could introduce a helper similar to is_domain_direct_mapped avoid the 
introduction of dom0_msi for ARM.
Roger Pau Monné April 25, 2017, 1:19 p.m. UTC | #4
On Tue, Apr 25, 2017 at 01:00:06PM +0100, Julien Grall wrote:
> Hi Roger,
> 
> On 25/04/17 12:49, Roger Pau Monne wrote:
> > On Mon, Apr 24, 2017 at 04:31:57PM +0100, Julien Grall wrote:
> > > > +static int vpci_init_msi(struct pci_dev *pdev)
> > > > +{
> > > > +    uint8_t seg = pdev->seg, bus = pdev->bus;
> > > > +    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
> > > > +    struct vpci_msi *msi = NULL;
> > > > +    unsigned int msi_offset;
> > > > +    uint16_t control;
> > > > +    int rc;
> > > > +
> > > > +    msi_offset = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
> > > > +    if ( !msi_offset )
> > > > +        return 0;
> > > > +
> > > > +    if ( !dom0_msi )
> > > 
> > > I would introduce an helper to allow per-architecture decision. Likely on
> > > ARM MSI will be enabled by default.
> > 
> > dom0_msi is also enabled by default on x86.
> 
> Sorry by default I meant that they will never be disabled on ARM. So you
> could introduce a helper similar to is_domain_direct_mapped avoid the
> introduction of dom0_msi for ARM.

OK, no problem. I've added two vpci_msi{x}_enabled macros that you can replace
with 'true' if you wish.

Roger.
diff mbox

Patch

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 450b222734..38a8d05e63 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -660,7 +660,7 @@  affinities to prefer but be not limited to the specified node(s).
 Pin dom0 vcpus to their respective pcpus
 
 ### dom0
-> `= List of [ pvh | shadow ]`
+> `= List of [ pvh | shadow | msi ]`
 
 > Sub-options:
 
@@ -677,6 +677,13 @@  Flag that makes a dom0 boot in PVHv2 mode.
 Flag that makes a dom0 use shadow paging. Only works when "pvh" is
 enabled.
 
+> `msi`
+
+> Default: `true`
+
+Enable or disable (using the `no-` prefix) the MSI emulation inside of
+Xen for a PVH Dom0. Note that this option has no effect on a PV Dom0.
+
 ### dtuart (ARM)
 > `= path [:options]`
 
diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
index cc8acad688..01afcf6215 100644
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -176,29 +176,37 @@  struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
 bool __initdata opt_dom0_shadow;
 #endif
 bool __initdata dom0_pvh;
+bool __initdata dom0_msi = true;
 
 /*
  * List of parameters that affect Dom0 creation:
  *
  *  - pvh               Create a PVHv2 Dom0.
  *  - shadow            Use shadow paging for Dom0.
+ *  - msi               MSI functionality.
  */
 static void __init parse_dom0_param(char *s)
 {
     char *ss;
+    bool enabled;
 
     do {
+        enabled = !!strncmp(s, "no-", 3);
+        if ( !enabled )
+            s += 3;
 
         ss = strchr(s, ',');
         if ( ss )
             *ss = '\0';
 
         if ( !strcmp(s, "pvh") )
-            dom0_pvh = true;
+            dom0_pvh = enabled;
 #ifdef CONFIG_SHADOW_PAGING
         else if ( !strcmp(s, "shadow") )
-            opt_dom0_shadow = true;
+            opt_dom0_shadow = enabled;
 #endif
+        else if ( !strcmp(s, "msi") )
+            dom0_msi = enabled;
 
         s = ss + 1;
     } while ( ss );
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index a36692c313..614d975efe 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -622,3 +622,24 @@  void msix_write_completion(struct vcpu *v)
     if ( msixtbl_write(v, ctrl_address, 4, 0) != X86EMUL_OKAY )
         gdprintk(XENLOG_WARNING, "MSI-X write completion failure\n");
 }
+
+unsigned int msi_vector(uint16_t data)
+{
+    return (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+}
+
+unsigned int msi_flags(uint16_t data, uint64_t addr)
+{
+    unsigned int rh, dm, dest_id, deliv_mode, trig_mode;
+
+    rh = (addr >> MSI_ADDR_REDIRECTION_SHIFT) & 0x1;
+    dm = (addr >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
+    dest_id = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    deliv_mode = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    trig_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+
+    return dest_id | (rh << GFLAGS_SHIFT_RH) | (dm << GFLAGS_SHIFT_DM) |
+           (deliv_mode << GFLAGS_SHIFT_DELIV_MODE) |
+           (trig_mode << GFLAGS_SHIFT_TRG_MODE);
+}
+
diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
index c3f3085c93..ef4fc6caf3 100644
--- a/xen/drivers/vpci/Makefile
+++ b/xen/drivers/vpci/Makefile
@@ -1 +1 @@ 
-obj-y += vpci.o header.o capabilities.o
+obj-y += vpci.o header.o capabilities.o msi.o
diff --git a/xen/drivers/vpci/capabilities.c b/xen/drivers/vpci/capabilities.c
index 204355e673..ad9f45c2e1 100644
--- a/xen/drivers/vpci/capabilities.c
+++ b/xen/drivers/vpci/capabilities.c
@@ -109,7 +109,7 @@  static int vpci_index_capabilities(struct pci_dev *pdev)
     return 0;
 }
 
-static void vpci_mask_capability(struct pci_dev *pdev, uint8_t cap_id)
+void xen_vpci_mask_capability(struct pci_dev *pdev, uint8_t cap_id)
 {
     struct vpci_capability *cap;
     uint8_t cap_offset;
@@ -138,9 +138,8 @@  static int vpci_capabilities_init(struct pci_dev *pdev)
     if ( rc )
         return rc;
 
-    /* Mask MSI and MSI-X capabilities until Xen handles them. */
-    vpci_mask_capability(pdev, PCI_CAP_ID_MSI);
-    vpci_mask_capability(pdev, PCI_CAP_ID_MSIX);
+    /* Mask MSI-X capability until Xen handles it. */
+    xen_vpci_mask_capability(pdev, PCI_CAP_ID_MSIX);
 
     return 0;
 }
diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
new file mode 100644
index 0000000000..aea6c68907
--- /dev/null
+++ b/xen/drivers/vpci/msi.c
@@ -0,0 +1,469 @@ 
+/*
+ * Handlers for accesses to the MSI capability structure.
+ *
+ * Copyright (C) 2017 Citrix Systems R&D
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/sched.h>
+#include <xen/vpci.h>
+#include <asm/msi.h>
+#include <xen/keyhandler.h>
+
+static void vpci_msi_mask_pirq(int pirq, bool mask)
+{
+        struct pirq *pinfo = pirq_info(current->domain, pirq);
+        struct irq_desc *desc;
+        unsigned long flags;
+        int irq;
+
+        ASSERT(pinfo);
+        irq = pinfo->arch.irq;
+        ASSERT(irq < nr_irqs);
+
+        desc = irq_to_desc(irq);
+        ASSERT(desc);
+
+        spin_lock_irqsave(&desc->lock, flags);
+        guest_mask_msi_irq(desc, mask);
+        spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/* Handlers for the MSI control field (PCI_MSI_FLAGS). */
+static int vpci_msi_control_read(struct pci_dev *pdev, unsigned int reg,
+                                 union vpci_val *val, void *data)
+{
+    struct vpci_msi *msi = data;
+
+    if ( msi->enabled )
+        val->word |= PCI_MSI_FLAGS_ENABLE;
+    if ( msi->masking )
+        val->word |= PCI_MSI_FLAGS_MASKBIT;
+    if ( msi->address64 )
+        val->word |= PCI_MSI_FLAGS_64BIT;
+
+    /* Set multiple message capable. */
+    val->word |= ((fls(msi->max_vectors) - 1) << 1) & PCI_MSI_FLAGS_QMASK;
+
+    /* Set current number of configured vectors. */
+    val->word |= ((fls(msi->guest_vectors) - 1) << 4) & PCI_MSI_FLAGS_QSIZE;
+
+    return 0;
+}
+
+static int vpci_msi_control_write(struct pci_dev *pdev, unsigned int reg,
+                                  union vpci_val val, void *data)
+{
+    struct vpci_msi *msi = data;
+    unsigned int i, vectors = 1 << ((val.word & PCI_MSI_FLAGS_QSIZE) >> 4);
+    int rc;
+
+    if ( vectors > msi->max_vectors )
+        return -EINVAL;
+
+    msi->guest_vectors = vectors;
+
+    if ( !((val.word ^ msi->enabled) & PCI_MSI_FLAGS_ENABLE) )
+        return 0;
+
+    if ( val.word & PCI_MSI_FLAGS_ENABLE )
+    {
+        int index = -1;
+        struct msi_info msi_info = {
+            .seg = pdev->seg,
+            .bus = pdev->bus,
+            .devfn = pdev->devfn,
+            .entry_nr = vectors,
+        };
+
+        ASSERT(!msi->enabled);
+
+        /* Get a PIRQ. */
+        rc = allocate_and_map_msi_pirq(pdev->domain, &index, &msi->pirq,
+                                       &msi_info);
+        if ( rc )
+        {
+            dprintk(XENLOG_ERR, "%04x:%02x:%02x.%u: failed to map PIRQ: %d\n",
+                    pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                    PCI_FUNC(pdev->devfn), rc);
+            return rc;
+        }
+
+        ASSERT(msi->pirq != -1);
+        ASSERT(msi->vectors == 0);
+        msi->vectors = vectors;
+
+        for ( i = 0; i < vectors; i++ )
+        {
+            xen_domctl_bind_pt_irq_t bind = {
+                .hvm_domid = DOMID_SELF,
+                .machine_irq = msi->pirq + i,
+                .irq_type = PT_IRQ_TYPE_MSI,
+                .u.msi.gvec = msi_vector(msi->data) + i,
+                .u.msi.gflags = msi_flags(msi->data, msi->address),
+            };
+
+            pcidevs_lock();
+            rc = pt_irq_create_bind(pdev->domain, &bind);
+            if ( rc )
+            {
+                dprintk(XENLOG_ERR,
+                        "%04x:%02x:%02x.%u: failed to bind PIRQ %u: %d\n",
+                        pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                        PCI_FUNC(pdev->devfn), msi->pirq + i, rc);
+                spin_lock(&pdev->domain->event_lock);
+                unmap_domain_pirq(pdev->domain, msi->pirq);
+                spin_unlock(&pdev->domain->event_lock);
+                pcidevs_unlock();
+                msi->pirq = -1;
+                msi->vectors = 0;
+                return rc;
+            }
+            pcidevs_unlock();
+        }
+
+        /* Apply the mask bits. */
+        if ( msi->masking )
+        {
+            uint32_t mask = msi->mask;
+
+            while ( mask )
+            {
+                unsigned int i = ffs(mask);
+
+                vpci_msi_mask_pirq(msi->pirq + i, true);
+                __clear_bit(i, &mask);
+            }
+        }
+
+        __msi_set_enable(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                         PCI_FUNC(pdev->devfn), reg - PCI_MSI_FLAGS, 1);
+        msi->enabled = true;
+    }
+    else
+    {
+        ASSERT(msi->enabled);
+        __msi_set_enable(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                         PCI_FUNC(pdev->devfn), reg - PCI_MSI_FLAGS, 0);
+
+        for ( i = 0; i < msi->vectors; i++ )
+        {
+            xen_domctl_bind_pt_irq_t bind = {
+                .hvm_domid = DOMID_SELF,
+                .machine_irq = msi->pirq + i,
+                .irq_type = PT_IRQ_TYPE_MSI,
+            };
+
+            pcidevs_lock();
+            pt_irq_destroy_bind(pdev->domain, &bind);
+            pcidevs_unlock();
+        }
+
+        pcidevs_lock();
+        spin_lock(&pdev->domain->event_lock);
+        unmap_domain_pirq(pdev->domain, msi->pirq);
+        spin_unlock(&pdev->domain->event_lock);
+        pcidevs_unlock();
+
+        msi->pirq = -1;
+        msi->vectors = 0;
+        msi->enabled = false;
+    }
+
+    return 0;
+}
+
+/* Handlers for the address field (32bit or low part of a 64bit address). */
+static int vpci_msi_address_read(struct pci_dev *pdev, unsigned int reg,
+                                 union vpci_val *val, void *data)
+{
+    struct vpci_msi *msi = data;
+
+    val->double_word = msi->address;
+
+    return 0;
+}
+
+static int vpci_msi_address_write(struct pci_dev *pdev, unsigned int reg,
+                                  union vpci_val val, void *data)
+{
+    struct vpci_msi *msi = data;
+
+    /* Clear low part. */
+    msi->address &= ~GENMASK(31, 0);
+    msi->address |= val.double_word;
+
+    return 0;
+}
+
+/* Handlers for the high part of a 64bit address field. */
+static int vpci_msi_address_upper_read(struct pci_dev *pdev, unsigned int reg,
+                                       union vpci_val *val, void *data)
+{
+    struct vpci_msi *msi = data;
+
+    val->double_word = msi->address >> 32;
+
+    return 0;
+}
+
+static int vpci_msi_address_upper_write(struct pci_dev *pdev, unsigned int reg,
+                                        union vpci_val val, void *data)
+{
+    struct vpci_msi *msi = data;
+
+    /* Clear high part. */
+    msi->address &= ~GENMASK(63, 32);
+    msi->address |= (uint64_t)val.double_word << 32;
+
+    return 0;
+}
+
+/* Handlers for the data field. */
+static int vpci_msi_data_read(struct pci_dev *pdev, unsigned int reg,
+                              union vpci_val *val, void *data)
+{
+    struct vpci_msi *msi = data;
+
+    val->word = msi->data;
+
+    return 0;
+}
+
+static int vpci_msi_data_write(struct pci_dev *pdev, unsigned int reg,
+                               union vpci_val val, void *data)
+{
+    struct vpci_msi *msi = data;
+
+    msi->data = val.word;
+
+    return 0;
+}
+
+static int vpci_msi_mask_read(struct pci_dev *pdev, unsigned int reg,
+                              union vpci_val *val, void *data)
+{
+    struct vpci_msi *msi = data;
+
+    val->double_word = msi->mask;
+
+    return 0;
+}
+
+static int vpci_msi_mask_write(struct pci_dev *pdev, unsigned int reg,
+                               union vpci_val val, void *data)
+{
+    struct vpci_msi *msi = data;
+    uint32_t dmask;
+
+    dmask = msi->mask ^ val.double_word;
+
+    if ( !dmask )
+        return 0;
+
+    while ( dmask && msi->pirq != -1 )
+    {
+        unsigned int i = ffs(dmask);
+
+        vpci_msi_mask_pirq(msi->pirq + i, !test_bit(i, &msi->mask));
+        __clear_bit(i, &dmask);
+    }
+
+    msi->mask = val.double_word;
+    return 0;
+}
+
+static int vpci_init_msi(struct pci_dev *pdev)
+{
+    uint8_t seg = pdev->seg, bus = pdev->bus;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    struct vpci_msi *msi = NULL;
+    unsigned int msi_offset;
+    uint16_t control;
+    int rc;
+
+    msi_offset = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
+    if ( !msi_offset )
+        return 0;
+
+    if ( !dom0_msi )
+    {
+        xen_vpci_mask_capability(pdev, PCI_CAP_ID_MSI);
+        return 0;
+    }
+
+    msi = xzalloc(struct vpci_msi);
+    if ( !msi )
+        return -ENOMEM;
+
+    control = pci_conf_read16(seg, bus, slot, func,
+                              msi_control_reg(msi_offset));
+
+    rc = xen_vpci_add_register(pdev, vpci_msi_control_read,
+                               vpci_msi_control_write,
+                               msi_control_reg(msi_offset), 2, msi);
+    if ( rc )
+    {
+        dprintk(XENLOG_ERR,
+                "%04x:%02x:%02x.%u: failed to add handler for MSI control: %d\n",
+                seg, bus, slot, func, rc);
+        goto error;
+    }
+
+    /* Get the maximum number of vectors the device supports. */
+    msi->max_vectors = multi_msi_capable(control);
+    ASSERT(msi->max_vectors <= 32);
+
+    /* Initial value after reset. */
+    msi->guest_vectors = 1;
+
+    /* No PIRQ bind yet. */
+    msi->pirq = -1;
+
+    if ( is_64bit_address(control) )
+        msi->address64 = true;
+    if ( is_mask_bit_support(control) )
+        msi->masking = true;
+
+    rc = xen_vpci_add_register(pdev, vpci_msi_address_read,
+                               vpci_msi_address_write,
+                               msi_lower_address_reg(msi_offset), 4, msi);
+    if ( rc )
+    {
+        dprintk(XENLOG_ERR,
+                "%04x:%02x:%02x.%u: failed to add handler for MSI address: %d\n",
+                seg, bus, slot, func, rc);
+        goto error;
+    }
+
+    rc = xen_vpci_add_register(pdev, vpci_msi_data_read, vpci_msi_data_write,
+                               msi_data_reg(msi_offset, msi->address64), 2,
+                               msi);
+    if ( rc )
+    {
+        dprintk(XENLOG_ERR,
+                "%04x:%02x:%02x.%u: failed to add handler for MSI address: %d\n",
+                seg, bus, slot, func, rc);
+        goto error;
+    }
+
+    if ( msi->address64 )
+    {
+        rc = xen_vpci_add_register(pdev, vpci_msi_address_upper_read,
+                                   vpci_msi_address_upper_write,
+                                   msi_upper_address_reg(msi_offset), 4, msi);
+        if ( rc )
+        {
+            dprintk(XENLOG_ERR,
+                    "%04x:%02x:%02x.%u: failed to add handler for MSI address: %d\n",
+                    seg, bus, slot, func, rc);
+            goto error;
+        }
+    }
+
+    if ( msi->masking )
+    {
+        rc = xen_vpci_add_register(pdev, vpci_msi_mask_read,
+                                   vpci_msi_mask_write,
+                                   msi_mask_bits_reg(msi_offset,
+                                                     msi->address64), 4, msi);
+        if ( rc )
+        {
+            dprintk(XENLOG_ERR,
+                    "%04x:%02x:%02x.%u: failed to add handler for MSI mask: %d\n",
+                    seg, bus, slot, func, rc);
+            goto error;
+        }
+    }
+
+    pdev->vpci->msi = msi;
+
+    return 0;
+
+ error:
+    ASSERT(rc);
+    xfree(msi);
+    return rc;
+}
+
+REGISTER_VPCI_INIT(vpci_init_msi, false);
+
+static void vpci_dump_msi(unsigned char key)
+{
+    struct domain *d;
+    struct pci_dev *pdev;
+
+    printk("Guest MSI information:\n");
+
+    for_each_domain ( d )
+    {
+        if ( !has_vpci(d) )
+            continue;
+
+        vpci_lock(d);
+        list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list)
+        {
+            uint8_t seg = pdev->seg, bus = pdev->bus;
+            uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+            struct vpci_msi *msi = pdev->vpci->msi;
+            uint16_t data;
+            uint64_t addr;
+
+            if ( !msi )
+                continue;
+
+            printk("Device %04x:%02x:%02x.%u\n", seg, bus, slot, func);
+
+            printk("Enabled: %u Supports masking: %u 64-bit addresses: %u\n",
+                   msi->enabled, msi->masking, msi->address64);
+            printk("Max vectors: %u guest vectors: %u enabled vectors: %u\n",
+                   msi->max_vectors, msi->guest_vectors, msi->vectors);
+
+            data = msi->data;
+            addr = msi->address;
+            printk("vec=%#02x%7s%6s%3sassert%5s%7s dest_id=%lu pirq=%d\n",
+                   (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT,
+                   data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
+                   data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
+                   data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
+                   addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
+                   addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "cpu",
+                   (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT,
+                   msi->pirq);
+
+            if ( msi->masking )
+                printk("mask=%#032x\n", msi->mask);
+            printk("\n");
+        }
+        vpci_unlock(d);
+    }
+}
+
+static int __init vpci_msi_setup_keyhandler(void)
+{
+    register_keyhandler('Z', vpci_dump_msi, "dump guest MSI state", 1);
+    return 0;
+}
+__initcall(vpci_msi_setup_keyhandler);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
index 0434aca706..899e37ae0f 100644
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -126,6 +126,10 @@  void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
 void msix_write_completion(struct vcpu *);
 void msixtbl_init(struct domain *d);
 
+/* Get the vector/flags from a MSI address/data fields. */
+unsigned int msi_vector(uint16_t data);
+unsigned int msi_flags(uint16_t data, uint64_t addr);
+
 enum stdvga_cache_state {
     STDVGA_CACHE_UNINITIALIZED,
     STDVGA_CACHE_ENABLED,
diff --git a/xen/include/asm-x86/msi.h b/xen/include/asm-x86/msi.h
index a5de6a1328..dcbec8cf04 100644
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -251,4 +251,6 @@  void ack_nonmaskable_msi_irq(struct irq_desc *);
 void end_nonmaskable_msi_irq(struct irq_desc *, u8 vector);
 void set_msi_affinity(struct irq_desc *, const cpumask_t *);
 
+extern bool dom0_msi;
+
 #endif /* __ASM_MSI_H */
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index 0d2c72c109..37dfb3b6c5 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -58,6 +58,7 @@  struct dev_intx_gsi_link {
 #define VMSI_TRIG_MODE    0x8000
 
 #define GFLAGS_SHIFT_RH             8
+#define GFLAGS_SHIFT_DM             9
 #define GFLAGS_SHIFT_DELIV_MODE     12
 #define GFLAGS_SHIFT_TRG_MODE       15
 
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 75564b9d93..277e860d25 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -88,9 +88,35 @@  struct vpci {
 
     /* List of capabilities supported by the device. */
     struct list_head cap_list;
+
+    /* MSI data. */
+    struct vpci_msi {
+        /* Maximum number of vectors supported by the device. */
+        unsigned int max_vectors;
+        /* Current guest-written number of vectors. */
+        unsigned int guest_vectors;
+        /* Number of vectors configured. */
+        unsigned int vectors;
+        /* Address and data fields. */
+        uint64_t address;
+        uint16_t data;
+        /* PIRQ */
+        int pirq;
+        /* Mask bitfield. */
+        uint32_t mask;
+        /* MSI enabled? */
+        bool enabled;
+        /* Supports per-vector masking? */
+        bool masking;
+        /* 64-bit address capable? */
+        bool address64;
+    } *msi;
 #endif
 };
 
+/* Mask a PCI capability. */
+void xen_vpci_mask_capability(struct pci_dev *pdev, uint8_t cap_id);
+
 #endif
 
 /*