diff mbox

[V2,21/25] tools/libxc: Add a new interface to bind remapping format msi with pirq

Message ID 1502310866-10450-22-git-send-email-tianyu.lan@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

lan,Tianyu Aug. 9, 2017, 8:34 p.m. UTC
From: Chao Gao <chao.gao@intel.com>

Introduce a new binding relationship and provide a new interface to
manage the new relationship.

Signed-off-by: Chao Gao <chao.gao@intel.com>
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
---
 tools/libxc/include/xenctrl.h |  17 ++++++
 tools/libxc/xc_domain.c       |  53 +++++++++++++++++
 xen/drivers/passthrough/io.c  | 135 +++++++++++++++++++++++++++++++++++-------
 xen/include/public/domctl.h   |   7 +++
 xen/include/xen/hvm/irq.h     |   7 +++
 5 files changed, 198 insertions(+), 21 deletions(-)

Comments

Roger Pau Monné Aug. 23, 2017, 10:41 a.m. UTC | #1
On Wed, Aug 09, 2017 at 04:34:22PM -0400, Lan Tianyu wrote:
> From: Chao Gao <chao.gao@intel.com>
> 
> Introduce a new binding relationship and provide a new interface to
> manage the new relationship.
> 
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> ---
>  tools/libxc/include/xenctrl.h |  17 ++++++
>  tools/libxc/xc_domain.c       |  53 +++++++++++++++++
>  xen/drivers/passthrough/io.c  | 135 +++++++++++++++++++++++++++++++++++-------
>  xen/include/public/domctl.h   |   7 +++
>  xen/include/xen/hvm/irq.h     |   7 +++
>  5 files changed, 198 insertions(+), 21 deletions(-)
> 
> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> index dfaa9d5..b0a9437 100644
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -1720,6 +1720,15 @@ int xc_domain_ioport_mapping(xc_interface *xch,
>                               uint32_t nr_ports,
>                               uint32_t add_mapping);
>  
> +int xc_domain_update_msi_irq_remapping(
> +    xc_interface *xch,
> +    uint32_t domid,
> +    uint32_t pirq,
> +    uint32_t source_id,
> +    uint32_t data,
> +    uint64_t addr,
> +    uint64_t gtable);
> +
>  int xc_domain_update_msi_irq(
>      xc_interface *xch,
>      uint32_t domid,
> @@ -1734,6 +1743,14 @@ int xc_domain_unbind_msi_irq(xc_interface *xch,
>                               uint32_t pirq,
>                               uint32_t gflags);
>  
> +int xc_domain_unbind_msi_irq_remapping(
> +    xc_interface *xch,
> +    uint32_t domid,
> +    uint32_t pirq,
> +    uint32_t source_id,
> +    uint32_t data,
> +    uint64_t addr);

I think this doesn't match the coding style, but it seems like the
surrounding functions also use it, so I will let the maintainers
decide whether this is fine or not.

>  int xc_domain_bind_pt_irq(xc_interface *xch,
>                            uint32_t domid,
>                            uint8_t machine_irq,
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index 3bab4e8..4b6a510 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -1702,8 +1702,34 @@ int xc_deassign_dt_device(
>      return rc;
>  }
>  
> +int xc_domain_update_msi_irq_remapping(
> +    xc_interface *xch,
> +    uint32_t domid,
> +    uint32_t pirq,
> +    uint32_t source_id,
> +    uint32_t data,
> +    uint64_t addr,
> +    uint64_t gtable)
> +{
> +    int rc;
> +    xen_domctl_bind_pt_irq_t *bind;

No newline.

> +    DECLARE_DOMCTL;
>  
> +    domctl.cmd = XEN_DOMCTL_bind_pt_irq;
> +    domctl.domain = (domid_t)domid;
>  
> +    bind = &(domctl.u.bind_pt_irq);
> +    bind->irq_type = PT_IRQ_TYPE_MSI_IR;
> +    bind->machine_irq = pirq;
> +    bind->u.msi_ir.source_id = source_id;
> +    bind->u.msi_ir.data = data;
> +    bind->u.msi_ir.addr = addr;
> +    bind->u.msi_ir.gtable = gtable;
> +
> +    rc = do_domctl(xch, &domctl);
> +    return rc;
> +}
>  
>  int xc_domain_update_msi_irq(
>      xc_interface *xch,
> @@ -1732,6 +1758,33 @@ int xc_domain_update_msi_irq(
>      return rc;
>  }
>  
> +int xc_domain_unbind_msi_irq_remapping(
> +    xc_interface *xch,
> +    uint32_t domid,
> +    uint32_t pirq,
> +    uint32_t source_id,
> +    uint32_t data,
> +    uint64_t addr)
> +{
> +    int rc;
> +    xen_domctl_bind_pt_irq_t *bind;
> +
> +    DECLARE_DOMCTL;
> +
> +    domctl.cmd = XEN_DOMCTL_unbind_pt_irq;
> +    domctl.domain = (domid_t)domid;
> +
> +    bind = &(domctl.u.bind_pt_irq);
> +    bind->irq_type = PT_IRQ_TYPE_MSI_IR;
> +    bind->machine_irq = pirq;
> +    bind->u.msi_ir.source_id = source_id;
> +    bind->u.msi_ir.data = data;
> +    bind->u.msi_ir.addr = addr;
> +
> +    rc = do_domctl(xch, &domctl);
> +    return rc;
> +}
> +
>  int xc_domain_unbind_msi_irq(
>      xc_interface *xch,
>      uint32_t domid,
> diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
> index 4d457f6..0510887 100644
> --- a/xen/drivers/passthrough/io.c
> +++ b/xen/drivers/passthrough/io.c
> @@ -276,6 +276,92 @@ static struct vcpu *vector_hashing_dest(const struct domain *d,
>      return dest;
>  }
>  
> +static inline void set_hvm_gmsi_info(struct hvm_gmsi_info *msi,
> +                                     xen_domctl_bind_pt_irq_t *pt_irq_bind)

Inline? I would rather let the compiler decide IMHO.

> +{
> +    if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )

A switch seems like a better choice here.

> +    {
> +        msi->legacy.gvec = pt_irq_bind->u.msi.gvec;
> +        msi->legacy.gflags = pt_irq_bind->u.msi.gflags;
> +    }
> +    else if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_IR )
> +    {
> +        msi->intremap.source_id = pt_irq_bind->u.msi_ir.source_id;
> +        msi->intremap.data = pt_irq_bind->u.msi_ir.data;
> +        msi->intremap.addr = pt_irq_bind->u.msi_ir.addr;
> +    }
> +    else
> +        BUG();

ASSERT_UNREACHABLE();

> +}
> +
> +static inline void clear_hvm_gmsi_info(struct hvm_gmsi_info *msi, int irq_type)

No inline.

> +{
> +    if ( irq_type == PT_IRQ_TYPE_MSI )

Same here (switch + ASSERT). Maybe a memset would be faster here?

> +    {
> +        msi->legacy.gvec = 0;
> +        msi->legacy.gflags = 0;
> +    }
> +    else if ( irq_type == PT_IRQ_TYPE_MSI_IR )
> +    {
> +        msi->intremap.source_id = 0;
> +        msi->intremap.data = 0;
> +        msi->intremap.addr = 0;
> +    }
> +    else
> +        BUG();
> +}
> +
> +static inline bool hvm_gmsi_info_need_update(struct hvm_gmsi_info *msi,

No inline.

> +                                         xen_domctl_bind_pt_irq_t *pt_irq_bind)
> +{
> +    if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )

switch please.

> +        return ((msi->legacy.gvec != pt_irq_bind->u.msi.gvec) ||
> +                (msi->legacy.gflags != pt_irq_bind->u.msi.gflags));
> +    else if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_IR )

Unneeded else.

> +        return ((msi->intremap.source_id != pt_irq_bind->u.msi_ir.source_id) ||
> +                (msi->intremap.data != pt_irq_bind->u.msi_ir.data) ||
> +                (msi->intremap.addr != pt_irq_bind->u.msi_ir.addr));
> +    BUG();

ASSERT_UNREACHABLE and newline.

> +    return 0;
> +}
> +
> +static int pirq_dpci_2_msi_attr(struct domain *d,
> +                                struct hvm_pirq_dpci *pirq_dpci, uint8_t *gvec,
> +                                uint8_t *dest, uint8_t *dm, uint8_t *dlm)
> +{
> +    int rc = 0;
> +
> +    if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
> +    {
> +        *gvec = pirq_dpci->gmsi.legacy.gvec;
> +        *dest = pirq_dpci->gmsi.legacy.gflags & VMSI_DEST_ID_MASK;
> +        *dm = !!(pirq_dpci->gmsi.legacy.gflags & VMSI_DM_MASK);
> +        *dlm = (pirq_dpci->gmsi.legacy.gflags & VMSI_DELIV_MASK) >>
> +                GFLAGS_SHIFT_DELIV_MODE;

MASK_EXTR.

> +    }
> +    else if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI_IR )
> +    {
> +        struct irq_remapping_request request;
> +        struct irq_remapping_info irq_info;
> +
> +        irq_request_msi_fill(&request, pirq_dpci->gmsi.intremap.source_id,
> +                             pirq_dpci->gmsi.intremap.addr,
> +                             pirq_dpci->gmsi.intremap.data);
> +        /* Currently, only viommu 0 is supported */
> +        rc = viommu_get_irq_info(d, 0, &request, &irq_info);
> +        if ( !rc )

I don't like the !rc construct, I would rather have:

if ( rc )
    return rc;

*gvec = ...;

But that's my personal taste, you should wait for maintainers to
express their opinions.

> +        {
> +            *gvec = irq_info.vector;
> +            *dest = irq_info.dest;
> +            *dm = irq_info.dest_mode;
> +            *dlm = irq_info.delivery_mode;
> +        }
> +    }
> +    else
> +        BUG();

ASSERT_UNREACHABLE();

> +    return rc;
> +}
> +
>  int pt_irq_create_bind(
>      struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
>  {
> @@ -339,17 +425,21 @@ int pt_irq_create_bind(
>      switch ( pt_irq_bind->irq_type )
>      {
>      case PT_IRQ_TYPE_MSI:
> +    case PT_IRQ_TYPE_MSI_IR:
>      {
> -        uint8_t dest, dest_mode, delivery_mode;
> +        uint8_t dest = 0, dest_mode = 0, delivery_mode = 0, gvec;

Why do you need those initializations now? They where unneeded before
and I don't see you using those variables.

>          int dest_vcpu_id;
>          const struct vcpu *vcpu;
> +        bool ir = (pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_IR);
> +        uint64_t gtable = ir ? pt_irq_bind->u.msi_ir.gtable :
> +                          pt_irq_bind->u.msi.gtable;
>  
>          if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
>          {
>              pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI |
> -                               HVM_IRQ_DPCI_GUEST_MSI;
> -            pirq_dpci->gmsi.legacy.gvec = pt_irq_bind->u.msi.gvec;
> -            pirq_dpci->gmsi.legacy.gflags = pt_irq_bind->u.msi.gflags;
> +                               (ir ? HVM_IRQ_DPCI_GUEST_MSI_IR :
> +                                HVM_IRQ_DPCI_GUEST_MSI);
> +            set_hvm_gmsi_info(&pirq_dpci->gmsi, pt_irq_bind);
>              /*
>               * 'pt_irq_create_bind' can be called after 'pt_irq_destroy_bind'.
>               * The 'pirq_cleanup_check' which would free the structure is only
> @@ -364,9 +454,9 @@ int pt_irq_create_bind(
>              pirq_dpci->dom = d;
>              /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
>              rc = pirq_guest_bind(d->vcpu[0], info, 0);
> -            if ( rc == 0 && pt_irq_bind->u.msi.gtable )
> +            if ( rc == 0 && gtable )
>              {
> -                rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable);
> +                rc = msixtbl_pt_register(d, info, gtable);
>                  if ( unlikely(rc) )
>                  {
>                      pirq_guest_unbind(d, info);
> @@ -381,8 +471,7 @@ int pt_irq_create_bind(
>              }
>              if ( unlikely(rc) )
>              {
> -                pirq_dpci->gmsi.legacy.gflags = 0;
> -                pirq_dpci->gmsi.legacy.gvec = 0;
> +                clear_hvm_gmsi_info(&pirq_dpci->gmsi, pt_irq_bind->irq_type);
>                  pirq_dpci->dom = NULL;
>                  pirq_dpci->flags = 0;
>                  pirq_cleanup_check(info, d);
> @@ -392,7 +481,8 @@ int pt_irq_create_bind(
>          }
>          else
>          {
> -            uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI;
> +            uint32_t mask = HVM_IRQ_DPCI_MACH_MSI |
> +                     (ir ? HVM_IRQ_DPCI_GUEST_MSI_IR : HVM_IRQ_DPCI_GUEST_MSI);

Maybe:

uint32_t mask = (ir ? HVM_IRQ_DPCI_GUEST_MSI_IR
                    : HVM_IRQ_DPCI_GUEST_MSI) |
                HVM_IRQ_DPCI_MACH_MSI;

>  
>              if ( (pirq_dpci->flags & mask) != mask )
>              {
> @@ -401,29 +491,31 @@ int pt_irq_create_bind(
>              }
>  
>              /* If pirq is already mapped as vmsi, update guest data/addr. */
> -            if ( pirq_dpci->gmsi.legacy.gvec != pt_irq_bind->u.msi.gvec ||
> -                 pirq_dpci->gmsi.legacy.gflags != pt_irq_bind->u.msi.gflags )
> +            if ( hvm_gmsi_info_need_update(&pirq_dpci->gmsi, pt_irq_bind) )
>              {
>                  /* Directly clear pending EOIs before enabling new MSI info. */
>                  pirq_guest_eoi(info);
>  
> -                pirq_dpci->gmsi.legacy.gvec = pt_irq_bind->u.msi.gvec;
> -                pirq_dpci->gmsi.legacy.gflags = pt_irq_bind->u.msi.gflags;
> +                set_hvm_gmsi_info(&pirq_dpci->gmsi, pt_irq_bind);
>              }
>          }
>          /* Calculate dest_vcpu_id for MSI-type pirq migration. */
> -        dest = pirq_dpci->gmsi.legacy.gflags & VMSI_DEST_ID_MASK;
> -        dest_mode = !!(pirq_dpci->gmsi.legacy.gflags & VMSI_DM_MASK);
> -        delivery_mode = (pirq_dpci->gmsi.legacy.gflags & VMSI_DELIV_MASK) >>
> -                         GFLAGS_SHIFT_DELIV_MODE;
> -
> -        dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
> +        rc = pirq_dpci_2_msi_attr(d, pirq_dpci, &gvec, &dest, &dest_mode,
> +                                  &delivery_mode);
> +        if ( unlikely(rc) )
> +        {
> +            spin_unlock(&d->event_lock);
> +            return -EFAULT;

Return rc? Or else you are losing the return value for no apparent
reason.

> +        }
> +        else

Unneeded else branch.

> +            dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
>          pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
>          spin_unlock(&d->event_lock);
>  
>          pirq_dpci->gmsi.posted = false;
>          vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
> -        if ( iommu_intpost )
> +        /* Currently, don't use interrupt posting for guest's remapping MSIs */
> +        if ( iommu_intpost && !ir )
>          {
>              if ( delivery_mode == dest_LowestPrio )
>                  vcpu = vector_hashing_dest(d, dest, dest_mode,
> @@ -435,7 +527,7 @@ int pt_irq_create_bind(
>              hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
>  
>          /* Use interrupt posting if it is supported. */
> -        if ( iommu_intpost )
> +        if ( iommu_intpost && !ir )

So with interrupt remapping posted interrupts are not available...

>              pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL,
>                             info, pirq_dpci->gmsi.legacy.gvec);
>  
> @@ -627,6 +719,7 @@ int pt_irq_destroy_bind(
>          }
>          break;
>      case PT_IRQ_TYPE_MSI:
> +    case PT_IRQ_TYPE_MSI_IR:
>          break;
>      default:
>          return -EOPNOTSUPP;
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 4b10f26..1adf032 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -555,6 +555,7 @@ typedef enum pt_irq_type_e {
>      PT_IRQ_TYPE_MSI,
>      PT_IRQ_TYPE_MSI_TRANSLATE,
>      PT_IRQ_TYPE_SPI,    /* ARM: valid range 32-1019 */
> +    PT_IRQ_TYPE_MSI_IR,
>  } pt_irq_type_t;
>  struct xen_domctl_bind_pt_irq {
>      uint32_t machine_irq;
> @@ -575,6 +576,12 @@ struct xen_domctl_bind_pt_irq {
>              uint64_aligned_t gtable;
>          } msi;
>          struct {
> +            uint32_t source_id;
> +            uint32_t data;
> +            uint64_t addr;
> +            uint64_aligned_t gtable;

uint64_aligned_t? Please use uint64_t.

> +        } msi_ir;
> +        struct {
>              uint16_t spi;
>          } spi;
>      } u;
> diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
> index 5e736f8..884e092 100644
> --- a/xen/include/xen/hvm/irq.h
> +++ b/xen/include/xen/hvm/irq.h
> @@ -41,6 +41,7 @@ struct dev_intx_gsi_link {
>  #define _HVM_IRQ_DPCI_GUEST_PCI_SHIFT           4
>  #define _HVM_IRQ_DPCI_GUEST_MSI_SHIFT           5
>  #define _HVM_IRQ_DPCI_IDENTITY_GSI_SHIFT        6
> +#define _HVM_IRQ_DPCI_GUEST_MSI_IR_SHIFT        7 

Trailing space.

Roger.
Chao Gao Aug. 25, 2017, 7:28 a.m. UTC | #2
On Wed, Aug 23, 2017 at 11:41:25AM +0100, Roger Pau Monné wrote:
>On Wed, Aug 09, 2017 at 04:34:22PM -0400, Lan Tianyu wrote:
>> From: Chao Gao <chao.gao@intel.com>
>> 
>> Introduce a new binding relationship and provide a new interface to
>> manage the new relationship.
>> 
>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
>> ---
>>          pirq_dpci->gmsi.posted = false;
>>          vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
>> -        if ( iommu_intpost )
>> +        /* Currently, don't use interrupt posting for guest's remapping MSIs */
>> +        if ( iommu_intpost && !ir )
>>          {
>>              if ( delivery_mode == dest_LowestPrio )
>>                  vcpu = vector_hashing_dest(d, dest, dest_mode,
>> @@ -435,7 +527,7 @@ int pt_irq_create_bind(
>>              hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
>>  
>>          /* Use interrupt posting if it is supported. */
>> -        if ( iommu_intpost )
>> +        if ( iommu_intpost && !ir )
>
>So with interrupt remapping posted interrupts are not available...

Yes. We want to make thing simple. Currently, all vIRTE isn't
cached by vvtd and thus we needn't do anything when guest try to flush
vIRTE. If we use posted interrupt here, it means some information will
be cached by physical VTd. In that case, we should push effort to flush
correspond phyiscal IRTE. We don't include these patches in this
series.

Thanks
Chao
Roger Pau Monné Aug. 25, 2017, 9:35 a.m. UTC | #3
On Fri, Aug 25, 2017 at 03:28:46PM +0800, Chao Gao wrote:
> On Wed, Aug 23, 2017 at 11:41:25AM +0100, Roger Pau Monné wrote:
> >On Wed, Aug 09, 2017 at 04:34:22PM -0400, Lan Tianyu wrote:
> >> From: Chao Gao <chao.gao@intel.com>
> >> 
> >> Introduce a new binding relationship and provide a new interface to
> >> manage the new relationship.
> >> 
> >> Signed-off-by: Chao Gao <chao.gao@intel.com>
> >> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> >> ---
> >>          pirq_dpci->gmsi.posted = false;
> >>          vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
> >> -        if ( iommu_intpost )
> >> +        /* Currently, don't use interrupt posting for guest's remapping MSIs */
> >> +        if ( iommu_intpost && !ir )
> >>          {
> >>              if ( delivery_mode == dest_LowestPrio )
> >>                  vcpu = vector_hashing_dest(d, dest, dest_mode,
> >> @@ -435,7 +527,7 @@ int pt_irq_create_bind(
> >>              hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
> >>  
> >>          /* Use interrupt posting if it is supported. */
> >> -        if ( iommu_intpost )
> >> +        if ( iommu_intpost && !ir )
> >
> >So with interrupt remapping posted interrupts are not available...
> 
> Yes. We want to make thing simple. Currently, all vIRTE isn't
> cached by vvtd and thus we needn't do anything when guest try to flush
> vIRTE. If we use posted interrupt here, it means some information will
> be cached by physical VTd. In that case, we should push effort to flush
> correspond phyiscal IRTE. We don't include these patches in this
> series.

Might be worth adding a "FIXME".

Thanks, Roger.
diff mbox

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index dfaa9d5..b0a9437 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1720,6 +1720,15 @@  int xc_domain_ioport_mapping(xc_interface *xch,
                              uint32_t nr_ports,
                              uint32_t add_mapping);
 
+int xc_domain_update_msi_irq_remapping(
+    xc_interface *xch,
+    uint32_t domid,
+    uint32_t pirq,
+    uint32_t source_id,
+    uint32_t data,
+    uint64_t addr,
+    uint64_t gtable);
+
 int xc_domain_update_msi_irq(
     xc_interface *xch,
     uint32_t domid,
@@ -1734,6 +1743,14 @@  int xc_domain_unbind_msi_irq(xc_interface *xch,
                              uint32_t pirq,
                              uint32_t gflags);
 
+int xc_domain_unbind_msi_irq_remapping(
+    xc_interface *xch,
+    uint32_t domid,
+    uint32_t pirq,
+    uint32_t source_id,
+    uint32_t data,
+    uint64_t addr);
+
 int xc_domain_bind_pt_irq(xc_interface *xch,
                           uint32_t domid,
                           uint8_t machine_irq,
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 3bab4e8..4b6a510 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1702,8 +1702,34 @@  int xc_deassign_dt_device(
     return rc;
 }
 
+int xc_domain_update_msi_irq_remapping(
+    xc_interface *xch,
+    uint32_t domid,
+    uint32_t pirq,
+    uint32_t source_id,
+    uint32_t data,
+    uint64_t addr,
+    uint64_t gtable)
+{
+    int rc;
+    xen_domctl_bind_pt_irq_t *bind;
+
+    DECLARE_DOMCTL;
 
+    domctl.cmd = XEN_DOMCTL_bind_pt_irq;
+    domctl.domain = (domid_t)domid;
 
+    bind = &(domctl.u.bind_pt_irq);
+    bind->irq_type = PT_IRQ_TYPE_MSI_IR;
+    bind->machine_irq = pirq;
+    bind->u.msi_ir.source_id = source_id;
+    bind->u.msi_ir.data = data;
+    bind->u.msi_ir.addr = addr;
+    bind->u.msi_ir.gtable = gtable;
+
+    rc = do_domctl(xch, &domctl);
+    return rc;
+}
 
 int xc_domain_update_msi_irq(
     xc_interface *xch,
@@ -1732,6 +1758,33 @@  int xc_domain_update_msi_irq(
     return rc;
 }
 
+int xc_domain_unbind_msi_irq_remapping(
+    xc_interface *xch,
+    uint32_t domid,
+    uint32_t pirq,
+    uint32_t source_id,
+    uint32_t data,
+    uint64_t addr)
+{
+    int rc;
+    xen_domctl_bind_pt_irq_t *bind;
+
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_unbind_pt_irq;
+    domctl.domain = (domid_t)domid;
+
+    bind = &(domctl.u.bind_pt_irq);
+    bind->irq_type = PT_IRQ_TYPE_MSI_IR;
+    bind->machine_irq = pirq;
+    bind->u.msi_ir.source_id = source_id;
+    bind->u.msi_ir.data = data;
+    bind->u.msi_ir.addr = addr;
+
+    rc = do_domctl(xch, &domctl);
+    return rc;
+}
+
 int xc_domain_unbind_msi_irq(
     xc_interface *xch,
     uint32_t domid,
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 4d457f6..0510887 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -276,6 +276,92 @@  static struct vcpu *vector_hashing_dest(const struct domain *d,
     return dest;
 }
 
+static inline void set_hvm_gmsi_info(struct hvm_gmsi_info *msi,
+                                     xen_domctl_bind_pt_irq_t *pt_irq_bind)
+{
+    if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
+    {
+        msi->legacy.gvec = pt_irq_bind->u.msi.gvec;
+        msi->legacy.gflags = pt_irq_bind->u.msi.gflags;
+    }
+    else if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_IR )
+    {
+        msi->intremap.source_id = pt_irq_bind->u.msi_ir.source_id;
+        msi->intremap.data = pt_irq_bind->u.msi_ir.data;
+        msi->intremap.addr = pt_irq_bind->u.msi_ir.addr;
+    }
+    else
+        BUG();
+}
+
+static inline void clear_hvm_gmsi_info(struct hvm_gmsi_info *msi, int irq_type)
+{
+    if ( irq_type == PT_IRQ_TYPE_MSI )
+    {
+        msi->legacy.gvec = 0;
+        msi->legacy.gflags = 0;
+    }
+    else if ( irq_type == PT_IRQ_TYPE_MSI_IR )
+    {
+        msi->intremap.source_id = 0;
+        msi->intremap.data = 0;
+        msi->intremap.addr = 0;
+    }
+    else
+        BUG();
+}
+
+static inline bool hvm_gmsi_info_need_update(struct hvm_gmsi_info *msi,
+                                         xen_domctl_bind_pt_irq_t *pt_irq_bind)
+{
+    if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
+        return ((msi->legacy.gvec != pt_irq_bind->u.msi.gvec) ||
+                (msi->legacy.gflags != pt_irq_bind->u.msi.gflags));
+    else if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_IR )
+        return ((msi->intremap.source_id != pt_irq_bind->u.msi_ir.source_id) ||
+                (msi->intremap.data != pt_irq_bind->u.msi_ir.data) ||
+                (msi->intremap.addr != pt_irq_bind->u.msi_ir.addr));
+    BUG();
+    return 0;
+}
+
+static int pirq_dpci_2_msi_attr(struct domain *d,
+                                struct hvm_pirq_dpci *pirq_dpci, uint8_t *gvec,
+                                uint8_t *dest, uint8_t *dm, uint8_t *dlm)
+{
+    int rc = 0;
+
+    if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
+    {
+        *gvec = pirq_dpci->gmsi.legacy.gvec;
+        *dest = pirq_dpci->gmsi.legacy.gflags & VMSI_DEST_ID_MASK;
+        *dm = !!(pirq_dpci->gmsi.legacy.gflags & VMSI_DM_MASK);
+        *dlm = (pirq_dpci->gmsi.legacy.gflags & VMSI_DELIV_MASK) >>
+                GFLAGS_SHIFT_DELIV_MODE;
+    }
+    else if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI_IR )
+    {
+        struct irq_remapping_request request;
+        struct irq_remapping_info irq_info;
+
+        irq_request_msi_fill(&request, pirq_dpci->gmsi.intremap.source_id,
+                             pirq_dpci->gmsi.intremap.addr,
+                             pirq_dpci->gmsi.intremap.data);
+        /* Currently, only viommu 0 is supported */
+        rc = viommu_get_irq_info(d, 0, &request, &irq_info);
+        if ( !rc )
+        {
+            *gvec = irq_info.vector;
+            *dest = irq_info.dest;
+            *dm = irq_info.dest_mode;
+            *dlm = irq_info.delivery_mode;
+        }
+    }
+    else
+        BUG();
+    return rc;
+}
+
 int pt_irq_create_bind(
     struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
 {
@@ -339,17 +425,21 @@  int pt_irq_create_bind(
     switch ( pt_irq_bind->irq_type )
     {
     case PT_IRQ_TYPE_MSI:
+    case PT_IRQ_TYPE_MSI_IR:
     {
-        uint8_t dest, dest_mode, delivery_mode;
+        uint8_t dest = 0, dest_mode = 0, delivery_mode = 0, gvec;
         int dest_vcpu_id;
         const struct vcpu *vcpu;
+        bool ir = (pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_IR);
+        uint64_t gtable = ir ? pt_irq_bind->u.msi_ir.gtable :
+                          pt_irq_bind->u.msi.gtable;
 
         if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
         {
             pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI |
-                               HVM_IRQ_DPCI_GUEST_MSI;
-            pirq_dpci->gmsi.legacy.gvec = pt_irq_bind->u.msi.gvec;
-            pirq_dpci->gmsi.legacy.gflags = pt_irq_bind->u.msi.gflags;
+                               (ir ? HVM_IRQ_DPCI_GUEST_MSI_IR :
+                                HVM_IRQ_DPCI_GUEST_MSI);
+            set_hvm_gmsi_info(&pirq_dpci->gmsi, pt_irq_bind);
             /*
              * 'pt_irq_create_bind' can be called after 'pt_irq_destroy_bind'.
              * The 'pirq_cleanup_check' which would free the structure is only
@@ -364,9 +454,9 @@  int pt_irq_create_bind(
             pirq_dpci->dom = d;
             /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
             rc = pirq_guest_bind(d->vcpu[0], info, 0);
-            if ( rc == 0 && pt_irq_bind->u.msi.gtable )
+            if ( rc == 0 && gtable )
             {
-                rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable);
+                rc = msixtbl_pt_register(d, info, gtable);
                 if ( unlikely(rc) )
                 {
                     pirq_guest_unbind(d, info);
@@ -381,8 +471,7 @@  int pt_irq_create_bind(
             }
             if ( unlikely(rc) )
             {
-                pirq_dpci->gmsi.legacy.gflags = 0;
-                pirq_dpci->gmsi.legacy.gvec = 0;
+                clear_hvm_gmsi_info(&pirq_dpci->gmsi, pt_irq_bind->irq_type);
                 pirq_dpci->dom = NULL;
                 pirq_dpci->flags = 0;
                 pirq_cleanup_check(info, d);
@@ -392,7 +481,8 @@  int pt_irq_create_bind(
         }
         else
         {
-            uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI;
+            uint32_t mask = HVM_IRQ_DPCI_MACH_MSI |
+                     (ir ? HVM_IRQ_DPCI_GUEST_MSI_IR : HVM_IRQ_DPCI_GUEST_MSI);
 
             if ( (pirq_dpci->flags & mask) != mask )
             {
@@ -401,29 +491,31 @@  int pt_irq_create_bind(
             }
 
             /* If pirq is already mapped as vmsi, update guest data/addr. */
-            if ( pirq_dpci->gmsi.legacy.gvec != pt_irq_bind->u.msi.gvec ||
-                 pirq_dpci->gmsi.legacy.gflags != pt_irq_bind->u.msi.gflags )
+            if ( hvm_gmsi_info_need_update(&pirq_dpci->gmsi, pt_irq_bind) )
             {
                 /* Directly clear pending EOIs before enabling new MSI info. */
                 pirq_guest_eoi(info);
 
-                pirq_dpci->gmsi.legacy.gvec = pt_irq_bind->u.msi.gvec;
-                pirq_dpci->gmsi.legacy.gflags = pt_irq_bind->u.msi.gflags;
+                set_hvm_gmsi_info(&pirq_dpci->gmsi, pt_irq_bind);
             }
         }
         /* Calculate dest_vcpu_id for MSI-type pirq migration. */
-        dest = pirq_dpci->gmsi.legacy.gflags & VMSI_DEST_ID_MASK;
-        dest_mode = !!(pirq_dpci->gmsi.legacy.gflags & VMSI_DM_MASK);
-        delivery_mode = (pirq_dpci->gmsi.legacy.gflags & VMSI_DELIV_MASK) >>
-                         GFLAGS_SHIFT_DELIV_MODE;
-
-        dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
+        rc = pirq_dpci_2_msi_attr(d, pirq_dpci, &gvec, &dest, &dest_mode,
+                                  &delivery_mode);
+        if ( unlikely(rc) )
+        {
+            spin_unlock(&d->event_lock);
+            return -EFAULT;
+        }
+        else
+            dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
         pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
         spin_unlock(&d->event_lock);
 
         pirq_dpci->gmsi.posted = false;
         vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
-        if ( iommu_intpost )
+        /* Currently, don't use interrupt posting for guest's remapping MSIs */
+        if ( iommu_intpost && !ir )
         {
             if ( delivery_mode == dest_LowestPrio )
                 vcpu = vector_hashing_dest(d, dest, dest_mode,
@@ -435,7 +527,7 @@  int pt_irq_create_bind(
             hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
 
         /* Use interrupt posting if it is supported. */
-        if ( iommu_intpost )
+        if ( iommu_intpost && !ir )
             pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL,
                            info, pirq_dpci->gmsi.legacy.gvec);
 
@@ -627,6 +719,7 @@  int pt_irq_destroy_bind(
         }
         break;
     case PT_IRQ_TYPE_MSI:
+    case PT_IRQ_TYPE_MSI_IR:
         break;
     default:
         return -EOPNOTSUPP;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 4b10f26..1adf032 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -555,6 +555,7 @@  typedef enum pt_irq_type_e {
     PT_IRQ_TYPE_MSI,
     PT_IRQ_TYPE_MSI_TRANSLATE,
     PT_IRQ_TYPE_SPI,    /* ARM: valid range 32-1019 */
+    PT_IRQ_TYPE_MSI_IR,
 } pt_irq_type_t;
 struct xen_domctl_bind_pt_irq {
     uint32_t machine_irq;
@@ -575,6 +576,12 @@  struct xen_domctl_bind_pt_irq {
             uint64_aligned_t gtable;
         } msi;
         struct {
+            uint32_t source_id;
+            uint32_t data;
+            uint64_t addr;
+            uint64_aligned_t gtable;
+        } msi_ir;
+        struct {
             uint16_t spi;
         } spi;
     } u;
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index 5e736f8..884e092 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -41,6 +41,7 @@  struct dev_intx_gsi_link {
 #define _HVM_IRQ_DPCI_GUEST_PCI_SHIFT           4
 #define _HVM_IRQ_DPCI_GUEST_MSI_SHIFT           5
 #define _HVM_IRQ_DPCI_IDENTITY_GSI_SHIFT        6
+#define _HVM_IRQ_DPCI_GUEST_MSI_IR_SHIFT        7 
 #define _HVM_IRQ_DPCI_TRANSLATE_SHIFT          15
 #define HVM_IRQ_DPCI_MACH_PCI        (1 << _HVM_IRQ_DPCI_MACH_PCI_SHIFT)
 #define HVM_IRQ_DPCI_MACH_MSI        (1 << _HVM_IRQ_DPCI_MACH_MSI_SHIFT)
@@ -49,6 +50,7 @@  struct dev_intx_gsi_link {
 #define HVM_IRQ_DPCI_GUEST_PCI       (1 << _HVM_IRQ_DPCI_GUEST_PCI_SHIFT)
 #define HVM_IRQ_DPCI_GUEST_MSI       (1 << _HVM_IRQ_DPCI_GUEST_MSI_SHIFT)
 #define HVM_IRQ_DPCI_IDENTITY_GSI    (1 << _HVM_IRQ_DPCI_IDENTITY_GSI_SHIFT)
+#define HVM_IRQ_DPCI_GUEST_MSI_IR    (1 << _HVM_IRQ_DPCI_GUEST_MSI_IR_SHIFT)
 #define HVM_IRQ_DPCI_TRANSLATE       (1 << _HVM_IRQ_DPCI_TRANSLATE_SHIFT)
 
 #define VMSI_DEST_ID_MASK 0xff
@@ -67,6 +69,11 @@  struct hvm_gmsi_info {
             uint32_t gvec;
             uint32_t gflags;
         } legacy;
+        struct {
+            uint32_t source_id;
+            uint32_t data;
+            uint64_t addr;
+        } intremap;
     };
     int dest_vcpu_id; /* -1 :multi-dest, non-negative: dest_vcpu_id */
     bool posted; /* directly deliver to guest via VT-d PI? */