diff mbox series

xen/vpci: msix: move x86 specific code to x86 file

Message ID bfb2b7f282249cee8b6ff15d424a2b7d823ac743.1639478564.git.rahul.singh@arm.com (mailing list archive)
State New, archived
Headers show
Series xen/vpci: msix: move x86 specific code to x86 file | expand

Commit Message

Rahul Singh Dec. 14, 2021, 10:45 a.m. UTC
vpci/msix.c file will be used for arm architecture when vpci msix
support will be added to ARM, but there is x86 specific code in this
file.

Move x86 specific code to the x86_msix.c file to make sure common code
will be used for other architecture.

No functional change intended.

Signed-off-by: Rahul Singh <rahul.singh@arm.com>
---
 xen/arch/x86/msi.c                       |   2 +-
 xen/drivers/passthrough/amd/iommu_init.c |   1 +
 xen/drivers/vpci/Makefile                |   1 +
 xen/drivers/vpci/msi.c                   |   3 +-
 xen/drivers/vpci/msix.c                  | 134 +++++---------------
 xen/drivers/vpci/x86_msix.c              | 155 +++++++++++++++++++++++
 xen/include/asm-x86/msi.h                |  28 ----
 xen/include/xen/msi.h                    |  28 ++++
 xen/include/xen/vpci.h                   |  21 +++
 9 files changed, 239 insertions(+), 134 deletions(-)
 create mode 100644 xen/drivers/vpci/x86_msix.c

Comments

Roger Pau Monné Dec. 14, 2021, 12:37 p.m. UTC | #1
On Tue, Dec 14, 2021 at 10:45:17AM +0000, Rahul Singh wrote:
> vpci/msix.c file will be used for arm architecture when vpci msix
> support will be added to ARM, but there is x86 specific code in this
> file.
> 
> Move x86 specific code to the x86_msix.c file to make sure common code
> will be used for other architecture.
> 
> No functional change intended.
> 
> Signed-off-by: Rahul Singh <rahul.singh@arm.com>
> ---
>  xen/arch/x86/msi.c                       |   2 +-
>  xen/drivers/passthrough/amd/iommu_init.c |   1 +
>  xen/drivers/vpci/Makefile                |   1 +
>  xen/drivers/vpci/msi.c                   |   3 +-
>  xen/drivers/vpci/msix.c                  | 134 +++++---------------
>  xen/drivers/vpci/x86_msix.c              | 155 +++++++++++++++++++++++

This should go into xen/arch/x86/hvm/vmsi.c there's already vPCI MSI
specific code in there.

>  xen/include/asm-x86/msi.h                |  28 ----
>  xen/include/xen/msi.h                    |  28 ++++
>  xen/include/xen/vpci.h                   |  21 +++
>  9 files changed, 239 insertions(+), 134 deletions(-)
>  create mode 100644 xen/drivers/vpci/x86_msix.c
> 
> diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
> index 5febc0ea4b..2b120f897f 100644
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -23,7 +23,7 @@
>  #include <asm/io.h>
>  #include <asm/smp.h>
>  #include <asm/desc.h>
> -#include <asm/msi.h>
> +#include <xen/msi.h>

You likely need to move this up to the xen/ prefixed include block.

>  #include <asm/fixmap.h>
>  #include <asm/p2m.h>
>  #include <mach_apic.h>
> diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
> index 559a734bda..fc385959c7 100644
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -20,6 +20,7 @@
>  #include <xen/acpi.h>
>  #include <xen/delay.h>
>  #include <xen/keyhandler.h>
> +#include <xen/msi.h>
>  
>  #include "iommu.h"

Might be better to replace the asm/msi.h in include in iommu.h with
xen/msi.h instead (or just add the xen/msi.h include instead of
replace).

>  
> diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
> index 1a1413b93e..543c265199 100644
> --- a/xen/drivers/vpci/Makefile
> +++ b/xen/drivers/vpci/Makefile
> @@ -1,2 +1,3 @@
>  obj-y += vpci.o header.o
>  obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o
> +obj-$(CONFIG_X86) += x86_msix.o
> diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
> index 5757a7aed2..8fc82a9b8d 100644
> --- a/xen/drivers/vpci/msi.c
> +++ b/xen/drivers/vpci/msi.c
> @@ -16,12 +16,11 @@
>   * License along with this program; If not, see <http://www.gnu.org/licenses/>.
>   */
>  
> +#include <xen/msi.h>
>  #include <xen/sched.h>
>  #include <xen/softirq.h>
>  #include <xen/vpci.h>
>  
> -#include <asm/msi.h>
> -
>  static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
>                               void *data)
>  {
> diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
> index 846f1b8d70..7a9b02f1a5 100644
> --- a/xen/drivers/vpci/msix.c
> +++ b/xen/drivers/vpci/msix.c
> @@ -17,15 +17,24 @@
>   * License along with this program; If not, see <http://www.gnu.org/licenses/>.
>   */
>  
> +#include <xen/msi.h>
>  #include <xen/sched.h>
>  #include <xen/vpci.h>
>  
> -#include <asm/msi.h>
>  #include <asm/p2m.h>
>  
> -#define VMSIX_ADDR_IN_RANGE(addr, vpci, nr)                               \
> -    ((addr) >= vmsix_table_addr(vpci, nr) &&                              \
> -     (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
> +/*
> + * The return value is different for the MMIO handler on ARM and x86
> + * architecture. To make the code common for both architectures create
> + * generic return code with architecture dependent values.
> + */
> +#ifdef CONFIG_X86
> +#define VPCI_EMUL_OKAY      X86EMUL_OKAY
> +#define VPCI_EMUL_RETRY     X86EMUL_RETRY
> +#else
> +#define VPCI_EMUL_OKAY      1
> +#define VPCI_EMUL_RETRY     VPCI_EMUL_OKAY
> +#endif

Since msix_{read/write} are no longer directly used by the MMIO
handlers you might as well just return an error code (or a boolean)
and let the caller translate that into the per-arch return code.

>  
>  static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
>                               void *data)
> @@ -138,29 +147,6 @@ static void control_write(const struct pci_dev *pdev, unsigned int reg,
>          pci_conf_write16(pdev->sbdf, reg, val);
>  }
>  
> -static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
> -{
> -    struct vpci_msix *msix;
> -
> -    list_for_each_entry ( msix, &d->arch.hvm.msix_tables, next )
> -    {
> -        const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
> -        unsigned int i;
> -
> -        for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
> -            if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
> -                 VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
> -                return msix;
> -    }
> -
> -    return NULL;
> -}
> -
> -static int msix_accept(struct vcpu *v, unsigned long addr)
> -{
> -    return !!msix_find(v->domain, addr);
> -}
> -
>  static bool access_allowed(const struct pci_dev *pdev, unsigned long addr,
>                             unsigned int len)
>  {
> @@ -182,21 +168,19 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
>      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
>  }
>  
> -static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
> -                     unsigned long *data)
> +int msix_read(struct vpci_msix *msix, unsigned long addr, unsigned int len,

This now requires a vpci_ prefix, since it's a global function.
Plain msix_{read,write} is way to generic.

> +              unsigned long *data)
>  {
> -    const struct domain *d = v->domain;
> -    struct vpci_msix *msix = msix_find(d, addr);
>      const struct vpci_msix_entry *entry;
>      unsigned int offset;
>  
>      *data = ~0ul;
>  
>      if ( !msix )
> -        return X86EMUL_RETRY;
> +        return VPCI_EMUL_RETRY;
>  
>      if ( !access_allowed(msix->pdev, addr, len) )
> -        return X86EMUL_OKAY;
> +        return VPCI_EMUL_OKAY;
>  
>      if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>      {
> @@ -210,11 +194,11 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>          switch ( len )
>          {
>          case 4:
> -            *data = readl(addr);
> +            *data = vpci_arch_readl(addr);

Why do you need a vpci wrapper around the read/write handlers? AFAICT
arm64 also has {read,write}{l,q}. And you likely want to protect the
64bit read with CONFIG_64BIT if this code is to be made available to
arm32.

>              break;
>  
>          case 8:
> -            *data = readq(addr);
> +            *data = vpci_arch_readq(addr);
>              break;
>  
>          default:
> @@ -222,7 +206,7 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>              break;
>          }
>  
> -        return X86EMUL_OKAY;
> +        return VPCI_EMUL_OKAY;
>      }
>  
>      spin_lock(&msix->pdev->vpci->lock);
> @@ -256,22 +240,20 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>      }
>      spin_unlock(&msix->pdev->vpci->lock);
>  
> -    return X86EMUL_OKAY;
> +    return VPCI_EMUL_OKAY;
>  }
>  
> -static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
> -                      unsigned long data)
> +int msix_write(const struct domain *d, struct vpci_msix *msix,
> +               unsigned long addr, unsigned int len, unsigned long data)
>  {
> -    const struct domain *d = v->domain;
> -    struct vpci_msix *msix = msix_find(d, addr);
>      struct vpci_msix_entry *entry;
>      unsigned int offset;
>  
>      if ( !msix )
> -        return X86EMUL_RETRY;
> +        return VPCI_EMUL_RETRY;
>  
>      if ( !access_allowed(msix->pdev, addr, len) )
> -        return X86EMUL_OKAY;
> +        return VPCI_EMUL_OKAY;
>  
>      if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>      {
> @@ -281,11 +263,11 @@ static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
>              switch ( len )
>              {
>              case 4:
> -                writel(data, addr);
> +                vpci_arch_writel(data, addr);
>                  break;
>  
>              case 8:
> -                writeq(data, addr);
> +                vpci_arch_writeq(data, addr);
>                  break;
>  
>              default:
> @@ -294,7 +276,7 @@ static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
>              }
>          }
>  
> -        return X86EMUL_OKAY;
> +        return VPCI_EMUL_OKAY;
>      }
>  
>      spin_lock(&msix->pdev->vpci->lock);
> @@ -372,60 +354,7 @@ static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
>      }
>      spin_unlock(&msix->pdev->vpci->lock);
>  
> -    return X86EMUL_OKAY;
> -}
> -
> -static const struct hvm_mmio_ops vpci_msix_table_ops = {
> -    .check = msix_accept,
> -    .read = msix_read,
> -    .write = msix_write,
> -};
> -
> -int vpci_make_msix_hole(const struct pci_dev *pdev)
> -{
> -    struct domain *d = pdev->domain;
> -    unsigned int i;
> -
> -    if ( !pdev->vpci->msix )
> -        return 0;
> -
> -    /* Make sure there's a hole for the MSIX table/PBA in the p2m. */
> -    for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->tables); i++ )
> -    {
> -        unsigned long start = PFN_DOWN(vmsix_table_addr(pdev->vpci, i));
> -        unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
> -                                     vmsix_table_size(pdev->vpci, i) - 1);
> -
> -        for ( ; start <= end; start++ )
> -        {
> -            p2m_type_t t;
> -            mfn_t mfn = get_gfn_query(d, start, &t);
> -
> -            switch ( t )
> -            {
> -            case p2m_mmio_dm:
> -            case p2m_invalid:
> -                break;
> -            case p2m_mmio_direct:
> -                if ( mfn_x(mfn) == start )
> -                {
> -                    clear_identity_p2m_entry(d, start);
> -                    break;
> -                }
> -                /* fallthrough. */
> -            default:
> -                put_gfn(d, start);
> -                gprintk(XENLOG_WARNING,
> -                        "%pp: existing mapping (mfn: %" PRI_mfn
> -                        "type: %d) at %#lx clobbers MSIX MMIO area\n",
> -                        &pdev->sbdf, mfn_x(mfn), t, start);
> -                return -EEXIST;
> -            }
> -            put_gfn(d, start);
> -        }
> -    }
> -
> -    return 0;
> +    return VPCI_EMUL_OKAY;
>  }
>  
>  static int init_msix(struct pci_dev *pdev)
> @@ -472,11 +401,10 @@ static int init_msix(struct pci_dev *pdev)
>          vpci_msix_arch_init_entry(&msix->entries[i]);
>      }
>  
> -    if ( list_empty(&d->arch.hvm.msix_tables) )
> -        register_mmio_handler(d, &vpci_msix_table_ops);
> +    register_msix_mmio_handler(d);
> +    vpci_msix_add_to_msix_table(msix, d);
>  
>      pdev->vpci->msix = msix;
> -    list_add(&msix->next, &d->arch.hvm.msix_tables);

You could likely do the registering of the handler and the addition of
the table in the same handler: vpci_msix_arch_register or similar.

Thanks, Roger.
Jan Beulich Dec. 14, 2021, 2:15 p.m. UTC | #2
On 14.12.2021 11:45, Rahul Singh wrote:
> --- a/xen/drivers/vpci/msix.c
> +++ b/xen/drivers/vpci/msix.c
> @@ -17,15 +17,24 @@
>   * License along with this program; If not, see <http://www.gnu.org/licenses/>.
>   */
>  
> +#include <xen/msi.h>
>  #include <xen/sched.h>
>  #include <xen/vpci.h>
>  
> -#include <asm/msi.h>
>  #include <asm/p2m.h>
>  
> -#define VMSIX_ADDR_IN_RANGE(addr, vpci, nr)                               \
> -    ((addr) >= vmsix_table_addr(vpci, nr) &&                              \
> -     (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
> +/*
> + * The return value is different for the MMIO handler on ARM and x86
> + * architecture. To make the code common for both architectures create
> + * generic return code with architecture dependent values.
> + */
> +#ifdef CONFIG_X86
> +#define VPCI_EMUL_OKAY      X86EMUL_OKAY
> +#define VPCI_EMUL_RETRY     X86EMUL_RETRY
> +#else
> +#define VPCI_EMUL_OKAY      1
> +#define VPCI_EMUL_RETRY     VPCI_EMUL_OKAY
> +#endif

In addition to what Roger has said, at the example of the above I think
you want to split this change. The change in return value naming could
likely quite well be a separate thing. And then it'll be easier to see
which other suggested changes are really movement of x86-specific stuff
(looking over it I wasn't convinced everything you move really is).

> @@ -472,11 +401,10 @@ static int init_msix(struct pci_dev *pdev)
>          vpci_msix_arch_init_entry(&msix->entries[i]);
>      }
>  
> -    if ( list_empty(&d->arch.hvm.msix_tables) )
> -        register_mmio_handler(d, &vpci_msix_table_ops);
> +    register_msix_mmio_handler(d);
> +    vpci_msix_add_to_msix_table(msix, d);
>  
>      pdev->vpci->msix = msix;
> -    list_add(&msix->next, &d->arch.hvm.msix_tables);
>  
>      return 0;

May I ask that you don't alter the order of operations? I take it that
vpci_msix_add_to_msix_table() is the replacement of the list_add().
That should occur only after pdev->vcpi has been updated. I could in
fact imagine that in cases like this one for Arm barriers may need
adding.

> --- /dev/null
> +++ b/xen/drivers/vpci/x86_msix.c
> @@ -0,0 +1,155 @@
> +/*
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms and conditions of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public
> + * License along with this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/sched.h>
> +#include <xen/vpci.h>
> +
> +#include <asm/msi.h>
> +#include <asm/p2m.h>
> +
> +u32 vpci_arch_readl(unsigned long addr)

Nit: No new uses of u<N> please; these are being phased out, with
uint<N>_t being the intended types.

> +{
> +    return readl(addr);
> +}
> +
> +u64 vpci_arch_readq(unsigned long addr)
> +{
> +    return readq(addr);
> +}
> +
> +void vpci_arch_writel(u32 data, unsigned long addr)
> +{
> +    writel(data, addr);
> +}
> +
> +void vpci_arch_writeq(u64 data, unsigned long addr)
> +{
> +    writeq(data, addr);
> +}

Functions like these (if, as Roger said, they need abstracting in the
first place) or ...

> +void register_msix_mmio_handler(struct domain *d)
> +{
> +    if ( list_empty(&d->arch.hvm.msix_tables) )
> +        register_mmio_handler(d, &vpci_msix_table_ops);
> +}
> +
> +void vpci_msix_add_to_msix_table(struct vpci_msix *msix,
> +                                 struct domain *d)
> +{
> +    list_add(&msix->next, &d->arch.hvm.msix_tables);
> +}

... these would imo better be inline helpers.

> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -148,34 +148,6 @@ int msi_free_irq(struct msi_desc *entry);
>   */
>  #define NR_HP_RESERVED_VECTORS 	20
>  
> -#define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
> -#define msi_lower_address_reg(base)	(base + PCI_MSI_ADDRESS_LO)
> -#define msi_upper_address_reg(base)	(base + PCI_MSI_ADDRESS_HI)
> -#define msi_data_reg(base, is64bit)	\
> -	( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
> -#define msi_mask_bits_reg(base, is64bit) \
> -	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
> -#define msi_pending_bits_reg(base, is64bit) \
> -	((base) + PCI_MSI_MASK_BIT + ((is64bit) ? 4 : 0))
> -#define msi_disable(control)		control &= ~PCI_MSI_FLAGS_ENABLE
> -#define multi_msi_capable(control) \
> -	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
> -#define multi_msi_enable(control, num) \
> -	control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
> -#define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
> -#define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
> -#define msi_enable(control, num) multi_msi_enable(control, num); \
> -	control |= PCI_MSI_FLAGS_ENABLE
> -
> -#define msix_control_reg(base)		(base + PCI_MSIX_FLAGS)
> -#define msix_table_offset_reg(base)	(base + PCI_MSIX_TABLE)
> -#define msix_pba_offset_reg(base)	(base + PCI_MSIX_PBA)
> -#define msix_enable(control)	 	control |= PCI_MSIX_FLAGS_ENABLE
> -#define msix_disable(control)	 	control &= ~PCI_MSIX_FLAGS_ENABLE
> -#define msix_table_size(control) 	((control & PCI_MSIX_FLAGS_QSIZE)+1)
> -#define msix_unmask(address)	 	(address & ~PCI_MSIX_VECTOR_BITMASK)
> -#define msix_mask(address)		(address | PCI_MSIX_VECTOR_BITMASK)
> -
>  /*
>   * MSI Defined Data Structures
>   */
> diff --git a/xen/include/xen/msi.h b/xen/include/xen/msi.h
> index c903d0050c..1c22c9a4a7 100644
> --- a/xen/include/xen/msi.h
> +++ b/xen/include/xen/msi.h
> @@ -3,6 +3,34 @@
>  
>  #include <xen/pci.h>
>  
> +#define msi_control_reg(base)       (base + PCI_MSI_FLAGS)
> +#define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO)
> +#define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI)
> +#define msi_data_reg(base, is64bit) \
> +	( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )

As you move this code, please tidy is style-wise. For the construct
here, for example this would mean

#define msi_data_reg(base, is64bit) \
    ((is64bit) ? (base) + PCI_MSI_DATA_64 : (base) + PCI_MSI_DATA_32)

or perhaps even

#define msi_data_reg(base, is64bit) \
    ((base) + ((is64bit) ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32))

Further items would want similar adjustments.

Jan
Rahul Singh Dec. 16, 2021, 10:18 a.m. UTC | #3
Hi Roger,

Thanks for reviewing the code.

> On 14 Dec 2021, at 12:37 pm, Roger Pau Monné <roger.pau@citrix.com> wrote:
> 
> On Tue, Dec 14, 2021 at 10:45:17AM +0000, Rahul Singh wrote:
>> vpci/msix.c file will be used for arm architecture when vpci msix
>> support will be added to ARM, but there is x86 specific code in this
>> file.
>> 
>> Move x86 specific code to the x86_msix.c file to make sure common code
>> will be used for other architecture.
>> 
>> No functional change intended.
>> 
>> Signed-off-by: Rahul Singh <rahul.singh@arm.com>
>> ---
>> xen/arch/x86/msi.c                       |   2 +-
>> xen/drivers/passthrough/amd/iommu_init.c |   1 +
>> xen/drivers/vpci/Makefile                |   1 +
>> xen/drivers/vpci/msi.c                   |   3 +-
>> xen/drivers/vpci/msix.c                  | 134 +++++---------------
>> xen/drivers/vpci/x86_msix.c              | 155 +++++++++++++++++++++++
> 
> This should go into xen/arch/x86/hvm/vmsi.c there's already vPCI MSI
> specific code in there.
Ok.
>> xen/include/asm-x86/msi.h                |  28 ----
>> xen/include/xen/msi.h                    |  28 ++++
>> xen/include/xen/vpci.h                   |  21 +++
>> 9 files changed, 239 insertions(+), 134 deletions(-)
>> create mode 100644 xen/drivers/vpci/x86_msix.c
>> 
>> diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
>> index 5febc0ea4b..2b120f897f 100644
>> --- a/xen/arch/x86/msi.c
>> +++ b/xen/arch/x86/msi.c
>> @@ -23,7 +23,7 @@
>> #include <asm/io.h>
>> #include <asm/smp.h>
>> #include <asm/desc.h>
>> -#include <asm/msi.h>
>> +#include <xen/msi.h>
> 
> You likely need to move this up to the xen/ prefixed include block.
Ok.
> 
>> #include <asm/fixmap.h>
>> #include <asm/p2m.h>
>> #include <mach_apic.h>
>> diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
>> index 559a734bda..fc385959c7 100644
>> --- a/xen/drivers/passthrough/amd/iommu_init.c
>> +++ b/xen/drivers/passthrough/amd/iommu_init.c
>> @@ -20,6 +20,7 @@
>> #include <xen/acpi.h>
>> #include <xen/delay.h>
>> #include <xen/keyhandler.h>
>> +#include <xen/msi.h>
>> 
>> #include "iommu.h"
> 
> Might be better to replace the asm/msi.h in include in iommu.h with
> xen/msi.h instead (or just add the xen/msi.h include instead of
> replace).

Ok.
> 
>> 
>> diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
>> index 1a1413b93e..543c265199 100644
>> --- a/xen/drivers/vpci/Makefile
>> +++ b/xen/drivers/vpci/Makefile
>> @@ -1,2 +1,3 @@
>> obj-y += vpci.o header.o
>> obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o
>> +obj-$(CONFIG_X86) += x86_msix.o
>> diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
>> index 5757a7aed2..8fc82a9b8d 100644
>> --- a/xen/drivers/vpci/msi.c
>> +++ b/xen/drivers/vpci/msi.c
>> @@ -16,12 +16,11 @@
>>  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
>>  */
>> 
>> +#include <xen/msi.h>
>> #include <xen/sched.h>
>> #include <xen/softirq.h>
>> #include <xen/vpci.h>
>> 
>> -#include <asm/msi.h>
>> -
>> static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
>>                              void *data)
>> {
>> diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
>> index 846f1b8d70..7a9b02f1a5 100644
>> --- a/xen/drivers/vpci/msix.c
>> +++ b/xen/drivers/vpci/msix.c
>> @@ -17,15 +17,24 @@
>>  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
>>  */
>> 
>> +#include <xen/msi.h>
>> #include <xen/sched.h>
>> #include <xen/vpci.h>
>> 
>> -#include <asm/msi.h>
>> #include <asm/p2m.h>
>> 
>> -#define VMSIX_ADDR_IN_RANGE(addr, vpci, nr)                               \
>> -    ((addr) >= vmsix_table_addr(vpci, nr) &&                              \
>> -     (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
>> +/*
>> + * The return value is different for the MMIO handler on ARM and x86
>> + * architecture. To make the code common for both architectures create
>> + * generic return code with architecture dependent values.
>> + */
>> +#ifdef CONFIG_X86
>> +#define VPCI_EMUL_OKAY      X86EMUL_OKAY
>> +#define VPCI_EMUL_RETRY     X86EMUL_RETRY
>> +#else
>> +#define VPCI_EMUL_OKAY      1
>> +#define VPCI_EMUL_RETRY     VPCI_EMUL_OKAY
>> +#endif
> 
> Since msix_{read/write} are no longer directly used by the MMIO
> handlers you might as well just return an error code (or a boolean)
> and let the caller translate that into the per-arch return code.

Ok.
> 
>> 
>> static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
>>                              void *data)
>> @@ -138,29 +147,6 @@ static void control_write(const struct pci_dev *pdev, unsigned int reg,
>>         pci_conf_write16(pdev->sbdf, reg, val);
>> }
>> 
>> -static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
>> -{
>> -    struct vpci_msix *msix;
>> -
>> -    list_for_each_entry ( msix, &d->arch.hvm.msix_tables, next )
>> -    {
>> -        const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
>> -        unsigned int i;
>> -
>> -        for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
>> -            if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
>> -                 VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
>> -                return msix;
>> -    }
>> -
>> -    return NULL;
>> -}
>> -
>> -static int msix_accept(struct vcpu *v, unsigned long addr)
>> -{
>> -    return !!msix_find(v->domain, addr);
>> -}
>> -
>> static bool access_allowed(const struct pci_dev *pdev, unsigned long addr,
>>                            unsigned int len)
>> {
>> @@ -182,21 +168,19 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
>>     return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
>> }
>> 
>> -static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>> -                     unsigned long *data)
>> +int msix_read(struct vpci_msix *msix, unsigned long addr, unsigned int len,
> 
> This now requires a vpci_ prefix, since it's a global function.
> Plain msix_{read,write} is way to generic.
Ack. 
> 
>> +              unsigned long *data)
>> {
>> -    const struct domain *d = v->domain;
>> -    struct vpci_msix *msix = msix_find(d, addr);
>>     const struct vpci_msix_entry *entry;
>>     unsigned int offset;
>> 
>>     *data = ~0ul;
>> 
>>     if ( !msix )
>> -        return X86EMUL_RETRY;
>> +        return VPCI_EMUL_RETRY;
>> 
>>     if ( !access_allowed(msix->pdev, addr, len) )
>> -        return X86EMUL_OKAY;
>> +        return VPCI_EMUL_OKAY;
>> 
>>     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>>     {
>> @@ -210,11 +194,11 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>>         switch ( len )
>>         {
>>         case 4:
>> -            *data = readl(addr);
>> +            *data = vpci_arch_readl(addr);
> 
> Why do you need a vpci wrapper around the read/write handlers? AFAICT
> arm64 also has {read,write}{l,q}. And you likely want to protect the
> 64bit read with CONFIG_64BIT if this code is to be made available to
> arm32.

I need the wrapper because {read,write}{l,q} function argument is different for ARM and x86.
ARM {read,wrie}(l,q}  function argument is pointer to the address whereas X86  {read,wrie}(l,q} 
function argument is address itself.

> 
>>             break;
>> 
>>         case 8:
>> -            *data = readq(addr);
>> +            *data = vpci_arch_readq(addr);
>>             break;
>> 
>>         default:
>> @@ -222,7 +206,7 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>>             break;
>>         }
>> 
>> -        return X86EMUL_OKAY;
>> +        return VPCI_EMUL_OKAY;
>>     }
>> 
>>     spin_lock(&msix->pdev->vpci->lock);
>> @@ -256,22 +240,20 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>>     }
>>     spin_unlock(&msix->pdev->vpci->lock);
>> 
>> -    return X86EMUL_OKAY;
>> +    return VPCI_EMUL_OKAY;
>> }
>> 
>> -static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
>> -                      unsigned long data)
>> +int msix_write(const struct domain *d, struct vpci_msix *msix,
>> +               unsigned long addr, unsigned int len, unsigned long data)
>> {
>> -    const struct domain *d = v->domain;
>> -    struct vpci_msix *msix = msix_find(d, addr);
>>     struct vpci_msix_entry *entry;
>>     unsigned int offset;
>> 
>>     if ( !msix )
>> -        return X86EMUL_RETRY;
>> +        return VPCI_EMUL_RETRY;
>> 
>>     if ( !access_allowed(msix->pdev, addr, len) )
>> -        return X86EMUL_OKAY;
>> +        return VPCI_EMUL_OKAY;
>> 
>>     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>>     {
>> @@ -281,11 +263,11 @@ static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
>>             switch ( len )
>>             {
>>             case 4:
>> -                writel(data, addr);
>> +                vpci_arch_writel(data, addr);
>>                 break;
>> 
>>             case 8:
>> -                writeq(data, addr);
>> +                vpci_arch_writeq(data, addr);
>>                 break;
>> 
>>             default:
>> @@ -294,7 +276,7 @@ static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
>>             }
>>         }
>> 
>> -        return X86EMUL_OKAY;
>> +        return VPCI_EMUL_OKAY;
>>     }
>> 
>>     spin_lock(&msix->pdev->vpci->lock);
>> @@ -372,60 +354,7 @@ static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
>>     }
>>     spin_unlock(&msix->pdev->vpci->lock);
>> 
>> -    return X86EMUL_OKAY;
>> -}
>> -
>> -static const struct hvm_mmio_ops vpci_msix_table_ops = {
>> -    .check = msix_accept,
>> -    .read = msix_read,
>> -    .write = msix_write,
>> -};
>> -
>> -int vpci_make_msix_hole(const struct pci_dev *pdev)
>> -{
>> -    struct domain *d = pdev->domain;
>> -    unsigned int i;
>> -
>> -    if ( !pdev->vpci->msix )
>> -        return 0;
>> -
>> -    /* Make sure there's a hole for the MSIX table/PBA in the p2m. */
>> -    for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->tables); i++ )
>> -    {
>> -        unsigned long start = PFN_DOWN(vmsix_table_addr(pdev->vpci, i));
>> -        unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
>> -                                     vmsix_table_size(pdev->vpci, i) - 1);
>> -
>> -        for ( ; start <= end; start++ )
>> -        {
>> -            p2m_type_t t;
>> -            mfn_t mfn = get_gfn_query(d, start, &t);
>> -
>> -            switch ( t )
>> -            {
>> -            case p2m_mmio_dm:
>> -            case p2m_invalid:
>> -                break;
>> -            case p2m_mmio_direct:
>> -                if ( mfn_x(mfn) == start )
>> -                {
>> -                    clear_identity_p2m_entry(d, start);
>> -                    break;
>> -                }
>> -                /* fallthrough. */
>> -            default:
>> -                put_gfn(d, start);
>> -                gprintk(XENLOG_WARNING,
>> -                        "%pp: existing mapping (mfn: %" PRI_mfn
>> -                        "type: %d) at %#lx clobbers MSIX MMIO area\n",
>> -                        &pdev->sbdf, mfn_x(mfn), t, start);
>> -                return -EEXIST;
>> -            }
>> -            put_gfn(d, start);
>> -        }
>> -    }
>> -
>> -    return 0;
>> +    return VPCI_EMUL_OKAY;
>> }
>> 
>> static int init_msix(struct pci_dev *pdev)
>> @@ -472,11 +401,10 @@ static int init_msix(struct pci_dev *pdev)
>>         vpci_msix_arch_init_entry(&msix->entries[i]);
>>     }
>> 
>> -    if ( list_empty(&d->arch.hvm.msix_tables) )
>> -        register_mmio_handler(d, &vpci_msix_table_ops);
>> +    register_msix_mmio_handler(d);
>> +    vpci_msix_add_to_msix_table(msix, d);
>> 
>>     pdev->vpci->msix = msix;
>> -    list_add(&msix->next, &d->arch.hvm.msix_tables);
> 
> You could likely do the registering of the handler and the addition of
> the table in the same handler: vpci_msix_arch_register or similar.

Ok.

Regards,
Rahul
> 
> Thanks, Roger.
Rahul Singh Dec. 16, 2021, 10:28 a.m. UTC | #4
Hi Jan,

Thanks for reviewing the code.

> On 14 Dec 2021, at 2:15 pm, Jan Beulich <jbeulich@suse.com> wrote:
> 
> On 14.12.2021 11:45, Rahul Singh wrote:
>> --- a/xen/drivers/vpci/msix.c
>> +++ b/xen/drivers/vpci/msix.c
>> @@ -17,15 +17,24 @@
>>  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
>>  */
>> 
>> +#include <xen/msi.h>
>> #include <xen/sched.h>
>> #include <xen/vpci.h>
>> 
>> -#include <asm/msi.h>
>> #include <asm/p2m.h>
>> 
>> -#define VMSIX_ADDR_IN_RANGE(addr, vpci, nr)                               \
>> -    ((addr) >= vmsix_table_addr(vpci, nr) &&                              \
>> -     (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
>> +/*
>> + * The return value is different for the MMIO handler on ARM and x86
>> + * architecture. To make the code common for both architectures create
>> + * generic return code with architecture dependent values.
>> + */
>> +#ifdef CONFIG_X86
>> +#define VPCI_EMUL_OKAY      X86EMUL_OKAY
>> +#define VPCI_EMUL_RETRY     X86EMUL_RETRY
>> +#else
>> +#define VPCI_EMUL_OKAY      1
>> +#define VPCI_EMUL_RETRY     VPCI_EMUL_OKAY
>> +#endif
> 
> In addition to what Roger has said, at the example of the above I think
> you want to split this change. The change in return value naming could
> likely quite well be a separate thing. And then it'll be easier to see
> which other suggested changes are really movement of x86-specific stuff
> (looking over it I wasn't convinced everything you move really is).
> 

Ack. I will split the changes in next version.

>> @@ -472,11 +401,10 @@ static int init_msix(struct pci_dev *pdev)
>>         vpci_msix_arch_init_entry(&msix->entries[i]);
>>     }
>> 
>> -    if ( list_empty(&d->arch.hvm.msix_tables) )
>> -        register_mmio_handler(d, &vpci_msix_table_ops);
>> +    register_msix_mmio_handler(d);
>> +    vpci_msix_add_to_msix_table(msix, d);
>> 
>>     pdev->vpci->msix = msix;
>> -    list_add(&msix->next, &d->arch.hvm.msix_tables);
>> 
>>     return 0;
> 
> May I ask that you don't alter the order of operations? I take it that
> vpci_msix_add_to_msix_table() is the replacement of the list_add().
> That should occur only after pdev->vcpi has been updated. I could in
> fact imagine that in cases like this one for Arm barriers may need
> adding.

Yes I will not change the order I will fix this in next version.
> 
>> --- /dev/null
>> +++ b/xen/drivers/vpci/x86_msix.c
>> @@ -0,0 +1,155 @@
>> +/*
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms and conditions of the GNU General Public
>> + * License, version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public
>> + * License along with this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include <xen/sched.h>
>> +#include <xen/vpci.h>
>> +
>> +#include <asm/msi.h>
>> +#include <asm/p2m.h>
>> +
>> +u32 vpci_arch_readl(unsigned long addr)
> 
> Nit: No new uses of u<N> please; these are being phased out, with
> uint<N>_t being the intended types.

Ack .
> 
>> +{
>> +    return readl(addr);
>> +}
>> +
>> +u64 vpci_arch_readq(unsigned long addr)
>> +{
>> +    return readq(addr);
>> +}
>> +
>> +void vpci_arch_writel(u32 data, unsigned long addr)
>> +{
>> +    writel(data, addr);
>> +}
>> +
>> +void vpci_arch_writeq(u64 data, unsigned long addr)
>> +{
>> +    writeq(data, addr);
>> +}
> 
> Functions like these (if, as Roger said, they need abstracting in the
> first place) or ...
> 
>> +void register_msix_mmio_handler(struct domain *d)
>> +{
>> +    if ( list_empty(&d->arch.hvm.msix_tables) )
>> +        register_mmio_handler(d, &vpci_msix_table_ops);
>> +}
>> +
>> +void vpci_msix_add_to_msix_table(struct vpci_msix *msix,
>> +                                 struct domain *d)
>> +{
>> +    list_add(&msix->next, &d->arch.hvm.msix_tables);
>> +}
> 
> ... these would imo better be inline helpers.

Ack.
> 
>> --- a/xen/include/asm-x86/msi.h
>> +++ b/xen/include/asm-x86/msi.h
>> @@ -148,34 +148,6 @@ int msi_free_irq(struct msi_desc *entry);
>>  */
>> #define NR_HP_RESERVED_VECTORS 	20
>> 
>> -#define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
>> -#define msi_lower_address_reg(base)	(base + PCI_MSI_ADDRESS_LO)
>> -#define msi_upper_address_reg(base)	(base + PCI_MSI_ADDRESS_HI)
>> -#define msi_data_reg(base, is64bit)	\
>> -	( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
>> -#define msi_mask_bits_reg(base, is64bit) \
>> -	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
>> -#define msi_pending_bits_reg(base, is64bit) \
>> -	((base) + PCI_MSI_MASK_BIT + ((is64bit) ? 4 : 0))
>> -#define msi_disable(control)		control &= ~PCI_MSI_FLAGS_ENABLE
>> -#define multi_msi_capable(control) \
>> -	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
>> -#define multi_msi_enable(control, num) \
>> -	control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
>> -#define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
>> -#define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
>> -#define msi_enable(control, num) multi_msi_enable(control, num); \
>> -	control |= PCI_MSI_FLAGS_ENABLE
>> -
>> -#define msix_control_reg(base)		(base + PCI_MSIX_FLAGS)
>> -#define msix_table_offset_reg(base)	(base + PCI_MSIX_TABLE)
>> -#define msix_pba_offset_reg(base)	(base + PCI_MSIX_PBA)
>> -#define msix_enable(control)	 	control |= PCI_MSIX_FLAGS_ENABLE
>> -#define msix_disable(control)	 	control &= ~PCI_MSIX_FLAGS_ENABLE
>> -#define msix_table_size(control) 	((control & PCI_MSIX_FLAGS_QSIZE)+1)
>> -#define msix_unmask(address)	 	(address & ~PCI_MSIX_VECTOR_BITMASK)
>> -#define msix_mask(address)		(address | PCI_MSIX_VECTOR_BITMASK)
>> -
>> /*
>>  * MSI Defined Data Structures
>>  */
>> diff --git a/xen/include/xen/msi.h b/xen/include/xen/msi.h
>> index c903d0050c..1c22c9a4a7 100644
>> --- a/xen/include/xen/msi.h
>> +++ b/xen/include/xen/msi.h
>> @@ -3,6 +3,34 @@
>> 
>> #include <xen/pci.h>
>> 
>> +#define msi_control_reg(base)       (base + PCI_MSI_FLAGS)
>> +#define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO)
>> +#define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI)
>> +#define msi_data_reg(base, is64bit) \
>> +	( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
> 
> As you move this code, please tidy is style-wise. For the construct
> here, for example this would mean
> 
> #define msi_data_reg(base, is64bit) \
>    ((is64bit) ? (base) + PCI_MSI_DATA_64 : (base) + PCI_MSI_DATA_32)
> 
> or perhaps even
> 
> #define msi_data_reg(base, is64bit) \
>    ((base) + ((is64bit) ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32))
> 
> Further items would want similar adjustments.

Ok . Let me try to fix this in next version

Regards,
Rahul
> 
> Jan
>
Roger Pau Monné Dec. 16, 2021, 11:01 a.m. UTC | #5
On Thu, Dec 16, 2021 at 10:18:32AM +0000, Rahul Singh wrote:
> Hi Roger,
> 
> Thanks for reviewing the code.
> 
> > On 14 Dec 2021, at 12:37 pm, Roger Pau Monné <roger.pau@citrix.com> wrote:
> > 
> > On Tue, Dec 14, 2021 at 10:45:17AM +0000, Rahul Singh wrote:
> >> +              unsigned long *data)
> >> {
> >> -    const struct domain *d = v->domain;
> >> -    struct vpci_msix *msix = msix_find(d, addr);
> >>     const struct vpci_msix_entry *entry;
> >>     unsigned int offset;
> >> 
> >>     *data = ~0ul;
> >> 
> >>     if ( !msix )
> >> -        return X86EMUL_RETRY;
> >> +        return VPCI_EMUL_RETRY;
> >> 
> >>     if ( !access_allowed(msix->pdev, addr, len) )
> >> -        return X86EMUL_OKAY;
> >> +        return VPCI_EMUL_OKAY;
> >> 
> >>     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
> >>     {
> >> @@ -210,11 +194,11 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
> >>         switch ( len )
> >>         {
> >>         case 4:
> >> -            *data = readl(addr);
> >> +            *data = vpci_arch_readl(addr);
> > 
> > Why do you need a vpci wrapper around the read/write handlers? AFAICT
> > arm64 also has {read,write}{l,q}. And you likely want to protect the
> > 64bit read with CONFIG_64BIT if this code is to be made available to
> > arm32.
> 
> I need the wrapper because {read,write}{l,q} function argument is different for ARM and x86.
> ARM {read,wrie}(l,q}  function argument is pointer to the address whereas X86  {read,wrie}(l,q} 
> function argument is address itself.

Oh, that's a shame. I don't think there's a need to tag those helpers
with the vpci_ prefix though. Could we maybe introduce
bus_{read,write}{b,w,l,q} helpers that take the same parameters on all
arches?

It would be even better to fix the current ones so they take the same
parameters on x86 and Arm, but that would mean changing all the call
places in one of the arches.

Thanks, Roger.
Jan Beulich Dec. 16, 2021, 1:37 p.m. UTC | #6
On 16.12.2021 12:01, Roger Pau Monné wrote:
> On Thu, Dec 16, 2021 at 10:18:32AM +0000, Rahul Singh wrote:
>> Hi Roger,
>>
>> Thanks for reviewing the code.
>>
>>> On 14 Dec 2021, at 12:37 pm, Roger Pau Monné <roger.pau@citrix.com> wrote:
>>>
>>> On Tue, Dec 14, 2021 at 10:45:17AM +0000, Rahul Singh wrote:
>>>> +              unsigned long *data)
>>>> {
>>>> -    const struct domain *d = v->domain;
>>>> -    struct vpci_msix *msix = msix_find(d, addr);
>>>>     const struct vpci_msix_entry *entry;
>>>>     unsigned int offset;
>>>>
>>>>     *data = ~0ul;
>>>>
>>>>     if ( !msix )
>>>> -        return X86EMUL_RETRY;
>>>> +        return VPCI_EMUL_RETRY;
>>>>
>>>>     if ( !access_allowed(msix->pdev, addr, len) )
>>>> -        return X86EMUL_OKAY;
>>>> +        return VPCI_EMUL_OKAY;
>>>>
>>>>     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>>>>     {
>>>> @@ -210,11 +194,11 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>>>>         switch ( len )
>>>>         {
>>>>         case 4:
>>>> -            *data = readl(addr);
>>>> +            *data = vpci_arch_readl(addr);
>>>
>>> Why do you need a vpci wrapper around the read/write handlers? AFAICT
>>> arm64 also has {read,write}{l,q}. And you likely want to protect the
>>> 64bit read with CONFIG_64BIT if this code is to be made available to
>>> arm32.
>>
>> I need the wrapper because {read,write}{l,q} function argument is different for ARM and x86.
>> ARM {read,wrie}(l,q}  function argument is pointer to the address whereas X86  {read,wrie}(l,q} 
>> function argument is address itself.
> 
> Oh, that's a shame. I don't think there's a need to tag those helpers
> with the vpci_ prefix though. Could we maybe introduce
> bus_{read,write}{b,w,l,q} helpers that take the same parameters on all
> arches?
> 
> It would be even better to fix the current ones so they take the same
> parameters on x86 and Arm, but that would mean changing all the call
> places in one of the arches.

Yet still: +1 for removing the extra level of indirection. Imo these
trivial helpers should never have diverged between arches; I have
always been under the impression that on Linux they can be used by
arch-independent code (or else drivers would be quite hard to write).

Jan
Julien Grall Dec. 17, 2021, 2:32 p.m. UTC | #7
Hi Jan,

On 16/12/2021 13:37, Jan Beulich wrote:
> On 16.12.2021 12:01, Roger Pau Monné wrote:
>> On Thu, Dec 16, 2021 at 10:18:32AM +0000, Rahul Singh wrote:
>>> Hi Roger,
>>>
>>> Thanks for reviewing the code.
>>>
>>>> On 14 Dec 2021, at 12:37 pm, Roger Pau Monné <roger.pau@citrix.com> wrote:
>>>>
>>>> On Tue, Dec 14, 2021 at 10:45:17AM +0000, Rahul Singh wrote:
>>>>> +              unsigned long *data)
>>>>> {
>>>>> -    const struct domain *d = v->domain;
>>>>> -    struct vpci_msix *msix = msix_find(d, addr);
>>>>>      const struct vpci_msix_entry *entry;
>>>>>      unsigned int offset;
>>>>>
>>>>>      *data = ~0ul;
>>>>>
>>>>>      if ( !msix )
>>>>> -        return X86EMUL_RETRY;
>>>>> +        return VPCI_EMUL_RETRY;
>>>>>
>>>>>      if ( !access_allowed(msix->pdev, addr, len) )
>>>>> -        return X86EMUL_OKAY;
>>>>> +        return VPCI_EMUL_OKAY;
>>>>>
>>>>>      if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>>>>>      {
>>>>> @@ -210,11 +194,11 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>>>>>          switch ( len )
>>>>>          {
>>>>>          case 4:
>>>>> -            *data = readl(addr);
>>>>> +            *data = vpci_arch_readl(addr);
>>>>
>>>> Why do you need a vpci wrapper around the read/write handlers? AFAICT
>>>> arm64 also has {read,write}{l,q}. And you likely want to protect the
>>>> 64bit read with CONFIG_64BIT if this code is to be made available to
>>>> arm32.
>>>
>>> I need the wrapper because {read,write}{l,q} function argument is different for ARM and x86.
>>> ARM {read,wrie}(l,q}  function argument is pointer to the address whereas X86  {read,wrie}(l,q}
>>> function argument is address itself.
>>
>> Oh, that's a shame. I don't think there's a need to tag those helpers
>> with the vpci_ prefix though. Could we maybe introduce
>> bus_{read,write}{b,w,l,q} helpers that take the same parameters on all
>> arches?
>>
>> It would be even better to fix the current ones so they take the same
>> parameters on x86 and Arm, but that would mean changing all the call
>> places in one of the arches.
> 
> Yet still: +1 for removing the extra level of indirection. Imo these
> trivial helpers should never have diverged between arches; I have
> always been under the impression that on Linux they can be used by
> arch-independent code (or else drivers would be quite hard to write).

So technically both helpers are able to cope with pointer. The x86 one 
is also allowing to pass an address.

 From a brief look at the x86, it looks like most of the users are using 
a pointer. However, the vPCI msix code is one example where addresses 
are passed.

AFAICT, the read*/write* helpers on Linux only works with pointers. So I 
think the actions should be:
    1) Modify the vPCI MSIx code to use pointer
    2) Modify the x86 read*/write* helpers to forbid any access other 
than pointer.

Cheers,
Rahul Singh Dec. 17, 2021, 2:58 p.m. UTC | #8
Hi Julien

> On 17 Dec 2021, at 2:32 pm, Julien Grall <julien@xen.org> wrote:
> 
> Hi Jan,
> 
> On 16/12/2021 13:37, Jan Beulich wrote:
>> On 16.12.2021 12:01, Roger Pau Monné wrote:
>>> On Thu, Dec 16, 2021 at 10:18:32AM +0000, Rahul Singh wrote:
>>>> Hi Roger,
>>>> 
>>>> Thanks for reviewing the code.
>>>> 
>>>>> On 14 Dec 2021, at 12:37 pm, Roger Pau Monné <roger.pau@citrix.com> wrote:
>>>>> 
>>>>> On Tue, Dec 14, 2021 at 10:45:17AM +0000, Rahul Singh wrote:
>>>>>> +              unsigned long *data)
>>>>>> {
>>>>>> -    const struct domain *d = v->domain;
>>>>>> -    struct vpci_msix *msix = msix_find(d, addr);
>>>>>>     const struct vpci_msix_entry *entry;
>>>>>>     unsigned int offset;
>>>>>> 
>>>>>>     *data = ~0ul;
>>>>>> 
>>>>>>     if ( !msix )
>>>>>> -        return X86EMUL_RETRY;
>>>>>> +        return VPCI_EMUL_RETRY;
>>>>>> 
>>>>>>     if ( !access_allowed(msix->pdev, addr, len) )
>>>>>> -        return X86EMUL_OKAY;
>>>>>> +        return VPCI_EMUL_OKAY;
>>>>>> 
>>>>>>     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>>>>>>     {
>>>>>> @@ -210,11 +194,11 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>>>>>>         switch ( len )
>>>>>>         {
>>>>>>         case 4:
>>>>>> -            *data = readl(addr);
>>>>>> +            *data = vpci_arch_readl(addr);
>>>>> 
>>>>> Why do you need a vpci wrapper around the read/write handlers? AFAICT
>>>>> arm64 also has {read,write}{l,q}. And you likely want to protect the
>>>>> 64bit read with CONFIG_64BIT if this code is to be made available to
>>>>> arm32.
>>>> 
>>>> I need the wrapper because {read,write}{l,q} function argument is different for ARM and x86.
>>>> ARM {read,wrie}(l,q}  function argument is pointer to the address whereas X86  {read,wrie}(l,q}
>>>> function argument is address itself.
>>> 
>>> Oh, that's a shame. I don't think there's a need to tag those helpers
>>> with the vpci_ prefix though. Could we maybe introduce
>>> bus_{read,write}{b,w,l,q} helpers that take the same parameters on all
>>> arches?
>>> 
>>> It would be even better to fix the current ones so they take the same
>>> parameters on x86 and Arm, but that would mean changing all the call
>>> places in one of the arches.
>> Yet still: +1 for removing the extra level of indirection. Imo these
>> trivial helpers should never have diverged between arches; I have
>> always been under the impression that on Linux they can be used by
>> arch-independent code (or else drivers would be quite hard to write).
> 
> So technically both helpers are able to cope with pointer. The x86 one is also allowing to pass an address.
> 
> From a brief look at the x86, it looks like most of the users are using a pointer. However, the vPCI msix code is one example where addresses are passed.

Yes you are right.
> 
> AFAICT, the read*/write* helpers on Linux only works with pointers. So I think the actions should be:
>   1) Modify the vPCI MSIx code to use pointer

I am also thinking to change the misx_read/write to use a pointer to address to avoid change in {read,write}{b,w,l,q}
If everyone is ok I will send the next version to modify the same.

Regards,
Rahul
>   2) Modify the x86 read*/write* helpers to forbid any access other than pointer.
> 
> Cheers,
> 
> -- 
> Julien Grall
Jan Beulich Dec. 21, 2021, 7:41 a.m. UTC | #9
On 17.12.2021 15:32, Julien Grall wrote:
> On 16/12/2021 13:37, Jan Beulich wrote:
>> On 16.12.2021 12:01, Roger Pau Monné wrote:
>>> On Thu, Dec 16, 2021 at 10:18:32AM +0000, Rahul Singh wrote:
>>>>> On 14 Dec 2021, at 12:37 pm, Roger Pau Monné <roger.pau@citrix.com> wrote:
>>>>> On Tue, Dec 14, 2021 at 10:45:17AM +0000, Rahul Singh wrote:
>>>>>> +              unsigned long *data)
>>>>>> {
>>>>>> -    const struct domain *d = v->domain;
>>>>>> -    struct vpci_msix *msix = msix_find(d, addr);
>>>>>>      const struct vpci_msix_entry *entry;
>>>>>>      unsigned int offset;
>>>>>>
>>>>>>      *data = ~0ul;
>>>>>>
>>>>>>      if ( !msix )
>>>>>> -        return X86EMUL_RETRY;
>>>>>> +        return VPCI_EMUL_RETRY;
>>>>>>
>>>>>>      if ( !access_allowed(msix->pdev, addr, len) )
>>>>>> -        return X86EMUL_OKAY;
>>>>>> +        return VPCI_EMUL_OKAY;
>>>>>>
>>>>>>      if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>>>>>>      {
>>>>>> @@ -210,11 +194,11 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>>>>>>          switch ( len )
>>>>>>          {
>>>>>>          case 4:
>>>>>> -            *data = readl(addr);
>>>>>> +            *data = vpci_arch_readl(addr);
>>>>>
>>>>> Why do you need a vpci wrapper around the read/write handlers? AFAICT
>>>>> arm64 also has {read,write}{l,q}. And you likely want to protect the
>>>>> 64bit read with CONFIG_64BIT if this code is to be made available to
>>>>> arm32.
>>>>
>>>> I need the wrapper because {read,write}{l,q} function argument is different for ARM and x86.
>>>> ARM {read,wrie}(l,q}  function argument is pointer to the address whereas X86  {read,wrie}(l,q}
>>>> function argument is address itself.
>>>
>>> Oh, that's a shame. I don't think there's a need to tag those helpers
>>> with the vpci_ prefix though. Could we maybe introduce
>>> bus_{read,write}{b,w,l,q} helpers that take the same parameters on all
>>> arches?
>>>
>>> It would be even better to fix the current ones so they take the same
>>> parameters on x86 and Arm, but that would mean changing all the call
>>> places in one of the arches.
>>
>> Yet still: +1 for removing the extra level of indirection. Imo these
>> trivial helpers should never have diverged between arches; I have
>> always been under the impression that on Linux they can be used by
>> arch-independent code (or else drivers would be quite hard to write).
> 
> So technically both helpers are able to cope with pointer. The x86 one 
> is also allowing to pass an address.
> 
>  From a brief look at the x86, it looks like most of the users are using 
> a pointer. However, the vPCI msix code is one example where addresses 
> are passed.

Okay, first of all I need to clean up some confusion cause by Rahul
saying "pointer to the address": That's where my "extra level of
indirection" came from. I would really wish one wouldn't need to go
to the code and verify such basic statements. There's no "pointer
to the address" here. The question is whether the argument has to
be a pointer (Arm) or is convertable to a pointer (x86). Therefore
...

> AFAICT, the read*/write* helpers on Linux only works with pointers. So I 
> think the actions should be:
>     1) Modify the vPCI MSIx code to use pointer
>     2) Modify the x86 read*/write* helpers to forbid any access other 
> than pointer.

... I'd suggest to go with 1), to avoid impacting other x86 code.
Longer term I wouldn't mind switching to 2) (unless vPCI really is
the only place using non-pointer arguments, in which case doing
the 2nd step right away [but still in a separate patch] would seem
quite reasonable).

Jan
Rahul Singh Dec. 21, 2021, 9:35 a.m. UTC | #10
Hi Jan

> On 21 Dec 2021, at 7:41 am, Jan Beulich <jbeulich@suse.com> wrote:
> 
> On 17.12.2021 15:32, Julien Grall wrote:
>> On 16/12/2021 13:37, Jan Beulich wrote:
>>> On 16.12.2021 12:01, Roger Pau Monné wrote:
>>>> On Thu, Dec 16, 2021 at 10:18:32AM +0000, Rahul Singh wrote:
>>>>>> On 14 Dec 2021, at 12:37 pm, Roger Pau Monné <roger.pau@citrix.com> wrote:
>>>>>> On Tue, Dec 14, 2021 at 10:45:17AM +0000, Rahul Singh wrote:
>>>>>>> +              unsigned long *data)
>>>>>>> {
>>>>>>> -    const struct domain *d = v->domain;
>>>>>>> -    struct vpci_msix *msix = msix_find(d, addr);
>>>>>>>     const struct vpci_msix_entry *entry;
>>>>>>>     unsigned int offset;
>>>>>>> 
>>>>>>>     *data = ~0ul;
>>>>>>> 
>>>>>>>     if ( !msix )
>>>>>>> -        return X86EMUL_RETRY;
>>>>>>> +        return VPCI_EMUL_RETRY;
>>>>>>> 
>>>>>>>     if ( !access_allowed(msix->pdev, addr, len) )
>>>>>>> -        return X86EMUL_OKAY;
>>>>>>> +        return VPCI_EMUL_OKAY;
>>>>>>> 
>>>>>>>     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>>>>>>>     {
>>>>>>> @@ -210,11 +194,11 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
>>>>>>>         switch ( len )
>>>>>>>         {
>>>>>>>         case 4:
>>>>>>> -            *data = readl(addr);
>>>>>>> +            *data = vpci_arch_readl(addr);
>>>>>> 
>>>>>> Why do you need a vpci wrapper around the read/write handlers? AFAICT
>>>>>> arm64 also has {read,write}{l,q}. And you likely want to protect the
>>>>>> 64bit read with CONFIG_64BIT if this code is to be made available to
>>>>>> arm32.
>>>>> 
>>>>> I need the wrapper because {read,write}{l,q} function argument is different for ARM and x86.
>>>>> ARM {read,wrie}(l,q}  function argument is pointer to the address whereas X86  {read,wrie}(l,q}
>>>>> function argument is address itself.
>>>> 
>>>> Oh, that's a shame. I don't think there's a need to tag those helpers
>>>> with the vpci_ prefix though. Could we maybe introduce
>>>> bus_{read,write}{b,w,l,q} helpers that take the same parameters on all
>>>> arches?
>>>> 
>>>> It would be even better to fix the current ones so they take the same
>>>> parameters on x86 and Arm, but that would mean changing all the call
>>>> places in one of the arches.
>>> 
>>> Yet still: +1 for removing the extra level of indirection. Imo these
>>> trivial helpers should never have diverged between arches; I have
>>> always been under the impression that on Linux they can be used by
>>> arch-independent code (or else drivers would be quite hard to write).
>> 
>> So technically both helpers are able to cope with pointer. The x86 one 
>> is also allowing to pass an address.
>> 
>> From a brief look at the x86, it looks like most of the users are using 
>> a pointer. However, the vPCI msix code is one example where addresses 
>> are passed.
> 
> Okay, first of all I need to clean up some confusion cause by Rahul
> saying "pointer to the address”:

Sorry for the confusion.
> That's where my "extra level of
> indirection" came from. I would really wish one wouldn't need to go
> to the code and verify such basic statements. There's no "pointer
> to the address" here. The question is whether the argument has to
> be a pointer (Arm) or is convertable to a pointer (x86). Therefore
> ...
> 
>> AFAICT, the read*/write* helpers on Linux only works with pointers. So I 
>> think the actions should be:
>>    1) Modify the vPCI MSIx code to use pointer
>>    2) Modify the x86 read*/write* helpers to forbid any access other 
>> than pointer.
> 
> ... I'd suggest to go with 1), to avoid impacting other x86 code.
> Longer term I wouldn't mind switching to 2) (unless vPCI really is
> the only place using non-pointer arguments, in which case doing
> the 2nd step right away [but still in a separate patch] would seem
> quite reasonable).

I will choose option 1 as of now to avoid any x86 specific change to 
 {read,write}{b,w,l,q}.

Regards,
Rahul
> Jan
>
diff mbox series

Patch

diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index 5febc0ea4b..2b120f897f 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -23,7 +23,7 @@ 
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
-#include <asm/msi.h>
+#include <xen/msi.h>
 #include <asm/fixmap.h>
 #include <asm/p2m.h>
 #include <mach_apic.h>
diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index 559a734bda..fc385959c7 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -20,6 +20,7 @@ 
 #include <xen/acpi.h>
 #include <xen/delay.h>
 #include <xen/keyhandler.h>
+#include <xen/msi.h>
 
 #include "iommu.h"
 
diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
index 1a1413b93e..543c265199 100644
--- a/xen/drivers/vpci/Makefile
+++ b/xen/drivers/vpci/Makefile
@@ -1,2 +1,3 @@ 
 obj-y += vpci.o header.o
 obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o
+obj-$(CONFIG_X86) += x86_msix.o
diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
index 5757a7aed2..8fc82a9b8d 100644
--- a/xen/drivers/vpci/msi.c
+++ b/xen/drivers/vpci/msi.c
@@ -16,12 +16,11 @@ 
  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <xen/msi.h>
 #include <xen/sched.h>
 #include <xen/softirq.h>
 #include <xen/vpci.h>
 
-#include <asm/msi.h>
-
 static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
                              void *data)
 {
diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
index 846f1b8d70..7a9b02f1a5 100644
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -17,15 +17,24 @@ 
  * License along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <xen/msi.h>
 #include <xen/sched.h>
 #include <xen/vpci.h>
 
-#include <asm/msi.h>
 #include <asm/p2m.h>
 
-#define VMSIX_ADDR_IN_RANGE(addr, vpci, nr)                               \
-    ((addr) >= vmsix_table_addr(vpci, nr) &&                              \
-     (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
+/*
+ * The return value is different for the MMIO handler on ARM and x86
+ * architecture. To make the code common for both architectures create
+ * generic return code with architecture dependent values.
+ */
+#ifdef CONFIG_X86
+#define VPCI_EMUL_OKAY      X86EMUL_OKAY
+#define VPCI_EMUL_RETRY     X86EMUL_RETRY
+#else
+#define VPCI_EMUL_OKAY      1
+#define VPCI_EMUL_RETRY     VPCI_EMUL_OKAY
+#endif
 
 static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
                              void *data)
@@ -138,29 +147,6 @@  static void control_write(const struct pci_dev *pdev, unsigned int reg,
         pci_conf_write16(pdev->sbdf, reg, val);
 }
 
-static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
-{
-    struct vpci_msix *msix;
-
-    list_for_each_entry ( msix, &d->arch.hvm.msix_tables, next )
-    {
-        const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
-        unsigned int i;
-
-        for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
-            if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
-                 VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
-                return msix;
-    }
-
-    return NULL;
-}
-
-static int msix_accept(struct vcpu *v, unsigned long addr)
-{
-    return !!msix_find(v->domain, addr);
-}
-
 static bool access_allowed(const struct pci_dev *pdev, unsigned long addr,
                            unsigned int len)
 {
@@ -182,21 +168,19 @@  static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
     return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
 }
 
-static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
-                     unsigned long *data)
+int msix_read(struct vpci_msix *msix, unsigned long addr, unsigned int len,
+              unsigned long *data)
 {
-    const struct domain *d = v->domain;
-    struct vpci_msix *msix = msix_find(d, addr);
     const struct vpci_msix_entry *entry;
     unsigned int offset;
 
     *data = ~0ul;
 
     if ( !msix )
-        return X86EMUL_RETRY;
+        return VPCI_EMUL_RETRY;
 
     if ( !access_allowed(msix->pdev, addr, len) )
-        return X86EMUL_OKAY;
+        return VPCI_EMUL_OKAY;
 
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
@@ -210,11 +194,11 @@  static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
         switch ( len )
         {
         case 4:
-            *data = readl(addr);
+            *data = vpci_arch_readl(addr);
             break;
 
         case 8:
-            *data = readq(addr);
+            *data = vpci_arch_readq(addr);
             break;
 
         default:
@@ -222,7 +206,7 @@  static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
             break;
         }
 
-        return X86EMUL_OKAY;
+        return VPCI_EMUL_OKAY;
     }
 
     spin_lock(&msix->pdev->vpci->lock);
@@ -256,22 +240,20 @@  static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
     }
     spin_unlock(&msix->pdev->vpci->lock);
 
-    return X86EMUL_OKAY;
+    return VPCI_EMUL_OKAY;
 }
 
-static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
-                      unsigned long data)
+int msix_write(const struct domain *d, struct vpci_msix *msix,
+               unsigned long addr, unsigned int len, unsigned long data)
 {
-    const struct domain *d = v->domain;
-    struct vpci_msix *msix = msix_find(d, addr);
     struct vpci_msix_entry *entry;
     unsigned int offset;
 
     if ( !msix )
-        return X86EMUL_RETRY;
+        return VPCI_EMUL_RETRY;
 
     if ( !access_allowed(msix->pdev, addr, len) )
-        return X86EMUL_OKAY;
+        return VPCI_EMUL_OKAY;
 
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
@@ -281,11 +263,11 @@  static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
             switch ( len )
             {
             case 4:
-                writel(data, addr);
+                vpci_arch_writel(data, addr);
                 break;
 
             case 8:
-                writeq(data, addr);
+                vpci_arch_writeq(data, addr);
                 break;
 
             default:
@@ -294,7 +276,7 @@  static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
             }
         }
 
-        return X86EMUL_OKAY;
+        return VPCI_EMUL_OKAY;
     }
 
     spin_lock(&msix->pdev->vpci->lock);
@@ -372,60 +354,7 @@  static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
     }
     spin_unlock(&msix->pdev->vpci->lock);
 
-    return X86EMUL_OKAY;
-}
-
-static const struct hvm_mmio_ops vpci_msix_table_ops = {
-    .check = msix_accept,
-    .read = msix_read,
-    .write = msix_write,
-};
-
-int vpci_make_msix_hole(const struct pci_dev *pdev)
-{
-    struct domain *d = pdev->domain;
-    unsigned int i;
-
-    if ( !pdev->vpci->msix )
-        return 0;
-
-    /* Make sure there's a hole for the MSIX table/PBA in the p2m. */
-    for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->tables); i++ )
-    {
-        unsigned long start = PFN_DOWN(vmsix_table_addr(pdev->vpci, i));
-        unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
-                                     vmsix_table_size(pdev->vpci, i) - 1);
-
-        for ( ; start <= end; start++ )
-        {
-            p2m_type_t t;
-            mfn_t mfn = get_gfn_query(d, start, &t);
-
-            switch ( t )
-            {
-            case p2m_mmio_dm:
-            case p2m_invalid:
-                break;
-            case p2m_mmio_direct:
-                if ( mfn_x(mfn) == start )
-                {
-                    clear_identity_p2m_entry(d, start);
-                    break;
-                }
-                /* fallthrough. */
-            default:
-                put_gfn(d, start);
-                gprintk(XENLOG_WARNING,
-                        "%pp: existing mapping (mfn: %" PRI_mfn
-                        "type: %d) at %#lx clobbers MSIX MMIO area\n",
-                        &pdev->sbdf, mfn_x(mfn), t, start);
-                return -EEXIST;
-            }
-            put_gfn(d, start);
-        }
-    }
-
-    return 0;
+    return VPCI_EMUL_OKAY;
 }
 
 static int init_msix(struct pci_dev *pdev)
@@ -472,11 +401,10 @@  static int init_msix(struct pci_dev *pdev)
         vpci_msix_arch_init_entry(&msix->entries[i]);
     }
 
-    if ( list_empty(&d->arch.hvm.msix_tables) )
-        register_mmio_handler(d, &vpci_msix_table_ops);
+    register_msix_mmio_handler(d);
+    vpci_msix_add_to_msix_table(msix, d);
 
     pdev->vpci->msix = msix;
-    list_add(&msix->next, &d->arch.hvm.msix_tables);
 
     return 0;
 }
diff --git a/xen/drivers/vpci/x86_msix.c b/xen/drivers/vpci/x86_msix.c
new file mode 100644
index 0000000000..b38b52e410
--- /dev/null
+++ b/xen/drivers/vpci/x86_msix.c
@@ -0,0 +1,155 @@ 
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/sched.h>
+#include <xen/vpci.h>
+
+#include <asm/msi.h>
+#include <asm/p2m.h>
+
+u32 vpci_arch_readl(unsigned long addr)
+{
+    return readl(addr);
+}
+
+u64 vpci_arch_readq(unsigned long addr)
+{
+    return readq(addr);
+}
+
+void vpci_arch_writel(u32 data, unsigned long addr)
+{
+    writel(data, addr);
+}
+
+void vpci_arch_writeq(u64 data, unsigned long addr)
+{
+    writeq(data, addr);
+}
+
+int vpci_make_msix_hole(const struct pci_dev *pdev)
+{
+    struct domain *d = pdev->domain;
+    unsigned int i;
+
+    if ( !pdev->vpci->msix )
+        return 0;
+
+    /* Make sure there's a hole for the MSIX table/PBA in the p2m. */
+    for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->tables); i++ )
+    {
+        unsigned long start = PFN_DOWN(vmsix_table_addr(pdev->vpci, i));
+        unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
+                                     vmsix_table_size(pdev->vpci, i) - 1);
+
+        for ( ; start <= end; start++ )
+        {
+            p2m_type_t t;
+            mfn_t mfn = get_gfn_query(d, start, &t);
+
+            switch ( t )
+            {
+            case p2m_mmio_dm:
+            case p2m_invalid:
+                break;
+            case p2m_mmio_direct:
+                if ( mfn_x(mfn) == start )
+                {
+                    clear_identity_p2m_entry(d, start);
+                    break;
+                }
+                /* fallthrough. */
+            default:
+                put_gfn(d, start);
+                gprintk(XENLOG_WARNING,
+                        "%pp: existing mapping (mfn: %" PRI_mfn
+                        "type: %d) at %#lx clobbers MSIX MMIO area\n",
+                        &pdev->sbdf, mfn_x(mfn), t, start);
+                return -EEXIST;
+            }
+            put_gfn(d, start);
+        }
+    }
+
+    return 0;
+}
+
+struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
+{
+    struct vpci_msix *msix;
+
+    list_for_each_entry ( msix, &d->arch.hvm.msix_tables, next )
+    {
+        const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
+        unsigned int i;
+
+        for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
+            if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
+                 VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
+                return msix;
+    }
+
+    return NULL;
+}
+
+static int x86_msix_accept(struct vcpu *v, unsigned long addr)
+{
+    return !!msix_find(v->domain, addr);
+}
+
+static int x86_msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
+                          unsigned long data)
+{
+    const struct domain *d = v->domain;
+    struct vpci_msix *msix = msix_find(d, addr);
+
+    return msix_write(d, msix, addr, len, data);
+}
+
+static int x86_msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
+                         unsigned long *data)
+{
+    const struct domain *d = v->domain;
+    struct vpci_msix *msix = msix_find(d, addr);
+
+    return msix_read(msix, addr, len, data);
+}
+
+static const struct hvm_mmio_ops vpci_msix_table_ops = {
+    .check = x86_msix_accept,
+    .read = x86_msix_read,
+    .write = x86_msix_write,
+};
+
+void register_msix_mmio_handler(struct domain *d)
+{
+    if ( list_empty(&d->arch.hvm.msix_tables) )
+        register_mmio_handler(d, &vpci_msix_table_ops);
+}
+
+void vpci_msix_add_to_msix_table(struct vpci_msix *msix,
+                                 struct domain *d)
+{
+    list_add(&msix->next, &d->arch.hvm.msix_tables);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/msi.h b/xen/include/asm-x86/msi.h
index e228b0f3f3..0a7912e9be 100644
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -148,34 +148,6 @@  int msi_free_irq(struct msi_desc *entry);
  */
 #define NR_HP_RESERVED_VECTORS 	20
 
-#define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
-#define msi_lower_address_reg(base)	(base + PCI_MSI_ADDRESS_LO)
-#define msi_upper_address_reg(base)	(base + PCI_MSI_ADDRESS_HI)
-#define msi_data_reg(base, is64bit)	\
-	( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
-#define msi_mask_bits_reg(base, is64bit) \
-	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
-#define msi_pending_bits_reg(base, is64bit) \
-	((base) + PCI_MSI_MASK_BIT + ((is64bit) ? 4 : 0))
-#define msi_disable(control)		control &= ~PCI_MSI_FLAGS_ENABLE
-#define multi_msi_capable(control) \
-	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
-#define multi_msi_enable(control, num) \
-	control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
-#define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
-#define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
-#define msi_enable(control, num) multi_msi_enable(control, num); \
-	control |= PCI_MSI_FLAGS_ENABLE
-
-#define msix_control_reg(base)		(base + PCI_MSIX_FLAGS)
-#define msix_table_offset_reg(base)	(base + PCI_MSIX_TABLE)
-#define msix_pba_offset_reg(base)	(base + PCI_MSIX_PBA)
-#define msix_enable(control)	 	control |= PCI_MSIX_FLAGS_ENABLE
-#define msix_disable(control)	 	control &= ~PCI_MSIX_FLAGS_ENABLE
-#define msix_table_size(control) 	((control & PCI_MSIX_FLAGS_QSIZE)+1)
-#define msix_unmask(address)	 	(address & ~PCI_MSIX_VECTOR_BITMASK)
-#define msix_mask(address)		(address | PCI_MSIX_VECTOR_BITMASK)
-
 /*
  * MSI Defined Data Structures
  */
diff --git a/xen/include/xen/msi.h b/xen/include/xen/msi.h
index c903d0050c..1c22c9a4a7 100644
--- a/xen/include/xen/msi.h
+++ b/xen/include/xen/msi.h
@@ -3,6 +3,34 @@ 
 
 #include <xen/pci.h>
 
+#define msi_control_reg(base)       (base + PCI_MSI_FLAGS)
+#define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO)
+#define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI)
+#define msi_data_reg(base, is64bit) \
+	( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
+#define msi_mask_bits_reg(base, is64bit) \
+	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
+#define msi_pending_bits_reg(base, is64bit) \
+	((base) + PCI_MSI_MASK_BIT + ((is64bit) ? 4 : 0))
+#define msi_disable(control)        control &= ~PCI_MSI_FLAGS_ENABLE
+#define multi_msi_capable(control) \
+	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
+#define multi_msi_enable(control, num) \
+	control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
+#define is_64bit_address(control)   (!!(control & PCI_MSI_FLAGS_64BIT))
+#define is_mask_bit_support(control)    (!!(control & PCI_MSI_FLAGS_MASKBIT))
+#define msi_enable(control, num) multi_msi_enable(control, num); \
+	control |= PCI_MSI_FLAGS_ENABLE
+
+#define msix_control_reg(base)      (base + PCI_MSIX_FLAGS)
+#define msix_table_offset_reg(base) (base + PCI_MSIX_TABLE)
+#define msix_pba_offset_reg(base)   (base + PCI_MSIX_PBA)
+#define msix_enable(control)        control |= PCI_MSIX_FLAGS_ENABLE
+#define msix_disable(control)       control &= ~PCI_MSIX_FLAGS_ENABLE
+#define msix_table_size(control)    ((control & PCI_MSIX_FLAGS_QSIZE)+1)
+#define msix_unmask(address)        (address & ~PCI_MSIX_VECTOR_BITMASK)
+#define msix_mask(address)          (address | PCI_MSIX_VECTOR_BITMASK)
+
 #ifdef CONFIG_HAS_PCI_MSI
 
 #include <asm/msi.h>
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 9ea66e033f..4cb7665d54 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -150,6 +150,11 @@  struct vpci_vcpu {
 };
 
 #ifdef __XEN__
+
+#define VMSIX_ADDR_IN_RANGE(addr, vpci, nr)                               \
+    ((addr) >= vmsix_table_addr(vpci, nr) &&                              \
+     (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
+
 void vpci_dump_msi(void);
 
 /* Make sure there's a hole in the p2m for the MSIX mmio areas. */
@@ -220,6 +225,22 @@  bool vpci_ecam_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int len,
 bool vpci_ecam_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int len,
                     unsigned long *data);
 
+void register_msix_mmio_handler(struct domain *d);
+
+void vpci_msix_add_to_msix_table(struct vpci_msix *msix, struct domain *d);
+
+int msix_write(const struct domain *d, struct vpci_msix *msix,
+               unsigned long addr, unsigned int len, unsigned long data);
+
+int msix_read(struct vpci_msix *msix, unsigned long addr, unsigned int len,
+              unsigned long *data);
+
+u32 vpci_arch_readl(unsigned long addr);
+u64 vpci_arch_readq(unsigned long addr);
+
+void vpci_arch_writel(u32 data, unsigned long addr);
+void vpci_arch_writeq(u64 data, unsigned long addr);
+
 #endif /* __XEN__ */
 
 #else /* !CONFIG_HAS_VPCI */