diff mbox series

x86/msi: dynamically map pages for MSI-X tables

Message ID 20230426145520.40554-1-hakor@amazon.com (mailing list archive)
State New, archived
Headers show
Series x86/msi: dynamically map pages for MSI-X tables | expand

Commit Message

Ruben Hakobyan April 26, 2023, 2:55 p.m. UTC
Xen reserves a constant number of pages that can be used for mapping
MSI-X tables. This limit is defined by FIX_MSIX_MAX_PAGES in fixmap.h.

Reserving a fixed number of pages could result in an -ENOMEM if a
device requests a new page when the fixmap limit is exhausted and will
necessitate manually adjusting the limit before compilation.

To avoid the issues with the current fixmap implementation, we modify
the MSI-X page mapping logic to instead dynamically map new pages when
they are needed by making use of ioremap().

Signed-off-by: Ruben Hakobyan <hakor@amazon.com>
---
 xen/arch/x86/include/asm/fixmap.h |  2 -
 xen/arch/x86/include/asm/msi.h    |  5 +--
 xen/arch/x86/msi.c                | 69 ++++++++-----------------------
 3 files changed, 19 insertions(+), 57 deletions(-)

Comments

Roger Pau Monné May 2, 2023, 10:10 a.m. UTC | #1
On Wed, Apr 26, 2023 at 02:55:20PM +0000, Ruben Hakobyan wrote:
> Xen reserves a constant number of pages that can be used for mapping
> MSI-X tables. This limit is defined by FIX_MSIX_MAX_PAGES in fixmap.h.
> 
> Reserving a fixed number of pages could result in an -ENOMEM if a
> device requests a new page when the fixmap limit is exhausted and will
> necessitate manually adjusting the limit before compilation.
> 
> To avoid the issues with the current fixmap implementation, we modify
> the MSI-X page mapping logic to instead dynamically map new pages when
> they are needed by making use of ioremap().

I wonder if Arm plans to reuse this code, and whether then arm32 would
better keep the fixmap implementation to avoid exhausting virtual
address space in that case.

This also have the side effect of ioremap() now possibly allocating a
page in order to fill the page table for the newly allocated VA.

> Signed-off-by: Ruben Hakobyan <hakor@amazon.com>
> ---
>  xen/arch/x86/include/asm/fixmap.h |  2 -
>  xen/arch/x86/include/asm/msi.h    |  5 +--
>  xen/arch/x86/msi.c                | 69 ++++++++-----------------------
>  3 files changed, 19 insertions(+), 57 deletions(-)
> 
> diff --git a/xen/arch/x86/include/asm/fixmap.h b/xen/arch/x86/include/asm/fixmap.h
> index 516ec3fa6c..139c3e2dcc 100644
> --- a/xen/arch/x86/include/asm/fixmap.h
> +++ b/xen/arch/x86/include/asm/fixmap.h
> @@ -61,8 +61,6 @@ enum fixed_addresses {
>      FIX_ACPI_END = FIX_ACPI_BEGIN + NUM_FIXMAP_ACPI_PAGES - 1,
>      FIX_HPET_BASE,
>      FIX_TBOOT_SHARED_BASE,
> -    FIX_MSIX_IO_RESERV_BASE,
> -    FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1,
>      FIX_TBOOT_MAP_ADDRESS,
>      FIX_APEI_RANGE_BASE,
>      FIX_APEI_RANGE_END = FIX_APEI_RANGE_BASE + FIX_APEI_RANGE_MAX -1,
> diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
> index a53ade95c9..16c80c9883 100644
> --- a/xen/arch/x86/include/asm/msi.h
> +++ b/xen/arch/x86/include/asm/msi.h
> @@ -55,9 +55,6 @@
>  #define	 MSI_ADDR_DEST_ID_MASK		0x00ff000
>  #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
>  
> -/* MAX fixed pages reserved for mapping MSIX tables. */
> -#define FIX_MSIX_MAX_PAGES              512
> -
>  struct msi_info {
>      pci_sbdf_t sbdf;
>      int irq;
> @@ -213,7 +210,7 @@ struct arch_msix {
>          unsigned long first, last;
>      } table, pba;
>      int table_refcnt[MAX_MSIX_TABLE_PAGES];
> -    int table_idx[MAX_MSIX_TABLE_PAGES];
> +    void __iomem *table_va[MAX_MSIX_TABLE_PAGES];
>      spinlock_t table_lock;
>      bool host_maskall, guest_maskall;
>      domid_t warned;
> diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
> index d0bf63df1d..8128274c07 100644
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -24,7 +24,6 @@
>  #include <asm/smp.h>
>  #include <asm/desc.h>
>  #include <asm/msi.h>
> -#include <asm/fixmap.h>
>  #include <asm/p2m.h>
>  #include <mach_apic.h>
>  #include <io_ports.h>
> @@ -39,75 +38,44 @@ boolean_param("msi", use_msi);
>  
>  static void __pci_disable_msix(struct msi_desc *);
>  
> -/* bitmap indicate which fixed map is free */
> -static DEFINE_SPINLOCK(msix_fixmap_lock);
> -static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
> -
> -static int msix_fixmap_alloc(void)
> -{
> -    int i, rc = -ENOMEM;
> -
> -    spin_lock(&msix_fixmap_lock);
> -    for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ )
> -        if ( !test_bit(i, &msix_fixmap_pages) )
> -            break;
> -    if ( i == FIX_MSIX_MAX_PAGES )
> -        goto out;
> -    rc = FIX_MSIX_IO_RESERV_BASE + i;
> -    set_bit(i, &msix_fixmap_pages);
> -
> - out:
> -    spin_unlock(&msix_fixmap_lock);
> -    return rc;
> -}
> -
> -static void msix_fixmap_free(int idx)
> -{
> -    spin_lock(&msix_fixmap_lock);
> -    if ( idx >= FIX_MSIX_IO_RESERV_BASE )
> -        clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
> -    spin_unlock(&msix_fixmap_lock);
> -}
> -
> -static int msix_get_fixmap(struct arch_msix *msix, u64 table_paddr,
> +static void __iomem *msix_map_table(struct arch_msix *msix, u64 table_paddr,

I think msix_{get,put}_entry() might be better, as you are not mapping
and unmapping the table at every call.

>                             u64 entry_paddr)
>  {
>      long nr_page;
> -    int idx;
> +    void __iomem *va = NULL;
>  
>      nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT);
>  
>      if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES )
> -        return -EINVAL;
> +        return NULL;
>  
>      spin_lock(&msix->table_lock);
>      if ( msix->table_refcnt[nr_page]++ == 0 )
>      {
> -        idx = msix_fixmap_alloc();
> -        if ( idx < 0 )
> +        va = ioremap(entry_paddr, PAGE_SIZE);

You are missing an 'entry_paddr & PAGE_MASK' here AFAICT, or else
ioremap() won't return a page-aligned address if entry is not the
first one on the requested page when mapping.

> +        if ( va == NULL )
>          {
>              msix->table_refcnt[nr_page]--;
>              goto out;
>          }
> -        set_fixmap_nocache(idx, entry_paddr);
> -        msix->table_idx[nr_page] = idx;
> +        msix->table_va[nr_page] = va;
>      }
>      else
> -        idx = msix->table_idx[nr_page];
> +        va = msix->table_va[nr_page];
>  
>   out:
>      spin_unlock(&msix->table_lock);
> -    return idx;
> +    return va;
>  }
>  
> -static void msix_put_fixmap(struct arch_msix *msix, int idx)
> +static void msix_unmap_table(struct arch_msix *msix, void __iomem *va)

va can be made const here.

>  {
>      int i;
>  
>      spin_lock(&msix->table_lock);
>      for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ )
>      {
> -        if ( msix->table_idx[i] == idx )
> +        if ( msix->table_va[i] == va )
>              break;
>      }
>      if ( i == MAX_MSIX_TABLE_PAGES )
> @@ -115,9 +83,8 @@ static void msix_put_fixmap(struct arch_msix *msix, int idx)
>  
>      if ( --msix->table_refcnt[i] == 0 )
>      {
> -        clear_fixmap(idx);
> -        msix_fixmap_free(idx);
> -        msix->table_idx[i] = 0;
> +        vunmap(va);

iounmap()

> +        msix->table_va[i] = NULL;
>      }
>  
>   out:
> @@ -568,8 +535,8 @@ int msi_free_irq(struct msi_desc *entry)
>      }
>  
>      if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
> -        msix_put_fixmap(entry->dev->msix,
> -                        virt_to_fix((unsigned long)entry->mask_base));
> +        msix_unmap_table(entry->dev->msix,
> +                       (void*)((unsigned long)entry->mask_base & PAGE_MASK));

Did you consider calling this msix_unmap_entry() and just pass the
entry VA to the function and get the page from there.

round_pgdown() might be helpful here otherwise.

>      list_del(&entry->list);
>      xfree(entry);
> @@ -892,10 +859,10 @@ static int msix_capability_init(struct pci_dev *dev,
>      {
>          /* Map MSI-X table region */
>          u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
> -        int idx = msix_get_fixmap(msix, table_paddr, entry_paddr);
> +        void __iomem *va = msix_map_table(msix, table_paddr, entry_paddr);
>          void __iomem *base;
>  
> -        if ( idx < 0 )
> +        if ( va == NULL )
>          {
>              if ( zap_on_error )
>              {
> @@ -907,9 +874,9 @@ static int msix_capability_init(struct pci_dev *dev,
>  
>              pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
>              xfree(entry);
> -            return idx;
> +            return -ENOMEM;
>          }
> -        base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1));
> +        base = va + (entry_paddr & (PAGE_SIZE - 1));

Now that msix_map_table() returns a virtual address, you could likely
do the adjustment in there an return the entry VA from
msix_map_table() or equivalent? (see my naming suggestion above)

Otherwise please use ~PAGE_MASK.

Thanks, Roger.
Jan Beulich May 2, 2023, 10:18 a.m. UTC | #2
On 02.05.2023 12:10, Roger Pau Monné wrote:
> On Wed, Apr 26, 2023 at 02:55:20PM +0000, Ruben Hakobyan wrote:
>> Xen reserves a constant number of pages that can be used for mapping
>> MSI-X tables. This limit is defined by FIX_MSIX_MAX_PAGES in fixmap.h.
>>
>> Reserving a fixed number of pages could result in an -ENOMEM if a
>> device requests a new page when the fixmap limit is exhausted and will
>> necessitate manually adjusting the limit before compilation.
>>
>> To avoid the issues with the current fixmap implementation, we modify
>> the MSI-X page mapping logic to instead dynamically map new pages when
>> they are needed by making use of ioremap().
> 
> I wonder if Arm plans to reuse this code, and whether then arm32 would
> better keep the fixmap implementation to avoid exhausting virtual
> address space in that case.

I think this would then need to be something that 32-bit architectures
do specially. Right now aiui PCI (and hence MSI-X) work on Arm targets
only Arm64.

> This also have the side effect of ioremap() now possibly allocating a
> page in order to fill the page table for the newly allocated VA.

Indeed, but I think the (vague) plan to switch to ioremap() has been
around for a pretty long time (perhaps forever since 32-bit support
was purged).

Jan
Roger Pau Monné May 2, 2023, 10:22 a.m. UTC | #3
On Tue, May 02, 2023 at 12:18:06PM +0200, Jan Beulich wrote:
> On 02.05.2023 12:10, Roger Pau Monné wrote:
> > On Wed, Apr 26, 2023 at 02:55:20PM +0000, Ruben Hakobyan wrote:
> >> Xen reserves a constant number of pages that can be used for mapping
> >> MSI-X tables. This limit is defined by FIX_MSIX_MAX_PAGES in fixmap.h.
> >>
> >> Reserving a fixed number of pages could result in an -ENOMEM if a
> >> device requests a new page when the fixmap limit is exhausted and will
> >> necessitate manually adjusting the limit before compilation.
> >>
> >> To avoid the issues with the current fixmap implementation, we modify
> >> the MSI-X page mapping logic to instead dynamically map new pages when
> >> they are needed by making use of ioremap().
> > 
> > I wonder if Arm plans to reuse this code, and whether then arm32 would
> > better keep the fixmap implementation to avoid exhausting virtual
> > address space in that case.
> 
> I think this would then need to be something that 32-bit architectures
> do specially. Right now aiui PCI (and hence MSI-X) work on Arm targets
> only Arm64.
> 
> > This also have the side effect of ioremap() now possibly allocating a
> > page in order to fill the page table for the newly allocated VA.
> 
> Indeed, but I think the (vague) plan to switch to ioremap() has been
> around for a pretty long time (perhaps forever since 32-bit support
> was purged).

Yup, I'm not saying the above should block the patch, but might be
worth mentioning in the commit message.

Roger.
diff mbox series

Patch

diff --git a/xen/arch/x86/include/asm/fixmap.h b/xen/arch/x86/include/asm/fixmap.h
index 516ec3fa6c..139c3e2dcc 100644
--- a/xen/arch/x86/include/asm/fixmap.h
+++ b/xen/arch/x86/include/asm/fixmap.h
@@ -61,8 +61,6 @@  enum fixed_addresses {
     FIX_ACPI_END = FIX_ACPI_BEGIN + NUM_FIXMAP_ACPI_PAGES - 1,
     FIX_HPET_BASE,
     FIX_TBOOT_SHARED_BASE,
-    FIX_MSIX_IO_RESERV_BASE,
-    FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1,
     FIX_TBOOT_MAP_ADDRESS,
     FIX_APEI_RANGE_BASE,
     FIX_APEI_RANGE_END = FIX_APEI_RANGE_BASE + FIX_APEI_RANGE_MAX -1,
diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
index a53ade95c9..16c80c9883 100644
--- a/xen/arch/x86/include/asm/msi.h
+++ b/xen/arch/x86/include/asm/msi.h
@@ -55,9 +55,6 @@ 
 #define	 MSI_ADDR_DEST_ID_MASK		0x00ff000
 #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
 
-/* MAX fixed pages reserved for mapping MSIX tables. */
-#define FIX_MSIX_MAX_PAGES              512
-
 struct msi_info {
     pci_sbdf_t sbdf;
     int irq;
@@ -213,7 +210,7 @@  struct arch_msix {
         unsigned long first, last;
     } table, pba;
     int table_refcnt[MAX_MSIX_TABLE_PAGES];
-    int table_idx[MAX_MSIX_TABLE_PAGES];
+    void __iomem *table_va[MAX_MSIX_TABLE_PAGES];
     spinlock_t table_lock;
     bool host_maskall, guest_maskall;
     domid_t warned;
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index d0bf63df1d..8128274c07 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -24,7 +24,6 @@ 
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/msi.h>
-#include <asm/fixmap.h>
 #include <asm/p2m.h>
 #include <mach_apic.h>
 #include <io_ports.h>
@@ -39,75 +38,44 @@  boolean_param("msi", use_msi);
 
 static void __pci_disable_msix(struct msi_desc *);
 
-/* bitmap indicate which fixed map is free */
-static DEFINE_SPINLOCK(msix_fixmap_lock);
-static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
-
-static int msix_fixmap_alloc(void)
-{
-    int i, rc = -ENOMEM;
-
-    spin_lock(&msix_fixmap_lock);
-    for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ )
-        if ( !test_bit(i, &msix_fixmap_pages) )
-            break;
-    if ( i == FIX_MSIX_MAX_PAGES )
-        goto out;
-    rc = FIX_MSIX_IO_RESERV_BASE + i;
-    set_bit(i, &msix_fixmap_pages);
-
- out:
-    spin_unlock(&msix_fixmap_lock);
-    return rc;
-}
-
-static void msix_fixmap_free(int idx)
-{
-    spin_lock(&msix_fixmap_lock);
-    if ( idx >= FIX_MSIX_IO_RESERV_BASE )
-        clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
-    spin_unlock(&msix_fixmap_lock);
-}
-
-static int msix_get_fixmap(struct arch_msix *msix, u64 table_paddr,
+static void __iomem *msix_map_table(struct arch_msix *msix, u64 table_paddr,
                            u64 entry_paddr)
 {
     long nr_page;
-    int idx;
+    void __iomem *va = NULL;
 
     nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT);
 
     if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES )
-        return -EINVAL;
+        return NULL;
 
     spin_lock(&msix->table_lock);
     if ( msix->table_refcnt[nr_page]++ == 0 )
     {
-        idx = msix_fixmap_alloc();
-        if ( idx < 0 )
+        va = ioremap(entry_paddr, PAGE_SIZE);
+        if ( va == NULL )
         {
             msix->table_refcnt[nr_page]--;
             goto out;
         }
-        set_fixmap_nocache(idx, entry_paddr);
-        msix->table_idx[nr_page] = idx;
+        msix->table_va[nr_page] = va;
     }
     else
-        idx = msix->table_idx[nr_page];
+        va = msix->table_va[nr_page];
 
  out:
     spin_unlock(&msix->table_lock);
-    return idx;
+    return va;
 }
 
-static void msix_put_fixmap(struct arch_msix *msix, int idx)
+static void msix_unmap_table(struct arch_msix *msix, void __iomem *va)
 {
     int i;
 
     spin_lock(&msix->table_lock);
     for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ )
     {
-        if ( msix->table_idx[i] == idx )
+        if ( msix->table_va[i] == va )
             break;
     }
     if ( i == MAX_MSIX_TABLE_PAGES )
@@ -115,9 +83,8 @@  static void msix_put_fixmap(struct arch_msix *msix, int idx)
 
     if ( --msix->table_refcnt[i] == 0 )
     {
-        clear_fixmap(idx);
-        msix_fixmap_free(idx);
-        msix->table_idx[i] = 0;
+        vunmap(va);
+        msix->table_va[i] = NULL;
     }
 
  out:
@@ -568,8 +535,8 @@  int msi_free_irq(struct msi_desc *entry)
     }
 
     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
-        msix_put_fixmap(entry->dev->msix,
-                        virt_to_fix((unsigned long)entry->mask_base));
+        msix_unmap_table(entry->dev->msix,
+                       (void*)((unsigned long)entry->mask_base & PAGE_MASK));
 
     list_del(&entry->list);
     xfree(entry);
@@ -892,10 +859,10 @@  static int msix_capability_init(struct pci_dev *dev,
     {
         /* Map MSI-X table region */
         u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
-        int idx = msix_get_fixmap(msix, table_paddr, entry_paddr);
+        void __iomem *va = msix_map_table(msix, table_paddr, entry_paddr);
         void __iomem *base;
 
-        if ( idx < 0 )
+        if ( va == NULL )
         {
             if ( zap_on_error )
             {
@@ -907,9 +874,9 @@  static int msix_capability_init(struct pci_dev *dev,
 
             pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
             xfree(entry);
-            return idx;
+            return -ENOMEM;
         }
-        base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1));
+        base = va + (entry_paddr & (PAGE_SIZE - 1));
 
         /* Mask interrupt here */
         writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);