Message ID | 20230426145520.40554-1-hakor@amazon.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | x86/msi: dynamically map pages for MSI-X tables | expand |
On Wed, Apr 26, 2023 at 02:55:20PM +0000, Ruben Hakobyan wrote: > Xen reserves a constant number of pages that can be used for mapping > MSI-X tables. This limit is defined by FIX_MSIX_MAX_PAGES in fixmap.h. > > Reserving a fixed number of pages could result in an -ENOMEM if a > device requests a new page when the fixmap limit is exhausted and will > necessitate manually adjusting the limit before compilation. > > To avoid the issues with the current fixmap implementation, we modify > the MSI-X page mapping logic to instead dynamically map new pages when > they are needed by making use of ioremap(). I wonder if Arm plans to reuse this code, and whether then arm32 would better keep the fixmap implementation to avoid exhausting virtual address space in that case. This also have the side effect of ioremap() now possibly allocating a page in order to fill the page table for the newly allocated VA. > Signed-off-by: Ruben Hakobyan <hakor@amazon.com> > --- > xen/arch/x86/include/asm/fixmap.h | 2 - > xen/arch/x86/include/asm/msi.h | 5 +-- > xen/arch/x86/msi.c | 69 ++++++++----------------------- > 3 files changed, 19 insertions(+), 57 deletions(-) > > diff --git a/xen/arch/x86/include/asm/fixmap.h b/xen/arch/x86/include/asm/fixmap.h > index 516ec3fa6c..139c3e2dcc 100644 > --- a/xen/arch/x86/include/asm/fixmap.h > +++ b/xen/arch/x86/include/asm/fixmap.h > @@ -61,8 +61,6 @@ enum fixed_addresses { > FIX_ACPI_END = FIX_ACPI_BEGIN + NUM_FIXMAP_ACPI_PAGES - 1, > FIX_HPET_BASE, > FIX_TBOOT_SHARED_BASE, > - FIX_MSIX_IO_RESERV_BASE, > - FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1, > FIX_TBOOT_MAP_ADDRESS, > FIX_APEI_RANGE_BASE, > FIX_APEI_RANGE_END = FIX_APEI_RANGE_BASE + FIX_APEI_RANGE_MAX -1, > diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h > index a53ade95c9..16c80c9883 100644 > --- a/xen/arch/x86/include/asm/msi.h > +++ b/xen/arch/x86/include/asm/msi.h > @@ -55,9 +55,6 @@ > #define MSI_ADDR_DEST_ID_MASK 0x00ff000 > #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK) > > -/* MAX fixed pages reserved for mapping MSIX tables. */ > -#define FIX_MSIX_MAX_PAGES 512 > - > struct msi_info { > pci_sbdf_t sbdf; > int irq; > @@ -213,7 +210,7 @@ struct arch_msix { > unsigned long first, last; > } table, pba; > int table_refcnt[MAX_MSIX_TABLE_PAGES]; > - int table_idx[MAX_MSIX_TABLE_PAGES]; > + void __iomem *table_va[MAX_MSIX_TABLE_PAGES]; > spinlock_t table_lock; > bool host_maskall, guest_maskall; > domid_t warned; > diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c > index d0bf63df1d..8128274c07 100644 > --- a/xen/arch/x86/msi.c > +++ b/xen/arch/x86/msi.c > @@ -24,7 +24,6 @@ > #include <asm/smp.h> > #include <asm/desc.h> > #include <asm/msi.h> > -#include <asm/fixmap.h> > #include <asm/p2m.h> > #include <mach_apic.h> > #include <io_ports.h> > @@ -39,75 +38,44 @@ boolean_param("msi", use_msi); > > static void __pci_disable_msix(struct msi_desc *); > > -/* bitmap indicate which fixed map is free */ > -static DEFINE_SPINLOCK(msix_fixmap_lock); > -static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES); > - > -static int msix_fixmap_alloc(void) > -{ > - int i, rc = -ENOMEM; > - > - spin_lock(&msix_fixmap_lock); > - for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ ) > - if ( !test_bit(i, &msix_fixmap_pages) ) > - break; > - if ( i == FIX_MSIX_MAX_PAGES ) > - goto out; > - rc = FIX_MSIX_IO_RESERV_BASE + i; > - set_bit(i, &msix_fixmap_pages); > - > - out: > - spin_unlock(&msix_fixmap_lock); > - return rc; > -} > - > -static void msix_fixmap_free(int idx) > -{ > - spin_lock(&msix_fixmap_lock); > - if ( idx >= FIX_MSIX_IO_RESERV_BASE ) > - clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages); > - spin_unlock(&msix_fixmap_lock); > -} > - > -static int msix_get_fixmap(struct arch_msix *msix, u64 table_paddr, > +static void __iomem *msix_map_table(struct arch_msix *msix, u64 table_paddr, I think msix_{get,put}_entry() might be better, as you are not mapping and unmapping the table at every call. > u64 entry_paddr) > { > long nr_page; > - int idx; > + void __iomem *va = NULL; > > nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT); > > if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES ) > - return -EINVAL; > + return NULL; > > spin_lock(&msix->table_lock); > if ( msix->table_refcnt[nr_page]++ == 0 ) > { > - idx = msix_fixmap_alloc(); > - if ( idx < 0 ) > + va = ioremap(entry_paddr, PAGE_SIZE); You are missing an 'entry_paddr & PAGE_MASK' here AFAICT, or else ioremap() won't return a page-aligned address if entry is not the first one on the requested page when mapping. > + if ( va == NULL ) > { > msix->table_refcnt[nr_page]--; > goto out; > } > - set_fixmap_nocache(idx, entry_paddr); > - msix->table_idx[nr_page] = idx; > + msix->table_va[nr_page] = va; > } > else > - idx = msix->table_idx[nr_page]; > + va = msix->table_va[nr_page]; > > out: > spin_unlock(&msix->table_lock); > - return idx; > + return va; > } > > -static void msix_put_fixmap(struct arch_msix *msix, int idx) > +static void msix_unmap_table(struct arch_msix *msix, void __iomem *va) va can be made const here. > { > int i; > > spin_lock(&msix->table_lock); > for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ ) > { > - if ( msix->table_idx[i] == idx ) > + if ( msix->table_va[i] == va ) > break; > } > if ( i == MAX_MSIX_TABLE_PAGES ) > @@ -115,9 +83,8 @@ static void msix_put_fixmap(struct arch_msix *msix, int idx) > > if ( --msix->table_refcnt[i] == 0 ) > { > - clear_fixmap(idx); > - msix_fixmap_free(idx); > - msix->table_idx[i] = 0; > + vunmap(va); iounmap() > + msix->table_va[i] = NULL; > } > > out: > @@ -568,8 +535,8 @@ int msi_free_irq(struct msi_desc *entry) > } > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > - msix_put_fixmap(entry->dev->msix, > - virt_to_fix((unsigned long)entry->mask_base)); > + msix_unmap_table(entry->dev->msix, > + (void*)((unsigned long)entry->mask_base & PAGE_MASK)); Did you consider calling this msix_unmap_entry() and just pass the entry VA to the function and get the page from there. round_pgdown() might be helpful here otherwise. > list_del(&entry->list); > xfree(entry); > @@ -892,10 +859,10 @@ static int msix_capability_init(struct pci_dev *dev, > { > /* Map MSI-X table region */ > u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE; > - int idx = msix_get_fixmap(msix, table_paddr, entry_paddr); > + void __iomem *va = msix_map_table(msix, table_paddr, entry_paddr); > void __iomem *base; > > - if ( idx < 0 ) > + if ( va == NULL ) > { > if ( zap_on_error ) > { > @@ -907,9 +874,9 @@ static int msix_capability_init(struct pci_dev *dev, > > pci_conf_write16(dev->sbdf, msix_control_reg(pos), control); > xfree(entry); > - return idx; > + return -ENOMEM; > } > - base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1)); > + base = va + (entry_paddr & (PAGE_SIZE - 1)); Now that msix_map_table() returns a virtual address, you could likely do the adjustment in there an return the entry VA from msix_map_table() or equivalent? (see my naming suggestion above) Otherwise please use ~PAGE_MASK. Thanks, Roger.
On 02.05.2023 12:10, Roger Pau Monné wrote: > On Wed, Apr 26, 2023 at 02:55:20PM +0000, Ruben Hakobyan wrote: >> Xen reserves a constant number of pages that can be used for mapping >> MSI-X tables. This limit is defined by FIX_MSIX_MAX_PAGES in fixmap.h. >> >> Reserving a fixed number of pages could result in an -ENOMEM if a >> device requests a new page when the fixmap limit is exhausted and will >> necessitate manually adjusting the limit before compilation. >> >> To avoid the issues with the current fixmap implementation, we modify >> the MSI-X page mapping logic to instead dynamically map new pages when >> they are needed by making use of ioremap(). > > I wonder if Arm plans to reuse this code, and whether then arm32 would > better keep the fixmap implementation to avoid exhausting virtual > address space in that case. I think this would then need to be something that 32-bit architectures do specially. Right now aiui PCI (and hence MSI-X) work on Arm targets only Arm64. > This also have the side effect of ioremap() now possibly allocating a > page in order to fill the page table for the newly allocated VA. Indeed, but I think the (vague) plan to switch to ioremap() has been around for a pretty long time (perhaps forever since 32-bit support was purged). Jan
On Tue, May 02, 2023 at 12:18:06PM +0200, Jan Beulich wrote: > On 02.05.2023 12:10, Roger Pau Monné wrote: > > On Wed, Apr 26, 2023 at 02:55:20PM +0000, Ruben Hakobyan wrote: > >> Xen reserves a constant number of pages that can be used for mapping > >> MSI-X tables. This limit is defined by FIX_MSIX_MAX_PAGES in fixmap.h. > >> > >> Reserving a fixed number of pages could result in an -ENOMEM if a > >> device requests a new page when the fixmap limit is exhausted and will > >> necessitate manually adjusting the limit before compilation. > >> > >> To avoid the issues with the current fixmap implementation, we modify > >> the MSI-X page mapping logic to instead dynamically map new pages when > >> they are needed by making use of ioremap(). > > > > I wonder if Arm plans to reuse this code, and whether then arm32 would > > better keep the fixmap implementation to avoid exhausting virtual > > address space in that case. > > I think this would then need to be something that 32-bit architectures > do specially. Right now aiui PCI (and hence MSI-X) work on Arm targets > only Arm64. > > > This also have the side effect of ioremap() now possibly allocating a > > page in order to fill the page table for the newly allocated VA. > > Indeed, but I think the (vague) plan to switch to ioremap() has been > around for a pretty long time (perhaps forever since 32-bit support > was purged). Yup, I'm not saying the above should block the patch, but might be worth mentioning in the commit message. Roger.
diff --git a/xen/arch/x86/include/asm/fixmap.h b/xen/arch/x86/include/asm/fixmap.h index 516ec3fa6c..139c3e2dcc 100644 --- a/xen/arch/x86/include/asm/fixmap.h +++ b/xen/arch/x86/include/asm/fixmap.h @@ -61,8 +61,6 @@ enum fixed_addresses { FIX_ACPI_END = FIX_ACPI_BEGIN + NUM_FIXMAP_ACPI_PAGES - 1, FIX_HPET_BASE, FIX_TBOOT_SHARED_BASE, - FIX_MSIX_IO_RESERV_BASE, - FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1, FIX_TBOOT_MAP_ADDRESS, FIX_APEI_RANGE_BASE, FIX_APEI_RANGE_END = FIX_APEI_RANGE_BASE + FIX_APEI_RANGE_MAX -1, diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h index a53ade95c9..16c80c9883 100644 --- a/xen/arch/x86/include/asm/msi.h +++ b/xen/arch/x86/include/asm/msi.h @@ -55,9 +55,6 @@ #define MSI_ADDR_DEST_ID_MASK 0x00ff000 #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK) -/* MAX fixed pages reserved for mapping MSIX tables. */ -#define FIX_MSIX_MAX_PAGES 512 - struct msi_info { pci_sbdf_t sbdf; int irq; @@ -213,7 +210,7 @@ struct arch_msix { unsigned long first, last; } table, pba; int table_refcnt[MAX_MSIX_TABLE_PAGES]; - int table_idx[MAX_MSIX_TABLE_PAGES]; + void __iomem *table_va[MAX_MSIX_TABLE_PAGES]; spinlock_t table_lock; bool host_maskall, guest_maskall; domid_t warned; diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c index d0bf63df1d..8128274c07 100644 --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -24,7 +24,6 @@ #include <asm/smp.h> #include <asm/desc.h> #include <asm/msi.h> -#include <asm/fixmap.h> #include <asm/p2m.h> #include <mach_apic.h> #include <io_ports.h> @@ -39,75 +38,44 @@ boolean_param("msi", use_msi); static void __pci_disable_msix(struct msi_desc *); -/* bitmap indicate which fixed map is free */ -static DEFINE_SPINLOCK(msix_fixmap_lock); -static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES); - -static int msix_fixmap_alloc(void) -{ - int i, rc = -ENOMEM; - - spin_lock(&msix_fixmap_lock); - for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ ) - if ( !test_bit(i, &msix_fixmap_pages) ) - break; - if ( i == FIX_MSIX_MAX_PAGES ) - goto out; - rc = FIX_MSIX_IO_RESERV_BASE + i; - set_bit(i, &msix_fixmap_pages); - - out: - spin_unlock(&msix_fixmap_lock); - return rc; -} - -static void msix_fixmap_free(int idx) -{ - spin_lock(&msix_fixmap_lock); - if ( idx >= FIX_MSIX_IO_RESERV_BASE ) - clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages); - spin_unlock(&msix_fixmap_lock); -} - -static int msix_get_fixmap(struct arch_msix *msix, u64 table_paddr, +static void __iomem *msix_map_table(struct arch_msix *msix, u64 table_paddr, u64 entry_paddr) { long nr_page; - int idx; + void __iomem *va = NULL; nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT); if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES ) - return -EINVAL; + return NULL; spin_lock(&msix->table_lock); if ( msix->table_refcnt[nr_page]++ == 0 ) { - idx = msix_fixmap_alloc(); - if ( idx < 0 ) + va = ioremap(entry_paddr, PAGE_SIZE); + if ( va == NULL ) { msix->table_refcnt[nr_page]--; goto out; } - set_fixmap_nocache(idx, entry_paddr); - msix->table_idx[nr_page] = idx; + msix->table_va[nr_page] = va; } else - idx = msix->table_idx[nr_page]; + va = msix->table_va[nr_page]; out: spin_unlock(&msix->table_lock); - return idx; + return va; } -static void msix_put_fixmap(struct arch_msix *msix, int idx) +static void msix_unmap_table(struct arch_msix *msix, void __iomem *va) { int i; spin_lock(&msix->table_lock); for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ ) { - if ( msix->table_idx[i] == idx ) + if ( msix->table_va[i] == va ) break; } if ( i == MAX_MSIX_TABLE_PAGES ) @@ -115,9 +83,8 @@ static void msix_put_fixmap(struct arch_msix *msix, int idx) if ( --msix->table_refcnt[i] == 0 ) { - clear_fixmap(idx); - msix_fixmap_free(idx); - msix->table_idx[i] = 0; + vunmap(va); + msix->table_va[i] = NULL; } out: @@ -568,8 +535,8 @@ int msi_free_irq(struct msi_desc *entry) } if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) - msix_put_fixmap(entry->dev->msix, - virt_to_fix((unsigned long)entry->mask_base)); + msix_unmap_table(entry->dev->msix, + (void*)((unsigned long)entry->mask_base & PAGE_MASK)); list_del(&entry->list); xfree(entry); @@ -892,10 +859,10 @@ static int msix_capability_init(struct pci_dev *dev, { /* Map MSI-X table region */ u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE; - int idx = msix_get_fixmap(msix, table_paddr, entry_paddr); + void __iomem *va = msix_map_table(msix, table_paddr, entry_paddr); void __iomem *base; - if ( idx < 0 ) + if ( va == NULL ) { if ( zap_on_error ) { @@ -907,9 +874,9 @@ static int msix_capability_init(struct pci_dev *dev, pci_conf_write16(dev->sbdf, msix_control_reg(pos), control); xfree(entry); - return idx; + return -ENOMEM; } - base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1)); + base = va + (entry_paddr & (PAGE_SIZE - 1)); /* Mask interrupt here */ writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
Xen reserves a constant number of pages that can be used for mapping MSI-X tables. This limit is defined by FIX_MSIX_MAX_PAGES in fixmap.h. Reserving a fixed number of pages could result in an -ENOMEM if a device requests a new page when the fixmap limit is exhausted and will necessitate manually adjusting the limit before compilation. To avoid the issues with the current fixmap implementation, we modify the MSI-X page mapping logic to instead dynamically map new pages when they are needed by making use of ioremap(). Signed-off-by: Ruben Hakobyan <hakor@amazon.com> --- xen/arch/x86/include/asm/fixmap.h | 2 - xen/arch/x86/include/asm/msi.h | 5 +-- xen/arch/x86/msi.c | 69 ++++++++----------------------- 3 files changed, 19 insertions(+), 57 deletions(-)