diff mbox series

[RFC,02/10] xen: pci: add pci_seg->alldevs_lock

Message ID 20220831141040.13231-3-volodymyr_babchuk@epam.com (mailing list archive)
State New, archived
Headers show
Series Rework PCI locking | expand

Commit Message

Volodymyr Babchuk Aug. 31, 2022, 2:10 p.m. UTC
This lock protects alldevs_list of struct pci_seg. As this, it should
be used when we are adding, removing on enumerating PCI devices
assigned to a PCI segment.

Radix tree that stores PCI segment has own locking mechanism, also
pci_seg structures are only allocated and newer freed, so we need no
additional locking to access pci_seg structures. But we need a lock
that protects alldevs_list field.

This enables more granular locking instead of one huge pcidevs_lock
that locks entire PCI subsystem.  Please note that pcidevs_lock() is
still used, we are going to remove it in subsequent patches.

Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
---
 xen/drivers/passthrough/pci.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

Comments

Stefano Stabellini Jan. 26, 2023, 11:40 p.m. UTC | #1
On Wed, 31 Aug 2022, Volodymyr Babchuk wrote:
> This lock protects alldevs_list of struct pci_seg. As this, it should
> be used when we are adding, removing on enumerating PCI devices
> assigned to a PCI segment.
> 
> Radix tree that stores PCI segment has own locking mechanism, also
> pci_seg structures are only allocated and newer freed, so we need no
> additional locking to access pci_seg structures. But we need a lock
> that protects alldevs_list field.
> 
> This enables more granular locking instead of one huge pcidevs_lock
> that locks entire PCI subsystem.  Please note that pcidevs_lock() is
> still used, we are going to remove it in subsequent patches.
> 
> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
> ---
>  xen/drivers/passthrough/pci.c | 20 +++++++++++++++++++-
>  1 file changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index 4366f8f965..2dfa1c2875 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -38,6 +38,7 @@
>  
>  struct pci_seg {
>      struct list_head alldevs_list;
> +    spinlock_t alldevs_lock;
>      u16 nr;
>      unsigned long *ro_map;
>      /* bus2bridge_lock protects bus2bridge array */
> @@ -93,6 +94,7 @@ static struct pci_seg *alloc_pseg(u16 seg)
>      pseg->nr = seg;
>      INIT_LIST_HEAD(&pseg->alldevs_list);
>      spin_lock_init(&pseg->bus2bridge_lock);
> +    spin_lock_init(&pseg->alldevs_lock);
>  
>      if ( radix_tree_insert(&pci_segments, seg, pseg) )
>      {
> @@ -385,9 +387,13 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
>      unsigned int pos;
>      int rc;
>  
> +    spin_lock(&pseg->alldevs_lock);
>      list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
>          if ( pdev->bus == bus && pdev->devfn == devfn )
> +        {
> +            spin_unlock(&pseg->alldevs_lock);
>              return pdev;
> +        }
>  
>      pdev = xzalloc(struct pci_dev);
>      if ( !pdev )

Here there is a missing spin_unlock on the error path


> @@ -404,10 +410,12 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
>      if ( rc )
>      {
>          xfree(pdev);
> +        spin_unlock(&pseg->alldevs_lock);
>          return NULL;
>      }
>  
>      list_add(&pdev->alldevs_list, &pseg->alldevs_list);
> +    spin_unlock(&pseg->alldevs_lock);
>  
>      /* update bus2bridge */
>      switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
> @@ -611,15 +619,20 @@ struct pci_dev *pci_get_pdev(struct domain *d, pci_sbdf_t sbdf)
>       */
>      if ( !d || is_hardware_domain(d) )
>      {
> -        const struct pci_seg *pseg = get_pseg(sbdf.seg);
> +        struct pci_seg *pseg = get_pseg(sbdf.seg);
>  
>          if ( !pseg )
>              return NULL;
>  
> +        spin_lock(&pseg->alldevs_lock);
>          list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
>              if ( pdev->sbdf.bdf == sbdf.bdf &&
>                   (!d || pdev->domain == d) )
> +            {
> +                spin_unlock(&pseg->alldevs_lock);
>                  return pdev;
> +            }
> +        spin_unlock(&pseg->alldevs_lock);
>      }
>      else
>      {
> @@ -893,6 +906,7 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
>          return -ENODEV;
>  
>      pcidevs_lock();
> +    spin_lock(&pseg->alldevs_lock);
>      list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
>          if ( pdev->bus == bus && pdev->devfn == devfn )
>          {
> @@ -907,10 +921,12 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
>              }
>              printk(XENLOG_DEBUG "PCI remove device %pp\n", &pdev->sbdf);
>              free_pdev(pseg, pdev);
> +            list_del(&pdev->alldevs_list);

use after free: free_pdev is freeing pdef

>              break;
>          }
>  
>      pcidevs_unlock();
> +    spin_unlock(&pseg->alldevs_lock);

lock inversion


>      return ret;
>  }
>  
> @@ -1363,6 +1379,7 @@ static int cf_check _dump_pci_devices(struct pci_seg *pseg, void *arg)
>  
>      printk("==== segment %04x ====\n", pseg->nr);
>  
> +    spin_lock(&pseg->alldevs_lock);
>      list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
>      {
>          printk("%pp - ", &pdev->sbdf);
> @@ -1376,6 +1393,7 @@ static int cf_check _dump_pci_devices(struct pci_seg *pseg, void *arg)
>          pdev_dump_msi(pdev);
>          printk("\n");
>      }
> +    spin_unlock(&pseg->alldevs_lock);
>  
>      return 0;
>  }
> -- 
> 2.36.1
>
Jan Beulich Feb. 28, 2023, 4:32 p.m. UTC | #2
On 31.08.2022 16:10, Volodymyr Babchuk wrote:
> This lock protects alldevs_list of struct pci_seg. As this, it should
> be used when we are adding, removing on enumerating PCI devices
> assigned to a PCI segment.
> 
> Radix tree that stores PCI segment has own locking mechanism, also
> pci_seg structures are only allocated and newer freed, so we need no
> additional locking to access pci_seg structures. But we need a lock
> that protects alldevs_list field.
> 
> This enables more granular locking instead of one huge pcidevs_lock
> that locks entire PCI subsystem.  Please note that pcidevs_lock() is
> still used, we are going to remove it in subsequent patches.

Just a thought: To limit the scope of the steps taken, would it be a
possibility (and useful) to move from the global to the per-segment
lock, extending what this per-segment lock is actually protecting?
And only then take further steps, as already done in later parts of
this series?

Jan
diff mbox series

Patch

diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 4366f8f965..2dfa1c2875 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -38,6 +38,7 @@ 
 
 struct pci_seg {
     struct list_head alldevs_list;
+    spinlock_t alldevs_lock;
     u16 nr;
     unsigned long *ro_map;
     /* bus2bridge_lock protects bus2bridge array */
@@ -93,6 +94,7 @@  static struct pci_seg *alloc_pseg(u16 seg)
     pseg->nr = seg;
     INIT_LIST_HEAD(&pseg->alldevs_list);
     spin_lock_init(&pseg->bus2bridge_lock);
+    spin_lock_init(&pseg->alldevs_lock);
 
     if ( radix_tree_insert(&pci_segments, seg, pseg) )
     {
@@ -385,9 +387,13 @@  static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
     unsigned int pos;
     int rc;
 
+    spin_lock(&pseg->alldevs_lock);
     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
         if ( pdev->bus == bus && pdev->devfn == devfn )
+        {
+            spin_unlock(&pseg->alldevs_lock);
             return pdev;
+        }
 
     pdev = xzalloc(struct pci_dev);
     if ( !pdev )
@@ -404,10 +410,12 @@  static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
     if ( rc )
     {
         xfree(pdev);
+        spin_unlock(&pseg->alldevs_lock);
         return NULL;
     }
 
     list_add(&pdev->alldevs_list, &pseg->alldevs_list);
+    spin_unlock(&pseg->alldevs_lock);
 
     /* update bus2bridge */
     switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
@@ -611,15 +619,20 @@  struct pci_dev *pci_get_pdev(struct domain *d, pci_sbdf_t sbdf)
      */
     if ( !d || is_hardware_domain(d) )
     {
-        const struct pci_seg *pseg = get_pseg(sbdf.seg);
+        struct pci_seg *pseg = get_pseg(sbdf.seg);
 
         if ( !pseg )
             return NULL;
 
+        spin_lock(&pseg->alldevs_lock);
         list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
             if ( pdev->sbdf.bdf == sbdf.bdf &&
                  (!d || pdev->domain == d) )
+            {
+                spin_unlock(&pseg->alldevs_lock);
                 return pdev;
+            }
+        spin_unlock(&pseg->alldevs_lock);
     }
     else
     {
@@ -893,6 +906,7 @@  int pci_remove_device(u16 seg, u8 bus, u8 devfn)
         return -ENODEV;
 
     pcidevs_lock();
+    spin_lock(&pseg->alldevs_lock);
     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
         if ( pdev->bus == bus && pdev->devfn == devfn )
         {
@@ -907,10 +921,12 @@  int pci_remove_device(u16 seg, u8 bus, u8 devfn)
             }
             printk(XENLOG_DEBUG "PCI remove device %pp\n", &pdev->sbdf);
             free_pdev(pseg, pdev);
+            list_del(&pdev->alldevs_list);
             break;
         }
 
     pcidevs_unlock();
+    spin_unlock(&pseg->alldevs_lock);
     return ret;
 }
 
@@ -1363,6 +1379,7 @@  static int cf_check _dump_pci_devices(struct pci_seg *pseg, void *arg)
 
     printk("==== segment %04x ====\n", pseg->nr);
 
+    spin_lock(&pseg->alldevs_lock);
     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
     {
         printk("%pp - ", &pdev->sbdf);
@@ -1376,6 +1393,7 @@  static int cf_check _dump_pci_devices(struct pci_seg *pseg, void *arg)
         pdev_dump_msi(pdev);
         printk("\n");
     }
+    spin_unlock(&pseg->alldevs_lock);
 
     return 0;
 }