diff mbox series

[RFC,FUTURE,1/3] domctl/pci: add ability to provide/request a virtual SBDF

Message ID 20231213234345.779722-2-volodymyr_babchuk@epam.com (mailing list archive)
State New
Headers show
Series Make vSBDF configurable by toolstack | expand

Commit Message

Volodymyr Babchuk Dec. 13, 2023, 11:44 p.m. UTC
With CONFIG_HAS_VPCI_GUEST_SUPPORT enabled, hypervisor will assign a
passed-through PCI device to a guest using virtual/guest SBDF
number. Right now hypervisor automatically allocates next free
SBDF. But there are cases mentioned in [1] when user should be able to
control SBDF assigned to the passed-through device.

To enable this, extend assign_device domctl call with optional
parameter virtual_sbdf. When this parameter is set to
XEN_DOMCTL_DEV_SDBF_ANY, hypervisor will act as previously, but it
will return allocated vSBDF back to the toolstack. Alternatively,
toolstack might provide desired vSBDF and hypervisor will try to use
it, if it is free and falls into permitted range.

[1] https://lore.kernel.org/all/d6a58e73-da51-40f1-a2f7-576274945585@xen.org/

Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
---
 tools/libs/ctrl/xc_domain.c   |  1 +
 xen/drivers/passthrough/pci.c | 17 ++++++++++------
 xen/drivers/vpci/vpci.c       | 38 +++++++++++++++++++++++++++--------
 xen/include/public/domctl.h   |  9 ++++++++-
 xen/include/xen/vpci.h        |  4 ++--
 5 files changed, 52 insertions(+), 17 deletions(-)

Comments

Jan Beulich April 22, 2024, 3:28 p.m. UTC | #1
On 14.12.2023 00:44, Volodymyr Babchuk wrote:
> With CONFIG_HAS_VPCI_GUEST_SUPPORT enabled, hypervisor will assign a
> passed-through PCI device to a guest using virtual/guest SBDF
> number. Right now hypervisor automatically allocates next free
> SBDF. But there are cases mentioned in [1] when user should be able to
> control SBDF assigned to the passed-through device.
> 
> To enable this, extend assign_device domctl call with optional
> parameter virtual_sbdf. When this parameter is set to
> XEN_DOMCTL_DEV_SDBF_ANY, hypervisor will act as previously, but it
> will return allocated vSBDF back to the toolstack. Alternatively,
> toolstack might provide desired vSBDF and hypervisor will try to use
> it, if it is free and falls into permitted range.
> 
> [1] https://lore.kernel.org/all/d6a58e73-da51-40f1-a2f7-576274945585@xen.org/
> 
> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>

It's been a hile since this was sent, so comments below may have been given
by others already. I'm sorry for the redundancy if so.

> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -37,7 +37,7 @@ extern vpci_register_init_t *const __end_vpci_array[];
>  #define NUM_VPCI_INIT (__end_vpci_array - __start_vpci_array)
>  
>  #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
> -static int add_virtual_device(struct pci_dev *pdev)
> +static int add_virtual_device(struct pci_dev *pdev, pci_sbdf_t *vsbdf)
>  {
>      struct domain *d = pdev->domain;
>      unsigned int new_dev_number;
> @@ -57,13 +57,35 @@ static int add_virtual_device(struct pci_dev *pdev)
>                   &pdev->sbdf);
>          return -EOPNOTSUPP;
>      }
> -    new_dev_number = find_first_zero_bit(d->vpci_dev_assigned_map,
> -                                         VPCI_MAX_VIRT_DEV);
> -    if ( new_dev_number == VPCI_MAX_VIRT_DEV )
> -        return -ENOSPC;
>  
> -    __set_bit(new_dev_number, &d->vpci_dev_assigned_map);
> +    if ( !vsbdf || vsbdf->sbdf == XEN_DOMCTL_DEV_SDBF_ANY )
> +    {
> +        new_dev_number = find_first_zero_bit(d->vpci_dev_assigned_map,
> +                                             VPCI_MAX_VIRT_DEV);
> +        if ( new_dev_number == VPCI_MAX_VIRT_DEV )
> +            return -ENOSPC;
>  
> +        if ( vsbdf )
> +            *vsbdf = PCI_SBDF(0, 0, new_dev_number, 0);
> +    }
> +    else
> +    {
> +        if ( vsbdf->seg != 0 || vsbdf->bus != 0 || vsbdf->fn != 0 )
> +        {
> +            gdprintk(XENLOG_ERR,
> +                     "vSBDF %pp: segment, bus and function should be 0\n",
> +                     vsbdf);
> +            return -EOPNOTSUPP;
> +        }
> +        new_dev_number = vsbdf->dev;
> +        if ( test_bit(new_dev_number, &d->vpci_dev_assigned_map) )
> +        {
> +            gdprintk(XENLOG_ERR, "vSBDF %pp already assigned\n", vsbdf);
> +            return -EOPNOTSUPP;
> +        }
> +    }
> +
> +    __set_bit(new_dev_number, &d->vpci_dev_assigned_map);
>      /*
>       * Both segment and bus number are 0:
>       *  - we emulate a single host bridge for the guest, e.g. segment 0

Please can a blank line remain to live ahead of this comment?

> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -504,7 +504,12 @@ struct xen_domctl_sendtrigger {
>  
>  
>  /* Assign a device to a guest. Sets up IOMMU structures. */
> -/* XEN_DOMCTL_assign_device */
> +/* XEN_DOMCTL_assign_device
> + * when assigning a PCI device, it is possible to either request
> + * or provide a virtual SBDF. When virtual_sbdf equals to
> + * XEN_DOMCTL_DEV_SDBF_ANY, hypervisor will return allocated
> + * vSBDF back.
> + */
>  /*
>   * XEN_DOMCTL_test_assign_device: Pass DOMID_INVALID to find out whether the
>   * given device is assigned to any DomU at all. Pass a specific domain ID to
> @@ -528,6 +533,8 @@ struct xen_domctl_assign_device {
>      union {
>          struct {
>              uint32_t machine_sbdf;   /* machine PCI ID of assigned device */
> +            uint32_t virtual_sbdf;   /* IN/OUT virtual SBDF of the device */
> +#define XEN_DOMCTL_DEV_SDBF_ANY     0xFFFFFFFF /* request a free SBDF */
>          } pci;

Such a struct change needs to come with an interface version bump, I
guess.

> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -33,7 +33,7 @@ typedef int vpci_register_init_t(struct pci_dev *dev);
>                 __used_section(".data.vpci." p) = x
>  
>  /* Assign vPCI to device by adding handlers to device. */
> -int __must_check vpci_assign_device(struct pci_dev *pdev);
> +int __must_check vpci_assign_device(struct pci_dev *pdev, pci_sbdf_t *vsbdf);
>  
>  /* Remove all handlers and free vpci related structures. */
>  void vpci_deassign_device(struct pci_dev *pdev);
> @@ -265,7 +265,7 @@ bool vpci_ecam_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int len,
>  #else /* !CONFIG_HAS_VPCI */
>  struct vpci_vcpu {};
>  
> -static inline int vpci_assign_device(struct pci_dev *pdev)
> +static inline int vpci_assign_device(struct pci_dev *pdev, pci_sbdf_t *vsbdf)
>  {
>      return 0;
>  }

Can this stub really return "success" without even touching *vsdbf? At
the very least the public header comment saying "hypervisor will return"
isn't quite right if this path is taken. Perhaps when HAS_VPCI=n there
should be a requirement for the caller to pass NULL? Yet even then the
domctl interface wouldn't do what it (currently) promises. So perhaps
you can't really extend an existing domctl here.

Jan
diff mbox series

Patch

diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
index f2d9d14b4d..2405e39517 100644
--- a/tools/libs/ctrl/xc_domain.c
+++ b/tools/libs/ctrl/xc_domain.c
@@ -1505,6 +1505,7 @@  int xc_assign_device(
     domctl.domain = domid;
     domctl.u.assign_device.dev = XEN_DOMCTL_DEV_PCI;
     domctl.u.assign_device.u.pci.machine_sbdf = machine_sbdf;
+    domctl.u.assign_device.u.pci.virtual_sbdf = XEN_DOMCTL_DEV_SDBF_ANY;
     domctl.u.assign_device.flags = flags;
 
     return do_domctl(xch, &domctl);
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index a3312fdab2..9ea18c39f4 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -755,7 +755,7 @@  int pci_add_device(u16 seg, u8 bus, u8 devfn,
          * For devices not discovered by Xen during boot, add vPCI handlers
          * when Dom0 first informs Xen about such devices.
          */
-        ret = vpci_assign_device(pdev);
+        ret = vpci_assign_device(pdev, NULL);
         if ( ret )
         {
             list_del(&pdev->domain_list);
@@ -891,7 +891,7 @@  static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
     pdev->fault.count = 0;
 
     write_lock(&target->pci_lock);
-    ret = vpci_assign_device(pdev);
+    ret = vpci_assign_device(pdev, NULL);
     write_unlock(&target->pci_lock);
 
  out:
@@ -1154,7 +1154,7 @@  static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
               PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
 
     write_lock(&ctxt->d->pci_lock);
-    err = vpci_assign_device(pdev);
+    err = vpci_assign_device(pdev, NULL);
     write_unlock(&ctxt->d->pci_lock);
     if ( err )
         printk(XENLOG_ERR "setup of vPCI for d%d failed: %d\n",
@@ -1461,7 +1461,8 @@  static int device_assigned(u16 seg, u8 bus, u8 devfn)
 }
 
 /* Caller should hold the pcidevs_lock */
-static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
+static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag,
+                         pci_sbdf_t *vsbdf)
 {
     const struct domain_iommu *hd = dom_iommu(d);
     struct pci_dev *pdev;
@@ -1515,7 +1516,7 @@  static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
     }
 
     write_lock(&d->pci_lock);
-    rc = vpci_assign_device(pdev);
+    rc = vpci_assign_device(pdev, vsbdf);
     write_unlock(&d->pci_lock);
 
  done:
@@ -1616,6 +1617,7 @@  int iommu_do_pci_domctl(
     u8 bus, devfn;
     int ret = 0;
     uint32_t machine_sbdf;
+    pci_sbdf_t virtual_sbdf;
 
     switch ( domctl->cmd )
     {
@@ -1675,6 +1677,7 @@  int iommu_do_pci_domctl(
             break;
 
         machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
+        virtual_sbdf.sbdf = domctl->u.assign_device.u.pci.virtual_sbdf;
 
         ret = xsm_assign_device(XSM_HOOK, d, machine_sbdf);
         if ( ret )
@@ -1696,11 +1699,13 @@  int iommu_do_pci_domctl(
             }
         }
         else if ( !ret )
-            ret = assign_device(d, seg, bus, devfn, flags);
+            ret = assign_device(d, seg, bus, devfn, flags, &virtual_sbdf);
         pcidevs_unlock();
         if ( ret == -ERESTART )
             ret = hypercall_create_continuation(__HYPERVISOR_domctl,
                                                 "h", u_domctl);
+
+        domctl->u.assign_device.u.pci.virtual_sbdf = virtual_sbdf.sbdf;
         break;
 
     case XEN_DOMCTL_deassign_device:
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 7c0b610ccc..12963b77c3 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -37,7 +37,7 @@  extern vpci_register_init_t *const __end_vpci_array[];
 #define NUM_VPCI_INIT (__end_vpci_array - __start_vpci_array)
 
 #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
-static int add_virtual_device(struct pci_dev *pdev)
+static int add_virtual_device(struct pci_dev *pdev, pci_sbdf_t *vsbdf)
 {
     struct domain *d = pdev->domain;
     unsigned int new_dev_number;
@@ -57,13 +57,35 @@  static int add_virtual_device(struct pci_dev *pdev)
                  &pdev->sbdf);
         return -EOPNOTSUPP;
     }
-    new_dev_number = find_first_zero_bit(d->vpci_dev_assigned_map,
-                                         VPCI_MAX_VIRT_DEV);
-    if ( new_dev_number == VPCI_MAX_VIRT_DEV )
-        return -ENOSPC;
 
-    __set_bit(new_dev_number, &d->vpci_dev_assigned_map);
+    if ( !vsbdf || vsbdf->sbdf == XEN_DOMCTL_DEV_SDBF_ANY )
+    {
+        new_dev_number = find_first_zero_bit(d->vpci_dev_assigned_map,
+                                             VPCI_MAX_VIRT_DEV);
+        if ( new_dev_number == VPCI_MAX_VIRT_DEV )
+            return -ENOSPC;
 
+        if ( vsbdf )
+            *vsbdf = PCI_SBDF(0, 0, new_dev_number, 0);
+    }
+    else
+    {
+        if ( vsbdf->seg != 0 || vsbdf->bus != 0 || vsbdf->fn != 0 )
+        {
+            gdprintk(XENLOG_ERR,
+                     "vSBDF %pp: segment, bus and function should be 0\n",
+                     vsbdf);
+            return -EOPNOTSUPP;
+        }
+        new_dev_number = vsbdf->dev;
+        if ( test_bit(new_dev_number, &d->vpci_dev_assigned_map) )
+        {
+            gdprintk(XENLOG_ERR, "vSBDF %pp already assigned\n", vsbdf);
+            return -EOPNOTSUPP;
+        }
+    }
+
+    __set_bit(new_dev_number, &d->vpci_dev_assigned_map);
     /*
      * Both segment and bus number are 0:
      *  - we emulate a single host bridge for the guest, e.g. segment 0
@@ -148,7 +170,7 @@  void vpci_deassign_device(struct pci_dev *pdev)
     pdev->vpci = NULL;
 }
 
-int vpci_assign_device(struct pci_dev *pdev)
+int vpci_assign_device(struct pci_dev *pdev, pci_sbdf_t *vsbdf)
 {
     unsigned int i;
     const unsigned long *ro_map;
@@ -176,7 +198,7 @@  int vpci_assign_device(struct pci_dev *pdev)
 
 #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
     pdev->vpci->guest_sbdf.sbdf = ~0;
-    rc = add_virtual_device(pdev);
+    rc = add_virtual_device(pdev, vsbdf);
     if ( rc )
         goto out;
 #endif
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index a33f9ec32b..60a59ce378 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -504,7 +504,12 @@  struct xen_domctl_sendtrigger {
 
 
 /* Assign a device to a guest. Sets up IOMMU structures. */
-/* XEN_DOMCTL_assign_device */
+/* XEN_DOMCTL_assign_device
+ * when assigning a PCI device, it is possible to either request
+ * or provide a virtual SBDF. When virtual_sbdf equals to
+ * XEN_DOMCTL_DEV_SDBF_ANY, hypervisor will return allocated
+ * vSBDF back.
+ */
 /*
  * XEN_DOMCTL_test_assign_device: Pass DOMID_INVALID to find out whether the
  * given device is assigned to any DomU at all. Pass a specific domain ID to
@@ -528,6 +533,8 @@  struct xen_domctl_assign_device {
     union {
         struct {
             uint32_t machine_sbdf;   /* machine PCI ID of assigned device */
+            uint32_t virtual_sbdf;   /* IN/OUT virtual SBDF of the device */
+#define XEN_DOMCTL_DEV_SDBF_ANY     0xFFFFFFFF /* request a free SBDF */
         } pci;
         struct {
             uint32_t size; /* Length of the path */
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index ec6598237b..f66a641e4f 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -33,7 +33,7 @@  typedef int vpci_register_init_t(struct pci_dev *dev);
                __used_section(".data.vpci." p) = x
 
 /* Assign vPCI to device by adding handlers to device. */
-int __must_check vpci_assign_device(struct pci_dev *pdev);
+int __must_check vpci_assign_device(struct pci_dev *pdev, pci_sbdf_t *vsbdf);
 
 /* Remove all handlers and free vpci related structures. */
 void vpci_deassign_device(struct pci_dev *pdev);
@@ -265,7 +265,7 @@  bool vpci_ecam_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int len,
 #else /* !CONFIG_HAS_VPCI */
 struct vpci_vcpu {};
 
-static inline int vpci_assign_device(struct pci_dev *pdev)
+static inline int vpci_assign_device(struct pci_dev *pdev, pci_sbdf_t *vsbdf)
 {
     return 0;
 }