diff mbox series

[RFC,XEN,v3,1/3] xen/vpci: Clear all vpci status of device

Message ID 20231210164009.1551147-2-Jiqian.Chen@amd.com (mailing list archive)
State Superseded
Headers show
Series Support device passthrough when dom0 is PVH on Xen | expand

Commit Message

Chen, Jiqian Dec. 10, 2023, 4:40 p.m. UTC
When a device has been reset on dom0 side, the vpci on Xen
side won't get notification, so the cached state in vpci is
all out of date compare with the real device state.
To solve that problem, add a new hypercall to clear all vpci
device state. When the state of device is reset on dom0 side,
dom0 can call this hypercall to notify vpci.

Co-developed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
 xen/arch/x86/hvm/hypercall.c |  1 +
 xen/drivers/pci/physdev.c    | 35 +++++++++++++++++++++++++++++++++++
 xen/drivers/vpci/vpci.c      |  9 +++++++++
 xen/include/public/physdev.h |  8 ++++++++
 xen/include/xen/vpci.h       |  6 ++++++
 5 files changed, 59 insertions(+)

Comments

Roger Pau Monné Dec. 11, 2023, 3:22 p.m. UTC | #1
On Mon, Dec 11, 2023 at 12:40:07AM +0800, Jiqian Chen wrote:
> When a device has been reset on dom0 side, the vpci on Xen
> side won't get notification, so the cached state in vpci is
> all out of date compare with the real device state.
> To solve that problem, add a new hypercall to clear all vpci
> device state. When the state of device is reset on dom0 side,
> dom0 can call this hypercall to notify vpci.
> 
> Co-developed-by: Huang Rui <ray.huang@amd.com>
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
>  xen/arch/x86/hvm/hypercall.c |  1 +
>  xen/drivers/pci/physdev.c    | 35 +++++++++++++++++++++++++++++++++++
>  xen/drivers/vpci/vpci.c      |  9 +++++++++
>  xen/include/public/physdev.h |  8 ++++++++
>  xen/include/xen/vpci.h       |  6 ++++++
>  5 files changed, 59 insertions(+)
> 
> diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c
> index eeb73e1aa5..6ad5b4d5f1 100644
> --- a/xen/arch/x86/hvm/hypercall.c
> +++ b/xen/arch/x86/hvm/hypercall.c
> @@ -84,6 +84,7 @@ long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>      case PHYSDEVOP_pci_mmcfg_reserved:
>      case PHYSDEVOP_pci_device_add:
>      case PHYSDEVOP_pci_device_remove:
> +    case PHYSDEVOP_pci_device_state_reset:
>      case PHYSDEVOP_dbgp_op:
>          if ( !is_hardware_domain(currd) )
>              return -ENOSYS;
> diff --git a/xen/drivers/pci/physdev.c b/xen/drivers/pci/physdev.c
> index 42db3e6d13..6ee2edb86a 100644
> --- a/xen/drivers/pci/physdev.c
> +++ b/xen/drivers/pci/physdev.c
> @@ -2,6 +2,7 @@
>  #include <xen/guest_access.h>
>  #include <xen/hypercall.h>
>  #include <xen/init.h>
> +#include <xen/vpci.h>
>  
>  #ifndef COMPAT
>  typedef long ret_t;
> @@ -67,6 +68,40 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>          break;
>      }
>  
> +    case PHYSDEVOP_pci_device_state_reset: {
> +        struct physdev_pci_device dev;
> +        struct pci_dev *pdev;
> +
> +        if ( !is_pci_passthrough_enabled() )
> +            return -EOPNOTSUPP;
> +
> +        ret = -EFAULT;
> +        if ( copy_from_guest(&dev, arg, 1) != 0 )
> +            break;
> +
> +        ret = xsm_resource_setup_pci(XSM_PRIV,
> +                                     (dev.seg << 16) | (dev.bus << 8) |
> +                                     dev.devfn);
> +        if ( ret )
> +            break;
> +
> +        pcidevs_lock();
> +        pdev = pci_get_pdev(NULL, PCI_SBDF(dev.seg, dev.bus, dev.devfn));
> +        if ( !pdev )
> +        {
> +            ret = -ENODEV;
> +            pcidevs_unlock();

Nit: assigning -ENODEV could be done after dropping the lock.

> +            break;
> +        }
> +
> +        ret = vpci_reset_device_state(pdev);
> +        if ( ret )
> +            printk(XENLOG_ERR "PCI reset device %pp state failed\n",
> +                   &pdev->sbdf);

Please do the print outside of the locked region, there's no need to
hold the pcidevs lock after the vpci_reset_device_state() call if you
use the data in the local 'dev' variable to print the SBDF.

It would also be fine if you want to introduce a local sbdf variable
that you can use both here and in the call to pci_get_pdev() above.

I think it's also easier to parse if the SBDF is at the begging of the
message:

"%pp: failed to reset PCI device state\n"

That's however a question of taste.

> +        pcidevs_unlock();
> +        break;
> +    }
> +
>      default:
>          ret = -ENOSYS;
>          break;
> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> index 72ef277c4f..3c64cb10cc 100644
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -107,6 +107,15 @@ int vpci_add_handlers(struct pci_dev *pdev)
>  
>      return rc;
>  }
> +
> +int vpci_reset_device_state(struct pci_dev *pdev)
> +{
> +    ASSERT(pcidevs_locked());
> +
> +    vpci_remove_device(pdev);
> +    return vpci_add_handlers(pdev);
> +}
> +
>  #endif /* __XEN__ */
>  
>  static int vpci_register_cmp(const struct vpci_register *r1,
> diff --git a/xen/include/public/physdev.h b/xen/include/public/physdev.h
> index f0c0d4727c..92c2f28bca 100644
> --- a/xen/include/public/physdev.h
> +++ b/xen/include/public/physdev.h
> @@ -296,6 +296,14 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);
>   */
>  #define PHYSDEVOP_prepare_msix          30
>  #define PHYSDEVOP_release_msix          31
> +/*
> + * On PVH dom0, when device is reset, the vpci on Xen side
> + * won't get notification, so that the cached state in vpci is
> + * all out of date with the real device state. Use this to reset
> + * the vpci state of device.
> + */

I get this feeling this is too specific to vpci, when the hypercall
itself could be used for other purposes in the future.  I would
instead write:

/*
 * Notify the hypervisor that a PCI device has been reset, so that any
 * internally cached state is regenerated.  Should be called after any
 * device reset performed by the hardware domain.
 */

> +#define PHYSDEVOP_pci_device_state_reset     32
> +
>  struct physdev_pci_device {
>      /* IN */
>      uint16_t seg;
> diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
> index d20c301a3d..d6377424f0 100644
> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -30,6 +30,7 @@ int __must_check vpci_add_handlers(struct pci_dev *pdev);
>  
>  /* Remove all handlers and free vpci related structures. */
>  void vpci_remove_device(struct pci_dev *pdev);
> +int vpci_reset_device_state(struct pci_dev *pdev);

__must_check please.

Thanks, Roger.
Chen, Jiqian Dec. 12, 2023, 6:44 a.m. UTC | #2
On 2023/12/11 23:22, Roger Pau Monné wrote:
> On Mon, Dec 11, 2023 at 12:40:07AM +0800, Jiqian Chen wrote:
>> When a device has been reset on dom0 side, the vpci on Xen
>> side won't get notification, so the cached state in vpci is
>> all out of date compare with the real device state.
>> To solve that problem, add a new hypercall to clear all vpci
>> device state. When the state of device is reset on dom0 side,
>> dom0 can call this hypercall to notify vpci.
>>
>> Co-developed-by: Huang Rui <ray.huang@amd.com>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>> ---
>>  xen/arch/x86/hvm/hypercall.c |  1 +
>>  xen/drivers/pci/physdev.c    | 35 +++++++++++++++++++++++++++++++++++
>>  xen/drivers/vpci/vpci.c      |  9 +++++++++
>>  xen/include/public/physdev.h |  8 ++++++++
>>  xen/include/xen/vpci.h       |  6 ++++++
>>  5 files changed, 59 insertions(+)
>>
>> diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c
>> index eeb73e1aa5..6ad5b4d5f1 100644
>> --- a/xen/arch/x86/hvm/hypercall.c
>> +++ b/xen/arch/x86/hvm/hypercall.c
>> @@ -84,6 +84,7 @@ long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>>      case PHYSDEVOP_pci_mmcfg_reserved:
>>      case PHYSDEVOP_pci_device_add:
>>      case PHYSDEVOP_pci_device_remove:
>> +    case PHYSDEVOP_pci_device_state_reset:
>>      case PHYSDEVOP_dbgp_op:
>>          if ( !is_hardware_domain(currd) )
>>              return -ENOSYS;
>> diff --git a/xen/drivers/pci/physdev.c b/xen/drivers/pci/physdev.c
>> index 42db3e6d13..6ee2edb86a 100644
>> --- a/xen/drivers/pci/physdev.c
>> +++ b/xen/drivers/pci/physdev.c
>> @@ -2,6 +2,7 @@
>>  #include <xen/guest_access.h>
>>  #include <xen/hypercall.h>
>>  #include <xen/init.h>
>> +#include <xen/vpci.h>
>>  
>>  #ifndef COMPAT
>>  typedef long ret_t;
>> @@ -67,6 +68,40 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>>          break;
>>      }
>>  
>> +    case PHYSDEVOP_pci_device_state_reset: {
>> +        struct physdev_pci_device dev;
>> +        struct pci_dev *pdev;
>> +
>> +        if ( !is_pci_passthrough_enabled() )
>> +            return -EOPNOTSUPP;
>> +
>> +        ret = -EFAULT;
>> +        if ( copy_from_guest(&dev, arg, 1) != 0 )
>> +            break;
>> +
>> +        ret = xsm_resource_setup_pci(XSM_PRIV,
>> +                                     (dev.seg << 16) | (dev.bus << 8) |
>> +                                     dev.devfn);
>> +        if ( ret )
>> +            break;
>> +
>> +        pcidevs_lock();
>> +        pdev = pci_get_pdev(NULL, PCI_SBDF(dev.seg, dev.bus, dev.devfn));
>> +        if ( !pdev )
>> +        {
>> +            ret = -ENODEV;
>> +            pcidevs_unlock();
> 
> Nit: assigning -ENODEV could be done after dropping the lock.
I will adjust the sequence in next version, thanks.

> 
>> +            break;
>> +        }
>> +
>> +        ret = vpci_reset_device_state(pdev);
>> +        if ( ret )
>> +            printk(XENLOG_ERR "PCI reset device %pp state failed\n",
>> +                   &pdev->sbdf);
> 
> Please do the print outside of the locked region, there's no need to
> hold the pcidevs lock after the vpci_reset_device_state() call if you
> use the data in the local 'dev' variable to print the SBDF.
> 
> It would also be fine if you want to introduce a local sbdf variable
> that you can use both here and in the call to pci_get_pdev() above.
> 
> I think it's also easier to parse if the SBDF is at the begging of the
> message:
> 
> "%pp: failed to reset PCI device state\n"
> 
> That's however a question of taste.
Nice suggestion, I will adjust according to all what you said. Thanks.

> 
>> +        pcidevs_unlock();
>> +        break;
>> +    }
>> +
>>      default:
>>          ret = -ENOSYS;
>>          break;
>> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
>> index 72ef277c4f..3c64cb10cc 100644
>> --- a/xen/drivers/vpci/vpci.c
>> +++ b/xen/drivers/vpci/vpci.c
>> @@ -107,6 +107,15 @@ int vpci_add_handlers(struct pci_dev *pdev)
>>  
>>      return rc;
>>  }
>> +
>> +int vpci_reset_device_state(struct pci_dev *pdev)
>> +{
>> +    ASSERT(pcidevs_locked());
>> +
>> +    vpci_remove_device(pdev);
>> +    return vpci_add_handlers(pdev);
>> +}
>> +
>>  #endif /* __XEN__ */
>>  
>>  static int vpci_register_cmp(const struct vpci_register *r1,
>> diff --git a/xen/include/public/physdev.h b/xen/include/public/physdev.h
>> index f0c0d4727c..92c2f28bca 100644
>> --- a/xen/include/public/physdev.h
>> +++ b/xen/include/public/physdev.h
>> @@ -296,6 +296,14 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);
>>   */
>>  #define PHYSDEVOP_prepare_msix          30
>>  #define PHYSDEVOP_release_msix          31
>> +/*
>> + * On PVH dom0, when device is reset, the vpci on Xen side
>> + * won't get notification, so that the cached state in vpci is
>> + * all out of date with the real device state. Use this to reset
>> + * the vpci state of device.
>> + */
> 
> I get this feeling this is too specific to vpci, when the hypercall
> itself could be used for other purposes in the future.  I would
> instead write:
> 
> /*
>  * Notify the hypervisor that a PCI device has been reset, so that any
>  * internally cached state is regenerated.  Should be called after any
>  * device reset performed by the hardware domain.
>  */
Thanks, will use this in next version.

> 
>> +#define PHYSDEVOP_pci_device_state_reset     32
>> +
>>  struct physdev_pci_device {
>>      /* IN */
>>      uint16_t seg;
>> diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
>> index d20c301a3d..d6377424f0 100644
>> --- a/xen/include/xen/vpci.h
>> +++ b/xen/include/xen/vpci.h
>> @@ -30,6 +30,7 @@ int __must_check vpci_add_handlers(struct pci_dev *pdev);
>>  
>>  /* Remove all handlers and free vpci related structures. */
>>  void vpci_remove_device(struct pci_dev *pdev);
>> +int vpci_reset_device_state(struct pci_dev *pdev);
> 
> __must_check please.
> 
> Thanks, Roger.
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c
index eeb73e1aa5..6ad5b4d5f1 100644
--- a/xen/arch/x86/hvm/hypercall.c
+++ b/xen/arch/x86/hvm/hypercall.c
@@ -84,6 +84,7 @@  long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     case PHYSDEVOP_pci_mmcfg_reserved:
     case PHYSDEVOP_pci_device_add:
     case PHYSDEVOP_pci_device_remove:
+    case PHYSDEVOP_pci_device_state_reset:
     case PHYSDEVOP_dbgp_op:
         if ( !is_hardware_domain(currd) )
             return -ENOSYS;
diff --git a/xen/drivers/pci/physdev.c b/xen/drivers/pci/physdev.c
index 42db3e6d13..6ee2edb86a 100644
--- a/xen/drivers/pci/physdev.c
+++ b/xen/drivers/pci/physdev.c
@@ -2,6 +2,7 @@ 
 #include <xen/guest_access.h>
 #include <xen/hypercall.h>
 #include <xen/init.h>
+#include <xen/vpci.h>
 
 #ifndef COMPAT
 typedef long ret_t;
@@ -67,6 +68,40 @@  ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         break;
     }
 
+    case PHYSDEVOP_pci_device_state_reset: {
+        struct physdev_pci_device dev;
+        struct pci_dev *pdev;
+
+        if ( !is_pci_passthrough_enabled() )
+            return -EOPNOTSUPP;
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&dev, arg, 1) != 0 )
+            break;
+
+        ret = xsm_resource_setup_pci(XSM_PRIV,
+                                     (dev.seg << 16) | (dev.bus << 8) |
+                                     dev.devfn);
+        if ( ret )
+            break;
+
+        pcidevs_lock();
+        pdev = pci_get_pdev(NULL, PCI_SBDF(dev.seg, dev.bus, dev.devfn));
+        if ( !pdev )
+        {
+            ret = -ENODEV;
+            pcidevs_unlock();
+            break;
+        }
+
+        ret = vpci_reset_device_state(pdev);
+        if ( ret )
+            printk(XENLOG_ERR "PCI reset device %pp state failed\n",
+                   &pdev->sbdf);
+        pcidevs_unlock();
+        break;
+    }
+
     default:
         ret = -ENOSYS;
         break;
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 72ef277c4f..3c64cb10cc 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -107,6 +107,15 @@  int vpci_add_handlers(struct pci_dev *pdev)
 
     return rc;
 }
+
+int vpci_reset_device_state(struct pci_dev *pdev)
+{
+    ASSERT(pcidevs_locked());
+
+    vpci_remove_device(pdev);
+    return vpci_add_handlers(pdev);
+}
+
 #endif /* __XEN__ */
 
 static int vpci_register_cmp(const struct vpci_register *r1,
diff --git a/xen/include/public/physdev.h b/xen/include/public/physdev.h
index f0c0d4727c..92c2f28bca 100644
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -296,6 +296,14 @@  DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);
  */
 #define PHYSDEVOP_prepare_msix          30
 #define PHYSDEVOP_release_msix          31
+/*
+ * On PVH dom0, when device is reset, the vpci on Xen side
+ * won't get notification, so that the cached state in vpci is
+ * all out of date with the real device state. Use this to reset
+ * the vpci state of device.
+ */
+#define PHYSDEVOP_pci_device_state_reset     32
+
 struct physdev_pci_device {
     /* IN */
     uint16_t seg;
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index d20c301a3d..d6377424f0 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -30,6 +30,7 @@  int __must_check vpci_add_handlers(struct pci_dev *pdev);
 
 /* Remove all handlers and free vpci related structures. */
 void vpci_remove_device(struct pci_dev *pdev);
+int vpci_reset_device_state(struct pci_dev *pdev);
 
 /* Add/remove a register handler. */
 int __must_check vpci_add_register_mask(struct vpci *vpci,
@@ -262,6 +263,11 @@  static inline int vpci_add_handlers(struct pci_dev *pdev)
 
 static inline void vpci_remove_device(struct pci_dev *pdev) { }
 
+static inline int vpci_reset_device_state(struct pci_dev *pdev)
+{
+    return 0;
+}
+
 static inline void vpci_dump_msi(void) { }
 
 static inline uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg,