diff mbox series

[XEN,v11,1/8] xen/vpci: Clear all vpci status of device

Message ID 20240630123344.20623-2-Jiqian.Chen@amd.com (mailing list archive)
State New
Headers show
Series Support device passthrough when dom0 is PVH on Xen | expand

Commit Message

Chen, Jiqian June 30, 2024, 12:33 p.m. UTC
When a device has been reset on dom0 side, the vpci on Xen
side won't get notification, so the cached state in vpci is
all out of date compare with the real device state.
To solve that problem, add a new hypercall to clear all vpci
device state. When the state of device is reset on dom0 side,
dom0 can call this hypercall to notify vpci.

Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
Reviewed-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
---
 xen/arch/x86/hvm/hypercall.c |  1 +
 xen/drivers/pci/physdev.c    | 58 ++++++++++++++++++++++++++++++++++++
 xen/drivers/vpci/vpci.c      | 10 +++++++
 xen/include/public/physdev.h | 20 +++++++++++++
 xen/include/xen/vpci.h       |  8 +++++
 5 files changed, 97 insertions(+)

Comments

Jan Beulich July 1, 2024, 7:18 a.m. UTC | #1
On 30.06.2024 14:33, Jiqian Chen wrote:
> When a device has been reset on dom0 side, the vpci on Xen
> side won't get notification, so the cached state in vpci is
> all out of date compare with the real device state.
> To solve that problem, add a new hypercall to clear all vpci
> device state. When the state of device is reset on dom0 side,
> dom0 can call this hypercall to notify vpci.

While the description properly talks about all of this being about device
reset, the title suggests otherwise (leaving open what the context is, thus
- to me at least - suggesting it's during vPCI init for a particular
device).

> @@ -67,6 +68,63 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>          break;
>      }
>  
> +    case PHYSDEVOP_pci_device_state_reset:
> +    {
> +        struct pci_device_state_reset dev_reset;
> +        struct pci_dev *pdev;
> +        pci_sbdf_t sbdf;
> +
> +        ret = -EOPNOTSUPP;
> +        if ( !is_pci_passthrough_enabled() )
> +            break;
> +
> +        ret = -EFAULT;
> +        if ( copy_from_guest(&dev_reset, arg, 1) != 0 )
> +            break;
> +
> +        sbdf = PCI_SBDF(dev_reset.dev.seg,
> +                        dev_reset.dev.bus,
> +                        dev_reset.dev.devfn);
> +
> +        ret = xsm_resource_setup_pci(XSM_PRIV, sbdf.sbdf);
> +        if ( ret )
> +            break;
> +
> +        pcidevs_lock();
> +        pdev = pci_get_pdev(NULL, sbdf);
> +        if ( !pdev )
> +        {
> +            pcidevs_unlock();
> +            ret = -ENODEV;
> +            break;
> +        }
> +
> +        write_lock(&pdev->domain->pci_lock);
> +        pcidevs_unlock();
> +        /* Implement FLR, other reset types may be implemented in future */

The comment isn't in sync with the code anymore.

> +        switch ( dev_reset.reset_type )
> +        {
> +        case PCI_DEVICE_STATE_RESET_COLD:
> +        case PCI_DEVICE_STATE_RESET_WARM:
> +        case PCI_DEVICE_STATE_RESET_HOT:
> +        case PCI_DEVICE_STATE_RESET_FLR:
> +        {

This brace isn't needed while at the same time it is confusing.

> +            ret = vpci_reset_device_state(pdev, dev_reset.reset_type);
> +            if ( ret )
> +                dprintk(XENLOG_ERR,
> +                        "%pp: failed to reset vPCI device state\n", &sbdf);

I question the need for a log message here.

> --- a/xen/include/public/physdev.h
> +++ b/xen/include/public/physdev.h
> @@ -296,6 +296,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);
>   */
>  #define PHYSDEVOP_prepare_msix          30
>  #define PHYSDEVOP_release_msix          31
> +/*
> + * Notify the hypervisor that a PCI device has been reset, so that any
> + * internally cached state is regenerated.  Should be called after any
> + * device reset performed by the hardware domain.
> + */
> +#define PHYSDEVOP_pci_device_state_reset 32
> +
>  struct physdev_pci_device {
>      /* IN */
>      uint16_t seg;
> @@ -305,6 +312,19 @@ struct physdev_pci_device {
>  typedef struct physdev_pci_device physdev_pci_device_t;
>  DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t);
>  
> +struct pci_device_state_reset {
> +    physdev_pci_device_t dev;
> +#define _PCI_DEVICE_STATE_RESET_COLD 0
> +#define PCI_DEVICE_STATE_RESET_COLD  (1U<<_PCI_DEVICE_STATE_RESET_COLD)
> +#define _PCI_DEVICE_STATE_RESET_WARM 1
> +#define PCI_DEVICE_STATE_RESET_WARM  (1U<<_PCI_DEVICE_STATE_RESET_WARM)
> +#define _PCI_DEVICE_STATE_RESET_HOT  2
> +#define PCI_DEVICE_STATE_RESET_HOT   (1U<<_PCI_DEVICE_STATE_RESET_HOT)
> +#define _PCI_DEVICE_STATE_RESET_FLR  3
> +#define PCI_DEVICE_STATE_RESET_FLR   (1U<<_PCI_DEVICE_STATE_RESET_FLR)
> +    uint32_t reset_type;
> +};

Do we really need the _PCI_DEVICE_STATE_RESET_* bit positions as separate
#define-s? I can't spot any use anywhere.

Jan
Chen, Jiqian July 2, 2024, 2:59 a.m. UTC | #2
On 2024/7/1 15:18, Jan Beulich wrote:
> On 30.06.2024 14:33, Jiqian Chen wrote:
>> When a device has been reset on dom0 side, the vpci on Xen
>> side won't get notification, so the cached state in vpci is
>> all out of date compare with the real device state.
>> To solve that problem, add a new hypercall to clear all vpci
>> device state. When the state of device is reset on dom0 side,
>> dom0 can call this hypercall to notify vpci.
> 
> While the description properly talks about all of this being about device
> reset, the title suggests otherwise (leaving open what the context is, thus
> - to me at least - suggesting it's during vPCI init for a particular
> device).
Change title to "xen/pci: Add hypercall to support reset of pcidev" ?

> 
>> @@ -67,6 +68,63 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>>          break;
>>      }
>>  
>> +    case PHYSDEVOP_pci_device_state_reset:
>> +    {
>> +        struct pci_device_state_reset dev_reset;
>> +        struct pci_dev *pdev;
>> +        pci_sbdf_t sbdf;
>> +
>> +        ret = -EOPNOTSUPP;
>> +        if ( !is_pci_passthrough_enabled() )
>> +            break;
>> +
>> +        ret = -EFAULT;
>> +        if ( copy_from_guest(&dev_reset, arg, 1) != 0 )
>> +            break;
>> +
>> +        sbdf = PCI_SBDF(dev_reset.dev.seg,
>> +                        dev_reset.dev.bus,
>> +                        dev_reset.dev.devfn);
>> +
>> +        ret = xsm_resource_setup_pci(XSM_PRIV, sbdf.sbdf);
>> +        if ( ret )
>> +            break;
>> +
>> +        pcidevs_lock();
>> +        pdev = pci_get_pdev(NULL, sbdf);
>> +        if ( !pdev )
>> +        {
>> +            pcidevs_unlock();
>> +            ret = -ENODEV;
>> +            break;
>> +        }
>> +
>> +        write_lock(&pdev->domain->pci_lock);
>> +        pcidevs_unlock();
>> +        /* Implement FLR, other reset types may be implemented in future */
> 
> The comment isn't in sync with the code anymore.
Change to "/* vpci_reset_device_state is called by default for all reset types, other specific operations can be added later as needed */" ?

> 
>> +        switch ( dev_reset.reset_type )
>> +        {
>> +        case PCI_DEVICE_STATE_RESET_COLD:
>> +        case PCI_DEVICE_STATE_RESET_WARM:
>> +        case PCI_DEVICE_STATE_RESET_HOT:
>> +        case PCI_DEVICE_STATE_RESET_FLR:
>> +        {
> 
> This brace isn't needed while at the same time it is confusing.
> 
>> +            ret = vpci_reset_device_state(pdev, dev_reset.reset_type);
>> +            if ( ret )
>> +                dprintk(XENLOG_ERR,
>> +                        "%pp: failed to reset vPCI device state\n", &sbdf);
> 
> I question the need for a log message here.
OK, will delete it in next version.

> 
>> --- a/xen/include/public/physdev.h
>> +++ b/xen/include/public/physdev.h
>> @@ -296,6 +296,13 @@ DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);
>>   */
>>  #define PHYSDEVOP_prepare_msix          30
>>  #define PHYSDEVOP_release_msix          31
>> +/*
>> + * Notify the hypervisor that a PCI device has been reset, so that any
>> + * internally cached state is regenerated.  Should be called after any
>> + * device reset performed by the hardware domain.
>> + */
>> +#define PHYSDEVOP_pci_device_state_reset 32
>> +
>>  struct physdev_pci_device {
>>      /* IN */
>>      uint16_t seg;
>> @@ -305,6 +312,19 @@ struct physdev_pci_device {
>>  typedef struct physdev_pci_device physdev_pci_device_t;
>>  DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t);
>>  
>> +struct pci_device_state_reset {
>> +    physdev_pci_device_t dev;
>> +#define _PCI_DEVICE_STATE_RESET_COLD 0
>> +#define PCI_DEVICE_STATE_RESET_COLD  (1U<<_PCI_DEVICE_STATE_RESET_COLD)
>> +#define _PCI_DEVICE_STATE_RESET_WARM 1
>> +#define PCI_DEVICE_STATE_RESET_WARM  (1U<<_PCI_DEVICE_STATE_RESET_WARM)
>> +#define _PCI_DEVICE_STATE_RESET_HOT  2
>> +#define PCI_DEVICE_STATE_RESET_HOT   (1U<<_PCI_DEVICE_STATE_RESET_HOT)
>> +#define _PCI_DEVICE_STATE_RESET_FLR  3
>> +#define PCI_DEVICE_STATE_RESET_FLR   (1U<<_PCI_DEVICE_STATE_RESET_FLR)
>> +    uint32_t reset_type;
>> +};
> 
> Do we really need the _PCI_DEVICE_STATE_RESET_* bit positions as separate
> #define-s? I can't spot any use anywhere.
I thought it was a coding style.
I will delete them in next version.

> 
> Jan
Jan Beulich July 2, 2024, 8:40 a.m. UTC | #3
On 02.07.2024 04:59, Chen, Jiqian wrote:
> On 2024/7/1 15:18, Jan Beulich wrote:
>> On 30.06.2024 14:33, Jiqian Chen wrote:
>>> When a device has been reset on dom0 side, the vpci on Xen
>>> side won't get notification, so the cached state in vpci is
>>> all out of date compare with the real device state.
>>> To solve that problem, add a new hypercall to clear all vpci
>>> device state. When the state of device is reset on dom0 side,
>>> dom0 can call this hypercall to notify vpci.
>>
>> While the description properly talks about all of this being about device
>> reset, the title suggests otherwise (leaving open what the context is, thus
>> - to me at least - suggesting it's during vPCI init for a particular
>> device).
> Change title to "xen/pci: Add hypercall to support reset of pcidev" ?

Perhaps.

>>> @@ -67,6 +68,63 @@ ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>>>          break;
>>>      }
>>>  
>>> +    case PHYSDEVOP_pci_device_state_reset:
>>> +    {
>>> +        struct pci_device_state_reset dev_reset;
>>> +        struct pci_dev *pdev;
>>> +        pci_sbdf_t sbdf;
>>> +
>>> +        ret = -EOPNOTSUPP;
>>> +        if ( !is_pci_passthrough_enabled() )
>>> +            break;
>>> +
>>> +        ret = -EFAULT;
>>> +        if ( copy_from_guest(&dev_reset, arg, 1) != 0 )
>>> +            break;
>>> +
>>> +        sbdf = PCI_SBDF(dev_reset.dev.seg,
>>> +                        dev_reset.dev.bus,
>>> +                        dev_reset.dev.devfn);
>>> +
>>> +        ret = xsm_resource_setup_pci(XSM_PRIV, sbdf.sbdf);
>>> +        if ( ret )
>>> +            break;
>>> +
>>> +        pcidevs_lock();
>>> +        pdev = pci_get_pdev(NULL, sbdf);
>>> +        if ( !pdev )
>>> +        {
>>> +            pcidevs_unlock();
>>> +            ret = -ENODEV;
>>> +            break;
>>> +        }
>>> +
>>> +        write_lock(&pdev->domain->pci_lock);
>>> +        pcidevs_unlock();
>>> +        /* Implement FLR, other reset types may be implemented in future */
>>
>> The comment isn't in sync with the code anymore.
> Change to "/* vpci_reset_device_state is called by default for all reset types, other specific operations can be added later as needed */" ?

Counter question: Is such a comment really adding any value?

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c
index 7fb3136f0c7c..0fab670a4871 100644
--- a/xen/arch/x86/hvm/hypercall.c
+++ b/xen/arch/x86/hvm/hypercall.c
@@ -83,6 +83,7 @@  long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     case PHYSDEVOP_pci_mmcfg_reserved:
     case PHYSDEVOP_pci_device_add:
     case PHYSDEVOP_pci_device_remove:
+    case PHYSDEVOP_pci_device_state_reset:
     case PHYSDEVOP_dbgp_op:
         if ( !is_hardware_domain(currd) )
             return -ENOSYS;
diff --git a/xen/drivers/pci/physdev.c b/xen/drivers/pci/physdev.c
index 42db3e6d133c..19a755d1c127 100644
--- a/xen/drivers/pci/physdev.c
+++ b/xen/drivers/pci/physdev.c
@@ -2,6 +2,7 @@ 
 #include <xen/guest_access.h>
 #include <xen/hypercall.h>
 #include <xen/init.h>
+#include <xen/vpci.h>
 
 #ifndef COMPAT
 typedef long ret_t;
@@ -67,6 +68,63 @@  ret_t pci_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         break;
     }
 
+    case PHYSDEVOP_pci_device_state_reset:
+    {
+        struct pci_device_state_reset dev_reset;
+        struct pci_dev *pdev;
+        pci_sbdf_t sbdf;
+
+        ret = -EOPNOTSUPP;
+        if ( !is_pci_passthrough_enabled() )
+            break;
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&dev_reset, arg, 1) != 0 )
+            break;
+
+        sbdf = PCI_SBDF(dev_reset.dev.seg,
+                        dev_reset.dev.bus,
+                        dev_reset.dev.devfn);
+
+        ret = xsm_resource_setup_pci(XSM_PRIV, sbdf.sbdf);
+        if ( ret )
+            break;
+
+        pcidevs_lock();
+        pdev = pci_get_pdev(NULL, sbdf);
+        if ( !pdev )
+        {
+            pcidevs_unlock();
+            ret = -ENODEV;
+            break;
+        }
+
+        write_lock(&pdev->domain->pci_lock);
+        pcidevs_unlock();
+        /* Implement FLR, other reset types may be implemented in future */
+        switch ( dev_reset.reset_type )
+        {
+        case PCI_DEVICE_STATE_RESET_COLD:
+        case PCI_DEVICE_STATE_RESET_WARM:
+        case PCI_DEVICE_STATE_RESET_HOT:
+        case PCI_DEVICE_STATE_RESET_FLR:
+        {
+            ret = vpci_reset_device_state(pdev, dev_reset.reset_type);
+            if ( ret )
+                dprintk(XENLOG_ERR,
+                        "%pp: failed to reset vPCI device state\n", &sbdf);
+            break;
+        }
+
+        default:
+            ret = -EOPNOTSUPP;
+            break;
+        }
+        write_unlock(&pdev->domain->pci_lock);
+
+        break;
+    }
+
     default:
         ret = -ENOSYS;
         break;
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 1e6aa5d799b9..7e914d1eff9f 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -172,6 +172,16 @@  int vpci_assign_device(struct pci_dev *pdev)
 
     return rc;
 }
+
+int vpci_reset_device_state(struct pci_dev *pdev,
+                            uint32_t reset_type)
+{
+    ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
+
+    vpci_deassign_device(pdev);
+    return vpci_assign_device(pdev);
+}
+
 #endif /* __XEN__ */
 
 static int vpci_register_cmp(const struct vpci_register *r1,
diff --git a/xen/include/public/physdev.h b/xen/include/public/physdev.h
index f0c0d4727c0b..ddbcdfb05248 100644
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -296,6 +296,13 @@  DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);
  */
 #define PHYSDEVOP_prepare_msix          30
 #define PHYSDEVOP_release_msix          31
+/*
+ * Notify the hypervisor that a PCI device has been reset, so that any
+ * internally cached state is regenerated.  Should be called after any
+ * device reset performed by the hardware domain.
+ */
+#define PHYSDEVOP_pci_device_state_reset 32
+
 struct physdev_pci_device {
     /* IN */
     uint16_t seg;
@@ -305,6 +312,19 @@  struct physdev_pci_device {
 typedef struct physdev_pci_device physdev_pci_device_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t);
 
+struct pci_device_state_reset {
+    physdev_pci_device_t dev;
+#define _PCI_DEVICE_STATE_RESET_COLD 0
+#define PCI_DEVICE_STATE_RESET_COLD  (1U<<_PCI_DEVICE_STATE_RESET_COLD)
+#define _PCI_DEVICE_STATE_RESET_WARM 1
+#define PCI_DEVICE_STATE_RESET_WARM  (1U<<_PCI_DEVICE_STATE_RESET_WARM)
+#define _PCI_DEVICE_STATE_RESET_HOT  2
+#define PCI_DEVICE_STATE_RESET_HOT   (1U<<_PCI_DEVICE_STATE_RESET_HOT)
+#define _PCI_DEVICE_STATE_RESET_FLR  3
+#define PCI_DEVICE_STATE_RESET_FLR   (1U<<_PCI_DEVICE_STATE_RESET_FLR)
+    uint32_t reset_type;
+};
+
 #define PHYSDEVOP_DBGP_RESET_PREPARE    1
 #define PHYSDEVOP_DBGP_RESET_DONE       2
 
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index da8d0f41e6f4..6be812dbc04a 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -38,6 +38,8 @@  int __must_check vpci_assign_device(struct pci_dev *pdev);
 
 /* Remove all handlers and free vpci related structures. */
 void vpci_deassign_device(struct pci_dev *pdev);
+int __must_check vpci_reset_device_state(struct pci_dev *pdev,
+                                         uint32_t reset_type);
 
 /* Add/remove a register handler. */
 int __must_check vpci_add_register_mask(struct vpci *vpci,
@@ -282,6 +284,12 @@  static inline int vpci_assign_device(struct pci_dev *pdev)
 
 static inline void vpci_deassign_device(struct pci_dev *pdev) { }
 
+static inline int __must_check vpci_reset_device_state(struct pci_dev *pdev,
+                                                       uint32_t reset_type)
+{
+    return 0;
+}
+
 static inline void vpci_dump_msi(void) { }
 
 static inline uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg,