diff mbox series

[RFC,09/13] vfio/pci: introduce CXL device awareness

Message ID 20240920223446.1908673-10-zhiw@nvidia.com (mailing list archive)
State New, archived
Headers show
Series vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough | expand

Commit Message

Zhi Wang Sept. 20, 2024, 10:34 p.m. UTC
CXL device programming interfaces are built upon PCI interfaces. Thus
the vfio-pci-core can be leveraged to handle a CXL device.

However, CXL device also has difference with PCI devicce:

- No INTX support, only MSI/MSIX is supported.
- Resest is one via CXL reset. FLR only resets CXL.io.

Introduce the CXL device awareness to the vfio-pci-core. Expose a new
VFIO device flags to the userspace to identify the VFIO device is a CXL
device. Disable INTX support in the vfio-pci-core. Disable FLR reset for
the CXL device as the kernel CXL core hasn't support CXL reset yet.
Disable mmap support on the CXL MMIO BAR in vfio-pci-core.

Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
 drivers/vfio/pci/vfio_cxl_core.c |  8 ++++++
 drivers/vfio/pci/vfio_pci_core.c | 42 +++++++++++++++++++++-----------
 include/linux/vfio_pci_core.h    |  2 ++
 include/uapi/linux/vfio.h        |  1 +
 4 files changed, 39 insertions(+), 14 deletions(-)

Comments

Alex Williamson Oct. 11, 2024, 8:37 p.m. UTC | #1
On Fri, 20 Sep 2024 15:34:42 -0700
Zhi Wang <zhiw@nvidia.com> wrote:

> CXL device programming interfaces are built upon PCI interfaces. Thus
> the vfio-pci-core can be leveraged to handle a CXL device.
> 
> However, CXL device also has difference with PCI devicce:
> 
> - No INTX support, only MSI/MSIX is supported.
> - Resest is one via CXL reset. FLR only resets CXL.io.
> 
> Introduce the CXL device awareness to the vfio-pci-core. Expose a new
> VFIO device flags to the userspace to identify the VFIO device is a CXL
> device. Disable INTX support in the vfio-pci-core. Disable FLR reset for
> the CXL device as the kernel CXL core hasn't support CXL reset yet.
> Disable mmap support on the CXL MMIO BAR in vfio-pci-core.

Why are we disabling mmap on the entire BAR when we have sparse mmap
support to handle disabling mmap only on a portion of the BAR, which
seems to be the case needed here.  Am I mistaken?
 
> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
> ---
>  drivers/vfio/pci/vfio_cxl_core.c |  8 ++++++
>  drivers/vfio/pci/vfio_pci_core.c | 42 +++++++++++++++++++++-----------
>  include/linux/vfio_pci_core.h    |  2 ++
>  include/uapi/linux/vfio.h        |  1 +
>  4 files changed, 39 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
> index bbb968cb1b70..d8b51f8792a2 100644
> --- a/drivers/vfio/pci/vfio_cxl_core.c
> +++ b/drivers/vfio/pci/vfio_cxl_core.c
> @@ -391,6 +391,8 @@ int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev)
>  	if (ret)
>  		return ret;
>  
> +	vfio_pci_core_enable_cxl(core_dev);
> +
>  	ret = vfio_pci_core_enable(core_dev);
>  	if (ret)
>  		goto err_pci_core_enable;
> @@ -618,6 +620,12 @@ ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *bu
>  }
>  EXPORT_SYMBOL_GPL(vfio_cxl_core_write);
>  
> +void vfio_pci_core_enable_cxl(struct vfio_pci_core_device *core_dev)
> +{
> +	core_dev->has_cxl = true;
> +}
> +EXPORT_SYMBOL(vfio_pci_core_enable_cxl);
> +
>  MODULE_LICENSE("GPL");
>  MODULE_AUTHOR(DRIVER_AUTHOR);
>  MODULE_DESCRIPTION(DRIVER_DESC);
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 9373942f1acb..e0f23b538858 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -126,6 +126,9 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
>  		if (!(res->flags & IORESOURCE_MEM))
>  			goto no_mmap;
>  
> +		if (vdev->has_cxl && bar == vdev->cxl.comp_reg_bar)
> +			goto no_mmap;
> +
>  		/*
>  		 * The PCI core shouldn't set up a resource with a
>  		 * type but zero size. But there may be bugs that
> @@ -487,10 +490,15 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
>  	if (ret)
>  		goto out_power;
>  
> -	/* If reset fails because of the device lock, fail this path entirely */
> -	ret = pci_try_reset_function(pdev);
> -	if (ret == -EAGAIN)
> -		goto out_disable_device;
> +	if (!vdev->has_cxl) {
> +		/* If reset fails because of the device lock, fail this path entirely */
> +		ret = pci_try_reset_function(pdev);
> +		if (ret == -EAGAIN)
> +			goto out_disable_device;
> +	} else {
> +		/* CXL Reset is missing in CXL core. FLR only resets CXL.io path. */
> +		ret = -ENODEV;
> +	}

Seems like this should perhaps be a prerequisite to exposing the device
to userspace, otherwise how is the state sanitized between uses?

>  
>  	vdev->reset_works = !ret;
>  	pci_save_state(pdev);
> @@ -498,14 +506,17 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
>  	if (!vdev->pci_saved_state)
>  		pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);
>  
> -	if (likely(!nointxmask)) {
> -		if (vfio_pci_nointx(pdev)) {
> -			pci_info(pdev, "Masking broken INTx support\n");
> -			vdev->nointx = true;
> -			pci_intx(pdev, 0);
> -		} else
> -			vdev->pci_2_3 = pci_intx_mask_supported(pdev);
> -	}
> +	if (!vdev->has_cxl) {
> +		if (likely(!nointxmask)) {
> +			if (vfio_pci_nointx(pdev)) {
> +				pci_info(pdev, "Masking broken INTx support\n");
> +				vdev->nointx = true;
> +				pci_intx(pdev, 0);
> +			} else
> +				vdev->pci_2_3 = pci_intx_mask_supported(pdev);
> +		}
> +	} else
> +		vdev->nointx = true; /* CXL device doesn't have INTX. */
>  

Why do we need to do anything here?  nointx is for exposing a device
with INTx support as if it does not have INTx support.  If a CXL device
simply does not support INTx, like SR-IOV VFs, this is not necessary,
the interrupt pin register should be zero.  Is that not the case on a
CXL device?

>  	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
>  	if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {
> @@ -541,7 +552,6 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
>  	if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
>  		vdev->has_vga = true;
>  
> -

Gratuitous whitespace change.

>  	return 0;
>  
>  out_free_zdev:
> @@ -657,7 +667,8 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
>  	 * Disable INTx and MSI, presumably to avoid spurious interrupts
>  	 * during reset.  Stolen from pci_reset_function()
>  	 */
> -	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
> +	if (!vdev->nointx)
> +		pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);

No, this is not what nointx is for.  Regardless it should be a no-op on
a device that doesn't support INTx.

>  
>  	/*
>  	 * Try to get the locks ourselves to prevent a deadlock. The
> @@ -973,6 +984,9 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
>  	if (vdev->reset_works)
>  		info.flags |= VFIO_DEVICE_FLAGS_RESET;
>  
> +	if (vdev->has_cxl)
> +		info.flags |= VFIO_DEVICE_FLAGS_CXL;
> +

So the proposal is that a vfio-cxl device will expose *both* PCI and
CXL device compatibility flags?  That means existing userspace
expecting only a PCI device will try to make use of this.  Shouldn't
the device be bound to vfio-pci rather than a vfio-cxl driver for that,
if it's even valid?

>  	info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
>  	info.num_irqs = VFIO_PCI_NUM_IRQS;
>  
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index 9d295ca9382a..e5646aad3eb3 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -113,6 +113,7 @@ struct vfio_pci_core_device {
>  	bool			needs_pm_restore:1;
>  	bool			pm_intx_masked:1;
>  	bool			pm_runtime_engaged:1;
> +	bool			has_cxl:1;

I wonder if has_cxl is based on has_vga, I would have expected is_cxl.
A PCI device that supports VGA is a much more discrete add-on, versus
my limited understanding of CXL is that it is a CXL device where PCI is
mostly just the configuration and enumeration interface, ie. CXL.io.

>  	struct pci_saved_state	*pci_saved_state;
>  	struct pci_saved_state	*pm_save;
>  	int			ioeventfds_nr;
> @@ -208,5 +209,6 @@ ssize_t vfio_cxl_core_read(struct vfio_device *core_vdev, char __user *buf,
>  			   size_t count, loff_t *ppos);
>  ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *buf,
>  			    size_t count, loff_t *ppos);
> +void vfio_pci_core_enable_cxl(struct vfio_pci_core_device *core_dev);
>  
>  #endif /* VFIO_PCI_CORE_H */
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 71f766c29060..0895183feaac 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -214,6 +214,7 @@ struct vfio_device_info {
>  #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
>  #define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
>  #define VFIO_DEVICE_FLAGS_CDX	(1 << 8)	/* vfio-cdx device */
> +#define VFIO_DEVICE_FLAGS_CXL	(1 << 9)	/* Device supports CXL support */

Comment wording.  Thanks,

Alex

>  	__u32	num_regions;	/* Max region index + 1 */
>  	__u32	num_irqs;	/* Max IRQ index + 1 */
>  	__u32   cap_offset;	/* Offset within info struct of first cap */
diff mbox series

Patch

diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
index bbb968cb1b70..d8b51f8792a2 100644
--- a/drivers/vfio/pci/vfio_cxl_core.c
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -391,6 +391,8 @@  int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev)
 	if (ret)
 		return ret;
 
+	vfio_pci_core_enable_cxl(core_dev);
+
 	ret = vfio_pci_core_enable(core_dev);
 	if (ret)
 		goto err_pci_core_enable;
@@ -618,6 +620,12 @@  ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *bu
 }
 EXPORT_SYMBOL_GPL(vfio_cxl_core_write);
 
+void vfio_pci_core_enable_cxl(struct vfio_pci_core_device *core_dev)
+{
+	core_dev->has_cxl = true;
+}
+EXPORT_SYMBOL(vfio_pci_core_enable_cxl);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 9373942f1acb..e0f23b538858 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -126,6 +126,9 @@  static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
 		if (!(res->flags & IORESOURCE_MEM))
 			goto no_mmap;
 
+		if (vdev->has_cxl && bar == vdev->cxl.comp_reg_bar)
+			goto no_mmap;
+
 		/*
 		 * The PCI core shouldn't set up a resource with a
 		 * type but zero size. But there may be bugs that
@@ -487,10 +490,15 @@  int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
 	if (ret)
 		goto out_power;
 
-	/* If reset fails because of the device lock, fail this path entirely */
-	ret = pci_try_reset_function(pdev);
-	if (ret == -EAGAIN)
-		goto out_disable_device;
+	if (!vdev->has_cxl) {
+		/* If reset fails because of the device lock, fail this path entirely */
+		ret = pci_try_reset_function(pdev);
+		if (ret == -EAGAIN)
+			goto out_disable_device;
+	} else {
+		/* CXL Reset is missing in CXL core. FLR only resets CXL.io path. */
+		ret = -ENODEV;
+	}
 
 	vdev->reset_works = !ret;
 	pci_save_state(pdev);
@@ -498,14 +506,17 @@  int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
 	if (!vdev->pci_saved_state)
 		pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);
 
-	if (likely(!nointxmask)) {
-		if (vfio_pci_nointx(pdev)) {
-			pci_info(pdev, "Masking broken INTx support\n");
-			vdev->nointx = true;
-			pci_intx(pdev, 0);
-		} else
-			vdev->pci_2_3 = pci_intx_mask_supported(pdev);
-	}
+	if (!vdev->has_cxl) {
+		if (likely(!nointxmask)) {
+			if (vfio_pci_nointx(pdev)) {
+				pci_info(pdev, "Masking broken INTx support\n");
+				vdev->nointx = true;
+				pci_intx(pdev, 0);
+			} else
+				vdev->pci_2_3 = pci_intx_mask_supported(pdev);
+		}
+	} else
+		vdev->nointx = true; /* CXL device doesn't have INTX. */
 
 	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
 	if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {
@@ -541,7 +552,6 @@  int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
 	if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
 		vdev->has_vga = true;
 
-
 	return 0;
 
 out_free_zdev:
@@ -657,7 +667,8 @@  void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
 	 * Disable INTx and MSI, presumably to avoid spurious interrupts
 	 * during reset.  Stolen from pci_reset_function()
 	 */
-	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+	if (!vdev->nointx)
+		pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
 
 	/*
 	 * Try to get the locks ourselves to prevent a deadlock. The
@@ -973,6 +984,9 @@  static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
 	if (vdev->reset_works)
 		info.flags |= VFIO_DEVICE_FLAGS_RESET;
 
+	if (vdev->has_cxl)
+		info.flags |= VFIO_DEVICE_FLAGS_CXL;
+
 	info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
 	info.num_irqs = VFIO_PCI_NUM_IRQS;
 
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 9d295ca9382a..e5646aad3eb3 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -113,6 +113,7 @@  struct vfio_pci_core_device {
 	bool			needs_pm_restore:1;
 	bool			pm_intx_masked:1;
 	bool			pm_runtime_engaged:1;
+	bool			has_cxl:1;
 	struct pci_saved_state	*pci_saved_state;
 	struct pci_saved_state	*pm_save;
 	int			ioeventfds_nr;
@@ -208,5 +209,6 @@  ssize_t vfio_cxl_core_read(struct vfio_device *core_vdev, char __user *buf,
 			   size_t count, loff_t *ppos);
 ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *buf,
 			    size_t count, loff_t *ppos);
+void vfio_pci_core_enable_cxl(struct vfio_pci_core_device *core_dev);
 
 #endif /* VFIO_PCI_CORE_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 71f766c29060..0895183feaac 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -214,6 +214,7 @@  struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
 #define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
 #define VFIO_DEVICE_FLAGS_CDX	(1 << 8)	/* vfio-cdx device */
+#define VFIO_DEVICE_FLAGS_CXL	(1 << 9)	/* Device supports CXL support */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 	__u32   cap_offset;	/* Offset within info struct of first cap */