diff mbox series

[RFC,08/13] vfio/cxl: emulate HDM decoder registers

Message ID 20240920223446.1908673-9-zhiw@nvidia.com (mailing list archive)
State New
Headers show
Series vfio: introduce vfio-cxl to support CXL type-2 accelerator passthrough | expand

Commit Message

Zhi Wang Sept. 20, 2024, 10:34 p.m. UTC
To directly access the device memory, the HDM decoder registers on the
path from CXL root port to the device needs to be configured when
creating a CXL region.

However, the physical HDM decoders are owned by the kernel CXL core when
creating and configuring a CXL region. Thus the VM is forbidden to
access and configure the phsyical HDM decoder registers. The HDM decoder
register in the CXL component register group needs to be emulated.

Emulate the HDM decoder registers in the vfio-cxl-core. Locate the BAR
where the component registers sit. Take a snapshot of component
registers before initialize the CXL device. Emulate the HDM decoder
registers when VM access them from vfio_device_ops->{read, write}.

Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
 drivers/vfio/pci/vfio_cxl_core.c | 208 ++++++++++++++++++++++++++++++-
 include/linux/cxl_accel_pci.h    |   6 +
 include/linux/vfio_pci_core.h    |   5 +
 3 files changed, 216 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/drivers/vfio/pci/vfio_cxl_core.c b/drivers/vfio/pci/vfio_cxl_core.c
index 68a935515256..bbb968cb1b70 100644
--- a/drivers/vfio/pci/vfio_cxl_core.c
+++ b/drivers/vfio/pci/vfio_cxl_core.c
@@ -283,6 +283,90 @@  static const struct vfio_pci_regops vfio_cxl_regops = {
 	.release	= vfio_cxl_region_release,
 };
 
+static int find_bar(struct pci_dev *pdev, u64 *offset, int *bar, u64 size)
+{
+	u64 start, end, flags;
+	int index, i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		index = i + PCI_STD_RESOURCES;
+		flags = pci_resource_flags(pdev, index);
+
+		start = pci_resource_start(pdev, index);
+		end = pci_resource_end(pdev, index);
+
+		if (*offset >= start && *offset + size - 1 <= end)
+			break;
+
+		if (flags & IORESOURCE_MEM_64)
+			i++;
+	}
+
+	if (i == PCI_STD_NUM_BARS)
+		return -ENODEV;
+
+	*offset = *offset - start;
+	*bar = index;
+
+	return 0;
+}
+
+static int find_comp_regs(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+	struct pci_dev *pdev = core_dev->pdev;
+	u64 offset;
+	int ret, bar;
+
+	ret = cxl_find_comp_regblock_offset(pdev, &offset);
+	if (ret)
+		return ret;
+
+	ret = find_bar(pdev, &offset, &bar, SZ_64K);
+	if (ret)
+		return ret;
+
+	cxl->comp_reg_bar = bar;
+	cxl->comp_reg_offset = offset;
+	cxl->comp_reg_size = SZ_64K;
+	return 0;
+}
+
+static void clean_virt_comp_regs(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+
+	kvfree(cxl->comp_reg_virt);
+}
+
+static int setup_virt_comp_regs(struct vfio_pci_core_device *core_dev)
+{
+	struct vfio_cxl *cxl = &core_dev->cxl;
+	struct pci_dev *pdev = core_dev->pdev;
+	u64 offset = cxl->comp_reg_offset;
+	int bar = cxl->comp_reg_bar;
+	u64 size = cxl->comp_reg_size;
+	void *regs;
+	unsigned int i;
+
+	cxl->comp_reg_virt = kvzalloc(size, GFP_KERNEL);
+	if (!cxl->comp_reg_virt)
+		return -ENOMEM;
+
+	regs = ioremap(pci_resource_start(pdev, bar) + offset, size);
+	if (!regs) {
+		kvfree(cxl->comp_reg_virt);
+		return -EFAULT;
+	}
+
+	for (i = 0; i < size; i += 4)
+		*(u32 *)(cxl->comp_reg_virt + i) = readl(regs + i);
+
+	iounmap(regs);
+
+	return 0;
+}
+
 int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev)
 {
 	struct vfio_cxl *cxl = &core_dev->cxl;
@@ -299,10 +383,18 @@  int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev)
 	if (!cxl->region.size)
 		return -EINVAL;
 
-	ret = vfio_pci_core_enable(core_dev);
+	ret = find_comp_regs(core_dev);
+	if (ret)
+		return ret;
+
+	ret = setup_virt_comp_regs(core_dev);
 	if (ret)
 		return ret;
 
+	ret = vfio_pci_core_enable(core_dev);
+	if (ret)
+		goto err_pci_core_enable;
+
 	ret = enable_cxl(core_dev, dvsec);
 	if (ret)
 		goto err_enable_cxl_device;
@@ -324,6 +416,8 @@  int vfio_cxl_core_enable(struct vfio_pci_core_device *core_dev)
 	disable_cxl(core_dev);
 err_enable_cxl_device:
 	vfio_pci_core_disable(core_dev);
+err_pci_core_enable:
+	clean_virt_comp_regs(core_dev);
 	return ret;
 }
 EXPORT_SYMBOL(vfio_cxl_core_enable);
@@ -341,6 +435,7 @@  void vfio_cxl_core_close_device(struct vfio_device *vdev)
 
 	disable_cxl(core_dev);
 	vfio_pci_core_close_device(vdev);
+	clean_virt_comp_regs(core_dev);
 }
 EXPORT_SYMBOL(vfio_cxl_core_close_device);
 
@@ -396,13 +491,102 @@  void vfio_cxl_core_set_driver_hdm_cap(struct vfio_pci_core_device *core_dev)
 }
 EXPORT_SYMBOL(vfio_cxl_core_set_driver_hdm_cap);
 
+static bool is_hdm_regblock(struct vfio_cxl *cxl, u64 offset, size_t count)
+{
+	return offset >= cxl->hdm_reg_offset &&
+	       offset + count < cxl->hdm_reg_offset +
+	       cxl->hdm_reg_size;
+}
+
+static void write_hdm_decoder_global(void *virt, u64 offset, u32 v)
+{
+	if (offset == 0x4)
+		*(u32 *)(virt + offset) = v & GENMASK(1, 0);
+}
+
+static void write_hdm_decoder_n(void *virt, u64 offset, u32 v)
+{
+	u32 cur, index;
+
+	index = (offset - 0x10) / 0x20;
+
+	/* HDM decoder registers are locked? */
+	cur = *(u32 *)(virt + index * 0x20 + 0x20);
+
+	if (cur & CXL_HDM_DECODER0_CTRL_LOCK &&
+	    cur & CXL_HDM_DECODER0_CTRL_COMMITTED)
+		return;
+
+	/* emulate HDM_DECODER_CTRL. */
+	if (offset == CXL_HDM_DECODER0_CTRL_OFFSET(index)) {
+		v &= ~CXL_HDM_DECODER0_CTRL_COMMIT_ERROR;
+
+		/* commit/de-commit */
+		if (v & CXL_HDM_DECODER0_CTRL_COMMIT)
+			v |= CXL_HDM_DECODER0_CTRL_COMMITTED;
+		else
+			v &= ~CXL_HDM_DECODER0_CTRL_COMMITTED;
+	}
+	*(u32 *)(virt + offset) = v;
+}
+
+static ssize_t
+emulate_hdm_regblock(struct vfio_device *vdev, char __user *buf,
+		     size_t count, loff_t *ppos, bool write)
+{
+	struct vfio_pci_core_device *core_dev =
+		container_of(vdev, struct vfio_pci_core_device, vdev);
+	struct vfio_cxl *cxl = &core_dev->cxl;
+	u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	void *hdm_reg_virt;
+	u64 hdm_offset;
+	u32 v;
+
+	hdm_offset = pos - cxl->hdm_reg_offset;
+	hdm_reg_virt = cxl->comp_reg_virt +
+		       (cxl->hdm_reg_offset - cxl->comp_reg_offset);
+
+	if (!write) {
+		v = *(u32 *)(hdm_reg_virt + hdm_offset);
+
+		if (copy_to_user(buf, &v, 4))
+			return -EFAULT;
+	} else {
+		if (copy_from_user(&v, buf, 4))
+			return -EFAULT;
+
+		if (hdm_offset < 0x10)
+			write_hdm_decoder_global(hdm_reg_virt, hdm_offset, v);
+		else
+			write_hdm_decoder_n(hdm_reg_virt, hdm_offset, v);
+	}
+	return count;
+}
+
 ssize_t vfio_cxl_core_read(struct vfio_device *core_vdev, char __user *buf,
 		size_t count, loff_t *ppos)
 {
 	struct vfio_pci_core_device *vdev =
 		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+	struct vfio_cxl *cxl = &vdev->cxl;
+	u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+	if (!count)
+		return 0;
+
+	if (index != cxl->comp_reg_bar)
+		return vfio_pci_rw(vdev, buf, count, ppos, false);
+
+	if (WARN_ON_ONCE(!IS_ALIGNED(pos, 4) || count != 4))
+		return -EINVAL;
 
-	return vfio_pci_rw(vdev, buf, count, ppos, false);
+	if (is_hdm_regblock(cxl, pos, count))
+		return emulate_hdm_regblock(core_vdev, buf, count,
+					    ppos, false);
+	else
+		return vfio_pci_rw(vdev, (char __user *)buf, count,
+				   ppos, false);
 }
 EXPORT_SYMBOL_GPL(vfio_cxl_core_read);
 
@@ -411,8 +595,26 @@  ssize_t vfio_cxl_core_write(struct vfio_device *core_vdev, const char __user *bu
 {
 	struct vfio_pci_core_device *vdev =
 		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+	struct vfio_cxl *cxl = &vdev->cxl;
+	u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+	if (!count)
+		return 0;
+
+	if (index != cxl->comp_reg_bar)
+		return vfio_pci_rw(vdev, (char __user *)buf, count, ppos,
+				   true);
+
+	if (WARN_ON_ONCE(!IS_ALIGNED(pos, 4) || count != 4))
+		return -EINVAL;
 
-	return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true);
+	if (is_hdm_regblock(cxl, pos, count))
+		return emulate_hdm_regblock(core_vdev, (char __user *)buf,
+					    count, ppos, true);
+	else
+		return vfio_pci_rw(vdev, (char __user *)buf, count, ppos,
+				   true);
 }
 EXPORT_SYMBOL_GPL(vfio_cxl_core_write);
 
diff --git a/include/linux/cxl_accel_pci.h b/include/linux/cxl_accel_pci.h
index c337ae8797e6..090f60fb9a3f 100644
--- a/include/linux/cxl_accel_pci.h
+++ b/include/linux/cxl_accel_pci.h
@@ -20,4 +20,10 @@ 
 #define   CXL_DVSEC_RANGE_BASE_LOW(i)	(0x24 + (i * 0x10))
 #define     CXL_DVSEC_MEM_BASE_LOW_MASK	GENMASK(31, 28)
 
+#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20)
+#define   CXL_HDM_DECODER0_CTRL_LOCK BIT(8)
+#define   CXL_HDM_DECODER0_CTRL_COMMIT BIT(9)
+#define   CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10)
+#define   CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11)
+
 #endif
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 64ccdcdfa95e..9d295ca9382a 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -62,6 +62,11 @@  struct vfio_cxl {
 	u8 caps;
 	u64 dpa_size;
 
+	int comp_reg_bar;
+	u64 comp_reg_offset;
+	u64 comp_reg_size;
+	void *comp_reg_virt;
+
 	u32 hdm_count;
 	u64 hdm_reg_offset;
 	u64 hdm_reg_size;