diff mbox series

[RFC,1/1] vfio: support CXL device in VFIO stub

Message ID 20240921071440.1915876-2-zhiw@nvidia.com (mailing list archive)
State New
Headers show
Series Introduce vfio-cxl to support CXL type-2 device passthrough | expand

Commit Message

Zhi Wang Sept. 21, 2024, 7:14 a.m. UTC
To support CXL device passthrough, vfio-cxl-core is introduced. This
is the QEMU part.

Get the CXL caps from the vfio-cxl-core. Trap and emulate the HDM
decoder registers. Map the HDM decdoers when the guest commits a HDM
decoder.

Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
 hw/vfio/common.c              |   3 +
 hw/vfio/pci.c                 | 134 ++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h                 |  10 +++
 include/hw/pci/pci.h          |   2 +
 include/hw/vfio/vfio-common.h |   1 +
 linux-headers/linux/vfio.h    |  14 ++++
 6 files changed, 164 insertions(+)
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9aac21abb7..6dea606f62 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -237,6 +237,9 @@  void vfio_region_write(void *opaque, hwaddr addr,
         break;
     }
 
+    if (region->notify_change)
+        region->notify_change(opaque, addr, data, size);
+
     if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
         error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
                      ",%d) failed: %m",
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a205c6b113..431a588252 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -23,6 +23,7 @@ 
 #include <sys/ioctl.h>
 
 #include "hw/hw.h"
+#include "hw/cxl/cxl_component.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/pci_bridge.h"
@@ -2743,6 +2744,72 @@  int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
     return 0;
 }
 
+static bool read_region(VFIORegion *region, uint32_t *val, uint64_t offset)
+{
+    VFIODevice *vbasedev = region->vbasedev;
+
+    if (pread(vbasedev->fd, val, 4, region->fd_offset + offset) != 4) {
+        error_report("%s(%s, 0x%lx, 0x%x, 0x%x) failed: %m",
+                     __func__,vbasedev->name, offset, *val, 4);
+        return false;
+    }
+    return true;
+}
+
+static void vfio_cxl_hdm_regs_changed(void *opaque, hwaddr addr,
+                                      uint64_t data, unsigned size)
+{
+    VFIORegion *region = opaque;
+    VFIODevice *vbasedev = region->vbasedev;
+    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+    VFIOCXL *cxl = &vdev->cxl;
+    MemoryRegion *address_space_mem = pci_get_bus(&vdev->pdev)->address_space_mem;
+    uint64_t offset, reg_offset, index;
+    uint32_t cur_val, write_val;
+
+    if (size != 4 || (addr & 0x3))
+        error_report("hdm_regs_changed: unsupported size or unaligned addr!\n");
+
+    offset = addr - cxl->hdm_regs_offset;
+    index = (offset - 0x10) / 0x20;
+    reg_offset = offset - 0x20 * index;
+
+    if (reg_offset != 0x20)
+        return;
+
+#define READ_REGION(val, offset) do { \
+    if (!read_region(region, val, offset)) \
+        return; \
+    } while(0)
+
+    write_val = (uint32_t)data;
+    READ_REGION(&cur_val, cxl->hdm_regs_offset + 0x20 * index + reg_offset);
+
+    if (!(cur_val & (1 << 10)) && (write_val & (1 << 9))) {
+        memory_region_transaction_begin();
+        memory_region_del_subregion(address_space_mem, cxl->region.mem);
+        memory_region_transaction_commit();
+    } else if (cur_val & (1 << 10) && !(write_val & (1 << 9))) {
+        /* commit -> not commit */
+        uint32_t base_hi, base_lo;
+        uint64_t base;
+
+        /* locked */
+        if (cur_val & (1 << 8))
+            return;
+
+        READ_REGION(&base_lo, cxl->hdm_regs_offset +  0x20 * index + 0x10);
+        READ_REGION(&base_hi, cxl->hdm_regs_offset +  0x20 * index + 0x14);
+
+        base = ((uint64_t)base_hi << 32) | (uint64_t)(base_lo >> 28);
+
+        memory_region_transaction_begin();
+        memory_region_add_subregion_overlap(address_space_mem,
+                                            base, cxl->region.mem, 0);
+        memory_region_transaction_commit();
+    }
+}
+
 static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
 {
     VFIODevice *vbasedev = &vdev->vbasedev;
@@ -2780,6 +2847,11 @@  static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
         }
 
         QLIST_INIT(&vdev->bars[i].quirks);
+
+        if (vbasedev->flags & VFIO_DEVICE_FLAGS_CXL &&
+            i == vdev->cxl.hdm_regs_bar_index) {
+            vdev->bars[i].region.notify_change = vfio_cxl_hdm_regs_changed;
+        }
     }
 
     ret = vfio_get_region_info(vbasedev,
@@ -2974,6 +3046,62 @@  static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static int vfio_cxl_setup(VFIOPCIDevice *vdev)
+{
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    struct VFIOCXL *cxl = &vdev->cxl;
+    struct vfio_device_info_cap_cxl *cap;
+    g_autofree struct vfio_device_info *info = NULL;
+    struct vfio_info_cap_header *hdr;
+    struct vfio_region_info *region_info;
+    int ret;
+
+    if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_CXL))
+        return 0;
+
+    info = vfio_get_device_info(vbasedev->fd);
+    if (!info) {
+        return -ENODEV;
+    }
+
+    hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_CXL);
+    if (!hdr) {
+        return -ENODEV;
+    }
+
+    cap = (void *)hdr;
+
+    cxl->hdm_count = cap->hdm_count;
+    cxl->hdm_regs_bar_index = cap->hdm_regs_bar_index;
+    cxl->hdm_regs_size = cap->hdm_regs_size;
+    cxl->hdm_regs_offset = cap->hdm_regs_offset;
+    cxl->dpa_size = cap->dpa_size;
+
+    ret = vfio_get_dev_region_info(vbasedev,
+            VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_CXL,
+            VFIO_REGION_SUBTYPE_CXL, &region_info);
+    if (ret) {
+        error_report("does not support requested CXL feature");
+        return ret;
+    }
+
+    ret = vfio_region_setup(OBJECT(vdev), vbasedev, &cxl->region,
+            region_info->index, "cxl region");
+    if (ret) {
+        error_report("fail to setup CXL region");
+        return ret;
+    }
+
+    g_free(region_info);
+
+    if (vfio_region_mmap(&cxl->region)) {
+        error_report("Failed to mmap %s cxl region",
+                     vdev->vbasedev.name);
+        return -EFAULT;
+    }
+    return 0;
+}
+
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
     VFIOPCIDevice *vdev = VFIO_PCI(pdev);
@@ -3083,6 +3211,12 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
         goto error;
     }
 
+    ret = vfio_cxl_setup(vdev);
+    if (ret) {
+        vfio_put_group(group);
+        goto error;
+    }
+
     vfio_populate_device(vdev, &err);
     if (err) {
         error_propagate(errp, err);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index a2771b9ff3..6c5f5c1ea5 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -118,6 +118,15 @@  typedef struct VFIOMSIXInfo {
 #define TYPE_VFIO_PCI "vfio-pci"
 OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
 
+typedef struct VFIOCXL {
+    uint8_t hdm_count;
+    uint8_t hdm_regs_bar_index;
+    uint64_t hdm_regs_size;
+    uint64_t hdm_regs_offset;
+    uint64_t dpa_size;
+    VFIORegion region;
+} VFIOCXL;
+
 struct VFIOPCIDevice {
     PCIDevice pdev;
     VFIODevice vbasedev;
@@ -177,6 +186,7 @@  struct VFIOPCIDevice {
     bool clear_parent_atomics_on_exit;
     VFIODisplay *dpy;
     Notifier irqchip_change_notifier;
+    VFIOCXL cxl;
 };
 
 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b70a0b95ff..fbf5786d00 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -117,6 +117,8 @@  extern bool pci_available;
 #define PCI_DEVICE_ID_REDHAT_UFS         0x0013
 #define PCI_DEVICE_ID_REDHAT_QXL         0x0100
 
+#define PCI_VENDOR_ID_CXL                0x1e98
+
 #define FMT_PCIBUS                      PRIx64
 
 typedef uint64_t pcibus_t;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index da43d27352..1c998c3ed6 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -56,6 +56,7 @@  typedef struct VFIORegion {
     uint32_t nr_mmaps;
     VFIOMmap *mmaps;
     uint8_t nr; /* cache the region number for debug */
+    void (*notify_change)(void *, hwaddr, uint64_t, unsigned);
 } VFIORegion;
 
 typedef struct VFIOMigration {
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 16db89071e..22fb50ed34 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -214,6 +214,7 @@  struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
 #define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
 #define VFIO_DEVICE_FLAGS_CDX	(1 << 8)	/* vfio-cdx device */
+#define VFIO_DEVICE_FLAGS_CXL	(1 << 9)	/* vfio-cdx device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
@@ -255,6 +256,16 @@  struct vfio_device_info_cap_pci_atomic_comp {
 	__u32 reserved;
 };
 
+#define VFIO_DEVICE_INFO_CAP_CXL               6
+struct vfio_device_info_cap_cxl {
+	struct vfio_info_cap_header header;
+	__u8 hdm_count;
+	__u8 hdm_regs_bar_index;
+	__u64 hdm_regs_size;
+	__u64 hdm_regs_offset;
+	__u64 dpa_size;
+};
+
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  *				       struct vfio_region_info)
@@ -371,6 +382,9 @@  struct vfio_region_info_cap_type {
 /* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
+/* sub-types for VFIO CXL region */
+#define VFIO_REGION_SUBTYPE_CXL                 (1)
+
 /**
  * struct vfio_region_gfx_edid - EDID region layout.
  *