@@ -237,6 +237,9 @@ void vfio_region_write(void *opaque, hwaddr addr,
break;
}
+ if (region->notify_change)
+ region->notify_change(opaque, addr, data, size);
+
if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
",%d) failed: %m",
@@ -23,6 +23,7 @@
#include <sys/ioctl.h>
#include "hw/hw.h"
+#include "hw/cxl/cxl_component.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci_bridge.h"
@@ -2743,6 +2744,72 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
return 0;
}
+static bool read_region(VFIORegion *region, uint32_t *val, uint64_t offset)
+{
+ VFIODevice *vbasedev = region->vbasedev;
+
+ if (pread(vbasedev->fd, val, 4, region->fd_offset + offset) != 4) {
+ error_report("%s(%s, 0x%lx, 0x%x, 0x%x) failed: %m",
+ __func__,vbasedev->name, offset, *val, 4);
+ return false;
+ }
+ return true;
+}
+
+static void vfio_cxl_hdm_regs_changed(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIORegion *region = opaque;
+ VFIODevice *vbasedev = region->vbasedev;
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ VFIOCXL *cxl = &vdev->cxl;
+ MemoryRegion *address_space_mem = pci_get_bus(&vdev->pdev)->address_space_mem;
+ uint64_t offset, reg_offset, index;
+ uint32_t cur_val, write_val;
+
+ if (size != 4 || (addr & 0x3))
+ error_report("hdm_regs_changed: unsupported size or unaligned addr!\n");
+
+ offset = addr - cxl->hdm_regs_offset;
+ index = (offset - 0x10) / 0x20;
+ reg_offset = offset - 0x20 * index;
+
+ if (reg_offset != 0x20)
+ return;
+
+#define READ_REGION(val, offset) do { \
+ if (!read_region(region, val, offset)) \
+ return; \
+ } while(0)
+
+ write_val = (uint32_t)data;
+ READ_REGION(&cur_val, cxl->hdm_regs_offset + 0x20 * index + reg_offset);
+
+ if (!(cur_val & (1 << 10)) && (write_val & (1 << 9))) {
+ memory_region_transaction_begin();
+ memory_region_del_subregion(address_space_mem, cxl->region.mem);
+ memory_region_transaction_commit();
+ } else if (cur_val & (1 << 10) && !(write_val & (1 << 9))) {
+ /* commit -> not commit */
+ uint32_t base_hi, base_lo;
+ uint64_t base;
+
+ /* locked */
+ if (cur_val & (1 << 8))
+ return;
+
+ READ_REGION(&base_lo, cxl->hdm_regs_offset + 0x20 * index + 0x10);
+ READ_REGION(&base_hi, cxl->hdm_regs_offset + 0x20 * index + 0x14);
+
+ base = ((uint64_t)base_hi << 32) | (uint64_t)(base_lo >> 28);
+
+ memory_region_transaction_begin();
+ memory_region_add_subregion_overlap(address_space_mem,
+ base, cxl->region.mem, 0);
+ memory_region_transaction_commit();
+ }
+}
+
static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
{
VFIODevice *vbasedev = &vdev->vbasedev;
@@ -2780,6 +2847,11 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
}
QLIST_INIT(&vdev->bars[i].quirks);
+
+ if (vbasedev->flags & VFIO_DEVICE_FLAGS_CXL &&
+ i == vdev->cxl.hdm_regs_bar_index) {
+ vdev->bars[i].region.notify_change = vfio_cxl_hdm_regs_changed;
+ }
}
ret = vfio_get_region_info(vbasedev,
@@ -2974,6 +3046,62 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
vdev->req_enabled = false;
}
+static int vfio_cxl_setup(VFIOPCIDevice *vdev)
+{
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ struct VFIOCXL *cxl = &vdev->cxl;
+ struct vfio_device_info_cap_cxl *cap;
+ g_autofree struct vfio_device_info *info = NULL;
+ struct vfio_info_cap_header *hdr;
+ struct vfio_region_info *region_info;
+ int ret;
+
+ if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_CXL))
+ return 0;
+
+ info = vfio_get_device_info(vbasedev->fd);
+ if (!info) {
+ return -ENODEV;
+ }
+
+ hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_CXL);
+ if (!hdr) {
+ return -ENODEV;
+ }
+
+ cap = (void *)hdr;
+
+ cxl->hdm_count = cap->hdm_count;
+ cxl->hdm_regs_bar_index = cap->hdm_regs_bar_index;
+ cxl->hdm_regs_size = cap->hdm_regs_size;
+ cxl->hdm_regs_offset = cap->hdm_regs_offset;
+ cxl->dpa_size = cap->dpa_size;
+
+ ret = vfio_get_dev_region_info(vbasedev,
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_CXL,
+ VFIO_REGION_SUBTYPE_CXL, ®ion_info);
+ if (ret) {
+ error_report("does not support requested CXL feature");
+ return ret;
+ }
+
+ ret = vfio_region_setup(OBJECT(vdev), vbasedev, &cxl->region,
+ region_info->index, "cxl region");
+ if (ret) {
+ error_report("fail to setup CXL region");
+ return ret;
+ }
+
+ g_free(region_info);
+
+ if (vfio_region_mmap(&cxl->region)) {
+ error_report("Failed to mmap %s cxl region",
+ vdev->vbasedev.name);
+ return -EFAULT;
+ }
+ return 0;
+}
+
static void vfio_realize(PCIDevice *pdev, Error **errp)
{
VFIOPCIDevice *vdev = VFIO_PCI(pdev);
@@ -3083,6 +3211,12 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
+ ret = vfio_cxl_setup(vdev);
+ if (ret) {
+ vfio_put_group(group);
+ goto error;
+ }
+
vfio_populate_device(vdev, &err);
if (err) {
error_propagate(errp, err);
@@ -118,6 +118,15 @@ typedef struct VFIOMSIXInfo {
#define TYPE_VFIO_PCI "vfio-pci"
OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
+typedef struct VFIOCXL {
+ uint8_t hdm_count;
+ uint8_t hdm_regs_bar_index;
+ uint64_t hdm_regs_size;
+ uint64_t hdm_regs_offset;
+ uint64_t dpa_size;
+ VFIORegion region;
+} VFIOCXL;
+
struct VFIOPCIDevice {
PCIDevice pdev;
VFIODevice vbasedev;
@@ -177,6 +186,7 @@ struct VFIOPCIDevice {
bool clear_parent_atomics_on_exit;
VFIODisplay *dpy;
Notifier irqchip_change_notifier;
+ VFIOCXL cxl;
};
/* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
@@ -117,6 +117,8 @@ extern bool pci_available;
#define PCI_DEVICE_ID_REDHAT_UFS 0x0013
#define PCI_DEVICE_ID_REDHAT_QXL 0x0100
+#define PCI_VENDOR_ID_CXL 0x1e98
+
#define FMT_PCIBUS PRIx64
typedef uint64_t pcibus_t;
@@ -56,6 +56,7 @@ typedef struct VFIORegion {
uint32_t nr_mmaps;
VFIOMmap *mmaps;
uint8_t nr; /* cache the region number for debug */
+ void (*notify_change)(void *, hwaddr, uint64_t, unsigned);
} VFIORegion;
typedef struct VFIOMigration {
@@ -214,6 +214,7 @@ struct vfio_device_info {
#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */
#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */
#define VFIO_DEVICE_FLAGS_CDX (1 << 8) /* vfio-cdx device */
+#define VFIO_DEVICE_FLAGS_CXL (1 << 9) /* vfio-cdx device */
__u32 num_regions; /* Max region index + 1 */
__u32 num_irqs; /* Max IRQ index + 1 */
__u32 cap_offset; /* Offset within info struct of first cap */
@@ -255,6 +256,16 @@ struct vfio_device_info_cap_pci_atomic_comp {
__u32 reserved;
};
+#define VFIO_DEVICE_INFO_CAP_CXL 6
+struct vfio_device_info_cap_cxl {
+ struct vfio_info_cap_header header;
+ __u8 hdm_count;
+ __u8 hdm_regs_bar_index;
+ __u64 hdm_regs_size;
+ __u64 hdm_regs_offset;
+ __u64 dpa_size;
+};
+
/**
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
* struct vfio_region_info)
@@ -371,6 +382,9 @@ struct vfio_region_info_cap_type {
/* sub-types for VFIO_REGION_TYPE_GFX */
#define VFIO_REGION_SUBTYPE_GFX_EDID (1)
+/* sub-types for VFIO CXL region */
+#define VFIO_REGION_SUBTYPE_CXL (1)
+
/**
* struct vfio_region_gfx_edid - EDID region layout.
*
To support CXL device passthrough, vfio-cxl-core is introduced. This is the QEMU part. Get the CXL caps from the vfio-cxl-core. Trap and emulate the HDM decoder registers. Map the HDM decdoers when the guest commits a HDM decoder. Signed-off-by: Zhi Wang <zhiw@nvidia.com> --- hw/vfio/common.c | 3 + hw/vfio/pci.c | 134 ++++++++++++++++++++++++++++++++++ hw/vfio/pci.h | 10 +++ include/hw/pci/pci.h | 2 + include/hw/vfio/vfio-common.h | 1 + linux-headers/linux/vfio.h | 14 ++++ 6 files changed, 164 insertions(+)