diff mbox series

[RFC,1/1] vfio: support CXL device in VFIO stub

Message ID 20240921071440.1915876-2-zhiw@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Introduce vfio-cxl to support CXL type-2 device passthrough | expand

Commit Message

Zhi Wang Sept. 21, 2024, 7:14 a.m. UTC
To support CXL device passthrough, vfio-cxl-core is introduced. This
is the QEMU part.

Get the CXL caps from the vfio-cxl-core. Trap and emulate the HDM
decoder registers. Map the HDM decdoers when the guest commits a HDM
decoder.

Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
 hw/vfio/common.c              |   3 +
 hw/vfio/pci.c                 | 134 ++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h                 |  10 +++
 include/hw/pci/pci.h          |   2 +
 include/hw/vfio/vfio-common.h |   1 +
 linux-headers/linux/vfio.h    |  14 ++++
 6 files changed, 164 insertions(+)

Comments

Alex Williamson Oct. 11, 2024, 9:47 p.m. UTC | #1
On Sat, 21 Sep 2024 00:14:40 -0700
Zhi Wang <zhiw@nvidia.com> wrote:

> To support CXL device passthrough, vfio-cxl-core is introduced. This
> is the QEMU part.
> 
> Get the CXL caps from the vfio-cxl-core. Trap and emulate the HDM
> decoder registers. Map the HDM decdoers when the guest commits a HDM
> decoder.

It seems like this could all essentially be handled as a quirk, setting
things up based on the CXL flag or CXL device info capability, and the
update could be done in the quirk write handler rather than a new
change notifier callback.  Thanks,

Alex

> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
> ---
>  hw/vfio/common.c              |   3 +
>  hw/vfio/pci.c                 | 134 ++++++++++++++++++++++++++++++++++
>  hw/vfio/pci.h                 |  10 +++
>  include/hw/pci/pci.h          |   2 +
>  include/hw/vfio/vfio-common.h |   1 +
>  linux-headers/linux/vfio.h    |  14 ++++
>  6 files changed, 164 insertions(+)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 9aac21abb7..6dea606f62 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -237,6 +237,9 @@ void vfio_region_write(void *opaque, hwaddr addr,
>          break;
>      }
>  
> +    if (region->notify_change)
> +        region->notify_change(opaque, addr, data, size);
> +
>      if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
>          error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
>                       ",%d) failed: %m",
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index a205c6b113..431a588252 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -23,6 +23,7 @@
>  #include <sys/ioctl.h>
>  
>  #include "hw/hw.h"
> +#include "hw/cxl/cxl_component.h"
>  #include "hw/pci/msi.h"
>  #include "hw/pci/msix.h"
>  #include "hw/pci/pci_bridge.h"
> @@ -2743,6 +2744,72 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
>      return 0;
>  }
>  
> +static bool read_region(VFIORegion *region, uint32_t *val, uint64_t offset)
> +{
> +    VFIODevice *vbasedev = region->vbasedev;
> +
> +    if (pread(vbasedev->fd, val, 4, region->fd_offset + offset) != 4) {
> +        error_report("%s(%s, 0x%lx, 0x%x, 0x%x) failed: %m",
> +                     __func__,vbasedev->name, offset, *val, 4);
> +        return false;
> +    }
> +    return true;
> +}
> +
> +static void vfio_cxl_hdm_regs_changed(void *opaque, hwaddr addr,
> +                                      uint64_t data, unsigned size)
> +{
> +    VFIORegion *region = opaque;
> +    VFIODevice *vbasedev = region->vbasedev;
> +    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +    VFIOCXL *cxl = &vdev->cxl;
> +    MemoryRegion *address_space_mem = pci_get_bus(&vdev->pdev)->address_space_mem;
> +    uint64_t offset, reg_offset, index;
> +    uint32_t cur_val, write_val;
> +
> +    if (size != 4 || (addr & 0x3))
> +        error_report("hdm_regs_changed: unsupported size or unaligned addr!\n");
> +
> +    offset = addr - cxl->hdm_regs_offset;
> +    index = (offset - 0x10) / 0x20;
> +    reg_offset = offset - 0x20 * index;
> +
> +    if (reg_offset != 0x20)
> +        return;
> +
> +#define READ_REGION(val, offset) do { \
> +    if (!read_region(region, val, offset)) \
> +        return; \
> +    } while(0)
> +
> +    write_val = (uint32_t)data;
> +    READ_REGION(&cur_val, cxl->hdm_regs_offset + 0x20 * index + reg_offset);
> +
> +    if (!(cur_val & (1 << 10)) && (write_val & (1 << 9))) {
> +        memory_region_transaction_begin();
> +        memory_region_del_subregion(address_space_mem, cxl->region.mem);
> +        memory_region_transaction_commit();
> +    } else if (cur_val & (1 << 10) && !(write_val & (1 << 9))) {
> +        /* commit -> not commit */
> +        uint32_t base_hi, base_lo;
> +        uint64_t base;
> +
> +        /* locked */
> +        if (cur_val & (1 << 8))
> +            return;
> +
> +        READ_REGION(&base_lo, cxl->hdm_regs_offset +  0x20 * index + 0x10);
> +        READ_REGION(&base_hi, cxl->hdm_regs_offset +  0x20 * index + 0x14);
> +
> +        base = ((uint64_t)base_hi << 32) | (uint64_t)(base_lo >> 28);
> +
> +        memory_region_transaction_begin();
> +        memory_region_add_subregion_overlap(address_space_mem,
> +                                            base, cxl->region.mem, 0);
> +        memory_region_transaction_commit();
> +    }
> +}
> +
>  static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>  {
>      VFIODevice *vbasedev = &vdev->vbasedev;
> @@ -2780,6 +2847,11 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>          }
>  
>          QLIST_INIT(&vdev->bars[i].quirks);
> +
> +        if (vbasedev->flags & VFIO_DEVICE_FLAGS_CXL &&
> +            i == vdev->cxl.hdm_regs_bar_index) {
> +            vdev->bars[i].region.notify_change = vfio_cxl_hdm_regs_changed;
> +        }
>      }
>  
>      ret = vfio_get_region_info(vbasedev,
> @@ -2974,6 +3046,62 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +static int vfio_cxl_setup(VFIOPCIDevice *vdev)
> +{
> +    VFIODevice *vbasedev = &vdev->vbasedev;
> +    struct VFIOCXL *cxl = &vdev->cxl;
> +    struct vfio_device_info_cap_cxl *cap;
> +    g_autofree struct vfio_device_info *info = NULL;
> +    struct vfio_info_cap_header *hdr;
> +    struct vfio_region_info *region_info;
> +    int ret;
> +
> +    if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_CXL))
> +        return 0;
> +
> +    info = vfio_get_device_info(vbasedev->fd);
> +    if (!info) {
> +        return -ENODEV;
> +    }
> +
> +    hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_CXL);
> +    if (!hdr) {
> +        return -ENODEV;
> +    }
> +
> +    cap = (void *)hdr;
> +
> +    cxl->hdm_count = cap->hdm_count;
> +    cxl->hdm_regs_bar_index = cap->hdm_regs_bar_index;
> +    cxl->hdm_regs_size = cap->hdm_regs_size;
> +    cxl->hdm_regs_offset = cap->hdm_regs_offset;
> +    cxl->dpa_size = cap->dpa_size;
> +
> +    ret = vfio_get_dev_region_info(vbasedev,
> +            VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_CXL,
> +            VFIO_REGION_SUBTYPE_CXL, &region_info);
> +    if (ret) {
> +        error_report("does not support requested CXL feature");
> +        return ret;
> +    }
> +
> +    ret = vfio_region_setup(OBJECT(vdev), vbasedev, &cxl->region,
> +            region_info->index, "cxl region");
> +    if (ret) {
> +        error_report("fail to setup CXL region");
> +        return ret;
> +    }
> +
> +    g_free(region_info);
> +
> +    if (vfio_region_mmap(&cxl->region)) {
> +        error_report("Failed to mmap %s cxl region",
> +                     vdev->vbasedev.name);
> +        return -EFAULT;
> +    }
> +    return 0;
> +}
> +
>  static void vfio_realize(PCIDevice *pdev, Error **errp)
>  {
>      VFIOPCIDevice *vdev = VFIO_PCI(pdev);
> @@ -3083,6 +3211,12 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>          goto error;
>      }
>  
> +    ret = vfio_cxl_setup(vdev);
> +    if (ret) {
> +        vfio_put_group(group);
> +        goto error;
> +    }
> +
>      vfio_populate_device(vdev, &err);
>      if (err) {
>          error_propagate(errp, err);
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index a2771b9ff3..6c5f5c1ea5 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -118,6 +118,15 @@ typedef struct VFIOMSIXInfo {
>  #define TYPE_VFIO_PCI "vfio-pci"
>  OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
>  
> +typedef struct VFIOCXL {
> +    uint8_t hdm_count;
> +    uint8_t hdm_regs_bar_index;
> +    uint64_t hdm_regs_size;
> +    uint64_t hdm_regs_offset;
> +    uint64_t dpa_size;
> +    VFIORegion region;
> +} VFIOCXL;
> +
>  struct VFIOPCIDevice {
>      PCIDevice pdev;
>      VFIODevice vbasedev;
> @@ -177,6 +186,7 @@ struct VFIOPCIDevice {
>      bool clear_parent_atomics_on_exit;
>      VFIODisplay *dpy;
>      Notifier irqchip_change_notifier;
> +    VFIOCXL cxl;
>  };
>  
>  /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index b70a0b95ff..fbf5786d00 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -117,6 +117,8 @@ extern bool pci_available;
>  #define PCI_DEVICE_ID_REDHAT_UFS         0x0013
>  #define PCI_DEVICE_ID_REDHAT_QXL         0x0100
>  
> +#define PCI_VENDOR_ID_CXL                0x1e98
> +
>  #define FMT_PCIBUS                      PRIx64
>  
>  typedef uint64_t pcibus_t;
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index da43d27352..1c998c3ed6 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -56,6 +56,7 @@ typedef struct VFIORegion {
>      uint32_t nr_mmaps;
>      VFIOMmap *mmaps;
>      uint8_t nr; /* cache the region number for debug */
> +    void (*notify_change)(void *, hwaddr, uint64_t, unsigned);
>  } VFIORegion;
>  
>  typedef struct VFIOMigration {
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 16db89071e..22fb50ed34 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -214,6 +214,7 @@ struct vfio_device_info {
>  #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
>  #define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
>  #define VFIO_DEVICE_FLAGS_CDX	(1 << 8)	/* vfio-cdx device */
> +#define VFIO_DEVICE_FLAGS_CXL	(1 << 9)	/* vfio-cdx device */
>  	__u32	num_regions;	/* Max region index + 1 */
>  	__u32	num_irqs;	/* Max IRQ index + 1 */
>  	__u32   cap_offset;	/* Offset within info struct of first cap */
> @@ -255,6 +256,16 @@ struct vfio_device_info_cap_pci_atomic_comp {
>  	__u32 reserved;
>  };
>  
> +#define VFIO_DEVICE_INFO_CAP_CXL               6
> +struct vfio_device_info_cap_cxl {
> +	struct vfio_info_cap_header header;
> +	__u8 hdm_count;
> +	__u8 hdm_regs_bar_index;
> +	__u64 hdm_regs_size;
> +	__u64 hdm_regs_offset;
> +	__u64 dpa_size;
> +};
> +
>  /**
>   * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
>   *				       struct vfio_region_info)
> @@ -371,6 +382,9 @@ struct vfio_region_info_cap_type {
>  /* sub-types for VFIO_REGION_TYPE_GFX */
>  #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
>  
> +/* sub-types for VFIO CXL region */
> +#define VFIO_REGION_SUBTYPE_CXL                 (1)
> +
>  /**
>   * struct vfio_region_gfx_edid - EDID region layout.
>   *
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9aac21abb7..6dea606f62 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -237,6 +237,9 @@  void vfio_region_write(void *opaque, hwaddr addr,
         break;
     }
 
+    if (region->notify_change)
+        region->notify_change(opaque, addr, data, size);
+
     if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
         error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
                      ",%d) failed: %m",
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a205c6b113..431a588252 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -23,6 +23,7 @@ 
 #include <sys/ioctl.h>
 
 #include "hw/hw.h"
+#include "hw/cxl/cxl_component.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/pci_bridge.h"
@@ -2743,6 +2744,72 @@  int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
     return 0;
 }
 
+static bool read_region(VFIORegion *region, uint32_t *val, uint64_t offset)
+{
+    VFIODevice *vbasedev = region->vbasedev;
+
+    if (pread(vbasedev->fd, val, 4, region->fd_offset + offset) != 4) {
+        error_report("%s(%s, 0x%lx, 0x%x, 0x%x) failed: %m",
+                     __func__,vbasedev->name, offset, *val, 4);
+        return false;
+    }
+    return true;
+}
+
+static void vfio_cxl_hdm_regs_changed(void *opaque, hwaddr addr,
+                                      uint64_t data, unsigned size)
+{
+    VFIORegion *region = opaque;
+    VFIODevice *vbasedev = region->vbasedev;
+    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+    VFIOCXL *cxl = &vdev->cxl;
+    MemoryRegion *address_space_mem = pci_get_bus(&vdev->pdev)->address_space_mem;
+    uint64_t offset, reg_offset, index;
+    uint32_t cur_val, write_val;
+
+    if (size != 4 || (addr & 0x3))
+        error_report("hdm_regs_changed: unsupported size or unaligned addr!\n");
+
+    offset = addr - cxl->hdm_regs_offset;
+    index = (offset - 0x10) / 0x20;
+    reg_offset = offset - 0x20 * index;
+
+    if (reg_offset != 0x20)
+        return;
+
+#define READ_REGION(val, offset) do { \
+    if (!read_region(region, val, offset)) \
+        return; \
+    } while(0)
+
+    write_val = (uint32_t)data;
+    READ_REGION(&cur_val, cxl->hdm_regs_offset + 0x20 * index + reg_offset);
+
+    if (!(cur_val & (1 << 10)) && (write_val & (1 << 9))) {
+        memory_region_transaction_begin();
+        memory_region_del_subregion(address_space_mem, cxl->region.mem);
+        memory_region_transaction_commit();
+    } else if (cur_val & (1 << 10) && !(write_val & (1 << 9))) {
+        /* commit -> not commit */
+        uint32_t base_hi, base_lo;
+        uint64_t base;
+
+        /* locked */
+        if (cur_val & (1 << 8))
+            return;
+
+        READ_REGION(&base_lo, cxl->hdm_regs_offset +  0x20 * index + 0x10);
+        READ_REGION(&base_hi, cxl->hdm_regs_offset +  0x20 * index + 0x14);
+
+        base = ((uint64_t)base_hi << 32) | (uint64_t)(base_lo >> 28);
+
+        memory_region_transaction_begin();
+        memory_region_add_subregion_overlap(address_space_mem,
+                                            base, cxl->region.mem, 0);
+        memory_region_transaction_commit();
+    }
+}
+
 static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
 {
     VFIODevice *vbasedev = &vdev->vbasedev;
@@ -2780,6 +2847,11 @@  static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
         }
 
         QLIST_INIT(&vdev->bars[i].quirks);
+
+        if (vbasedev->flags & VFIO_DEVICE_FLAGS_CXL &&
+            i == vdev->cxl.hdm_regs_bar_index) {
+            vdev->bars[i].region.notify_change = vfio_cxl_hdm_regs_changed;
+        }
     }
 
     ret = vfio_get_region_info(vbasedev,
@@ -2974,6 +3046,62 @@  static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static int vfio_cxl_setup(VFIOPCIDevice *vdev)
+{
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    struct VFIOCXL *cxl = &vdev->cxl;
+    struct vfio_device_info_cap_cxl *cap;
+    g_autofree struct vfio_device_info *info = NULL;
+    struct vfio_info_cap_header *hdr;
+    struct vfio_region_info *region_info;
+    int ret;
+
+    if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_CXL))
+        return 0;
+
+    info = vfio_get_device_info(vbasedev->fd);
+    if (!info) {
+        return -ENODEV;
+    }
+
+    hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_CXL);
+    if (!hdr) {
+        return -ENODEV;
+    }
+
+    cap = (void *)hdr;
+
+    cxl->hdm_count = cap->hdm_count;
+    cxl->hdm_regs_bar_index = cap->hdm_regs_bar_index;
+    cxl->hdm_regs_size = cap->hdm_regs_size;
+    cxl->hdm_regs_offset = cap->hdm_regs_offset;
+    cxl->dpa_size = cap->dpa_size;
+
+    ret = vfio_get_dev_region_info(vbasedev,
+            VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_CXL,
+            VFIO_REGION_SUBTYPE_CXL, &region_info);
+    if (ret) {
+        error_report("does not support requested CXL feature");
+        return ret;
+    }
+
+    ret = vfio_region_setup(OBJECT(vdev), vbasedev, &cxl->region,
+            region_info->index, "cxl region");
+    if (ret) {
+        error_report("fail to setup CXL region");
+        return ret;
+    }
+
+    g_free(region_info);
+
+    if (vfio_region_mmap(&cxl->region)) {
+        error_report("Failed to mmap %s cxl region",
+                     vdev->vbasedev.name);
+        return -EFAULT;
+    }
+    return 0;
+}
+
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
     VFIOPCIDevice *vdev = VFIO_PCI(pdev);
@@ -3083,6 +3211,12 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
         goto error;
     }
 
+    ret = vfio_cxl_setup(vdev);
+    if (ret) {
+        vfio_put_group(group);
+        goto error;
+    }
+
     vfio_populate_device(vdev, &err);
     if (err) {
         error_propagate(errp, err);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index a2771b9ff3..6c5f5c1ea5 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -118,6 +118,15 @@  typedef struct VFIOMSIXInfo {
 #define TYPE_VFIO_PCI "vfio-pci"
 OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI)
 
+typedef struct VFIOCXL {
+    uint8_t hdm_count;
+    uint8_t hdm_regs_bar_index;
+    uint64_t hdm_regs_size;
+    uint64_t hdm_regs_offset;
+    uint64_t dpa_size;
+    VFIORegion region;
+} VFIOCXL;
+
 struct VFIOPCIDevice {
     PCIDevice pdev;
     VFIODevice vbasedev;
@@ -177,6 +186,7 @@  struct VFIOPCIDevice {
     bool clear_parent_atomics_on_exit;
     VFIODisplay *dpy;
     Notifier irqchip_change_notifier;
+    VFIOCXL cxl;
 };
 
 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b70a0b95ff..fbf5786d00 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -117,6 +117,8 @@  extern bool pci_available;
 #define PCI_DEVICE_ID_REDHAT_UFS         0x0013
 #define PCI_DEVICE_ID_REDHAT_QXL         0x0100
 
+#define PCI_VENDOR_ID_CXL                0x1e98
+
 #define FMT_PCIBUS                      PRIx64
 
 typedef uint64_t pcibus_t;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index da43d27352..1c998c3ed6 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -56,6 +56,7 @@  typedef struct VFIORegion {
     uint32_t nr_mmaps;
     VFIOMmap *mmaps;
     uint8_t nr; /* cache the region number for debug */
+    void (*notify_change)(void *, hwaddr, uint64_t, unsigned);
 } VFIORegion;
 
 typedef struct VFIOMigration {
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 16db89071e..22fb50ed34 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -214,6 +214,7 @@  struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
 #define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
 #define VFIO_DEVICE_FLAGS_CDX	(1 << 8)	/* vfio-cdx device */
+#define VFIO_DEVICE_FLAGS_CXL	(1 << 9)	/* vfio-cdx device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
@@ -255,6 +256,16 @@  struct vfio_device_info_cap_pci_atomic_comp {
 	__u32 reserved;
 };
 
+#define VFIO_DEVICE_INFO_CAP_CXL               6
+struct vfio_device_info_cap_cxl {
+	struct vfio_info_cap_header header;
+	__u8 hdm_count;
+	__u8 hdm_regs_bar_index;
+	__u64 hdm_regs_size;
+	__u64 hdm_regs_offset;
+	__u64 dpa_size;
+};
+
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  *				       struct vfio_region_info)
@@ -371,6 +382,9 @@  struct vfio_region_info_cap_type {
 /* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
+/* sub-types for VFIO CXL region */
+#define VFIO_REGION_SUBTYPE_CXL                 (1)
+
 /**
  * struct vfio_region_gfx_edid - EDID region layout.
  *