diff mbox series

[RFC,v4,3/3] hw/cxl: Multi-Region CXL Type-3 Devices (Volatile and Persistent)

Message ID 20221128150157.97724-4-gregory.price@memverge.com
State New, archived
Headers show
Series CXL Type-3 Volatile Memory Support | expand

Commit Message

Gregory Price Nov. 28, 2022, 3:01 p.m. UTC
From: Gregory Price <gourry.memverge@gmail.com>

This commit enables each CXL Type-3 device to contain one volatile
memory region and one persistent region.

Two new properties have been added to cxl-type3 device initialization:
    [volatile-memdev] and [persistent-memdev]

The existing [memdev] property has been deprecated and will default the
memory region to a persistent memory region (although a user may assign
the region to a ram or file backed region). It cannot be used in
combination with the new [persistent-memdev] property.

Partitioning volatile memory from persistent memory is not yet supported.

Volatile memory is mapped at DPA(0x0), while Persistent memory is mapped
at DPA(vmem->size), per CXL Spec 8.2.9.8.2.0 - Get Partition Info.

Signed-off-by: Gregory Price <gregory.price@memverge.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 docs/system/devices/cxl.rst |  49 ++++--
 hw/cxl/cxl-mailbox-utils.c  |  22 +--
 hw/mem/cxl_type3.c          | 292 +++++++++++++++++++++++++++---------
 include/hw/cxl/cxl_device.h |  11 +-
 tests/qtest/cxl-test.c      |  78 ++++++++--
 5 files changed, 347 insertions(+), 105 deletions(-)

Comments

Fan Ni Dec. 8, 2022, 10:55 p.m. UTC | #1
On Mon, Nov 28, 2022 at 10:01:57AM -0500, Gregory Price wrote:

> From: Gregory Price <gourry.memverge@gmail.com>
> 
> This commit enables each CXL Type-3 device to contain one volatile
> memory region and one persistent region.
> 
> Two new properties have been added to cxl-type3 device initialization:
>     [volatile-memdev] and [persistent-memdev]
> 
> The existing [memdev] property has been deprecated and will default the
> memory region to a persistent memory region (although a user may assign
> the region to a ram or file backed region). It cannot be used in
> combination with the new [persistent-memdev] property.
> 
> Partitioning volatile memory from persistent memory is not yet supported.
> 
> Volatile memory is mapped at DPA(0x0), while Persistent memory is mapped
> at DPA(vmem->size), per CXL Spec 8.2.9.8.2.0 - Get Partition Info.
> 
> Signed-off-by: Gregory Price <gregory.price@memverge.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  docs/system/devices/cxl.rst |  49 ++++--
>  hw/cxl/cxl-mailbox-utils.c  |  22 +--
>  hw/mem/cxl_type3.c          | 292 +++++++++++++++++++++++++++---------
>  include/hw/cxl/cxl_device.h |  11 +-
>  tests/qtest/cxl-test.c      |  78 ++++++++--
>  5 files changed, 347 insertions(+), 105 deletions(-)
> 
> diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
> index f25783a4ec..45639a676a 100644
> --- a/docs/system/devices/cxl.rst
> +++ b/docs/system/devices/cxl.rst
> @@ -300,7 +300,7 @@ Example topology involving a switch::
>  
>  Example command lines
>  ---------------------
> -A very simple setup with just one directly attached CXL Type 3 device::
> +A very simple setup with just one directly attached CXL Type 3 Persistent Memory device::
>  
>    qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
>    ...
> @@ -308,7 +308,28 @@ A very simple setup with just one directly attached CXL Type 3 device::
>    -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa.raw,size=256M \
>    -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
>    -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
> -  -device cxl-type3,bus=root_port13,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
> +  -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
> +  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
> +
> +A very simple setup with just one directly attached CXL Type 3 Volatile Memory device::
> +
> +  qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
> +  ...
> +  -object memory-backend-ram,id=vmem0,share=on,size=256M \
> +  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
> +  -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
> +  -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,id=cxl-vmem0 \
> +  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
> +
> +The same volatile setup may optionally include an LSA region::
> +
> +  qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
> +  ...
> +  -object memory-backend-ram,id=vmem0,share=on,size=256M \
> +  -object memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa.raw,size=256M \
> +  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
> +  -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
> +  -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,lsa=cxl-lsa0,id=cxl-vmem0 \
>    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
>  
>  A setup suitable for 4 way interleave. Only one fixed window provided, to enable 2 way
> @@ -328,13 +349,13 @@ the CXL Type3 device directly attached (no switches).::
>    -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
>    -device pxb-cxl,bus_nr=222,bus=pcie.0,id=cxl.2 \
>    -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
> -  -device cxl-type3,bus=root_port13,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
> +  -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
>    -device cxl-rp,port=1,bus=cxl.1,id=root_port14,chassis=0,slot=3 \
> -  -device cxl-type3,bus=root_port14,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \
> +  -device cxl-type3,bus=root_port14,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \
>    -device cxl-rp,port=0,bus=cxl.2,id=root_port15,chassis=0,slot=5 \
> -  -device cxl-type3,bus=root_port15,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \
> +  -device cxl-type3,bus=root_port15,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \
>    -device cxl-rp,port=1,bus=cxl.2,id=root_port16,chassis=0,slot=6 \
> -  -device cxl-type3,bus=root_port16,memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \
> +  -device cxl-type3,bus=root_port16,persistent-memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \
>    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.targets.1=cxl.2,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=8k
>  
>  An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
> @@ -354,15 +375,23 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
>    -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
>    -device cxl-upstream,bus=root_port0,id=us0 \
>    -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
> -  -device cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0,size=256M \
> +  -device cxl-type3,bus=swport0,persistent-memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0,size=256M \
>    -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
> -  -device cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1,size=256M \
> +  -device cxl-type3,bus=swport1,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1,size=256M \
>    -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
> -  -device cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2,size=256M \
> +  -device cxl-type3,bus=swport2,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2,size=256M \
>    -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
> -  -device cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,size=256M \
> +  -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,size=256M \
>    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
>  
> +Deprecations
> +------------
> +
> +The Type 3 device [memdev] attribute has been deprecated in favor
> +of the [persistent-memdev] and [volatile-memdev] attributes. [memdev]
> +will default to a persistent memory device for backward compatibility
> +and is incapable of being used in combination with [persistent-memdev].
> +
>  Kernel Configuration Options
>  ----------------------------
>  
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index d7543fd5b4..44cebb950a 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -269,7 +269,8 @@ static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd,
>      } QEMU_PACKED *fw_info;
>      QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
>  
> -    if (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) {
> +    if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
> +        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
For a cxl configuration with only pmem or vmem, vmem_size or pmem_size
can be 0 and fail the check? 

>  
> @@ -412,20 +413,20 @@ static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd,
>  
>      CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
>      CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
> -    uint64_t size = cxl_dstate->pmem_size;
>  
> -    if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) {
> +    if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
>      id = (void *)cmd->payload;
>      memset(id, 0, sizeof(*id));
>  
> -    /* PMEM only */
>      snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
>  
> -    id->total_capacity = size / CXL_CAPACITY_MULTIPLIER;
> -    id->persistent_capacity = size / CXL_CAPACITY_MULTIPLIER;
> +    id->total_capacity = cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER;
> +    id->persistent_capacity = cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER;
> +    id->volatile_capacity = cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER;
>      id->lsa_size = cvc->get_lsa_size(ct3d);
>      id->poison_list_max_mer[1] = 0x1; /* 256 poison records */
>  
> @@ -444,16 +445,15 @@ static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
>          uint64_t next_pmem;
>      } QEMU_PACKED *part_info = (void *)cmd->payload;
>      QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
> -    uint64_t size = cxl_dstate->pmem_size;
>  
> -    if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) {
> +    if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> -    /* PMEM only */
> -    part_info->active_vmem = 0;
> +    part_info->active_vmem = cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER;
>      part_info->next_vmem = 0;
> -    part_info->active_pmem = size / CXL_CAPACITY_MULTIPLIER;
> +    part_info->active_pmem = cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER;
>      part_info->next_pmem = 0;
>  
>      *len = sizeof(*part_info);
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 0317bd96a6..81dc3def01 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -32,7 +32,8 @@ enum {
>  };
>  
>  static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
> -                                         int dsmad_handle, MemoryRegion *mr)
> +                                         int dsmad_handle, MemoryRegion *mr,
> +                                         bool is_pmem, uint64_t dpa_base)
>  {
>      g_autofree CDATDsmas *dsmas = NULL;
>      g_autofree CDATDslbis *dslbis0 = NULL;
> @@ -51,8 +52,8 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>              .length = sizeof(*dsmas),
>          },
>          .DSMADhandle = dsmad_handle,
> -        .flags = CDAT_DSMAS_FLAG_NV,
> -        .DPA_base = 0,
> +        .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
> +        .DPA_base = dpa_base,
>          .DPA_length = int128_get64(mr->size),
>      };
>  
> @@ -151,33 +152,67 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>  static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>  {
>      g_autofree CDATSubHeader **table = NULL;
> -    MemoryRegion *nonvolatile_mr;
>      CXLType3Dev *ct3d = priv;
> +    MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
>      int dsmad_handle = 0;
> +    int cur_ent = 0;
> +    int len = 0;
>      int rc;
>  
> -    if (!ct3d->hostmem) {
> +    if (!ct3d->hostpmem && !ct3d->hostvmem) {
>          return 0;
>      }
>  
> -    nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostmem);
> -    if (!nonvolatile_mr) {
> -        return -EINVAL;
> +    if (ct3d->hostvmem) {
> +        volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
> +        if (!volatile_mr) {
> +            return -EINVAL;
> +        }
> +        len += CT3_CDAT_NUM_ENTRIES;
> +    }
> +
> +    if (ct3d->hostpmem) {
> +        nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostpmem);
> +        if (!nonvolatile_mr) {
> +            return -EINVAL;
> +        }
> +        len += CT3_CDAT_NUM_ENTRIES;
>      }
>  
> -    table = g_malloc0(CT3_CDAT_NUM_ENTRIES * sizeof(*table));
> +    table = g_malloc0(len * sizeof(*table));
>      if (!table) {
>          return -ENOMEM;
>      }
>  
> -    rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, nonvolatile_mr);
> -    if (rc < 0) {
> -        return rc;
> +    /* Now fill them in */
> +    if (volatile_mr) {
> +        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
> +                                           true, 0);
> +        if (rc < 0) {
> +            return rc;
> +        }
> +        cur_ent = CT3_CDAT_NUM_ENTRIES;
> +    }
> +
> +    if (nonvolatile_mr) {
> +        rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
> +                nonvolatile_mr, true, (volatile_mr ? volatile_mr->size : 0));
> +        if (rc < 0) {
> +            goto error_cleanup;
> +        }
> +        cur_ent += CT3_CDAT_NUM_ENTRIES;
>      }
> +    assert(len == cur_ent);
>  
>      *cdat_table = g_steal_pointer(&table);
>  
> -    return CT3_CDAT_NUM_ENTRIES;
> +    return len;
> +error_cleanup:
> +    int i;
> +    for (i = 0; i < cur_ent; i++) {
> +        g_free(table[i]);
> +    }
> +    return rc;
>  }

I hit an error when compiling with gcc version 9.4.0
(Ubuntu 9.4.0-1ubuntu1~20.04.1), maybe moving the declaration of `i` to
the following loop.


../hw/mem/cxl_type3.c:211:5: error: a label can only be part of a statement
and a declaration is not a statement
  211 |     int i;
      |     ^~~
>  
>  static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv)
> @@ -378,16 +413,48 @@ static void build_dvsecs(CXLType3Dev *ct3d)
>      CXLDVSECRegisterLocator *regloc_dvsec;
>      uint8_t *dvsec;
>      int i;
> +    uint32_t range1_size_hi, range1_size_lo,
> +             range1_base_hi, range1_base_lo,
> +             range2_size_hi = 0, range2_size_lo = 0,
> +             range2_base_hi = 0, range2_base_lo = 0;
> +
> +    /*
> +     * Volatile memory is mapped as (0x0)
> +     * Persistent memory is mapped at (volatile->size)
> +     */
> +    if (ct3d->hostvmem) {
> +        range1_size_hi = ct3d->hostvmem->size >> 32;
> +        range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +                         (ct3d->hostvmem->size & 0xF0000000);
> +        range1_base_hi = 0;
> +        range1_base_lo = 0;
> +        if (ct3d->hostpmem) {
> +            range2_size_hi = ct3d->hostpmem->size >> 32;
> +            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +                             (ct3d->hostpmem->size & 0xF0000000);
> +            range2_base_hi = ct3d->hostvmem->size >> 32;
> +            range2_base_lo = ct3d->hostvmem->size & 0xF0000000;
> +        }
> +    } else {
> +        range1_size_hi = ct3d->hostpmem->size >> 32;
> +        range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> +                         (ct3d->hostpmem->size & 0xF0000000);
> +        range1_base_hi = 0;
> +        range1_base_lo = 0;
> +    }
>  
>      dvsec = (uint8_t *)&(CXLDVSECDevice){
>          .cap = 0x1e,
>          .ctrl = 0x2,
>          .status2 = 0x2,
> -        .range1_size_hi = ct3d->hostmem->size >> 32,
> -        .range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> -        (ct3d->hostmem->size & 0xF0000000),
> -        .range1_base_hi = 0,
> -        .range1_base_lo = 0,
> +        .range1_size_hi = range1_size_hi,
> +        .range1_size_lo = range1_size_lo,
> +        .range1_base_hi = range1_base_hi,
> +        .range1_base_lo = range1_base_lo,
> +        .range2_size_hi = range2_size_hi,
> +        .range2_size_lo = range2_size_lo,
> +        .range2_base_hi = range2_base_hi,
> +        .range2_base_lo = range2_base_lo
>      };
>      cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
>                                 PCIE_CXL_DEVICE_DVSEC_LENGTH,
> @@ -475,33 +542,62 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>      MemoryRegion *mr;
>      char *name;
>  
> -    if (!ct3d->hostmem) {
> -        error_setg(errp, "memdev property must be set");
> +    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
> +        error_setg(errp, "at least one memdev property must be set");
>          return false;
> +    } else if (ct3d->hostmem && ct3d->hostpmem) {
> +        error_setg(errp, "[memdev] cannot be used with new "
> +                         "[persistent-memdev] property");
> +        return false;
> +    } else if (ct3d->hostmem) {
> +        /* Use of hostmem property implies pmem */
> +        ct3d->hostpmem = ct3d->hostmem;
> +        ct3d->hostmem = NULL;
>      }
>  
> -    mr = host_memory_backend_get_memory(ct3d->hostmem);
> -    if (!mr) {
> -        error_setg(errp, "memdev property must be set");
> +    if (ct3d->hostpmem && !ct3d->lsa) {
> +        error_setg(errp, "lsa property must be set for persistent devices");
>          return false;
>      }
> -    memory_region_set_nonvolatile(mr, true);
> -    memory_region_set_enabled(mr, true);
> -    host_memory_backend_set_mapped(ct3d->hostmem, true);
>  
> -    if (ds->id) {
> -        name = g_strdup_printf("cxl-type3-dpa-space:%s", ds->id);
> -    } else {
> -        name = g_strdup("cxl-type3-dpa-space");
> +    if (ct3d->hostvmem) {
> +        mr = host_memory_backend_get_memory(ct3d->hostvmem);
> +        if (!mr) {
> +            error_setg(errp, "volatile memdev must have backing device");
> +            return false;
> +        }
> +        memory_region_set_nonvolatile(mr, false);
> +        memory_region_set_enabled(mr, true);
> +        host_memory_backend_set_mapped(ct3d->hostvmem, true);
> +        if (ds->id) {
> +            name = g_strdup_printf("cxl-type3-dpa-vmem-space:%s", ds->id);
> +        } else {
> +            name = g_strdup("cxl-type3-dpa-vmem-space");
> +        }
> +        address_space_init(&ct3d->hostvmem_as, mr, name);
> +        ct3d->cxl_dstate.vmem_size = mr->size;
> +        ct3d->cxl_dstate.mem_size += mr->size;
> +        g_free(name);
>      }
> -    address_space_init(&ct3d->hostmem_as, mr, name);
> -    g_free(name);
> -
> -    ct3d->cxl_dstate.pmem_size = ct3d->hostmem->size;
>  
> -    if (!ct3d->lsa) {
> -        error_setg(errp, "lsa property must be set");
> -        return false;
> +    if (ct3d->hostpmem) {
> +        mr = host_memory_backend_get_memory(ct3d->hostpmem);
> +        if (!mr) {
> +            error_setg(errp, "persistent memdev must have backing device");
> +            return false;
> +        }
> +        memory_region_set_nonvolatile(mr, true);
> +        memory_region_set_enabled(mr, true);
> +        host_memory_backend_set_mapped(ct3d->hostpmem, true);
> +        if (ds->id) {
> +            name = g_strdup_printf("cxl-type3-dpa-pmem-space:%s", ds->id);
> +        } else {
> +            name = g_strdup("cxl-type3-dpa-pmem-space");
> +        }
> +        address_space_init(&ct3d->hostpmem_as, mr, name);
> +        ct3d->cxl_dstate.pmem_size = mr->size;
> +        ct3d->cxl_dstate.mem_size += mr->size;
> +        g_free(name);
>      }
>  
>      return true;
> @@ -609,7 +705,12 @@ err_free_spdm_socket:
>  err_free_special_ops:
>      g_free(regs->special_ops);
>  err_address_space_free:
> -    address_space_destroy(&ct3d->hostmem_as);
> +    if (ct3d->hostvmem) {
> +        address_space_destroy(&ct3d->hostvmem_as);
> +    }
> +    if (ct3d->hostpmem) {
> +        address_space_destroy(&ct3d->hostpmem_as);
> +    }
>      return;
>  }
>  
> @@ -623,7 +724,12 @@ static void ct3_exit(PCIDevice *pci_dev)
>      cxl_doe_cdat_release(cxl_cstate);
>      spdm_sock_fini(ct3d->doe_spdm.socket);
>      g_free(regs->special_ops);
> -    address_space_destroy(&ct3d->hostmem_as);
> +    if (ct3d->hostvmem) {
> +        address_space_destroy(&ct3d->hostvmem_as);
> +    }
> +    if (ct3d->hostpmem) {
> +        address_space_destroy(&ct3d->hostpmem_as);
> +    }
>  }
>  
>  /* TODO: Support multiple HDM decoders and DPA skip */
> @@ -663,11 +769,17 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
>  {
>      CXLType3Dev *ct3d = CXL_TYPE3(d);
>      uint64_t dpa_offset;
> -    MemoryRegion *mr;
> +    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    AddressSpace *as;
>  
> -    /* TODO support volatile region */
> -    mr = host_memory_backend_get_memory(ct3d->hostmem);
> -    if (!mr) {
> +    if (ct3d->hostvmem) {
> +        vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +    }
> +    if (ct3d->hostpmem) {
> +        pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +    }
> +
> +    if (!vmr && !pmr) {
>          return MEMTX_ERROR;
>      }
>  
> @@ -675,11 +787,22 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
>          return MEMTX_ERROR;
>      }
>  
> -    if (dpa_offset > int128_get64(mr->size)) {
> +    if (dpa_offset > int128_get64(ct3d->cxl_dstate.mem_size)) {
>          return MEMTX_ERROR;
>      }
>  
> -    return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size);
> +    if (vmr) {
> +        if (dpa_offset <= int128_get64(vmr->size)) {
> +            as = &ct3d->hostvmem_as;
> +        } else {
> +            as = &ct3d->hostpmem_as;
> +            dpa_offset -= vmr->size;
> +        }
> +    }
> +    else {
> +        as = &ct3d->hostpmem_as;
> +    }
> +    return address_space_read(as, dpa_offset, attrs, data, size);
>  }
>  
>  MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
> @@ -687,10 +810,17 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
>  {
>      CXLType3Dev *ct3d = CXL_TYPE3(d);
>      uint64_t dpa_offset;
> -    MemoryRegion *mr;
> +    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    AddressSpace *as;
>  
> -    mr = host_memory_backend_get_memory(ct3d->hostmem);
> -    if (!mr) {
> +    if (ct3d->hostvmem) {
> +        vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +    }
> +    if (ct3d->hostpmem) {
> +        pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +    }
> +
> +    if (!vmr && !pmr) {
>          return MEMTX_OK;
>      }
>  
> @@ -698,11 +828,22 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
>          return MEMTX_OK;
>      }
>  
> -    if (dpa_offset > int128_get64(mr->size)) {
> +    if (dpa_offset > int128_get64(ct3d->cxl_dstate.mem_size)) {
>          return MEMTX_OK;
>      }
> -    return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs,
> -                               &data, size);
> +
> +    if (vmr) {
> +        if (dpa_offset <= int128_get64(vmr->size)) {
> +            as = &ct3d->hostvmem_as;
> +        } else {
> +            as = &ct3d->hostpmem_as;
> +            dpa_offset -= vmr->size;
> +        }
> +    }
> +    else {
> +        as = &ct3d->hostpmem_as;
> +    }
> +    return address_space_write(as, dpa_offset, attrs, &data, size);
>  }
>  
>  static void ct3d_reset(DeviceState *dev)
> @@ -717,7 +858,11 @@ static void ct3d_reset(DeviceState *dev)
>  
>  static Property ct3_props[] = {
>      DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
> -                     HostMemoryBackend *),
> +                     HostMemoryBackend *), /* for backward compatibility */
> +    DEFINE_PROP_LINK("persistent-memdev", CXLType3Dev, hostpmem,
> +                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
> +    DEFINE_PROP_LINK("volatile-memdev", CXLType3Dev, hostvmem,
> +                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
>      DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
>                       HostMemoryBackend *),
>      DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
> @@ -728,10 +873,12 @@ static Property ct3_props[] = {
>  
>  static uint64_t get_lsa_size(CXLType3Dev *ct3d)
>  {
> -    MemoryRegion *mr;
> -
> -    mr = host_memory_backend_get_memory(ct3d->lsa);
> -    return memory_region_size(mr);
> +    MemoryRegion *mr = NULL;
> +    if (ct3d->lsa) {
> +        mr = host_memory_backend_get_memory(ct3d->lsa);
> +        return memory_region_size(mr);
> +    }
> +    return 0;
>  }
>  
>  static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
> @@ -744,30 +891,35 @@ static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
>  static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
>                      uint64_t offset)
>  {
> -    MemoryRegion *mr;
> +    MemoryRegion *mr = NULL;
>      void *lsa;
>  
> -    mr = host_memory_backend_get_memory(ct3d->lsa);
> -    validate_lsa_access(mr, size, offset);
> +    if (ct3d->lsa) {
> +        mr = host_memory_backend_get_memory(ct3d->lsa);
> +        validate_lsa_access(mr, size, offset);
>  
> -    lsa = memory_region_get_ram_ptr(mr) + offset;
> -    memcpy(buf, lsa, size);
> +        lsa = memory_region_get_ram_ptr(mr) + offset;
> +        memcpy(buf, lsa, size);
> +        return size;
> +    }
>  
> -    return size;
> +    return 0;
>  }
>  
>  static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
>                      uint64_t offset)
>  {
> -    MemoryRegion *mr;
> -    void *lsa;
> +    MemoryRegion *mr = NULL;
> +    void *lsa = NULL;
>  
> -    mr = host_memory_backend_get_memory(ct3d->lsa);
> -    validate_lsa_access(mr, size, offset);
> +    if (ct3d->lsa) {
> +        mr = host_memory_backend_get_memory(ct3d->lsa);
> +        validate_lsa_access(mr, size, offset);
>  
> -    lsa = memory_region_get_ram_ptr(mr) + offset;
> -    memcpy(lsa, buf, size);
> -    memory_region_set_dirty(mr, offset, size);
> +        lsa = memory_region_get_ram_ptr(mr) + offset;
> +        memcpy(lsa, buf, size);
> +        memory_region_set_dirty(mr, offset, size);
> +    }
>  
>      /*
>       * Just like the PMEM, if the guest is not allowed to exit gracefully, label
> @@ -978,7 +1130,7 @@ static void ct3_class_init(ObjectClass *oc, void *data)
>      pc->config_read = ct3d_config_read;
>  
>      set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
> -    dc->desc = "CXL PMEM Device (Type 3)";
> +    dc->desc = "CXL Memory Device (Type 3)";
>      dc->reset = ct3d_reset;
>      device_class_set_props(dc, ct3_props);
>  
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 1cd71afcb6..1b366b739c 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -180,8 +180,10 @@ typedef struct cxl_device_state {
>          uint64_t host_set;
>      } timestamp;
>  
> -    /* memory region for persistent memory, HDM */
> +    /* memory region size, HDM */
> +    uint64_t mem_size;
>      uint64_t pmem_size;
> +    uint64_t vmem_size;
>  
>      struct cxl_cmd (*cxl_cmd_set)[256];
>      /* Move me later */
> @@ -311,12 +313,15 @@ struct CXLType3Dev {
>      PCIDevice parent_obj;
>  
>      /* Properties */
> -    HostMemoryBackend *hostmem;
> +    HostMemoryBackend *hostmem; /* deprecated */
> +    HostMemoryBackend *hostvmem;
> +    HostMemoryBackend *hostpmem;
>      HostMemoryBackend *lsa;
>      uint64_t sn;
>  
>      /* State */
> -    AddressSpace hostmem_as;
> +    AddressSpace hostvmem_as;
> +    AddressSpace hostpmem_as;
>      CXLComponentState cxl_cstate;
>      CXLDeviceState cxl_dstate;
>  
> diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c
> index e59ba22387..6893f54e28 100644
> --- a/tests/qtest/cxl-test.c
> +++ b/tests/qtest/cxl-test.c
> @@ -40,32 +40,46 @@
>    "-device cxl-rp,id=rp2,bus=cxl.1,chassis=0,slot=2 " \
>    "-device cxl-rp,id=rp3,bus=cxl.1,chassis=0,slot=3 "
>  
> -#define QEMU_T3D \
> +#define QEMU_T3D_DEPRECATED \
>    "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \
> -  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M "    \
> +  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
>    "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 "
>  
> +#define QEMU_T3D_PMEM \
> +  "-object memory-backend-file,id=m0,mem-path=%s,size=256M " \
> +  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
> +  "-device cxl-type3,bus=rp0,persistent-memdev=cxl-m0,lsa=lsa0,id=pmem0 "
> +
> +#define QEMU_T3D_VMEM \
> +  "-object memory-backend-ram,id=mem0,size=256M " \
> +  "-device cxl-type3,bus=rp0,volatile-memdev=mem0,id=mem0 "
> +
> +#define QEMU_T3D_VMEM_LSA \
> +  "-object memory-backend-ram,id=mem0,size=256M " \
> +  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
> +  "-device cxl-type3,bus=rp0,volatile-memdev=mem0,lsa=lsa0,id=mem0 "
> +
>  #define QEMU_2T3D \
>    "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \
>    "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
> -  "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \
> +  "-device cxl-type3,bus=rp0,persistent-memdev=cxl-mem0,lsa=lsa0,id=pmem0 " \
>    "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \
>    "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \
> -  "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 "
> +  "-device cxl-type3,bus=rp1,persistent-memdev=cxl-mem1,lsa=lsa1,id=pmem1 "
>  
>  #define QEMU_4T3D \
>    "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \
>    "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
> -  "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \
> +  "-device cxl-type3,bus=rp0,persistent-memdev=cxl-mem0,lsa=lsa0,id=pmem0 " \
>    "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \
>    "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \
> -  "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 " \
> +  "-device cxl-type3,bus=rp1,persistent-memdev=cxl-mem1,lsa=lsa1,id=pmem1 " \
>    "-object memory-backend-file,id=cxl-mem2,mem-path=%s,size=256M " \
>    "-object memory-backend-file,id=lsa2,mem-path=%s,size=256M " \
> -  "-device cxl-type3,bus=rp2,memdev=cxl-mem2,lsa=lsa2,id=cxl-pmem2 " \
> +  "-device cxl-type3,bus=rp2,persistent-memdev=cxl-mem2,lsa=lsa2,id=pmem2 " \
>    "-object memory-backend-file,id=cxl-mem3,mem-path=%s,size=256M " \
>    "-object memory-backend-file,id=lsa3,mem-path=%s,size=256M " \
> -  "-device cxl-type3,bus=rp3,memdev=cxl-mem3,lsa=lsa3,id=cxl-pmem3 "
> +  "-device cxl-type3,bus=rp3,persistent-memdev=cxl-mem3,lsa=lsa3,id=pmem3 "
>  
>  static void cxl_basic_hb(void)
>  {
> @@ -104,14 +118,53 @@ static void cxl_2root_port(void)
>  }
>  
>  #ifdef CONFIG_POSIX
> -static void cxl_t3d(void)
> +static void cxl_t3d_deprecated(void)
> +{
> +    g_autoptr(GString) cmdline = g_string_new(NULL);
> +    g_autofree const char *tmpfs = NULL;
> +
> +    tmpfs = g_dir_make_tmp("cxl-test-XXXXXX", NULL);
> +
> +    g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_DEPRECATED,
> +                    tmpfs, tmpfs);
> +
> +    qtest_start(cmdline->str);
> +    qtest_end();
> +}
> +
> +static void cxl_t3d_persistent(void)
> +{
> +    g_autoptr(GString) cmdline = g_string_new(NULL);
> +    g_autofree const char *tmpfs = NULL;
> +
> +    tmpfs = g_dir_make_tmp("cxl-test-XXXXXX", NULL);
> +
> +    g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_PMEM,
> +                    tmpfs, tmpfs);
> +
> +    qtest_start(cmdline->str);
> +    qtest_end();
> +}
> +
> +static void cxl_t3d_volatile(void)
> +{
> +    g_autoptr(GString) cmdline = g_string_new(NULL);
> +
> +    g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_VMEM);
> +
> +    qtest_start(cmdline->str);
> +    qtest_end();
> +}
> +
> +static void cxl_t3d_volatile_lsa(void)
>  {
>      g_autoptr(GString) cmdline = g_string_new(NULL);
>      g_autofree const char *tmpfs = NULL;
>  
>      tmpfs = g_dir_make_tmp("cxl-test-XXXXXX", NULL);
>  
> -    g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D, tmpfs, tmpfs);
> +    g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_VMEM_LSA,
> +                    tmpfs);
>  
>      qtest_start(cmdline->str);
>      qtest_end();
> @@ -179,7 +232,10 @@ int main(int argc, char **argv)
>          qtest_add_func("/pci/cxl/rp", cxl_root_port);
>          qtest_add_func("/pci/cxl/rp_x2", cxl_2root_port);
>  #ifdef CONFIG_POSIX
> -        qtest_add_func("/pci/cxl/type3_device", cxl_t3d);
> +        qtest_add_func("/pci/cxl/type3_device", cxl_t3d_deprecated);
> +        qtest_add_func("/pci/cxl/type3_device_pmem", cxl_t3d_persistent);
> +        qtest_add_func("/pci/cxl/type3_device_vmem", cxl_t3d_volatile);
> +        qtest_add_func("/pci/cxl/type3_device_vmem_lsa", cxl_t3d_volatile_lsa);
>          qtest_add_func("/pci/cxl/rp_x2_type3_x2", cxl_1pxb_2rp_2t3d);
>          qtest_add_func("/pci/cxl/pxb_x2_root_port_x4_type3_x4",
>                         cxl_2pxb_4rp_4t3d);
> -- 
> 2.37.3
> 
>
Gregory Price Dec. 8, 2022, 11:06 p.m. UTC | #2
On Thu, Dec 08, 2022 at 10:55:58PM +0000, Fan Ni wrote:
> On Mon, Nov 28, 2022 at 10:01:57AM -0500, Gregory Price wrote:
> >  
> > -    if (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) {
> > +    if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
> > +        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
> >          return CXL_MBOX_INTERNAL_ERROR;
> >      }
> For a cxl configuration with only pmem or vmem, vmem_size or pmem_size
> can be 0 and fail the check? 
> 

While nonsensical, i believe it's technically supported.  The prior
implementation likewise enabled pmem_size to be 0 in these checks.

> >  
> > +error_cleanup:
> > +    int i;
> > +    for (i = 0; i < cur_ent; i++) {
> > +        g_free(table[i]);
> > +    }
> > +    return rc;
> >  }
> 
> I hit an error when compiling with gcc version 9.4.0
> (Ubuntu 9.4.0-1ubuntu1~20.04.1), maybe moving the declaration of `i` to
> the following loop.
> 
> 
> ../hw/mem/cxl_type3.c:211:5: error: a label can only be part of a statement
> and a declaration is not a statement
>   211 |     int i;
>       |     ^~~

Moved the declaration to the top fo the function with the rest of the
declarations. Good catch.
Jonathan Cameron Dec. 19, 2022, 12:42 p.m. UTC | #3
On Mon, 28 Nov 2022 10:01:57 -0500
Gregory Price <gourry.memverge@gmail.com> wrote:

> From: Gregory Price <gourry.memverge@gmail.com>
> 
> This commit enables each CXL Type-3 device to contain one volatile
> memory region and one persistent region.
> 
> Two new properties have been added to cxl-type3 device initialization:
>     [volatile-memdev] and [persistent-memdev]
> 
> The existing [memdev] property has been deprecated and will default the
> memory region to a persistent memory region (although a user may assign
> the region to a ram or file backed region). It cannot be used in
> combination with the new [persistent-memdev] property.
> 
> Partitioning volatile memory from persistent memory is not yet supported.
> 
> Volatile memory is mapped at DPA(0x0), while Persistent memory is mapped
> at DPA(vmem->size), per CXL Spec 8.2.9.8.2.0 - Get Partition Info.
> 
> Signed-off-by: Gregory Price <gregory.price@memverge.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

As a process thing, when reworking a patch I picked up for the
CXL qemu gitlab tree, drop the SOB that I added as it's not relevant
to the new patch.

Some minor comments inline. I've fixed these up (and the int i from
the other review) whilst applying to my local tree.  That tree is a bit
of a mess at the moment, so probably won't push out to gitlab until
after the holidays.  Still no need to post a new version unless you particularly
want to or there are other changes to make.

My current plan for sending this upstream is to do it after a couple of
other series - the driver being what is needed for the current kernel code
testing:

1) Trivial cleanup (I'll probably pull patch 1 of this series into that)
2) The RAS error injection series - kernel code already upstream.
3) The event injection series (Ira is going to send an updated RFC)
   kernel code on list.

4) This series probably. My main concern here is that we don't really have
   any way of testing this yet. It's small enough that doesn't bother me
   to much though.


Hopefully some other series in parallel that aren't focused on the type 3 device
so can merge in other orders.


> ---
>  docs/system/devices/cxl.rst |  49 ++++--
>  hw/cxl/cxl-mailbox-utils.c  |  22 +--
>  hw/mem/cxl_type3.c          | 292 +++++++++++++++++++++++++++---------
>  include/hw/cxl/cxl_device.h |  11 +-
>  tests/qtest/cxl-test.c      |  78 ++++++++--
>  5 files changed, 347 insertions(+), 105 deletions(-)
> 
> diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
> index f25783a4ec..45639a676a 100644
> --- a/docs/system/devices/cxl.rst
> +++ b/docs/system/devices/cxl.rst
> @@ -300,7 +300,7 @@ Example topology involving a switch::
>  
>  Example command lines
>  ---------------------
> -A very simple setup with just one directly attached CXL Type 3 device::
> +A very simple setup with just one directly attached CXL Type 3 Persistent Memory device::
>  
>    qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
>    ...
> @@ -308,7 +308,28 @@ A very simple setup with just one directly attached CXL Type 3 device::
>    -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa.raw,size=256M \
>    -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
>    -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
> -  -device cxl-type3,bus=root_port13,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
> +  -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
> +  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
> +
> +A very simple setup with just one directly attached CXL Type 3 Volatile Memory device::
> +
> +  qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
> +  ...
> +  -object memory-backend-ram,id=vmem0,share=on,size=256M \
> +  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
> +  -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
> +  -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,id=cxl-vmem0 \
> +  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
> +
> +The same volatile setup may optionally include an LSA region::
> +
> +  qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
> +  ...
> +  -object memory-backend-ram,id=vmem0,share=on,size=256M \
> +  -object memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa.raw,size=256M \
> +  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
> +  -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
> +  -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,lsa=cxl-lsa0,id=cxl-vmem0 \
>    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
>  
>  A setup suitable for 4 way interleave. Only one fixed window provided, to enable 2 way
> @@ -328,13 +349,13 @@ the CXL Type3 device directly attached (no switches).::
>    -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
>    -device pxb-cxl,bus_nr=222,bus=pcie.0,id=cxl.2 \
>    -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
> -  -device cxl-type3,bus=root_port13,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
> +  -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
>    -device cxl-rp,port=1,bus=cxl.1,id=root_port14,chassis=0,slot=3 \
> -  -device cxl-type3,bus=root_port14,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \
> +  -device cxl-type3,bus=root_port14,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \
>    -device cxl-rp,port=0,bus=cxl.2,id=root_port15,chassis=0,slot=5 \
> -  -device cxl-type3,bus=root_port15,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \
> +  -device cxl-type3,bus=root_port15,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \
>    -device cxl-rp,port=1,bus=cxl.2,id=root_port16,chassis=0,slot=6 \
> -  -device cxl-type3,bus=root_port16,memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \
> +  -device cxl-type3,bus=root_port16,persistent-memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \
>    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.targets.1=cxl.2,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=8k
>  
>  An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
> @@ -354,15 +375,23 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
>    -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
>    -device cxl-upstream,bus=root_port0,id=us0 \
>    -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
> -  -device cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0,size=256M \
> +  -device cxl-type3,bus=swport0,persistent-memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0,size=256M \
>    -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
> -  -device cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1,size=256M \
> +  -device cxl-type3,bus=swport1,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1,size=256M \
>    -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
> -  -device cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2,size=256M \
> +  -device cxl-type3,bus=swport2,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2,size=256M \
>    -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
> -  -device cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,size=256M \
> +  -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,size=256M \
>    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
>  
> +Deprecations
> +------------
> +
> +The Type 3 device [memdev] attribute has been deprecated in favor
> +of the [persistent-memdev] and [volatile-memdev] attributes. [memdev]

That's not quite correct as it sort of implies we could previously use
memdev for the volatile case.

Also curiously short line wrap here. I'll tweak this.  Whilst the rest of the file is also
wrapped oddly, lets not make it worse.

> +will default to a persistent memory device for backward compatibility
> +and is incapable of being used in combination with [persistent-memdev].
> +
>  Kernel Configuration Options
>  ----------------------------
>  


>  
> @@ -444,16 +445,15 @@ static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
>          uint64_t next_pmem;
>      } QEMU_PACKED *part_info = (void *)cmd->payload;
>      QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
> -    uint64_t size = cxl_dstate->pmem_size;
>  
> -    if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) {
> +    if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
> +        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
>          return CXL_MBOX_INTERNAL_ERROR;
>      }
>  
> -    /* PMEM only */
> -    part_info->active_vmem = 0;
> +    part_info->active_vmem = cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER;
>      part_info->next_vmem = 0;

Unrelated to this patch, but it might be a nice follow up to add a comment somewhere
near here to say that next_vmem == 0 && next_pmem == 0 corresponds to no partition
change.  I had to check the spec on that :)

> -    part_info->active_pmem = size / CXL_CAPACITY_MULTIPLIER;
> +    part_info->active_pmem = cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER;
>      part_info->next_pmem = 0;
>  
>      *len = sizeof(*part_info);
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 0317bd96a6..81dc3def01 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c


>      dvsec = (uint8_t *)&(CXLDVSECDevice){
>          .cap = 0x1e,
>          .ctrl = 0x2,
>          .status2 = 0x2,
> -        .range1_size_hi = ct3d->hostmem->size >> 32,
> -        .range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
> -        (ct3d->hostmem->size & 0xF0000000),
> -        .range1_base_hi = 0,
> -        .range1_base_lo = 0,
> +        .range1_size_hi = range1_size_hi,
> +        .range1_size_lo = range1_size_lo,
> +        .range1_base_hi = range1_base_hi,
> +        .range1_base_lo = range1_base_lo,
> +        .range2_size_hi = range2_size_hi,
> +        .range2_size_lo = range2_size_lo,
> +        .range2_base_hi = range2_base_hi,
> +        .range2_base_lo = range2_base_lo

Trivial but add a trailing comma.  The structure might well get bigger in future
so we may want to add stuff here and the lack of a comma will make that noisier
than it needs to be.

>      };
>      cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
>                                 PCIE_CXL_DEVICE_DVSEC_LENGTH,
> @@ -475,33 +542,62 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>      MemoryRegion *mr;
>      char *name;
>  
> -    if (!ct3d->hostmem) {
> -        error_setg(errp, "memdev property must be set");
> +    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
> +        error_setg(errp, "at least one memdev property must be set");
>          return false;
> +    } else if (ct3d->hostmem && ct3d->hostpmem) {
> +        error_setg(errp, "[memdev] cannot be used with new "
> +                         "[persistent-memdev] property");
> +        return false;
> +    } else if (ct3d->hostmem) {
> +        /* Use of hostmem property implies pmem */
> +        ct3d->hostpmem = ct3d->hostmem;
> +        ct3d->hostmem = NULL;
>      }
>  
> -    mr = host_memory_backend_get_memory(ct3d->hostmem);
> -    if (!mr) {
> -        error_setg(errp, "memdev property must be set");
> +    if (ct3d->hostpmem && !ct3d->lsa) {
> +        error_setg(errp, "lsa property must be set for persistent devices");
>          return false;
>      }
> -    memory_region_set_nonvolatile(mr, true);
> -    memory_region_set_enabled(mr, true);
> -    host_memory_backend_set_mapped(ct3d->hostmem, true);
>  
> -    if (ds->id) {
> -        name = g_strdup_printf("cxl-type3-dpa-space:%s", ds->id);
> -    } else {
> -        name = g_strdup("cxl-type3-dpa-space");
> +    if (ct3d->hostvmem) {
> +        mr = host_memory_backend_get_memory(ct3d->hostvmem);
> +        if (!mr) {
> +            error_setg(errp, "volatile memdev must have backing device");
> +            return false;
> +        }
> +        memory_region_set_nonvolatile(mr, false);
> +        memory_region_set_enabled(mr, true);
> +        host_memory_backend_set_mapped(ct3d->hostvmem, true);
> +        if (ds->id) {
> +            name = g_strdup_printf("cxl-type3-dpa-vmem-space:%s", ds->id);
> +        } else {
> +            name = g_strdup("cxl-type3-dpa-vmem-space");
> +        }
> +        address_space_init(&ct3d->hostvmem_as, mr, name);
> +        ct3d->cxl_dstate.vmem_size = mr->size;
> +        ct3d->cxl_dstate.mem_size += mr->size;
> +        g_free(name);
>      }
> -    address_space_init(&ct3d->hostmem_as, mr, name);
> -    g_free(name);
> -
> -    ct3d->cxl_dstate.pmem_size = ct3d->hostmem->size;
>  
> -    if (!ct3d->lsa) {
> -        error_setg(errp, "lsa property must be set");
> -        return false;
> +    if (ct3d->hostpmem) {
> +        mr = host_memory_backend_get_memory(ct3d->hostpmem);
> +        if (!mr) {
> +            error_setg(errp, "persistent memdev must have backing device");
> +            return false;
> +        }
> +        memory_region_set_nonvolatile(mr, true);
> +        memory_region_set_enabled(mr, true);
> +        host_memory_backend_set_mapped(ct3d->hostpmem, true);
> +        if (ds->id) {
> +            name = g_strdup_printf("cxl-type3-dpa-pmem-space:%s", ds->id);
> +        } else {
> +            name = g_strdup("cxl-type3-dpa-pmem-space");
> +        }
> +        address_space_init(&ct3d->hostpmem_as, mr, name);
> +        ct3d->cxl_dstate.pmem_size = mr->size;
> +        ct3d->cxl_dstate.mem_size += mr->size;
> +        g_free(name);
>      }
>  
>      return true;
> @@ -609,7 +705,12 @@ err_free_spdm_socket:
>  err_free_special_ops:
>      g_free(regs->special_ops);
>  err_address_space_free:
> -    address_space_destroy(&ct3d->hostmem_as);
> +    if (ct3d->hostvmem) {
> +        address_space_destroy(&ct3d->hostvmem_as);
> +    }
> +    if (ct3d->hostpmem) {
> +        address_space_destroy(&ct3d->hostpmem_as);
> +    }

These two should be in opposite order (tear down in reverse of setup)

>      return;
>  }
>  
> @@ -623,7 +724,12 @@ static void ct3_exit(PCIDevice *pci_dev)
>      cxl_doe_cdat_release(cxl_cstate);
>      spdm_sock_fini(ct3d->doe_spdm.socket);
>      g_free(regs->special_ops);
> -    address_space_destroy(&ct3d->hostmem_as);
> +    if (ct3d->hostvmem) {
> +        address_space_destroy(&ct3d->hostvmem_as);
> +    }
> +    if (ct3d->hostpmem) {
> +        address_space_destroy(&ct3d->hostpmem_as);
> +    }

Same here.  Almost worth factoring this out but it's undoing setup_memory
and I can't immediately think of what the undo version of that should be called
so maybe we can avoid that for now by leaving these here.

>  }
>  
>  /* TODO: Support multiple HDM decoders and DPA skip */
> @@ -663,11 +769,17 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
>  {
>      CXLType3Dev *ct3d = CXL_TYPE3(d);
>      uint64_t dpa_offset;
> -    MemoryRegion *mr;
> +    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    AddressSpace *as;
>  
> -    /* TODO support volatile region */
> -    mr = host_memory_backend_get_memory(ct3d->hostmem);
> -    if (!mr) {
> +    if (ct3d->hostvmem) {
> +        vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +    }
> +    if (ct3d->hostpmem) {
> +        pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +    }
> +
> +    if (!vmr && !pmr) {
>          return MEMTX_ERROR;
>      }
>  
> @@ -675,11 +787,22 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
>          return MEMTX_ERROR;
>      }
>  
> -    if (dpa_offset > int128_get64(mr->size)) {
> +    if (dpa_offset > int128_get64(ct3d->cxl_dstate.mem_size)) {
>          return MEMTX_ERROR;
>      }
>  
> -    return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size);
> +    if (vmr) {
> +        if (dpa_offset <= int128_get64(vmr->size)) {
> +            as = &ct3d->hostvmem_as;

As a follow up it might be worth exploring if we can combine the two address spaces.
I'm not keen to do it yet, because of no simple way to test it and it's less obviously
correct than just having separate address spaces.

Would involve mapping a the two hostmem regions into a container memory region which
would be the one we use to build the address space.  Advantage would be that we wouldn't
need special handling for which region it was in here or the write path as QEMUs
normal heirarchical memory regions would handle that for us.
I'm not 100% sure it would work though!

> +        } else {
> +            as = &ct3d->hostpmem_as;
> +            dpa_offset -= vmr->size;
> +        }
> +    }
> +    else {
> +        as = &ct3d->hostpmem_as;
> +    }
> +    return address_space_read(as, dpa_offset, attrs, data, size);
>  }



>  
>  static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
> @@ -744,30 +891,35 @@ static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
>  static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
>                      uint64_t offset)
>  {
> -    MemoryRegion *mr;
> +    MemoryRegion *mr = NULL;

No reason to set this to NULL.

>      void *lsa;
>  
> -    mr = host_memory_backend_get_memory(ct3d->lsa);
> -    validate_lsa_access(mr, size, offset);
> +    if (ct3d->lsa) {

Flip logic to return early.  Nicer anyway plus reduces diff
making this slightly easier to review.


> +        mr = host_memory_backend_get_memory(ct3d->lsa);
> +        validate_lsa_access(mr, size, offset);
>  
> -    lsa = memory_region_get_ram_ptr(mr) + offset;
> -    memcpy(buf, lsa, size);
> +        lsa = memory_region_get_ram_ptr(mr) + offset;
> +        memcpy(buf, lsa, size);
> +        return size;
> +    }
>  
> -    return size;
> +    return 0;

Hmm. Maybe this should return an error, but then we can't use the uint64_t as a return
value.  As this function would never be called with !ct3d->lsa let's leave it as it stands.

>  }
>  
>  static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
>                      uint64_t offset)
>  {
> -    MemoryRegion *mr;
> -    void *lsa;
> +    MemoryRegion *mr = NULL;
> +    void *lsa = NULL;
As above these NULL values aren't read.
>  
> -    mr = host_memory_backend_get_memory(ct3d->lsa);
> -    validate_lsa_access(mr, size, offset);
> +    if (ct3d->lsa) {

Flip logic here as well

> +        mr = host_memory_backend_get_memory(ct3d->lsa);
> +        validate_lsa_access(mr, size, offset);
>  
> -    lsa = memory_region_get_ram_ptr(mr) + offset;
> -    memcpy(lsa, buf, size);
> -    memory_region_set_dirty(mr, offset, size);
> +        lsa = memory_region_get_ram_ptr(mr) + offset;
> +        memcpy(lsa, buf, size);
> +        memory_region_set_dirty(mr, offset, size);
> +    }
>  

> diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c
> index e59ba22387..6893f54e28 100644
> --- a/tests/qtest/cxl-test.c
> +++ b/tests/qtest/cxl-test.c
> @@ -40,32 +40,46 @@
>    "-device cxl-rp,id=rp2,bus=cxl.1,chassis=0,slot=2 " \
>    "-device cxl-rp,id=rp3,bus=cxl.1,chassis=0,slot=3 "
>  
> -#define QEMU_T3D \
> +#define QEMU_T3D_DEPRECATED \
>    "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \
> -  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M "    \

This belongs in previous patch. 

> +  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
>    "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 "
>  

Rest looks good to me.

Jonathan
Gregory Price Dec. 19, 2022, 4:12 p.m. UTC | #4
On Mon, Dec 19, 2022 at 12:42:11PM +0000, Jonathan Cameron wrote:
> As a process thing, when reworking a patch I picked up for the
> CXL qemu gitlab tree, drop the SOB that I added as it's not relevant
> to the new patch.
> 

ack

> Still no need to post a new version unless you particularly
> want to or there are other changes to make.

ack

> > +Deprecations
> > +------------
> > +
> > +The Type 3 device [memdev] attribute has been deprecated in favor
> > +of the [persistent-memdev] and [volatile-memdev] attributes. [memdev]
> 
> That's not quite correct as it sort of implies we could previously use
> memdev for the volatile case.

An early version of the patch explicitly errored out / warned the user
if they attempted to do this, but that got rebuffed.  This could be
added back in.

> > -    return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size);
> > +    if (vmr) {
> > +        if (dpa_offset <= int128_get64(vmr->size)) {
> > +            as = &ct3d->hostvmem_as;
> 
> As a follow up it might be worth exploring if we can combine the two address spaces.
> I'm not keen to do it yet, because of no simple way to test it and it's less obviously
> correct than just having separate address spaces.
> 
> Would involve mapping a the two hostmem regions into a container memory region which
> would be the one we use to build the address space.  Advantage would be that we wouldn't
> need special handling for which region it was in here or the write path as QEMUs
> normal heirarchical memory regions would handle that for us.
> I'm not 100% sure it would work though!
> 

Originally I had tried putting them both into one, with the thought that
since it's technically one device address space there should only be one
way to access the address space instead of two.

After investigating, the address space has to be initialized with a
memdev, and an address space only supports a single memdev, so i settled
on two address spaces in order to keep the memdevs separate (considering
each region may have different attributes).

This opens the question as to how to actually represent a persistent
memory device that can be partitioned as volatile.

Consider the following setup:

device[pmem 512mb, volatile 512 mb]
produces:
    memdev(512mb, pmem) && memdev(512mb, volatile)

But if I partition the device to 256p/768v, when i access data in range
[256mb,512mb), then i have volatile data going into the persistent memory
store by nature of the fact that the capacity is located in that memdev.

An alternative would be to require a vmem region the same size as the
pmem region (+ whatever additional volatile capacity), but this
means using 2x the resources to represent the real capacity of the
device. That's not good.

Another alternative would be to create a wrapper memdev that encompasses
persistent and volatile operations, and then create the partitioning
logic on top of it, such that it can use a single memdev while handling
whatever sematics only apply to each individual region.

The tl;dr here:  Going down to a single address space would probably
require writing a wrapper memdev that abstracts away all the
partitioning logic.  Maybe that's worth it?

> > @@ -744,30 +891,35 @@ static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
> >  static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
> >                      uint64_t offset)
> >  {
> > -    return size;
> > +    return 0;
> 
> Hmm. Maybe this should return an error, but then we can't use the uint64_t as a return
> value.  As this function would never be called with !ct3d->lsa let's leave it as it stands.
> 
> >  }

I originally wanted to do that, but I chose to keep the current contract
semantics so as to minimize the change set.  I agree though that this
should probably return an error.
Jonathan Cameron Dec. 19, 2022, 5:25 p.m. UTC | #5
On Mon, 19 Dec 2022 11:12:34 -0500
Gregory Price <gregory.price@memverge.com> wrote:

> On Mon, Dec 19, 2022 at 12:42:11PM +0000, Jonathan Cameron wrote:
> > As a process thing, when reworking a patch I picked up for the
> > CXL qemu gitlab tree, drop the SOB that I added as it's not relevant
> > to the new patch.
> >   
> 
> ack
> 
> > Still no need to post a new version unless you particularly
> > want to or there are other changes to make.  
> 
> ack
> 
> > > +Deprecations
> > > +------------
> > > +
> > > +The Type 3 device [memdev] attribute has been deprecated in favor
> > > +of the [persistent-memdev] and [volatile-memdev] attributes. [memdev]  
> > 
> > That's not quite correct as it sort of implies we could previously use
> > memdev for the volatile case.  
> 
> An early version of the patch explicitly errored out / warned the user
> if they attempted to do this, but that got rebuffed.  This could be
> added back in.

Ah. I was unclear. I just mean that the deprecation text here is a little
misleading.  Not commenting on the functionality in this patch.

> 
> > > -    return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size);
> > > +    if (vmr) {
> > > +        if (dpa_offset <= int128_get64(vmr->size)) {
> > > +            as = &ct3d->hostvmem_as;  
> > 
> > As a follow up it might be worth exploring if we can combine the two address spaces.
> > I'm not keen to do it yet, because of no simple way to test it and it's less obviously
> > correct than just having separate address spaces.
> > 
> > Would involve mapping a the two hostmem regions into a container memory region which
> > would be the one we use to build the address space.  Advantage would be that we wouldn't
> > need special handling for which region it was in here or the write path as QEMUs
> > normal heirarchical memory regions would handle that for us.
> > I'm not 100% sure it would work though!
> >   
> 
> Originally I had tried putting them both into one, with the thought that
> since it's technically one device address space there should only be one
> way to access the address space instead of two.
> 
> After investigating, the address space has to be initialized with a
> memdev, and an address space only supports a single memdev, so i settled
> on two address spaces in order to keep the memdevs separate (considering
> each region may have different attributes).

I think an address space needs a memory region, not a memdev.
Initialize a container region with memory_region_init()
We could then add the two memdev associated regions (with different 
attributes) as subregions using memory_region_add_subregion()

Similar is done for the system memory
https://elixir.bootlin.com/qemu/latest/source/softmmu/physmem.c#L2675
creates it.  Then it's mostly accessed by get_system_memory()

Memory is then added to that at particular base addresses via for example
https://elixir.bootlin.com/qemu/latest/source/hw/arm/virt.c#L2210
and similar.
I think we can do the same here.

> 
> This opens the question as to how to actually represent a persistent
> memory device that can be partitioned as volatile.
> 
> Consider the following setup:
> 
> device[pmem 512mb, volatile 512 mb]
> produces:
>     memdev(512mb, pmem) && memdev(512mb, volatile)
> 
> But if I partition the device to 256p/768v, when i access data in range
> [256mb,512mb), then i have volatile data going into the persistent memory
> store by nature of the fact that the capacity is located in that memdev.
> 
> An alternative would be to require a vmem region the same size as the
> pmem region (+ whatever additional volatile capacity), but this
> means using 2x the resources to represent the real capacity of the
> device. That's not good.

Do we care enough about the overhead, given this is emulation only?
Not that I think this is the way to go

> 
> Another alternative would be to create a wrapper memdev that encompasses
> persistent and volatile operations, and then create the partitioning
> logic on top of it, such that it can use a single memdev while handling
> whatever sematics only apply to each individual region.
> 
> The tl;dr here:  Going down to a single address space would probably
> require writing a wrapper memdev that abstracts away all the
> partitioning logic.  Maybe that's worth it?

For current setup, I think we can just create memory region that handles both of them.

For the partitioning case, lots of options.  I'm not sure what will work best.
Maybe we just decide we don't care if a persistent region (memdev-pmem) is presented
as volatile. I don't think it will do any real harm in the emulation, but maybe
I'm wrong on that?

> 
> > > @@ -744,30 +891,35 @@ static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
> > >  static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
> > >                      uint64_t offset)
> > >  {
> > > -    return size;
> > > +    return 0;  
> > 
> > Hmm. Maybe this should return an error, but then we can't use the uint64_t as a return
> > value.  As this function would never be called with !ct3d->lsa let's leave it as it stands.
> >   
> > >  }  
> 
> I originally wanted to do that, but I chose to keep the current contract
> semantics so as to minimize the change set.  I agree though that this
> should probably return an error.

Lets leave it for now.

Curious question on this lot. How are you testing it?  Some exciting scripts
to bring the hdm decoders up etc for the volatile region? Or some prototype
driver code?

Jonathan
Gregory Price Dec. 19, 2022, 5:55 p.m. UTC | #6
> I think an address space needs a memory region, not a memdev.
> Initialize a container region with memory_region_init()
> We could then add the two memdev associated regions (with different 
> attributes) as subregions using memory_region_add_subregion()
> 
> Similar is done for the system memory
> https://elixir.bootlin.com/qemu/latest/source/softmmu/physmem.c#L2675
> creates it.  Then it's mostly accessed by get_system_memory()
> 
> Memory is then added to that at particular base addresses via for example
> https://elixir.bootlin.com/qemu/latest/source/hw/arm/virt.c#L2210
> and similar.
> I think we can do the same here.
>

Ah, I'm just confused then, this seems reasonable

> Curious question on this lot. How are you testing it?  Some exciting scripts
> to bring the hdm decoders up etc for the volatile region? Or some prototype
> driver code?

Unfortunately I got pulled off this work for a bit to handle something
more pressing.  I have only tested that the existing functionality
(persistent mode) works as intended, and that at least the ndctl/cxl
tools report capacity as expected.

As of right now there is no code in the driver to actually touch these
regions in a way that works to be able to online these volatile regions
- at least so far as I know.

I don't remember where I left off, but some pmem-to-ram tutorials online
suggest you could online pmem regions like this

cxl create-region -d decoder0.0 -m mem0
echo offline > /sys/devices/system/memory/auto_online_blocks
ndctl create-namespace -f --mode devdax --continue
daxctl enable-device [device name]
daxctl reconfigure-device --mode=system-ram all

However I don't think this is successful in creating the dax devices,
and therefore the reconfiguring into ram.
Jonathan Cameron Dec. 20, 2022, 3:34 p.m. UTC | #7
On Mon, 19 Dec 2022 12:55:44 -0500
Gregory Price <gregory.price@memverge.com> wrote:

> > I think an address space needs a memory region, not a memdev.
> > Initialize a container region with memory_region_init()
> > We could then add the two memdev associated regions (with different 
> > attributes) as subregions using memory_region_add_subregion()
> > 
> > Similar is done for the system memory
> > https://elixir.bootlin.com/qemu/latest/source/softmmu/physmem.c#L2675
> > creates it.  Then it's mostly accessed by get_system_memory()
> > 
> > Memory is then added to that at particular base addresses via for example
> > https://elixir.bootlin.com/qemu/latest/source/hw/arm/virt.c#L2210
> > and similar.
> > I think we can do the same here.
> >  
> 
> Ah, I'm just confused then, this seems reasonable
> 
> > Curious question on this lot. How are you testing it?  Some exciting scripts
> > to bring the hdm decoders up etc for the volatile region? Or some prototype
> > driver code?  
> 
> Unfortunately I got pulled off this work for a bit to handle something
> more pressing.  

No problem. It happens to us all!

> I have only tested that the existing functionality
> (persistent mode) works as intended, and that at least the ndctl/cxl
> tools report capacity as expected.
> 
> As of right now there is no code in the driver to actually touch these
> regions in a way that works to be able to online these volatile regions
> - at least so far as I know.
> 
> I don't remember where I left off, but some pmem-to-ram tutorials online
> suggest you could online pmem regions like this
> 
> cxl create-region -d decoder0.0 -m mem0
> echo offline > /sys/devices/system/memory/auto_online_blocks
> ndctl create-namespace -f --mode devdax --continue
> daxctl enable-device [device name]
> daxctl reconfigure-device --mode=system-ram all
> 
> However I don't think this is successful in creating the dax devices,
> and therefore the reconfiguring into ram.

Sure. I only bothered testing the it in some dax modes rather than via kmem.
It 'should' work but more testing needed there.

However as you've noted, that only applies to the pmem regions at the moment.
I wondered if you'd scripted the HDM decoder setup etc for test purposes
(so what the driver will do). Alternative to that would be enabling the driver
support. Not sure if anyone is looking at that yet. Final alternative would
be to port the existing EDK2 based support to work on QEMU.  All non trivial
jobs so may take a while,

Jonathan
Gregory Price Dec. 20, 2022, 7:27 p.m. UTC | #8
On Tue, Dec 20, 2022 at 03:34:53PM +0000, Jonathan Cameron wrote:
> > However I don't think this is successful in creating the dax devices,
> > and therefore the reconfiguring into ram.
> 
> Sure. I only bothered testing the it in some dax modes rather than via kmem.
> It 'should' work but more testing needed there.
> 
> However as you've noted, that only applies to the pmem regions at the moment.
> I wondered if you'd scripted the HDM decoder setup etc for test purposes
> (so what the driver will do). Alternative to that would be enabling the driver
> support. Not sure if anyone is looking at that yet. Final alternative would
> be to port the existing EDK2 based support to work on QEMU.  All non trivial
> jobs so may take a while,
> 
> Jonathan

Also, I'm relatively new to this corner of the kernel (mm, regions, dax,
etc), so i need to spend a week or two with uninterrupted tinkering with
how adding new memory regions from these devices is actually "supposed
to work" in a dynamic-capacity world.

At least in theory, the partitioning of persistent and volatile memory
regions on one of these type-3 devices should end up looking a bit like
dynamic capacity when doing runtime reconfiguring.

For example, considering

Device(512mb PMEM, 512 VMEM), I'd want, at least i think

CMFW-Volatile:    max window size(1024mb) - Numa 2
CMFW-Persistent:  max window size(512mb)  - Numa 3

Then we'd need the kernel support for

1) Online 2x256mb volatile regions in Numa 2
2) Online 2x256mb persistent regions in Numa 3
3) Offline persistent region (256mb:512mb)
4) Reconfigure device to 256Pmem/768Volatile
   a) change decoders in device accordingly
5) Online 1x256mb volatile region in Numa 2

The question is whether you can do this without offlining the other
adjacent regions.  I just don't know enough about the region subsystem
to say what is "correct" behavior here.

On the device side, I need to go look at the mailbox commands to go
about implementing the reconfiguration / decoder reprogramming.

I guess the "decoder" reprogramming is essentially changing the
read/write commands to adjust based on v/pmem_active vs v/pmem_size?

I suppose I can look at this chunk next.
Jonathan Cameron Jan. 3, 2023, 3:56 p.m. UTC | #9
On Tue, 20 Dec 2022 14:27:31 -0500
Gregory Price <gregory.price@memverge.com> wrote:

> On Tue, Dec 20, 2022 at 03:34:53PM +0000, Jonathan Cameron wrote:
> > > However I don't think this is successful in creating the dax devices,
> > > and therefore the reconfiguring into ram.  
> > 
> > Sure. I only bothered testing the it in some dax modes rather than via kmem.
> > It 'should' work but more testing needed there.
> > 
> > However as you've noted, that only applies to the pmem regions at the moment.
> > I wondered if you'd scripted the HDM decoder setup etc for test purposes
> > (so what the driver will do). Alternative to that would be enabling the driver
> > support. Not sure if anyone is looking at that yet. Final alternative would
> > be to port the existing EDK2 based support to work on QEMU.  All non trivial
> > jobs so may take a while,
> > 
> > Jonathan  
> 
> Also, I'm relatively new to this corner of the kernel (mm, regions, dax,
> etc), so i need to spend a week or two with uninterrupted tinkering with
> how adding new memory regions from these devices is actually "supposed
> to work" in a dynamic-capacity world.
> 
> At least in theory, the partitioning of persistent and volatile memory
> regions on one of these type-3 devices should end up looking a bit like
> dynamic capacity when doing runtime reconfiguring.
> 
> For example, considering
> 
> Device(512mb PMEM, 512 VMEM), I'd want, at least i think
> 
> CMFW-Volatile:    max window size(1024mb) - Numa 2
> CMFW-Persistent:  max window size(512mb)  - Numa 3
> 
> Then we'd need the kernel support for
> 
> 1) Online 2x256mb volatile regions in Numa 2
> 2) Online 2x256mb persistent regions in Numa 3
> 3) Offline persistent region (256mb:512mb)
> 4) Reconfigure device to 256Pmem/768Volatile
>    a) change decoders in device accordingly
> 5) Online 1x256mb volatile region in Numa 2
> 
> The question is whether you can do this without offlining the other
> adjacent regions.  I just don't know enough about the region subsystem
> to say what is "correct" behavior here.

Whilst you probably 'can' do fine grained offline / online (to some
degree anyway) I'm not sure if people consider it an important
usecase. If decoder reprogramming is involved things will get very fiddly
so at least in first instance I'd advocate just ripping it all down and
building up again.  Or in the simple case, just block attempts to reconfigure
at the partitioning if either side is in use.

> 
> On the device side, I need to go look at the mailbox commands to go
> about implementing the reconfiguration / decoder reprogramming.
> 
> I guess the "decoder" reprogramming is essentially changing the
> read/write commands to adjust based on v/pmem_active vs v/pmem_size?

Yup.  We also need multiple decoder support in general in QEMU.
It's not that high on my list as my main focus this cycle is going
to be on reducing the out of tree patch set by upstreaming stuff.

> 
> I suppose I can look at this chunk next.

Great.

Jonathan
Jonathan Cameron Jan. 3, 2023, 6:15 p.m. UTC | #10
On Tue, 3 Jan 2023 11:02:31 -0500
Gregory Price <gourry.memverge@gmail.com> wrote:

> The fine grained control would be a precursor to an emulated pooling
> device.  If you can demonstrate it with a singleton attached device, you
> could just implement an exclusivity table in a shared file, and set the
> shared memory to a file backend as well.  Boom, shared memory pool across
> qemu instances.

For Dynamic capacity based pooling I agree, but a lot of work is needed to make that
function correctly.  The partitioning support is a much nearer term target and
there is no real need for them to look similar - on kernel side of things I'm
not yet convinced it's even a sensible route to make them look similar as DCD
is far less constrained than partitioning and expected usecases are probably
entirely different.

Hotplug based pooling (CXL 2.0 approach) is much simpler in OS because in that
case we always get full blown hotplug events.

Whilst I agree that pooling is interesting to emulate, my preference for initial
case would be moving between virtual PCIe heirarchies attached to different
root ports / host bridges on a single host.  That may be simpler to get going / test
than multiple hosts.

Right now I'd just like a static device with mixture of pmem / volatile plus 2+
HDM decoders and kernel support for that.

Jonathan

> 
> On Tue, Jan 3, 2023, 10:56 AM Jonathan Cameron <Jonathan.Cameron@huawei.com>
> wrote:
> 
> > On Tue, 20 Dec 2022 14:27:31 -0500
> > Gregory Price <gregory.price@memverge.com> wrote:
> >  
> > > On Tue, Dec 20, 2022 at 03:34:53PM +0000, Jonathan Cameron wrote:  
> > > > > However I don't think this is successful in creating the dax devices,
> > > > > and therefore the reconfiguring into ram.  
> > > >
> > > > Sure. I only bothered testing the it in some dax modes rather than via  
> > kmem.  
> > > > It 'should' work but more testing needed there.
> > > >
> > > > However as you've noted, that only applies to the pmem regions at the  
> > moment.  
> > > > I wondered if you'd scripted the HDM decoder setup etc for test  
> > purposes  
> > > > (so what the driver will do). Alternative to that would be enabling  
> > the driver  
> > > > support. Not sure if anyone is looking at that yet. Final alternative  
> > would  
> > > > be to port the existing EDK2 based support to work on QEMU.  All non  
> > trivial  
> > > > jobs so may take a while,
> > > >
> > > > Jonathan  
> > >
> > > Also, I'm relatively new to this corner of the kernel (mm, regions, dax,
> > > etc), so i need to spend a week or two with uninterrupted tinkering with
> > > how adding new memory regions from these devices is actually "supposed
> > > to work" in a dynamic-capacity world.
> > >
> > > At least in theory, the partitioning of persistent and volatile memory
> > > regions on one of these type-3 devices should end up looking a bit like
> > > dynamic capacity when doing runtime reconfiguring.
> > >
> > > For example, considering
> > >
> > > Device(512mb PMEM, 512 VMEM), I'd want, at least i think
> > >
> > > CMFW-Volatile:    max window size(1024mb) - Numa 2
> > > CMFW-Persistent:  max window size(512mb)  - Numa 3
> > >
> > > Then we'd need the kernel support for
> > >
> > > 1) Online 2x256mb volatile regions in Numa 2
> > > 2) Online 2x256mb persistent regions in Numa 3
> > > 3) Offline persistent region (256mb:512mb)
> > > 4) Reconfigure device to 256Pmem/768Volatile
> > >    a) change decoders in device accordingly
> > > 5) Online 1x256mb volatile region in Numa 2
> > >
> > > The question is whether you can do this without offlining the other
> > > adjacent regions.  I just don't know enough about the region subsystem
> > > to say what is "correct" behavior here.  
> >
> > Whilst you probably 'can' do fine grained offline / online (to some
> > degree anyway) I'm not sure if people consider it an important
> > usecase. If decoder reprogramming is involved things will get very fiddly
> > so at least in first instance I'd advocate just ripping it all down and
> > building up again.  Or in the simple case, just block attempts to
> > reconfigure
> > at the partitioning if either side is in use.
> >  
> > >
> > > On the device side, I need to go look at the mailbox commands to go
> > > about implementing the reconfiguration / decoder reprogramming.
> > >
> > > I guess the "decoder" reprogramming is essentially changing the
> > > read/write commands to adjust based on v/pmem_active vs v/pmem_size?  
> >
> > Yup.  We also need multiple decoder support in general in QEMU.
> > It's not that high on my list as my main focus this cycle is going
> > to be on reducing the out of tree patch set by upstreaming stuff.
> >  
> > >
> > > I suppose I can look at this chunk next.  
> >
> > Great.
> >
> > Jonathan
> >
> >
> >  
>
Gregory Price Jan. 19, 2023, 5:15 p.m. UTC | #11
Found a bug, not sure how we missed this, probably happed with rebasing
and some fixups. We're presently reporting the volatile region as
non-volatile, 1 line patch.

Jonathan do you want a separate patch shipped or would you rather just
apply a fixup to the commit in your current branch?

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 919cdf141e..1c5168e2e4 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -187,7 +187,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
     /* Now fill them in */
     if (volatile_mr) {
         rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
-                                           true, 0);
+                                           false, 0);
         if (rc < 0) {
             return rc;
         }
Jonathan Cameron Jan. 19, 2023, 5:31 p.m. UTC | #12
On Thu, 19 Jan 2023 12:15:45 -0500
Gregory Price <gregory.price@memverge.com> wrote:

> Found a bug, not sure how we missed this, probably happed with rebasing
> and some fixups. We're presently reporting the volatile region as
> non-volatile, 1 line patch.
> 
> Jonathan do you want a separate patch shipped or would you rather just
> apply a fixup to the commit in your current branch?
I'll fix up as I'd only squash the patch in anyway.

If tomorrow is slightly less crazy busy than today I'll push out a new
tree with this and the pass through decoders support RFC
(I'll post that to the lists as well)

Jonathan

> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 919cdf141e..1c5168e2e4 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -187,7 +187,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>      /* Now fill them in */
>      if (volatile_mr) {
>          rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
> -                                           true, 0);
> +                                           false, 0);
>          if (rc < 0) {
>              return rc;
>          }
Gregory Price Jan. 19, 2023, 10:13 p.m. UTC | #13
On Thu, Jan 19, 2023 at 05:31:12PM +0000, Jonathan Cameron wrote:
> On Thu, 19 Jan 2023 12:15:45 -0500
> Gregory Price <gregory.price@memverge.com> wrote:
> 
> > Found a bug, not sure how we missed this, probably happed with rebasing
> > and some fixups. We're presently reporting the volatile region as
> > non-volatile, 1 line patch.
> > 
> > Jonathan do you want a separate patch shipped or would you rather just
> > apply a fixup to the commit in your current branch?
> I'll fix up as I'd only squash the patch in anyway.
> 
> If tomorrow is slightly less crazy busy than today I'll push out a new
> tree with this and the pass through decoders support RFC
> (I'll post that to the lists as well)
> 
> Jonathan
> 

Aye aye! 

One other change to consider: the .EFI_memory_type_attr right now is set
to RESERVED.  Should this field actually be EFI_MEMORY_SP? Or is there a
reason for explicitly Reserved?

0: EfiConventionalMemory
1: EfiConventionalMemory w/ EFI_MEMORY_SP Attribute
2: EfiReservedMemoryType

I remember reading a while back that the intended marking is
special-purpose rather than reserved, but i'm hitting my wall on
knowledge.  

Dan may know, but I couldn't divine the correct setting from the kernel
(obviously this is EFI level code, so i didn't expect to).



One other thing that I am noticing:  When a CFMW is registered, an
nvdimm_bridge device becomes present in /sys/bus/cxl/devices -
regardless of whether the type3 device is persistent or volatile.

This makes me believe we aren't setting something up correctly in the
CDAT or something, but other than the below changes everything else
looks correct.  This could imply a kernel driver bug, but i've been
validating against real hardware and this behavior is not seen, even
with functional CXL memory expander devices (which the BIOS obviously
has a hand in setting up).

I started validating the DVSECs, but likewise i didn't see any
indication of error either.



diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 919cdf141e..4daa0cf0f6 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -132,8 +132,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
             .length = sizeof(*dsemts),
         },
         .DSMAS_handle = dsmad_handle,
-        /* Reserved - the non volatile from DSMAS matters */
-        .EFI_memory_type_attr = 2,
+        /* Reserved if NV - the non volatile from DSMAS matters
+         * otherwise label this EFI_MEMORY_SP (special purpose) */
+        .EFI_memory_type_attr = is_pmem ? 2 : 1,
         .DPA_offset = 0,
         .DPA_length = int128_get64(mr->size),
     };
@@ -187,7 +188,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
     /* Now fill them in */
     if (volatile_mr) {
         rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
-                                           true, 0);
+                                           false, 0);
         if (rc < 0) {
             return rc;
         }
Jonathan Cameron Jan. 20, 2023, 10:59 a.m. UTC | #14
On Thu, 19 Jan 2023 17:13:40 -0500
Gregory Price <gregory.price@memverge.com> wrote:

> On Thu, Jan 19, 2023 at 05:31:12PM +0000, Jonathan Cameron wrote:
> > On Thu, 19 Jan 2023 12:15:45 -0500
> > Gregory Price <gregory.price@memverge.com> wrote:
> >   
> > > Found a bug, not sure how we missed this, probably happed with rebasing
> > > and some fixups. We're presently reporting the volatile region as
> > > non-volatile, 1 line patch.
> > > 
> > > Jonathan do you want a separate patch shipped or would you rather just
> > > apply a fixup to the commit in your current branch?  
> > I'll fix up as I'd only squash the patch in anyway.
> > 
> > If tomorrow is slightly less crazy busy than today I'll push out a new
> > tree with this and the pass through decoders support RFC
> > (I'll post that to the lists as well)
> > 
> > Jonathan
> >   
> 
> Aye aye! 
> 
> One other change to consider: the .EFI_memory_type_attr right now is set
> to RESERVED.  Should this field actually be EFI_MEMORY_SP? Or is there a
> reason for explicitly Reserved?
> 
> 0: EfiConventionalMemory
> 1: EfiConventionalMemory w/ EFI_MEMORY_SP Attribute
> 2: EfiReservedMemoryType
> 
> I remember reading a while back that the intended marking is
> special-purpose rather than reserved, but i'm hitting my wall on
> knowledge.  
> 
> Dan may know, but I couldn't divine the correct setting from the kernel
> (obviously this is EFI level code, so i didn't expect to).

Yes, would be better to report as EfiConventionalMemory + SP
Shouldn't hugely matter from practical point of view though (I haven't
checked) as both mean driver managed and this is more about
policy than anything else.

> 
> 
> 
> One other thing that I am noticing:  When a CFMW is registered, an
> nvdimm_bridge device becomes present in /sys/bus/cxl/devices -
> regardless of whether the type3 device is persistent or volatile.
> 

That's one for Dan.  Key here is I don't think anyone is claiming the
kernel code yet supports 'hot plug / cold plug' of volatile type 3
devices.  I expect a non trivial amount of work to enable that
simply because it hasn't previously been of interest.

> This makes me believe we aren't setting something up correctly in the
> CDAT or something, but other than the below changes everything else
> looks correct.  This could imply a kernel driver bug, but i've been
> validating against real hardware and this behavior is not seen, even
> with functional CXL memory expander devices (which the BIOS obviously
> has a hand in setting up).
> 
> I started validating the DVSECs, but likewise i didn't see any
> indication of error either.
> 
> 
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 919cdf141e..4daa0cf0f6 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -132,8 +132,9 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>              .length = sizeof(*dsemts),
>          },
>          .DSMAS_handle = dsmad_handle,
> -        /* Reserved - the non volatile from DSMAS matters */
> -        .EFI_memory_type_attr = 2,
> +        /* Reserved if NV - the non volatile from DSMAS matters
> +         * otherwise label this EFI_MEMORY_SP (special purpose) */
> +        .EFI_memory_type_attr = is_pmem ? 2 : 1,
>          .DPA_offset = 0,
>          .DPA_length = int128_get64(mr->size),
>      };
> @@ -187,7 +188,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>      /* Now fill them in */
>      if (volatile_mr) {
>          rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
> -                                           true, 0);
> +                                           false, 0);
>          if (rc < 0) {
>              return rc;
>          }
Jonathan Cameron Jan. 30, 2023, 1:24 p.m. UTC | #15
> diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c
> index e59ba22387..6893f54e28 100644
> --- a/tests/qtest/cxl-test.c
> +++ b/tests/qtest/cxl-test.c
> @@ -40,32 +40,46 @@
>    "-device cxl-rp,id=rp2,bus=cxl.1,chassis=0,slot=2 " \
>    "-device cxl-rp,id=rp3,bus=cxl.1,chassis=0,slot=3 "
>  
> -#define QEMU_T3D \
> +#define QEMU_T3D_DEPRECATED \
>    "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \
> -  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M "    \
> +  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
>    "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 "
>  
> +#define QEMU_T3D_PMEM \
> +  "-object memory-backend-file,id=m0,mem-path=%s,size=256M " \
> +  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
> +  "-device cxl-type3,bus=rp0,persistent-memdev=cxl-m0,lsa=lsa0,id=pmem0 "
naming mismatch. I've fixed up with mem0 as the device name. 
The naming isn't totally consistent so I may tweak this some more before
sending for upstream.
Gregory Price Jan. 30, 2023, 2:37 p.m. UTC | #16
On Mon, Jan 30, 2023 at 01:24:51PM +0000, Jonathan Cameron wrote:
> 
> > diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c
> > index e59ba22387..6893f54e28 100644
> > --- a/tests/qtest/cxl-test.c
> > +++ b/tests/qtest/cxl-test.c
> > @@ -40,32 +40,46 @@
> >    "-device cxl-rp,id=rp2,bus=cxl.1,chassis=0,slot=2 " \
> >    "-device cxl-rp,id=rp3,bus=cxl.1,chassis=0,slot=3 "
> >  
> > -#define QEMU_T3D \
> > +#define QEMU_T3D_DEPRECATED \
> >    "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \
> > -  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M "    \
> > +  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
> >    "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 "
> >  
> > +#define QEMU_T3D_PMEM \
> > +  "-object memory-backend-file,id=m0,mem-path=%s,size=256M " \
> > +  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
> > +  "-device cxl-type3,bus=rp0,persistent-memdev=cxl-m0,lsa=lsa0,id=pmem0 "
> naming mismatch. I've fixed up with mem0 as the device name. 
> The naming isn't totally consistent so I may tweak this some more before
> sending for upstream.
> 
>

ack
Jonathan Cameron Jan. 31, 2023, 11:53 a.m. UTC | #17
On Mon, 28 Nov 2022 10:01:57 -0500
Gregory Price <gourry.memverge@gmail.com> wrote:

> From: Gregory Price <gourry.memverge@gmail.com>
> 
> This commit enables each CXL Type-3 device to contain one volatile
> memory region and one persistent region.
> 
> Two new properties have been added to cxl-type3 device initialization:
>     [volatile-memdev] and [persistent-memdev]
> 
> The existing [memdev] property has been deprecated and will default the
> memory region to a persistent memory region (although a user may assign
> the region to a ram or file backed region). It cannot be used in
> combination with the new [persistent-memdev] property.
> 
> Partitioning volatile memory from persistent memory is not yet supported.
> 
> Volatile memory is mapped at DPA(0x0), while Persistent memory is mapped
> at DPA(vmem->size), per CXL Spec 8.2.9.8.2.0 - Get Partition Info.
> 
> Signed-off-by: Gregory Price <gregory.price@memverge.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

I'm taking another look at these and tweaking what I'm carrying as I go
with a plan to post them for merging shortly.

Anyhow, a few more changes I wanted to call out here so they don't come
as a surprise / are things to focus on in reviewing that version 
(hopefully post it later today or maybe tomorrow).

Most of this is reducing the diff / duplication of code to make review a
tiny bit easier (hopefully)

...
  
>  /* TODO: Support multiple HDM decoders and DPA skip */
> @@ -663,11 +769,17 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
>  {
>      CXLType3Dev *ct3d = CXL_TYPE3(d);
>      uint64_t dpa_offset;
> -    MemoryRegion *mr;
> +    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    AddressSpace *as;

We end up with a lot of duplication between cxl_type3_read and cxl_type3_write()
so I've factored out a _get_as_and_dpa() function that deals with establish
where the memory we then read or write is. 

>  
> -    /* TODO support volatile region */
> -    mr = host_memory_backend_get_memory(ct3d->hostmem);
> -    if (!mr) {
> +    if (ct3d->hostvmem) {
> +        vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +    }
> +    if (ct3d->hostpmem) {
> +        pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +    }
> +
> +    if (!vmr && !pmr) {
>          return MEMTX_ERROR;
>      }
>  
> @@ -675,11 +787,22 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
>          return MEMTX_ERROR;
>      }
>  
> -    if (dpa_offset > int128_get64(mr->size)) {
> +    if (dpa_offset > int128_get64(ct3d->cxl_dstate.mem_size)) {
>          return MEMTX_ERROR;
>      }
>  
> -    return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size);
> +    if (vmr) {
> +        if (dpa_offset <= int128_get64(vmr->size)) {
> +            as = &ct3d->hostvmem_as;
> +        } else {
> +            as = &ct3d->hostpmem_as;
> +            dpa_offset -= vmr->size;
> +        }
> +    }
> +    else {
> +        as = &ct3d->hostpmem_as;
> +    }
> +    return address_space_read(as, dpa_offset, attrs, data, size);
>  }
>  
>  MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
> @@ -687,10 +810,17 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
>  {
>      CXLType3Dev *ct3d = CXL_TYPE3(d);
>      uint64_t dpa_offset;
> -    MemoryRegion *mr;
> +    MemoryRegion *vmr = NULL, *pmr = NULL;
> +    AddressSpace *as;
>  
> -    mr = host_memory_backend_get_memory(ct3d->hostmem);
> -    if (!mr) {
> +    if (ct3d->hostvmem) {
> +        vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +    }
> +    if (ct3d->hostpmem) {
> +        pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +    }
> +
> +    if (!vmr && !pmr) {
>          return MEMTX_OK;
>      }
>  
> @@ -698,11 +828,22 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
>          return MEMTX_OK;
>      }
>  
> -    if (dpa_offset > int128_get64(mr->size)) {
> +    if (dpa_offset > int128_get64(ct3d->cxl_dstate.mem_size)) {
>          return MEMTX_OK;
>      }
> -    return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs,
> -                               &data, size);
> +
> +    if (vmr) {
> +        if (dpa_offset <= int128_get64(vmr->size)) {
> +            as = &ct3d->hostvmem_as;
> +        } else {
> +            as = &ct3d->hostpmem_as;
> +            dpa_offset -= vmr->size;
> +        }
> +    }
> +    else {
> +        as = &ct3d->hostpmem_as;
> +    }
> +    return address_space_write(as, dpa_offset, attrs, &data, size);
>  }
>  
>  static void ct3d_reset(DeviceState *dev)
> @@ -717,7 +858,11 @@ static void ct3d_reset(DeviceState *dev)
>  
>  static Property ct3_props[] = {
>      DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
> -                     HostMemoryBackend *),
> +                     HostMemoryBackend *), /* for backward compatibility */
> +    DEFINE_PROP_LINK("persistent-memdev", CXLType3Dev, hostpmem,
> +                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
> +    DEFINE_PROP_LINK("volatile-memdev", CXLType3Dev, hostvmem,
> +                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
>      DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
>                       HostMemoryBackend *),
>      DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
> @@ -728,10 +873,12 @@ static Property ct3_props[] = {
>  
>  static uint64_t get_lsa_size(CXLType3Dev *ct3d)
>  {
> -    MemoryRegion *mr;
> -
> -    mr = host_memory_backend_get_memory(ct3d->lsa);
> -    return memory_region_size(mr);
> +    MemoryRegion *mr = NULL;
> +    if (ct3d->lsa) {

I flipped this logic as reduces the resulting diff and makes patch a little
easier to read.

> +        mr = host_memory_backend_get_memory(ct3d->lsa);
> +        return memory_region_size(mr);
> +    }
> +    return 0;
>  }
>  
>  static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
> @@ -744,30 +891,35 @@ static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
>  static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
>                      uint64_t offset)
>  {
> -    MemoryRegion *mr;
> +    MemoryRegion *mr = NULL;
>      void *lsa;
>  
> -    mr = host_memory_backend_get_memory(ct3d->lsa);
> -    validate_lsa_access(mr, size, offset);
> +    if (ct3d->lsa) {

Flipped this one as well.

> +        mr = host_memory_backend_get_memory(ct3d->lsa);
> +        validate_lsa_access(mr, size, offset);
>  
> -    lsa = memory_region_get_ram_ptr(mr) + offset;
> -    memcpy(buf, lsa, size);
> +        lsa = memory_region_get_ram_ptr(mr) + offset;
> +        memcpy(buf, lsa, size);
> +        return size;
> +    }
>  
> -    return size;
> +    return 0;
>  }
>  
>  static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
>                      uint64_t offset)
>  {
> -    MemoryRegion *mr;
> -    void *lsa;
> +    MemoryRegion *mr = NULL;
> +    void *lsa = NULL;
>  
> -    mr = host_memory_backend_get_memory(ct3d->lsa);
> -    validate_lsa_access(mr, size, offset);
> +    if (ct3d->lsa) {
> +        mr = host_memory_backend_get_memory(ct3d->lsa);
> +        validate_lsa_access(mr, size, offset);
>  
> -    lsa = memory_region_get_ram_ptr(mr) + offset;
> -    memcpy(lsa, buf, size);
> -    memory_region_set_dirty(mr, offset, size);
> +        lsa = memory_region_get_ram_ptr(mr) + offset;
> +        memcpy(lsa, buf, size);
> +        memory_region_set_dirty(mr, offset, size);
> +    }
>  
>      /*
>       * Just like the PMEM, if the guest is not allowed to exit gracefully, label
> @@ -978,7 +1130,7 @@ static void ct3_class_init(ObjectClass *oc, void *data)
>      pc->config_read = ct3d_config_read;
>  
>      set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
> -    dc->desc = "CXL PMEM Device (Type 3)";
> +    dc->desc = "CXL Memory Device (Type 3)";
>      dc->reset = ct3d_reset;
>      device_class_set_props(dc, ct3_props);
>  
>
diff mbox series

Patch

diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
index f25783a4ec..45639a676a 100644
--- a/docs/system/devices/cxl.rst
+++ b/docs/system/devices/cxl.rst
@@ -300,7 +300,7 @@  Example topology involving a switch::
 
 Example command lines
 ---------------------
-A very simple setup with just one directly attached CXL Type 3 device::
+A very simple setup with just one directly attached CXL Type 3 Persistent Memory device::
 
   qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
   ...
@@ -308,7 +308,28 @@  A very simple setup with just one directly attached CXL Type 3 device::
   -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa.raw,size=256M \
   -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
   -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
-  -device cxl-type3,bus=root_port13,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
+  -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
+  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
+
+A very simple setup with just one directly attached CXL Type 3 Volatile Memory device::
+
+  qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
+  ...
+  -object memory-backend-ram,id=vmem0,share=on,size=256M \
+  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
+  -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
+  -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,id=cxl-vmem0 \
+  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
+
+The same volatile setup may optionally include an LSA region::
+
+  qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \
+  ...
+  -object memory-backend-ram,id=vmem0,share=on,size=256M \
+  -object memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa.raw,size=256M \
+  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
+  -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
+  -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,lsa=cxl-lsa0,id=cxl-vmem0 \
   -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
 
 A setup suitable for 4 way interleave. Only one fixed window provided, to enable 2 way
@@ -328,13 +349,13 @@  the CXL Type3 device directly attached (no switches).::
   -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
   -device pxb-cxl,bus_nr=222,bus=pcie.0,id=cxl.2 \
   -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
-  -device cxl-type3,bus=root_port13,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
+  -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \
   -device cxl-rp,port=1,bus=cxl.1,id=root_port14,chassis=0,slot=3 \
-  -device cxl-type3,bus=root_port14,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \
+  -device cxl-type3,bus=root_port14,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \
   -device cxl-rp,port=0,bus=cxl.2,id=root_port15,chassis=0,slot=5 \
-  -device cxl-type3,bus=root_port15,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \
+  -device cxl-type3,bus=root_port15,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \
   -device cxl-rp,port=1,bus=cxl.2,id=root_port16,chassis=0,slot=6 \
-  -device cxl-type3,bus=root_port16,memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \
+  -device cxl-type3,bus=root_port16,persistent-memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \
   -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.targets.1=cxl.2,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=8k
 
 An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
@@ -354,15 +375,23 @@  An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
   -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
   -device cxl-upstream,bus=root_port0,id=us0 \
   -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
-  -device cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0,size=256M \
+  -device cxl-type3,bus=swport0,persistent-memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0,size=256M \
   -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
-  -device cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1,size=256M \
+  -device cxl-type3,bus=swport1,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1,size=256M \
   -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
-  -device cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2,size=256M \
+  -device cxl-type3,bus=swport2,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2,size=256M \
   -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
-  -device cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,size=256M \
+  -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,size=256M \
   -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
 
+Deprecations
+------------
+
+The Type 3 device [memdev] attribute has been deprecated in favor
+of the [persistent-memdev] and [volatile-memdev] attributes. [memdev]
+will default to a persistent memory device for backward compatibility
+and is incapable of being used in combination with [persistent-memdev].
+
 Kernel Configuration Options
 ----------------------------
 
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index d7543fd5b4..44cebb950a 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -269,7 +269,8 @@  static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd,
     } QEMU_PACKED *fw_info;
     QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
 
-    if (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) {
+    if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
+        (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
         return CXL_MBOX_INTERNAL_ERROR;
     }
 
@@ -412,20 +413,20 @@  static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd,
 
     CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
     CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
-    uint64_t size = cxl_dstate->pmem_size;
 
-    if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) {
+    if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
         return CXL_MBOX_INTERNAL_ERROR;
     }
 
     id = (void *)cmd->payload;
     memset(id, 0, sizeof(*id));
 
-    /* PMEM only */
     snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
-    id->total_capacity = size / CXL_CAPACITY_MULTIPLIER;
-    id->persistent_capacity = size / CXL_CAPACITY_MULTIPLIER;
+    id->total_capacity = cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER;
+    id->persistent_capacity = cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER;
+    id->volatile_capacity = cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER;
     id->lsa_size = cvc->get_lsa_size(ct3d);
     id->poison_list_max_mer[1] = 0x1; /* 256 poison records */
 
@@ -444,16 +445,15 @@  static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
         uint64_t next_pmem;
     } QEMU_PACKED *part_info = (void *)cmd->payload;
     QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
-    uint64_t size = cxl_dstate->pmem_size;
 
-    if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) {
+    if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+        (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
         return CXL_MBOX_INTERNAL_ERROR;
     }
 
-    /* PMEM only */
-    part_info->active_vmem = 0;
+    part_info->active_vmem = cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER;
     part_info->next_vmem = 0;
-    part_info->active_pmem = size / CXL_CAPACITY_MULTIPLIER;
+    part_info->active_pmem = cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER;
     part_info->next_pmem = 0;
 
     *len = sizeof(*part_info);
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 0317bd96a6..81dc3def01 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -32,7 +32,8 @@  enum {
 };
 
 static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
-                                         int dsmad_handle, MemoryRegion *mr)
+                                         int dsmad_handle, MemoryRegion *mr,
+                                         bool is_pmem, uint64_t dpa_base)
 {
     g_autofree CDATDsmas *dsmas = NULL;
     g_autofree CDATDslbis *dslbis0 = NULL;
@@ -51,8 +52,8 @@  static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
             .length = sizeof(*dsmas),
         },
         .DSMADhandle = dsmad_handle,
-        .flags = CDAT_DSMAS_FLAG_NV,
-        .DPA_base = 0,
+        .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
+        .DPA_base = dpa_base,
         .DPA_length = int128_get64(mr->size),
     };
 
@@ -151,33 +152,67 @@  static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
 static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
 {
     g_autofree CDATSubHeader **table = NULL;
-    MemoryRegion *nonvolatile_mr;
     CXLType3Dev *ct3d = priv;
+    MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
     int dsmad_handle = 0;
+    int cur_ent = 0;
+    int len = 0;
     int rc;
 
-    if (!ct3d->hostmem) {
+    if (!ct3d->hostpmem && !ct3d->hostvmem) {
         return 0;
     }
 
-    nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostmem);
-    if (!nonvolatile_mr) {
-        return -EINVAL;
+    if (ct3d->hostvmem) {
+        volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem);
+        if (!volatile_mr) {
+            return -EINVAL;
+        }
+        len += CT3_CDAT_NUM_ENTRIES;
+    }
+
+    if (ct3d->hostpmem) {
+        nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostpmem);
+        if (!nonvolatile_mr) {
+            return -EINVAL;
+        }
+        len += CT3_CDAT_NUM_ENTRIES;
     }
 
-    table = g_malloc0(CT3_CDAT_NUM_ENTRIES * sizeof(*table));
+    table = g_malloc0(len * sizeof(*table));
     if (!table) {
         return -ENOMEM;
     }
 
-    rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, nonvolatile_mr);
-    if (rc < 0) {
-        return rc;
+    /* Now fill them in */
+    if (volatile_mr) {
+        rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
+                                           true, 0);
+        if (rc < 0) {
+            return rc;
+        }
+        cur_ent = CT3_CDAT_NUM_ENTRIES;
+    }
+
+    if (nonvolatile_mr) {
+        rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
+                nonvolatile_mr, true, (volatile_mr ? volatile_mr->size : 0));
+        if (rc < 0) {
+            goto error_cleanup;
+        }
+        cur_ent += CT3_CDAT_NUM_ENTRIES;
     }
+    assert(len == cur_ent);
 
     *cdat_table = g_steal_pointer(&table);
 
-    return CT3_CDAT_NUM_ENTRIES;
+    return len;
+error_cleanup:
+    int i;
+    for (i = 0; i < cur_ent; i++) {
+        g_free(table[i]);
+    }
+    return rc;
 }
 
 static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv)
@@ -378,16 +413,48 @@  static void build_dvsecs(CXLType3Dev *ct3d)
     CXLDVSECRegisterLocator *regloc_dvsec;
     uint8_t *dvsec;
     int i;
+    uint32_t range1_size_hi, range1_size_lo,
+             range1_base_hi, range1_base_lo,
+             range2_size_hi = 0, range2_size_lo = 0,
+             range2_base_hi = 0, range2_base_lo = 0;
+
+    /*
+     * Volatile memory is mapped as (0x0)
+     * Persistent memory is mapped at (volatile->size)
+     */
+    if (ct3d->hostvmem) {
+        range1_size_hi = ct3d->hostvmem->size >> 32;
+        range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
+                         (ct3d->hostvmem->size & 0xF0000000);
+        range1_base_hi = 0;
+        range1_base_lo = 0;
+        if (ct3d->hostpmem) {
+            range2_size_hi = ct3d->hostpmem->size >> 32;
+            range2_size_lo = (2 << 5) | (2 << 2) | 0x3 |
+                             (ct3d->hostpmem->size & 0xF0000000);
+            range2_base_hi = ct3d->hostvmem->size >> 32;
+            range2_base_lo = ct3d->hostvmem->size & 0xF0000000;
+        }
+    } else {
+        range1_size_hi = ct3d->hostpmem->size >> 32;
+        range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
+                         (ct3d->hostpmem->size & 0xF0000000);
+        range1_base_hi = 0;
+        range1_base_lo = 0;
+    }
 
     dvsec = (uint8_t *)&(CXLDVSECDevice){
         .cap = 0x1e,
         .ctrl = 0x2,
         .status2 = 0x2,
-        .range1_size_hi = ct3d->hostmem->size >> 32,
-        .range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
-        (ct3d->hostmem->size & 0xF0000000),
-        .range1_base_hi = 0,
-        .range1_base_lo = 0,
+        .range1_size_hi = range1_size_hi,
+        .range1_size_lo = range1_size_lo,
+        .range1_base_hi = range1_base_hi,
+        .range1_base_lo = range1_base_lo,
+        .range2_size_hi = range2_size_hi,
+        .range2_size_lo = range2_size_lo,
+        .range2_base_hi = range2_base_hi,
+        .range2_base_lo = range2_base_lo
     };
     cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
                                PCIE_CXL_DEVICE_DVSEC_LENGTH,
@@ -475,33 +542,62 @@  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
     MemoryRegion *mr;
     char *name;
 
-    if (!ct3d->hostmem) {
-        error_setg(errp, "memdev property must be set");
+    if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) {
+        error_setg(errp, "at least one memdev property must be set");
         return false;
+    } else if (ct3d->hostmem && ct3d->hostpmem) {
+        error_setg(errp, "[memdev] cannot be used with new "
+                         "[persistent-memdev] property");
+        return false;
+    } else if (ct3d->hostmem) {
+        /* Use of hostmem property implies pmem */
+        ct3d->hostpmem = ct3d->hostmem;
+        ct3d->hostmem = NULL;
     }
 
-    mr = host_memory_backend_get_memory(ct3d->hostmem);
-    if (!mr) {
-        error_setg(errp, "memdev property must be set");
+    if (ct3d->hostpmem && !ct3d->lsa) {
+        error_setg(errp, "lsa property must be set for persistent devices");
         return false;
     }
-    memory_region_set_nonvolatile(mr, true);
-    memory_region_set_enabled(mr, true);
-    host_memory_backend_set_mapped(ct3d->hostmem, true);
 
-    if (ds->id) {
-        name = g_strdup_printf("cxl-type3-dpa-space:%s", ds->id);
-    } else {
-        name = g_strdup("cxl-type3-dpa-space");
+    if (ct3d->hostvmem) {
+        mr = host_memory_backend_get_memory(ct3d->hostvmem);
+        if (!mr) {
+            error_setg(errp, "volatile memdev must have backing device");
+            return false;
+        }
+        memory_region_set_nonvolatile(mr, false);
+        memory_region_set_enabled(mr, true);
+        host_memory_backend_set_mapped(ct3d->hostvmem, true);
+        if (ds->id) {
+            name = g_strdup_printf("cxl-type3-dpa-vmem-space:%s", ds->id);
+        } else {
+            name = g_strdup("cxl-type3-dpa-vmem-space");
+        }
+        address_space_init(&ct3d->hostvmem_as, mr, name);
+        ct3d->cxl_dstate.vmem_size = mr->size;
+        ct3d->cxl_dstate.mem_size += mr->size;
+        g_free(name);
     }
-    address_space_init(&ct3d->hostmem_as, mr, name);
-    g_free(name);
-
-    ct3d->cxl_dstate.pmem_size = ct3d->hostmem->size;
 
-    if (!ct3d->lsa) {
-        error_setg(errp, "lsa property must be set");
-        return false;
+    if (ct3d->hostpmem) {
+        mr = host_memory_backend_get_memory(ct3d->hostpmem);
+        if (!mr) {
+            error_setg(errp, "persistent memdev must have backing device");
+            return false;
+        }
+        memory_region_set_nonvolatile(mr, true);
+        memory_region_set_enabled(mr, true);
+        host_memory_backend_set_mapped(ct3d->hostpmem, true);
+        if (ds->id) {
+            name = g_strdup_printf("cxl-type3-dpa-pmem-space:%s", ds->id);
+        } else {
+            name = g_strdup("cxl-type3-dpa-pmem-space");
+        }
+        address_space_init(&ct3d->hostpmem_as, mr, name);
+        ct3d->cxl_dstate.pmem_size = mr->size;
+        ct3d->cxl_dstate.mem_size += mr->size;
+        g_free(name);
     }
 
     return true;
@@ -609,7 +705,12 @@  err_free_spdm_socket:
 err_free_special_ops:
     g_free(regs->special_ops);
 err_address_space_free:
-    address_space_destroy(&ct3d->hostmem_as);
+    if (ct3d->hostvmem) {
+        address_space_destroy(&ct3d->hostvmem_as);
+    }
+    if (ct3d->hostpmem) {
+        address_space_destroy(&ct3d->hostpmem_as);
+    }
     return;
 }
 
@@ -623,7 +724,12 @@  static void ct3_exit(PCIDevice *pci_dev)
     cxl_doe_cdat_release(cxl_cstate);
     spdm_sock_fini(ct3d->doe_spdm.socket);
     g_free(regs->special_ops);
-    address_space_destroy(&ct3d->hostmem_as);
+    if (ct3d->hostvmem) {
+        address_space_destroy(&ct3d->hostvmem_as);
+    }
+    if (ct3d->hostpmem) {
+        address_space_destroy(&ct3d->hostpmem_as);
+    }
 }
 
 /* TODO: Support multiple HDM decoders and DPA skip */
@@ -663,11 +769,17 @@  MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
 {
     CXLType3Dev *ct3d = CXL_TYPE3(d);
     uint64_t dpa_offset;
-    MemoryRegion *mr;
+    MemoryRegion *vmr = NULL, *pmr = NULL;
+    AddressSpace *as;
 
-    /* TODO support volatile region */
-    mr = host_memory_backend_get_memory(ct3d->hostmem);
-    if (!mr) {
+    if (ct3d->hostvmem) {
+        vmr = host_memory_backend_get_memory(ct3d->hostvmem);
+    }
+    if (ct3d->hostpmem) {
+        pmr = host_memory_backend_get_memory(ct3d->hostpmem);
+    }
+
+    if (!vmr && !pmr) {
         return MEMTX_ERROR;
     }
 
@@ -675,11 +787,22 @@  MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
         return MEMTX_ERROR;
     }
 
-    if (dpa_offset > int128_get64(mr->size)) {
+    if (dpa_offset > int128_get64(ct3d->cxl_dstate.mem_size)) {
         return MEMTX_ERROR;
     }
 
-    return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size);
+    if (vmr) {
+        if (dpa_offset <= int128_get64(vmr->size)) {
+            as = &ct3d->hostvmem_as;
+        } else {
+            as = &ct3d->hostpmem_as;
+            dpa_offset -= vmr->size;
+        }
+    }
+    else {
+        as = &ct3d->hostpmem_as;
+    }
+    return address_space_read(as, dpa_offset, attrs, data, size);
 }
 
 MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
@@ -687,10 +810,17 @@  MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
 {
     CXLType3Dev *ct3d = CXL_TYPE3(d);
     uint64_t dpa_offset;
-    MemoryRegion *mr;
+    MemoryRegion *vmr = NULL, *pmr = NULL;
+    AddressSpace *as;
 
-    mr = host_memory_backend_get_memory(ct3d->hostmem);
-    if (!mr) {
+    if (ct3d->hostvmem) {
+        vmr = host_memory_backend_get_memory(ct3d->hostvmem);
+    }
+    if (ct3d->hostpmem) {
+        pmr = host_memory_backend_get_memory(ct3d->hostpmem);
+    }
+
+    if (!vmr && !pmr) {
         return MEMTX_OK;
     }
 
@@ -698,11 +828,22 @@  MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
         return MEMTX_OK;
     }
 
-    if (dpa_offset > int128_get64(mr->size)) {
+    if (dpa_offset > int128_get64(ct3d->cxl_dstate.mem_size)) {
         return MEMTX_OK;
     }
-    return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs,
-                               &data, size);
+
+    if (vmr) {
+        if (dpa_offset <= int128_get64(vmr->size)) {
+            as = &ct3d->hostvmem_as;
+        } else {
+            as = &ct3d->hostpmem_as;
+            dpa_offset -= vmr->size;
+        }
+    }
+    else {
+        as = &ct3d->hostpmem_as;
+    }
+    return address_space_write(as, dpa_offset, attrs, &data, size);
 }
 
 static void ct3d_reset(DeviceState *dev)
@@ -717,7 +858,11 @@  static void ct3d_reset(DeviceState *dev)
 
 static Property ct3_props[] = {
     DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
-                     HostMemoryBackend *),
+                     HostMemoryBackend *), /* for backward compatibility */
+    DEFINE_PROP_LINK("persistent-memdev", CXLType3Dev, hostpmem,
+                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+    DEFINE_PROP_LINK("volatile-memdev", CXLType3Dev, hostvmem,
+                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
     DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
                      HostMemoryBackend *),
     DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
@@ -728,10 +873,12 @@  static Property ct3_props[] = {
 
 static uint64_t get_lsa_size(CXLType3Dev *ct3d)
 {
-    MemoryRegion *mr;
-
-    mr = host_memory_backend_get_memory(ct3d->lsa);
-    return memory_region_size(mr);
+    MemoryRegion *mr = NULL;
+    if (ct3d->lsa) {
+        mr = host_memory_backend_get_memory(ct3d->lsa);
+        return memory_region_size(mr);
+    }
+    return 0;
 }
 
 static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
@@ -744,30 +891,35 @@  static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
 static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
                     uint64_t offset)
 {
-    MemoryRegion *mr;
+    MemoryRegion *mr = NULL;
     void *lsa;
 
-    mr = host_memory_backend_get_memory(ct3d->lsa);
-    validate_lsa_access(mr, size, offset);
+    if (ct3d->lsa) {
+        mr = host_memory_backend_get_memory(ct3d->lsa);
+        validate_lsa_access(mr, size, offset);
 
-    lsa = memory_region_get_ram_ptr(mr) + offset;
-    memcpy(buf, lsa, size);
+        lsa = memory_region_get_ram_ptr(mr) + offset;
+        memcpy(buf, lsa, size);
+        return size;
+    }
 
-    return size;
+    return 0;
 }
 
 static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
                     uint64_t offset)
 {
-    MemoryRegion *mr;
-    void *lsa;
+    MemoryRegion *mr = NULL;
+    void *lsa = NULL;
 
-    mr = host_memory_backend_get_memory(ct3d->lsa);
-    validate_lsa_access(mr, size, offset);
+    if (ct3d->lsa) {
+        mr = host_memory_backend_get_memory(ct3d->lsa);
+        validate_lsa_access(mr, size, offset);
 
-    lsa = memory_region_get_ram_ptr(mr) + offset;
-    memcpy(lsa, buf, size);
-    memory_region_set_dirty(mr, offset, size);
+        lsa = memory_region_get_ram_ptr(mr) + offset;
+        memcpy(lsa, buf, size);
+        memory_region_set_dirty(mr, offset, size);
+    }
 
     /*
      * Just like the PMEM, if the guest is not allowed to exit gracefully, label
@@ -978,7 +1130,7 @@  static void ct3_class_init(ObjectClass *oc, void *data)
     pc->config_read = ct3d_config_read;
 
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-    dc->desc = "CXL PMEM Device (Type 3)";
+    dc->desc = "CXL Memory Device (Type 3)";
     dc->reset = ct3d_reset;
     device_class_set_props(dc, ct3_props);
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 1cd71afcb6..1b366b739c 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -180,8 +180,10 @@  typedef struct cxl_device_state {
         uint64_t host_set;
     } timestamp;
 
-    /* memory region for persistent memory, HDM */
+    /* memory region size, HDM */
+    uint64_t mem_size;
     uint64_t pmem_size;
+    uint64_t vmem_size;
 
     struct cxl_cmd (*cxl_cmd_set)[256];
     /* Move me later */
@@ -311,12 +313,15 @@  struct CXLType3Dev {
     PCIDevice parent_obj;
 
     /* Properties */
-    HostMemoryBackend *hostmem;
+    HostMemoryBackend *hostmem; /* deprecated */
+    HostMemoryBackend *hostvmem;
+    HostMemoryBackend *hostpmem;
     HostMemoryBackend *lsa;
     uint64_t sn;
 
     /* State */
-    AddressSpace hostmem_as;
+    AddressSpace hostvmem_as;
+    AddressSpace hostpmem_as;
     CXLComponentState cxl_cstate;
     CXLDeviceState cxl_dstate;
 
diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c
index e59ba22387..6893f54e28 100644
--- a/tests/qtest/cxl-test.c
+++ b/tests/qtest/cxl-test.c
@@ -40,32 +40,46 @@ 
   "-device cxl-rp,id=rp2,bus=cxl.1,chassis=0,slot=2 " \
   "-device cxl-rp,id=rp3,bus=cxl.1,chassis=0,slot=3 "
 
-#define QEMU_T3D \
+#define QEMU_T3D_DEPRECATED \
   "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \
-  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M "    \
+  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
   "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 "
 
+#define QEMU_T3D_PMEM \
+  "-object memory-backend-file,id=m0,mem-path=%s,size=256M " \
+  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
+  "-device cxl-type3,bus=rp0,persistent-memdev=cxl-m0,lsa=lsa0,id=pmem0 "
+
+#define QEMU_T3D_VMEM \
+  "-object memory-backend-ram,id=mem0,size=256M " \
+  "-device cxl-type3,bus=rp0,volatile-memdev=mem0,id=mem0 "
+
+#define QEMU_T3D_VMEM_LSA \
+  "-object memory-backend-ram,id=mem0,size=256M " \
+  "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
+  "-device cxl-type3,bus=rp0,volatile-memdev=mem0,lsa=lsa0,id=mem0 "
+
 #define QEMU_2T3D \
   "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \
   "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
-  "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \
+  "-device cxl-type3,bus=rp0,persistent-memdev=cxl-mem0,lsa=lsa0,id=pmem0 " \
   "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \
   "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \
-  "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 "
+  "-device cxl-type3,bus=rp1,persistent-memdev=cxl-mem1,lsa=lsa1,id=pmem1 "
 
 #define QEMU_4T3D \
   "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \
   "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \
-  "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \
+  "-device cxl-type3,bus=rp0,persistent-memdev=cxl-mem0,lsa=lsa0,id=pmem0 " \
   "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \
   "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \
-  "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 " \
+  "-device cxl-type3,bus=rp1,persistent-memdev=cxl-mem1,lsa=lsa1,id=pmem1 " \
   "-object memory-backend-file,id=cxl-mem2,mem-path=%s,size=256M " \
   "-object memory-backend-file,id=lsa2,mem-path=%s,size=256M " \
-  "-device cxl-type3,bus=rp2,memdev=cxl-mem2,lsa=lsa2,id=cxl-pmem2 " \
+  "-device cxl-type3,bus=rp2,persistent-memdev=cxl-mem2,lsa=lsa2,id=pmem2 " \
   "-object memory-backend-file,id=cxl-mem3,mem-path=%s,size=256M " \
   "-object memory-backend-file,id=lsa3,mem-path=%s,size=256M " \
-  "-device cxl-type3,bus=rp3,memdev=cxl-mem3,lsa=lsa3,id=cxl-pmem3 "
+  "-device cxl-type3,bus=rp3,persistent-memdev=cxl-mem3,lsa=lsa3,id=pmem3 "
 
 static void cxl_basic_hb(void)
 {
@@ -104,14 +118,53 @@  static void cxl_2root_port(void)
 }
 
 #ifdef CONFIG_POSIX
-static void cxl_t3d(void)
+static void cxl_t3d_deprecated(void)
+{
+    g_autoptr(GString) cmdline = g_string_new(NULL);
+    g_autofree const char *tmpfs = NULL;
+
+    tmpfs = g_dir_make_tmp("cxl-test-XXXXXX", NULL);
+
+    g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_DEPRECATED,
+                    tmpfs, tmpfs);
+
+    qtest_start(cmdline->str);
+    qtest_end();
+}
+
+static void cxl_t3d_persistent(void)
+{
+    g_autoptr(GString) cmdline = g_string_new(NULL);
+    g_autofree const char *tmpfs = NULL;
+
+    tmpfs = g_dir_make_tmp("cxl-test-XXXXXX", NULL);
+
+    g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_PMEM,
+                    tmpfs, tmpfs);
+
+    qtest_start(cmdline->str);
+    qtest_end();
+}
+
+static void cxl_t3d_volatile(void)
+{
+    g_autoptr(GString) cmdline = g_string_new(NULL);
+
+    g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_VMEM);
+
+    qtest_start(cmdline->str);
+    qtest_end();
+}
+
+static void cxl_t3d_volatile_lsa(void)
 {
     g_autoptr(GString) cmdline = g_string_new(NULL);
     g_autofree const char *tmpfs = NULL;
 
     tmpfs = g_dir_make_tmp("cxl-test-XXXXXX", NULL);
 
-    g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D, tmpfs, tmpfs);
+    g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_VMEM_LSA,
+                    tmpfs);
 
     qtest_start(cmdline->str);
     qtest_end();
@@ -179,7 +232,10 @@  int main(int argc, char **argv)
         qtest_add_func("/pci/cxl/rp", cxl_root_port);
         qtest_add_func("/pci/cxl/rp_x2", cxl_2root_port);
 #ifdef CONFIG_POSIX
-        qtest_add_func("/pci/cxl/type3_device", cxl_t3d);
+        qtest_add_func("/pci/cxl/type3_device", cxl_t3d_deprecated);
+        qtest_add_func("/pci/cxl/type3_device_pmem", cxl_t3d_persistent);
+        qtest_add_func("/pci/cxl/type3_device_vmem", cxl_t3d_volatile);
+        qtest_add_func("/pci/cxl/type3_device_vmem_lsa", cxl_t3d_volatile_lsa);
         qtest_add_func("/pci/cxl/rp_x2_type3_x2", cxl_1pxb_2rp_2t3d);
         qtest_add_func("/pci/cxl/pxb_x2_root_port_x4_type3_x4",
                        cxl_2pxb_4rp_4t3d);