diff mbox series

[v5,09/13] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

Message ID 20240304194331.1586191-10-nifan.cxl@gmail.com
State Superseded
Headers show
Series Enabling DCD emulation support in Qemu | expand

Commit Message

fan March 4, 2024, 7:34 p.m. UTC
From: Fan Ni <fan.ni@samsung.com>

Since fabric manager emulation is not supported yet, the change implements
the functions to add/release dynamic capacity extents as QMP interfaces.

Note: we skips any FM issued extent release request if the exact extent
does not exist in the extent list of the device. We will loose the
restriction later once we have partial release support in the kernel.

1. Add dynamic capacity extents:

For example, the command to add two continuous extents (each 128MiB long)
to region 0 (starting at DPA offset 0) looks like below:

{ "execute": "qmp_capabilities" }

{ "execute": "cxl-add-dynamic-capacity",
  "arguments": {
      "path": "/machine/peripheral/cxl-dcd0",
      "region-id": 0,
      "extents": [
      {
          "dpa": 0,
          "len": 134217728
      },
      {
          "dpa": 134217728,
          "len": 134217728
      }
      ]
  }
}

2. Release dynamic capacity extents:

For example, the command to release an extent of size 128MiB from region 0
(DPA offset 128MiB) look like below:

{ "execute": "cxl-release-dynamic-capacity",
  "arguments": {
      "path": "/machine/peripheral/cxl-dcd0",
      "region-id": 0,
      "extents": [
      {
          "dpa": 134217728,
          "len": 134217728
      }
      ]
  }
}

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/cxl/cxl-mailbox-utils.c  |  26 ++--
 hw/mem/cxl_type3.c          | 245 +++++++++++++++++++++++++++++++++++-
 hw/mem/cxl_type3_stubs.c    |  14 +++
 include/hw/cxl/cxl_device.h |   6 +
 include/hw/cxl/cxl_events.h |  18 +++
 qapi/cxl.json               |  61 ++++++++-
 6 files changed, 361 insertions(+), 9 deletions(-)

Comments

Jonathan Cameron March 6, 2024, 5:48 p.m. UTC | #1
On Mon,  4 Mar 2024 11:34:04 -0800
nifan.cxl@gmail.com wrote:

> From: Fan Ni <fan.ni@samsung.com>
> 
> Since fabric manager emulation is not supported yet, the change implements
> the functions to add/release dynamic capacity extents as QMP interfaces.

We'll need them anyway, or to implement an fm interface via QMP which is
going to be ugly and complex.

> 
> Note: we skips any FM issued extent release request if the exact extent
> does not exist in the extent list of the device. We will loose the
> restriction later once we have partial release support in the kernel.

Maybe the kernel will treat it as a request to release the extent it
is tracking that contains it.  So we may want to add a way to poke that.
Not today though!

> 
> 1. Add dynamic capacity extents:
> 
> For example, the command to add two continuous extents (each 128MiB long)
> to region 0 (starting at DPA offset 0) looks like below:
> 
> { "execute": "qmp_capabilities" }
> 
> { "execute": "cxl-add-dynamic-capacity",
>   "arguments": {
>       "path": "/machine/peripheral/cxl-dcd0",
>       "region-id": 0,
>       "extents": [
>       {
>           "dpa": 0,
>           "len": 134217728
>       },
>       {
>           "dpa": 134217728,
>           "len": 134217728
>       }
>       ]
>   }
> }
> 
> 2. Release dynamic capacity extents:
> 
> For example, the command to release an extent of size 128MiB from region 0
> (DPA offset 128MiB) look like below:
> 
> { "execute": "cxl-release-dynamic-capacity",
>   "arguments": {
>       "path": "/machine/peripheral/cxl-dcd0",
>       "region-id": 0,
>       "extents": [
>       {
>           "dpa": 134217728,
>           "len": 134217728
>       }
>       ]
>   }
> }
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>

...
  
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index dccfaaad3a..e9c8994cdb 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -674,6 +674,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
>          ct3d->dc.total_capacity += region->len;
>      }
>      QTAILQ_INIT(&ct3d->dc.extents);
> +    QTAILQ_INIT(&ct3d->dc.extents_pending_to_add);
>  
>      return true;
>  }
> @@ -686,6 +687,12 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
>          ent = QTAILQ_FIRST(&ct3d->dc.extents);
>          cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
>      }
> +
> +    while (!QTAILQ_EMPTY(&ct3d->dc.extents_pending_to_add)) {

QTAILQ_FOR_EACHSAFE

> +        ent = QTAILQ_FIRST(&ct3d->dc.extents_pending_to_add);
> +        cxl_remove_extent_from_extent_list(&ct3d->dc.extents_pending_to_add,
> +                                           ent);
> +    }
>  }

> +/*
> + * The main function to process dynamic capacity event. Currently DC extents
> + * add/release requests are processed.
> + */
> +static void qmp_cxl_process_dynamic_capacity(const char *path, CxlEventLog log,
> +                                             CXLDCEventType type, uint16_t hid,
> +                                             uint8_t rid,
> +                                             CXLDCExtentRecordList *records,
> +                                             Error **errp)
> +{
> +    Object *obj;
> +    CXLEventDynamicCapacity dCap = {};
> +    CXLEventRecordHdr *hdr = &dCap.hdr;
> +    CXLType3Dev *dcd;
> +    uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
> +    uint32_t num_extents = 0;
> +    CXLDCExtentRecordList *list;
> +    g_autofree CXLDCExtentRaw *extents = NULL;
> +    uint8_t enc_log;
> +    uint64_t offset, len, block_size;
> +    int i;
> +    int rc;

Combine the two lines above.

> +    g_autofree unsigned long *blk_bitmap = NULL;
> +
> +    obj = object_resolve_path(path, NULL);
> +    if (!obj) {
> +        error_setg(errp, "Unable to resolve path");
> +        return;
> +    }

object_resolve_path_type() and skip a step (should do this in various places
in our existing code!)

> +    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> +        error_setg(errp, "Path not point to a valid CXL type3 device");
> +        return;
> +    }
> +
> +    dcd = CXL_TYPE3(obj);
> +    if (!dcd->dc.num_regions) {
> +        error_setg(errp, "No dynamic capacity support from the device");
> +        return;
> +    }
> +
> +    rc = ct3d_qmp_cxl_event_log_enc(log);
> +    if (rc < 0) {
> +        error_setg(errp, "Unhandled error log type");
> +        return;
> +    }
> +    enc_log = rc;
> +
> +    if (rid >= dcd->dc.num_regions) {
> +        error_setg(errp, "region id is too large");
> +        return;
> +    }
> +    block_size = dcd->dc.regions[rid].block_size;
> +
> +    /* Sanity check and count the extents */
> +    list = records;
> +    while (list) {
> +        offset = list->value->offset;
> +        len = list->value->len;
> +
> +        if (len == 0) {
> +            error_setg(errp, "extent with 0 length is not allowed");
> +            return;
> +        }
> +
> +        if (offset % block_size || len % block_size) {
> +            error_setg(errp, "dpa or len is not aligned to region block size");
> +            return;
> +        }
> +
> +        if (offset + len > dcd->dc.regions[rid].len) {
> +            error_setg(errp, "extent range is beyond the region end");
> +            return;
> +        }
> +
> +        num_extents++;
> +        list = list->next;
> +    }
> +    if (num_extents == 0) {
> +        error_setg(errp, "No extents found in the command");
> +        return;
> +    }
> +
> +    blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size);
> +
> +    /* Create Extent list for event being passed to host */
> +    i = 0;
> +    list = records;
> +    extents = g_new0(CXLDCExtentRaw, num_extents);
> +    while (list) {
> +        CXLDCExtent *ent;
> +        bool skip_extent = false;
> +
> +        offset = list->value->offset;
> +        len = list->value->len;
> +
> +        extents[i].start_dpa = offset + dcd->dc.regions[rid].base;
> +        extents[i].len = len;
> +        memset(extents[i].tag, 0, 0x10);
> +        extents[i].shared_seq = 0;
> +
> +        if (type == DC_EVENT_RELEASE_CAPACITY ||
> +            type == DC_EVENT_FORCED_RELEASE_CAPACITY) {
> +            /*
> +             *  if the extent is still pending to be added to the host,

Odd spacing.

> +             * remove it from the pending extent list, so later when the add
> +             * response for the extent arrives, the device can reject the
> +             * extent as it is not in the pending list.
> +             */
> +            ent = cxl_dc_extent_exists(&dcd->dc.extents_pending_to_add,
> +                        &extents[i]);
> +            if (ent) {
> +                QTAILQ_REMOVE(&dcd->dc.extents_pending_to_add, ent, node);
> +                g_free(ent);
> +                skip_extent = true;
> +            } else if (!cxl_dc_extent_exists(&dcd->dc.extents, &extents[i])) {
> +                /* If the exact extent is not in the accepted list, skip */
> +                skip_extent = true;
> +            }
I think we need to reject case of some extents skipped and others not.
That's not supported yet so we need to complain if we get it at least. Maybe we need
to do two passes so we know this has happened early (or perhaps this is a later
patch in which case a todo here would help).

> +        
> +
> +        /* No duplicate or overlapped extents are allowed */
> +        if (test_any_bits_set(blk_bitmap, offset / block_size,
> +                              len / block_size)) {
> +            error_setg(errp, "duplicate or overlapped extents are detected");
> +            return;
> +        }
> +        bitmap_set(blk_bitmap, offset / block_size, len / block_size);
> +
> +        list = list->next;
> +        if (!skip_extent) {
> +            i++;
Problem is if we skip one in the middle the records will be wrong below.
> +        }
> +    }
> +    num_extents = i;
> +
> +    /*
> +     * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record
> +     *
> +     * All Dynamic Capacity event records shall set the Event Record Severity
> +     * field in the Common Event Record Format to Informational Event. All
> +     * Dynamic Capacity related events shall be logged in the Dynamic Capacity
> +     * Event Log.
> +     */
> +    cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
> +                            cxl_device_get_timestamp(&dcd->cxl_dstate));
> +
> +    dCap.type = type;
> +    /* FIXME: for now, validity flag is cleared */
> +    dCap.validity_flags = 0;
> +    stw_le_p(&dCap.host_id, hid);
> +    /* only valid for DC_REGION_CONFIG_UPDATED event */
> +    dCap.updated_region_id = 0;
> +    /*
> +     * FIXME: for now, the "More" flag is cleared as there is only one
> +     * extent associating with each record and tag-based release is
> +     * not supported.

Hmm. Seems like tag support would be easy.  Add an optional qmp parameter,
if a tag is set, we set the more flag for all but the last entry in this
loop.  I'm ok with that being a follow up patch though.

> +     */
> +    dCap.flags = 0;
> +    for (i = 0; i < num_extents; i++) {
> +        memcpy(&dCap.dynamic_capacity_extent, &extents[i],
> +               sizeof(CXLDCExtentRaw));
> +
> +        if (type == DC_EVENT_ADD_CAPACITY) {
> +            cxl_insert_extent_to_extent_list(&dcd->dc.extents_pending_to_add,
> +                                             extents[i].start_dpa,
> +                                             extents[i].len,
> +                                             extents[i].tag,
> +                                             extents[i].shared_seq);
> +        }
> +
> +        if (cxl_event_insert(&dcd->cxl_dstate, enc_log,
> +                             (CXLEventRecordRaw *)&dCap)) {
> +            cxl_event_irq_assert(dcd);
> +        }
> +    }
> +}




> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 341260e6e4..b524c5e699 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -490,6 +490,7 @@ struct CXLType3Dev {
>          AddressSpace host_dc_as;
>          uint64_t total_capacity; /* 256M aligned */
>          CXLDCExtentList extents;
> +        CXLDCExtentList extents_pending_to_add;

Long name, extents_pending or just pending is plenty I think.

>          uint32_t total_extent_count;
>          uint32_t ext_list_gen_seq;
>  
> @@ -551,4 +552,9 @@ CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len);
>  
>  void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
>                                          CXLDCExtent *extent);
> +void cxl_insert_extent_to_extent_list(CXLDCExtentList *list, uint64_t dpa,
> +                                      uint64_t len, uint8_t *tag,
> +                                      uint16_t shared_seq);
> +bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
> +                       unsigned long size);
>  #endif
fan March 6, 2024, 11:15 p.m. UTC | #2
On Wed, Mar 06, 2024 at 05:48:11PM +0000, Jonathan Cameron wrote:
> On Mon,  4 Mar 2024 11:34:04 -0800
> nifan.cxl@gmail.com wrote:
> 
> > From: Fan Ni <fan.ni@samsung.com>
> > 
> > Since fabric manager emulation is not supported yet, the change implements
> > the functions to add/release dynamic capacity extents as QMP interfaces.
> 
> We'll need them anyway, or to implement an fm interface via QMP which is
> going to be ugly and complex.
> 
> > 
> > Note: we skips any FM issued extent release request if the exact extent
> > does not exist in the extent list of the device. We will loose the
> > restriction later once we have partial release support in the kernel.
> 
> Maybe the kernel will treat it as a request to release the extent it
> is tracking that contains it.  So we may want to add a way to poke that.
> Not today though!
> 
> > 
> > 1. Add dynamic capacity extents:
> > 
> > For example, the command to add two continuous extents (each 128MiB long)
> > to region 0 (starting at DPA offset 0) looks like below:
> > 
> > { "execute": "qmp_capabilities" }
> > 
> > { "execute": "cxl-add-dynamic-capacity",
> >   "arguments": {
> >       "path": "/machine/peripheral/cxl-dcd0",
> >       "region-id": 0,
> >       "extents": [
> >       {
> >           "dpa": 0,
> >           "len": 134217728
> >       },
> >       {
> >           "dpa": 134217728,
> >           "len": 134217728
> >       }
> >       ]
> >   }
> > }
> > 
> > 2. Release dynamic capacity extents:
> > 
> > For example, the command to release an extent of size 128MiB from region 0
> > (DPA offset 128MiB) look like below:
> > 
> > { "execute": "cxl-release-dynamic-capacity",
> >   "arguments": {
> >       "path": "/machine/peripheral/cxl-dcd0",
> >       "region-id": 0,
> >       "extents": [
> >       {
> >           "dpa": 134217728,
> >           "len": 134217728
> >       }
> >       ]
> >   }
> > }
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> 
> ...
>   
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index dccfaaad3a..e9c8994cdb 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -674,6 +674,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
> >          ct3d->dc.total_capacity += region->len;
> >      }
> >      QTAILQ_INIT(&ct3d->dc.extents);
> > +    QTAILQ_INIT(&ct3d->dc.extents_pending_to_add);
> >  
> >      return true;
> >  }
> > @@ -686,6 +687,12 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> >          ent = QTAILQ_FIRST(&ct3d->dc.extents);
> >          cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
> >      }
> > +
> > +    while (!QTAILQ_EMPTY(&ct3d->dc.extents_pending_to_add)) {
> 
> QTAILQ_FOR_EACHSAFE
> 
> > +        ent = QTAILQ_FIRST(&ct3d->dc.extents_pending_to_add);
> > +        cxl_remove_extent_from_extent_list(&ct3d->dc.extents_pending_to_add,
> > +                                           ent);
> > +    }
> >  }
> 
> > +/*
> > + * The main function to process dynamic capacity event. Currently DC extents
> > + * add/release requests are processed.
> > + */
> > +static void qmp_cxl_process_dynamic_capacity(const char *path, CxlEventLog log,
> > +                                             CXLDCEventType type, uint16_t hid,
> > +                                             uint8_t rid,
> > +                                             CXLDCExtentRecordList *records,
> > +                                             Error **errp)
> > +{
> > +    Object *obj;
> > +    CXLEventDynamicCapacity dCap = {};
> > +    CXLEventRecordHdr *hdr = &dCap.hdr;
> > +    CXLType3Dev *dcd;
> > +    uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
> > +    uint32_t num_extents = 0;
> > +    CXLDCExtentRecordList *list;
> > +    g_autofree CXLDCExtentRaw *extents = NULL;
> > +    uint8_t enc_log;
> > +    uint64_t offset, len, block_size;
> > +    int i;
> > +    int rc;
> 
> Combine the two lines above.
> 
> > +    g_autofree unsigned long *blk_bitmap = NULL;
> > +
> > +    obj = object_resolve_path(path, NULL);
> > +    if (!obj) {
> > +        error_setg(errp, "Unable to resolve path");
> > +        return;
> > +    }
> 
> object_resolve_path_type() and skip a step (should do this in various places
> in our existing code!)
> 
> > +    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> > +        error_setg(errp, "Path not point to a valid CXL type3 device");
> > +        return;
> > +    }
> > +
> > +    dcd = CXL_TYPE3(obj);
> > +    if (!dcd->dc.num_regions) {
> > +        error_setg(errp, "No dynamic capacity support from the device");
> > +        return;
> > +    }
> > +
> > +    rc = ct3d_qmp_cxl_event_log_enc(log);
> > +    if (rc < 0) {
> > +        error_setg(errp, "Unhandled error log type");
> > +        return;
> > +    }
> > +    enc_log = rc;
> > +
> > +    if (rid >= dcd->dc.num_regions) {
> > +        error_setg(errp, "region id is too large");
> > +        return;
> > +    }
> > +    block_size = dcd->dc.regions[rid].block_size;
> > +
> > +    /* Sanity check and count the extents */
> > +    list = records;
> > +    while (list) {
> > +        offset = list->value->offset;
> > +        len = list->value->len;
> > +
> > +        if (len == 0) {
> > +            error_setg(errp, "extent with 0 length is not allowed");
> > +            return;
> > +        }
> > +
> > +        if (offset % block_size || len % block_size) {
> > +            error_setg(errp, "dpa or len is not aligned to region block size");
> > +            return;
> > +        }
> > +
> > +        if (offset + len > dcd->dc.regions[rid].len) {
> > +            error_setg(errp, "extent range is beyond the region end");
> > +            return;
> > +        }
> > +
> > +        num_extents++;
> > +        list = list->next;
> > +    }
> > +    if (num_extents == 0) {
> > +        error_setg(errp, "No extents found in the command");
> > +        return;
> > +    }
> > +
> > +    blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size);
> > +
> > +    /* Create Extent list for event being passed to host */
> > +    i = 0;
> > +    list = records;
> > +    extents = g_new0(CXLDCExtentRaw, num_extents);
> > +    while (list) {
> > +        CXLDCExtent *ent;
> > +        bool skip_extent = false;
> > +
> > +        offset = list->value->offset;
> > +        len = list->value->len;
> > +
> > +        extents[i].start_dpa = offset + dcd->dc.regions[rid].base;
> > +        extents[i].len = len;
> > +        memset(extents[i].tag, 0, 0x10);
> > +        extents[i].shared_seq = 0;
> > +
> > +        if (type == DC_EVENT_RELEASE_CAPACITY ||
> > +            type == DC_EVENT_FORCED_RELEASE_CAPACITY) {
> > +            /*
> > +             *  if the extent is still pending to be added to the host,
> 
> Odd spacing.
> 
> > +             * remove it from the pending extent list, so later when the add
> > +             * response for the extent arrives, the device can reject the
> > +             * extent as it is not in the pending list.
> > +             */
> > +            ent = cxl_dc_extent_exists(&dcd->dc.extents_pending_to_add,
> > +                        &extents[i]);
> > +            if (ent) {
> > +                QTAILQ_REMOVE(&dcd->dc.extents_pending_to_add, ent, node);
> > +                g_free(ent);
> > +                skip_extent = true;
> > +            } else if (!cxl_dc_extent_exists(&dcd->dc.extents, &extents[i])) {
> > +                /* If the exact extent is not in the accepted list, skip */
> > +                skip_extent = true;
> > +            }
> I think we need to reject case of some extents skipped and others not.
> That's not supported yet so we need to complain if we get it at least. Maybe we need
> to do two passes so we know this has happened early (or perhaps this is a later
> patch in which case a todo here would help).

Skip here does not mean the extent is invalid, it just means the extent
is still pending to add, so remove them from pending list would be
enough to reject the extent, no need to release further. That is based
on your feedback on v4.

The loop here is only to collect the extents to sent to the event log. 
But as you said, we need one pass before updating pending list.
Actually if we do not allow the above case where extents to release is
still in the pending to add list, we can just return here with error, no
extra dry run needed. 

What do you think?

> 
> > +        
> > +
> > +        /* No duplicate or overlapped extents are allowed */
> > +        if (test_any_bits_set(blk_bitmap, offset / block_size,
> > +                              len / block_size)) {
> > +            error_setg(errp, "duplicate or overlapped extents are detected");
> > +            return;
> > +        }
> > +        bitmap_set(blk_bitmap, offset / block_size, len / block_size);
> > +
> > +        list = list->next;
> > +        if (!skip_extent) {
> > +            i++;
> Problem is if we skip one in the middle the records will be wrong below.

Why? Only extents passed the check will be stored in variable extents and
processed further and i be updated. 
For skipped ones, since i is not updated, they will be
overwritten by following valid ones.

Fan

> > +        }
> > +    }
> > +    num_extents = i;
> > +
> > +    /*
> > +     * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record
> > +     *
> > +     * All Dynamic Capacity event records shall set the Event Record Severity
> > +     * field in the Common Event Record Format to Informational Event. All
> > +     * Dynamic Capacity related events shall be logged in the Dynamic Capacity
> > +     * Event Log.
> > +     */
> > +    cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
> > +                            cxl_device_get_timestamp(&dcd->cxl_dstate));
> > +
> > +    dCap.type = type;
> > +    /* FIXME: for now, validity flag is cleared */
> > +    dCap.validity_flags = 0;
> > +    stw_le_p(&dCap.host_id, hid);
> > +    /* only valid for DC_REGION_CONFIG_UPDATED event */
> > +    dCap.updated_region_id = 0;
> > +    /*
> > +     * FIXME: for now, the "More" flag is cleared as there is only one
> > +     * extent associating with each record and tag-based release is
> > +     * not supported.
> 
> Hmm. Seems like tag support would be easy.  Add an optional qmp parameter,
> if a tag is set, we set the more flag for all but the last entry in this
> loop.  I'm ok with that being a follow up patch though.
> 
> > +     */
> > +    dCap.flags = 0;
> > +    for (i = 0; i < num_extents; i++) {
> > +        memcpy(&dCap.dynamic_capacity_extent, &extents[i],
> > +               sizeof(CXLDCExtentRaw));
> > +
> > +        if (type == DC_EVENT_ADD_CAPACITY) {
> > +            cxl_insert_extent_to_extent_list(&dcd->dc.extents_pending_to_add,
> > +                                             extents[i].start_dpa,
> > +                                             extents[i].len,
> > +                                             extents[i].tag,
> > +                                             extents[i].shared_seq);
> > +        }
> > +
> > +        if (cxl_event_insert(&dcd->cxl_dstate, enc_log,
> > +                             (CXLEventRecordRaw *)&dCap)) {
> > +            cxl_event_irq_assert(dcd);
> > +        }
> > +    }
> > +}
> 
> 
> 
> 
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 341260e6e4..b524c5e699 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -490,6 +490,7 @@ struct CXLType3Dev {
> >          AddressSpace host_dc_as;
> >          uint64_t total_capacity; /* 256M aligned */
> >          CXLDCExtentList extents;
> > +        CXLDCExtentList extents_pending_to_add;
> 
> Long name, extents_pending or just pending is plenty I think.
> 
> >          uint32_t total_extent_count;
> >          uint32_t ext_list_gen_seq;
> >  
> > @@ -551,4 +552,9 @@ CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len);
> >  
> >  void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
> >                                          CXLDCExtent *extent);
> > +void cxl_insert_extent_to_extent_list(CXLDCExtentList *list, uint64_t dpa,
> > +                                      uint64_t len, uint8_t *tag,
> > +                                      uint16_t shared_seq);
> > +bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
> > +                       unsigned long size);
> >  #endif
> 
>
fan March 6, 2024, 11:36 p.m. UTC | #3
On Wed, Mar 06, 2024 at 05:48:11PM +0000, Jonathan Cameron wrote:
> On Mon,  4 Mar 2024 11:34:04 -0800
> nifan.cxl@gmail.com wrote:
> 
> > From: Fan Ni <fan.ni@samsung.com>
> > 
> > Since fabric manager emulation is not supported yet, the change implements
> > the functions to add/release dynamic capacity extents as QMP interfaces.
> 
> We'll need them anyway, or to implement an fm interface via QMP which is
> going to be ugly and complex.
> 
> > 
> > Note: we skips any FM issued extent release request if the exact extent
> > does not exist in the extent list of the device. We will loose the
> > restriction later once we have partial release support in the kernel.
> 
> Maybe the kernel will treat it as a request to release the extent it
> is tracking that contains it.  So we may want to add a way to poke that.
> Not today though!
> 
> > 
> > 1. Add dynamic capacity extents:
> > 
> > For example, the command to add two continuous extents (each 128MiB long)
> > to region 0 (starting at DPA offset 0) looks like below:
> > 
> > { "execute": "qmp_capabilities" }
> > 
> > { "execute": "cxl-add-dynamic-capacity",
> >   "arguments": {
> >       "path": "/machine/peripheral/cxl-dcd0",
> >       "region-id": 0,
> >       "extents": [
> >       {
> >           "dpa": 0,
> >           "len": 134217728
> >       },
> >       {
> >           "dpa": 134217728,
> >           "len": 134217728
> >       }
> >       ]
> >   }
> > }
> > 
> > 2. Release dynamic capacity extents:
> > 
> > For example, the command to release an extent of size 128MiB from region 0
> > (DPA offset 128MiB) look like below:
> > 
> > { "execute": "cxl-release-dynamic-capacity",
> >   "arguments": {
> >       "path": "/machine/peripheral/cxl-dcd0",
> >       "region-id": 0,
> >       "extents": [
> >       {
> >           "dpa": 134217728,
> >           "len": 134217728
> >       }
> >       ]
> >   }
> > }
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> 
> ...
>   
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index dccfaaad3a..e9c8994cdb 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -674,6 +674,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
> >          ct3d->dc.total_capacity += region->len;
> >      }
> >      QTAILQ_INIT(&ct3d->dc.extents);
> > +    QTAILQ_INIT(&ct3d->dc.extents_pending_to_add);
> >  
> >      return true;
> >  }
> > @@ -686,6 +687,12 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> >          ent = QTAILQ_FIRST(&ct3d->dc.extents);
> >          cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
> >      }
> > +
> > +    while (!QTAILQ_EMPTY(&ct3d->dc.extents_pending_to_add)) {
> 
> QTAILQ_FOR_EACHSAFE
> 
> > +        ent = QTAILQ_FIRST(&ct3d->dc.extents_pending_to_add);
> > +        cxl_remove_extent_from_extent_list(&ct3d->dc.extents_pending_to_add,
> > +                                           ent);
> > +    }
> >  }
> 
> > +/*
> > + * The main function to process dynamic capacity event. Currently DC extents
> > + * add/release requests are processed.
> > + */
> > +static void qmp_cxl_process_dynamic_capacity(const char *path, CxlEventLog log,
> > +                                             CXLDCEventType type, uint16_t hid,
> > +                                             uint8_t rid,
> > +                                             CXLDCExtentRecordList *records,
> > +                                             Error **errp)
> > +{
> > +    Object *obj;
> > +    CXLEventDynamicCapacity dCap = {};
> > +    CXLEventRecordHdr *hdr = &dCap.hdr;
> > +    CXLType3Dev *dcd;
> > +    uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
> > +    uint32_t num_extents = 0;
> > +    CXLDCExtentRecordList *list;
> > +    g_autofree CXLDCExtentRaw *extents = NULL;
> > +    uint8_t enc_log;
> > +    uint64_t offset, len, block_size;
> > +    int i;
> > +    int rc;
> 
> Combine the two lines above.
> 
> > +    g_autofree unsigned long *blk_bitmap = NULL;
> > +
> > +    obj = object_resolve_path(path, NULL);
> > +    if (!obj) {
> > +        error_setg(errp, "Unable to resolve path");
> > +        return;
> > +    }
> 
> object_resolve_path_type() and skip a step (should do this in various places
> in our existing code!)
> 
> > +    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> > +        error_setg(errp, "Path not point to a valid CXL type3 device");
> > +        return;
> > +    }
> > +
> > +    dcd = CXL_TYPE3(obj);
> > +    if (!dcd->dc.num_regions) {
> > +        error_setg(errp, "No dynamic capacity support from the device");
> > +        return;
> > +    }
> > +
> > +    rc = ct3d_qmp_cxl_event_log_enc(log);
> > +    if (rc < 0) {
> > +        error_setg(errp, "Unhandled error log type");
> > +        return;
> > +    }
> > +    enc_log = rc;
> > +
> > +    if (rid >= dcd->dc.num_regions) {
> > +        error_setg(errp, "region id is too large");
> > +        return;
> > +    }
> > +    block_size = dcd->dc.regions[rid].block_size;
> > +
> > +    /* Sanity check and count the extents */
> > +    list = records;
> > +    while (list) {
> > +        offset = list->value->offset;
> > +        len = list->value->len;
> > +
> > +        if (len == 0) {
> > +            error_setg(errp, "extent with 0 length is not allowed");
> > +            return;
> > +        }
> > +
> > +        if (offset % block_size || len % block_size) {
> > +            error_setg(errp, "dpa or len is not aligned to region block size");
> > +            return;
> > +        }
> > +
> > +        if (offset + len > dcd->dc.regions[rid].len) {
> > +            error_setg(errp, "extent range is beyond the region end");
> > +            return;
> > +        }
> > +
> > +        num_extents++;
> > +        list = list->next;
> > +    }
> > +    if (num_extents == 0) {
> > +        error_setg(errp, "No extents found in the command");
> > +        return;
> > +    }
> > +
> > +    blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size);
> > +
> > +    /* Create Extent list for event being passed to host */
> > +    i = 0;
> > +    list = records;
> > +    extents = g_new0(CXLDCExtentRaw, num_extents);
> > +    while (list) {
> > +        CXLDCExtent *ent;
> > +        bool skip_extent = false;
> > +
> > +        offset = list->value->offset;
> > +        len = list->value->len;
> > +
> > +        extents[i].start_dpa = offset + dcd->dc.regions[rid].base;
> > +        extents[i].len = len;
> > +        memset(extents[i].tag, 0, 0x10);
> > +        extents[i].shared_seq = 0;
> > +
> > +        if (type == DC_EVENT_RELEASE_CAPACITY ||
> > +            type == DC_EVENT_FORCED_RELEASE_CAPACITY) {
> > +            /*
> > +             *  if the extent is still pending to be added to the host,
> 
> Odd spacing.
> 
> > +             * remove it from the pending extent list, so later when the add
> > +             * response for the extent arrives, the device can reject the
> > +             * extent as it is not in the pending list.
> > +             */
> > +            ent = cxl_dc_extent_exists(&dcd->dc.extents_pending_to_add,
> > +                        &extents[i]);
> > +            if (ent) {
> > +                QTAILQ_REMOVE(&dcd->dc.extents_pending_to_add, ent, node);
> > +                g_free(ent);
> > +                skip_extent = true;
> > +            } else if (!cxl_dc_extent_exists(&dcd->dc.extents, &extents[i])) {
> > +                /* If the exact extent is not in the accepted list, skip */
> > +                skip_extent = true;
> > +            }
> I think we need to reject case of some extents skipped and others not.
> That's not supported yet so we need to complain if we get it at least. Maybe we need
> to do two passes so we know this has happened early (or perhaps this is a later
> patch in which case a todo here would help).

If the second skip_extent case, I will reject earlier instead of
skipping.

Fan

> 
> > +        
> > +
> > +        /* No duplicate or overlapped extents are allowed */
> > +        if (test_any_bits_set(blk_bitmap, offset / block_size,
> > +                              len / block_size)) {
> > +            error_setg(errp, "duplicate or overlapped extents are detected");
> > +            return;
> > +        }
> > +        bitmap_set(blk_bitmap, offset / block_size, len / block_size);
> > +
> > +        list = list->next;
> > +        if (!skip_extent) {
> > +            i++;
> Problem is if we skip one in the middle the records will be wrong below.
> > +        }
> > +    }
> > +    num_extents = i;
> > +
> > +    /*
> > +     * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record
> > +     *
> > +     * All Dynamic Capacity event records shall set the Event Record Severity
> > +     * field in the Common Event Record Format to Informational Event. All
> > +     * Dynamic Capacity related events shall be logged in the Dynamic Capacity
> > +     * Event Log.
> > +     */
> > +    cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
> > +                            cxl_device_get_timestamp(&dcd->cxl_dstate));
> > +
> > +    dCap.type = type;
> > +    /* FIXME: for now, validity flag is cleared */
> > +    dCap.validity_flags = 0;
> > +    stw_le_p(&dCap.host_id, hid);
> > +    /* only valid for DC_REGION_CONFIG_UPDATED event */
> > +    dCap.updated_region_id = 0;
> > +    /*
> > +     * FIXME: for now, the "More" flag is cleared as there is only one
> > +     * extent associating with each record and tag-based release is
> > +     * not supported.
> 
> Hmm. Seems like tag support would be easy.  Add an optional qmp parameter,
> if a tag is set, we set the more flag for all but the last entry in this
> loop.  I'm ok with that being a follow up patch though.
> 
> > +     */
> > +    dCap.flags = 0;
> > +    for (i = 0; i < num_extents; i++) {
> > +        memcpy(&dCap.dynamic_capacity_extent, &extents[i],
> > +               sizeof(CXLDCExtentRaw));
> > +
> > +        if (type == DC_EVENT_ADD_CAPACITY) {
> > +            cxl_insert_extent_to_extent_list(&dcd->dc.extents_pending_to_add,
> > +                                             extents[i].start_dpa,
> > +                                             extents[i].len,
> > +                                             extents[i].tag,
> > +                                             extents[i].shared_seq);
> > +        }
> > +
> > +        if (cxl_event_insert(&dcd->cxl_dstate, enc_log,
> > +                             (CXLEventRecordRaw *)&dCap)) {
> > +            cxl_event_irq_assert(dcd);
> > +        }
> > +    }
> > +}
> 
> 
> 
> 
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 341260e6e4..b524c5e699 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -490,6 +490,7 @@ struct CXLType3Dev {
> >          AddressSpace host_dc_as;
> >          uint64_t total_capacity; /* 256M aligned */
> >          CXLDCExtentList extents;
> > +        CXLDCExtentList extents_pending_to_add;
> 
> Long name, extents_pending or just pending is plenty I think.
> 
> >          uint32_t total_extent_count;
> >          uint32_t ext_list_gen_seq;
> >  
> > @@ -551,4 +552,9 @@ CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len);
> >  
> >  void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
> >                                          CXLDCExtent *extent);
> > +void cxl_insert_extent_to_extent_list(CXLDCExtentList *list, uint64_t dpa,
> > +                                      uint64_t len, uint8_t *tag,
> > +                                      uint16_t shared_seq);
> > +bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
> > +                       unsigned long size);
> >  #endif
> 
>
Jonathan Cameron March 7, 2024, 12:45 p.m. UTC | #4
...

> > > +    list = records;
> > > +    extents = g_new0(CXLDCExtentRaw, num_extents);
> > > +    while (list) {
> > > +        CXLDCExtent *ent;
> > > +        bool skip_extent = false;
> > > +
> > > +        offset = list->value->offset;
> > > +        len = list->value->len;
> > > +
> > > +        extents[i].start_dpa = offset + dcd->dc.regions[rid].base;
> > > +        extents[i].len = len;
> > > +        memset(extents[i].tag, 0, 0x10);
> > > +        extents[i].shared_seq = 0;
> > > +
> > > +        if (type == DC_EVENT_RELEASE_CAPACITY ||
> > > +            type == DC_EVENT_FORCED_RELEASE_CAPACITY) {
> > > +            /*
> > > +             *  if the extent is still pending to be added to the host,  
> > 
> > Odd spacing.
> >   
> > > +             * remove it from the pending extent list, so later when the add
> > > +             * response for the extent arrives, the device can reject the
> > > +             * extent as it is not in the pending list.
> > > +             */
> > > +            ent = cxl_dc_extent_exists(&dcd->dc.extents_pending_to_add,
> > > +                        &extents[i]);
> > > +            if (ent) {
> > > +                QTAILQ_REMOVE(&dcd->dc.extents_pending_to_add, ent, node);
> > > +                g_free(ent);
> > > +                skip_extent = true;
> > > +            } else if (!cxl_dc_extent_exists(&dcd->dc.extents, &extents[i])) {
> > > +                /* If the exact extent is not in the accepted list, skip */
> > > +                skip_extent = true;
> > > +            }  
> > I think we need to reject case of some extents skipped and others not.
> > That's not supported yet so we need to complain if we get it at least. Maybe we need
> > to do two passes so we know this has happened early (or perhaps this is a later
> > patch in which case a todo here would help).  
> 
> Skip here does not mean the extent is invalid, it just means the extent
> is still pending to add, so remove them from pending list would be
> enough to reject the extent, no need to release further. That is based
> on your feedback on v4.

Ah. I'd missunderstood.

> 
> The loop here is only to collect the extents to sent to the event log. 
> But as you said, we need one pass before updating pending list.
> Actually if we do not allow the above case where extents to release is
> still in the pending to add list, we can just return here with error, no
> extra dry run needed. 
> 
> What do you think?

I think we need a way to back out extents from the pending to add list
so we can create the race where they are offered to the OS and it takes
forever to accept and by the time it does we've removed them.

> 
> >   
> > > +        
> > > +
> > > +        /* No duplicate or overlapped extents are allowed */
> > > +        if (test_any_bits_set(blk_bitmap, offset / block_size,
> > > +                              len / block_size)) {
> > > +            error_setg(errp, "duplicate or overlapped extents are detected");
> > > +            return;
> > > +        }
> > > +        bitmap_set(blk_bitmap, offset / block_size, len / block_size);
> > > +
> > > +        list = list->next;
> > > +        if (!skip_extent) {
> > > +            i++;  
> > Problem is if we skip one in the middle the records will be wrong below.  
> 
> Why? Only extents passed the check will be stored in variable extents and
> processed further and i be updated. 
> For skipped ones, since i is not updated, they will be
> overwritten by following valid ones.
Ah. I'd missed the fact you store into the extent without a check on validity
but only move the index on if they were valid. Then rely on not passing a trailing
entry at the end.
If would be more readable I think if local variables were used for the parameters
until we've decided not to skip and the this ended with

        if (!skip_extent) {
            extents[i] = (DCXLDCExtentRaw) {
                .start_dpa = ...
	        ...
            };
            i++
        }
We have local len already so probably just need
uint64_t start_dpa = offset + dcd->dc.regions[rid].base;

Also maybe skip_extent_evlog or something like that to explain we are only
skipping that part. 
Helps people like me who read it completely wrong!

Jonathan
Jonathan Cameron March 7, 2024, 12:47 p.m. UTC | #5
> >   
> > > +             * remove it from the pending extent list, so later when the add
> > > +             * response for the extent arrives, the device can reject the
> > > +             * extent as it is not in the pending list.
> > > +             */
> > > +            ent = cxl_dc_extent_exists(&dcd->dc.extents_pending_to_add,
> > > +                        &extents[i]);
> > > +            if (ent) {
> > > +                QTAILQ_REMOVE(&dcd->dc.extents_pending_to_add, ent, node);
> > > +                g_free(ent);
> > > +                skip_extent = true;
> > > +            } else if (!cxl_dc_extent_exists(&dcd->dc.extents, &extents[i])) {
> > > +                /* If the exact extent is not in the accepted list, skip */
> > > +                skip_extent = true;
> > > +            }  
> > I think we need to reject case of some extents skipped and others not.
> > That's not supported yet so we need to complain if we get it at least. Maybe we need
> > to do two passes so we know this has happened early (or perhaps this is a later
> > patch in which case a todo here would help).  
> 
> If the second skip_extent case, I will reject earlier instead of
> skipping.
That was me misunderstanding the flow. I think this is fine as you have it already.

Jonathan
fan March 9, 2024, 4:35 a.m. UTC | #6
On Thu, Mar 07, 2024 at 12:45:55PM +0000, Jonathan Cameron wrote:
> ...
> 
> > > > +    list = records;
> > > > +    extents = g_new0(CXLDCExtentRaw, num_extents);
> > > > +    while (list) {
> > > > +        CXLDCExtent *ent;
> > > > +        bool skip_extent = false;
> > > > +
> > > > +        offset = list->value->offset;
> > > > +        len = list->value->len;
> > > > +
> > > > +        extents[i].start_dpa = offset + dcd->dc.regions[rid].base;
> > > > +        extents[i].len = len;
> > > > +        memset(extents[i].tag, 0, 0x10);
> > > > +        extents[i].shared_seq = 0;
> > > > +
> > > > +        if (type == DC_EVENT_RELEASE_CAPACITY ||
> > > > +            type == DC_EVENT_FORCED_RELEASE_CAPACITY) {
> > > > +            /*
> > > > +             *  if the extent is still pending to be added to the host,  
> > > 
> > > Odd spacing.
> > >   
> > > > +             * remove it from the pending extent list, so later when the add
> > > > +             * response for the extent arrives, the device can reject the
> > > > +             * extent as it is not in the pending list.
> > > > +             */
> > > > +            ent = cxl_dc_extent_exists(&dcd->dc.extents_pending_to_add,
> > > > +                        &extents[i]);
> > > > +            if (ent) {
> > > > +                QTAILQ_REMOVE(&dcd->dc.extents_pending_to_add, ent, node);
> > > > +                g_free(ent);
> > > > +                skip_extent = true;
> > > > +            } else if (!cxl_dc_extent_exists(&dcd->dc.extents, &extents[i])) {
> > > > +                /* If the exact extent is not in the accepted list, skip */
> > > > +                skip_extent = true;
> > > > +            }  
> > > I think we need to reject case of some extents skipped and others not.
> > > That's not supported yet so we need to complain if we get it at least. Maybe we need
> > > to do two passes so we know this has happened early (or perhaps this is a later
> > > patch in which case a todo here would help).  
> > 
> > Skip here does not mean the extent is invalid, it just means the extent
> > is still pending to add, so remove them from pending list would be
> > enough to reject the extent, no need to release further. That is based
> > on your feedback on v4.
> 
> Ah. I'd missunderstood.

Hi Jonathan,

I think we should not allow to release extents that are still pending to
add. 
If we allow it, there is a case that will not work.
Let's see the following case (time order):
1. Send request to add extent A to host; (A --> pending list)
2. Send request to release A from the host; (Delete A from pending list,
hoping the following add response for A will fail as there is not a matched
extent in the pending list).
3. Host send response to the device for the add request, however, for
some reason, it does not accept any of it, so updated list is empty,
spec allows it. Based on the spec, we need to drop the extent at the
head of the event log. Now we have problem. Since extent A is already
dropped from the list, we either cannot drop as the list is empty, which
is not the worst. If we have more extents in the list, we may drop the
one following A, which is for another request. If this happens, all the
following extents will be acked incorrectly as the order has been
shifted.
 
Does the above reasoning make sense to you?

Fan

> 
> > 
> > The loop here is only to collect the extents to sent to the event log. 
> > But as you said, we need one pass before updating pending list.
> > Actually if we do not allow the above case where extents to release is
> > still in the pending to add list, we can just return here with error, no
> > extra dry run needed. 
> > 
> > What do you think?
> 
> I think we need a way to back out extents from the pending to add list
> so we can create the race where they are offered to the OS and it takes
> forever to accept and by the time it does we've removed them.
> 
> > 
> > >   
> > > > +        
> > > > +
> > > > +        /* No duplicate or overlapped extents are allowed */
> > > > +        if (test_any_bits_set(blk_bitmap, offset / block_size,
> > > > +                              len / block_size)) {
> > > > +            error_setg(errp, "duplicate or overlapped extents are detected");
> > > > +            return;
> > > > +        }
> > > > +        bitmap_set(blk_bitmap, offset / block_size, len / block_size);
> > > > +
> > > > +        list = list->next;
> > > > +        if (!skip_extent) {
> > > > +            i++;  
> > > Problem is if we skip one in the middle the records will be wrong below.  
> > 
> > Why? Only extents passed the check will be stored in variable extents and
> > processed further and i be updated. 
> > For skipped ones, since i is not updated, they will be
> > overwritten by following valid ones.
> Ah. I'd missed the fact you store into the extent without a check on validity
> but only move the index on if they were valid. Then rely on not passing a trailing
> entry at the end.
> If would be more readable I think if local variables were used for the parameters
> until we've decided not to skip and the this ended with
> 
>         if (!skip_extent) {
>             extents[i] = (DCXLDCExtentRaw) {
>                 .start_dpa = ...
> 	        ...
>             };
>             i++
>         }
> We have local len already so probably just need
> uint64_t start_dpa = offset + dcd->dc.regions[rid].base;
> 
> Also maybe skip_extent_evlog or something like that to explain we are only
> skipping that part. 
> Helps people like me who read it completely wrong!
> 
> Jonathan
> 
>  
>
Jonathan Cameron March 12, 2024, 12:37 p.m. UTC | #7
On Fri, 8 Mar 2024 20:35:53 -0800
fan <nifan.cxl@gmail.com> wrote:

> On Thu, Mar 07, 2024 at 12:45:55PM +0000, Jonathan Cameron wrote:
> > ...
> >   
> > > > > +    list = records;
> > > > > +    extents = g_new0(CXLDCExtentRaw, num_extents);
> > > > > +    while (list) {
> > > > > +        CXLDCExtent *ent;
> > > > > +        bool skip_extent = false;
> > > > > +
> > > > > +        offset = list->value->offset;
> > > > > +        len = list->value->len;
> > > > > +
> > > > > +        extents[i].start_dpa = offset + dcd->dc.regions[rid].base;
> > > > > +        extents[i].len = len;
> > > > > +        memset(extents[i].tag, 0, 0x10);
> > > > > +        extents[i].shared_seq = 0;
> > > > > +
> > > > > +        if (type == DC_EVENT_RELEASE_CAPACITY ||
> > > > > +            type == DC_EVENT_FORCED_RELEASE_CAPACITY) {
> > > > > +            /*
> > > > > +             *  if the extent is still pending to be added to the host,    
> > > > 
> > > > Odd spacing.
> > > >     
> > > > > +             * remove it from the pending extent list, so later when the add
> > > > > +             * response for the extent arrives, the device can reject the
> > > > > +             * extent as it is not in the pending list.
> > > > > +             */
> > > > > +            ent = cxl_dc_extent_exists(&dcd->dc.extents_pending_to_add,
> > > > > +                        &extents[i]);
> > > > > +            if (ent) {
> > > > > +                QTAILQ_REMOVE(&dcd->dc.extents_pending_to_add, ent, node);
> > > > > +                g_free(ent);
> > > > > +                skip_extent = true;
> > > > > +            } else if (!cxl_dc_extent_exists(&dcd->dc.extents, &extents[i])) {
> > > > > +                /* If the exact extent is not in the accepted list, skip */
> > > > > +                skip_extent = true;
> > > > > +            }    
> > > > I think we need to reject case of some extents skipped and others not.
> > > > That's not supported yet so we need to complain if we get it at least. Maybe we need
> > > > to do two passes so we know this has happened early (or perhaps this is a later
> > > > patch in which case a todo here would help).    
> > > 
> > > Skip here does not mean the extent is invalid, it just means the extent
> > > is still pending to add, so remove them from pending list would be
> > > enough to reject the extent, no need to release further. That is based
> > > on your feedback on v4.  
> > 
> > Ah. I'd missunderstood.  
> 
> Hi Jonathan,
> 
> I think we should not allow to release extents that are still pending to
> add. 
> If we allow it, there is a case that will not work.
> Let's see the following case (time order):
> 1. Send request to add extent A to host; (A --> pending list)
> 2. Send request to release A from the host; (Delete A from pending list,
> hoping the following add response for A will fail as there is not a matched
> extent in the pending list).

Definitely not allow the host to release something it hasn't accepted.
Should allow QMP to release such entrees though (and same for fmapi when
we get there). Any such requested from host should be treated as whatever
it says to do if you release an extent that you don't have.

> 3. Host send response to the device for the add request, however, for
> some reason, it does not accept any of it, so updated list is empty,
> spec allows it. Based on the spec, we need to drop the extent at the
> head of the event log. Now we have problem. Since extent A is already
> dropped from the list, we either cannot drop as the list is empty, which
> is not the worst. If we have more extents in the list, we may drop the
> one following A, which is for another request. If this happens, all the
> following extents will be acked incorrectly as the order has been
> shifted.
>  
> Does the above reasoning make sense to you?
Absolutely.  I got confused here on who was doing release.
Host definitely can't release stuff it hasn't successfully accepted.

Jonathan

> 
> Fan
> 
> >   
> > > 
> > > The loop here is only to collect the extents to sent to the event log. 
> > > But as you said, we need one pass before updating pending list.
> > > Actually if we do not allow the above case where extents to release is
> > > still in the pending to add list, we can just return here with error, no
> > > extra dry run needed. 
> > > 
> > > What do you think?  
> > 
> > I think we need a way to back out extents from the pending to add list
> > so we can create the race where they are offered to the OS and it takes
> > forever to accept and by the time it does we've removed them.
> >   
> > >   
> > > >     
> > > > > +        
> > > > > +
> > > > > +        /* No duplicate or overlapped extents are allowed */
> > > > > +        if (test_any_bits_set(blk_bitmap, offset / block_size,
> > > > > +                              len / block_size)) {
> > > > > +            error_setg(errp, "duplicate or overlapped extents are detected");
> > > > > +            return;
> > > > > +        }
> > > > > +        bitmap_set(blk_bitmap, offset / block_size, len / block_size);
> > > > > +
> > > > > +        list = list->next;
> > > > > +        if (!skip_extent) {
> > > > > +            i++;    
> > > > Problem is if we skip one in the middle the records will be wrong below.    
> > > 
> > > Why? Only extents passed the check will be stored in variable extents and
> > > processed further and i be updated. 
> > > For skipped ones, since i is not updated, they will be
> > > overwritten by following valid ones.  
> > Ah. I'd missed the fact you store into the extent without a check on validity
> > but only move the index on if they were valid. Then rely on not passing a trailing
> > entry at the end.
> > If would be more readable I think if local variables were used for the parameters
> > until we've decided not to skip and the this ended with
> > 
> >         if (!skip_extent) {
> >             extents[i] = (DCXLDCExtentRaw) {
> >                 .start_dpa = ...
> > 	        ...
> >             };
> >             i++
> >         }
> > We have local len already so probably just need
> > uint64_t start_dpa = offset + dcd->dc.regions[rid].base;
> > 
> > Also maybe skip_extent_evlog or something like that to explain we are only
> > skipping that part. 
> > Helps people like me who read it completely wrong!
> > 
> > Jonathan
> > 
> >  
> >
fan March 12, 2024, 4:27 p.m. UTC | #8
On Tue, Mar 12, 2024 at 12:37:23PM +0000, Jonathan Cameron wrote:
> On Fri, 8 Mar 2024 20:35:53 -0800
> fan <nifan.cxl@gmail.com> wrote:
> 
> > On Thu, Mar 07, 2024 at 12:45:55PM +0000, Jonathan Cameron wrote:
> > > ...
> > >   
> > > > > > +    list = records;
> > > > > > +    extents = g_new0(CXLDCExtentRaw, num_extents);
> > > > > > +    while (list) {
> > > > > > +        CXLDCExtent *ent;
> > > > > > +        bool skip_extent = false;
> > > > > > +
> > > > > > +        offset = list->value->offset;
> > > > > > +        len = list->value->len;
> > > > > > +
> > > > > > +        extents[i].start_dpa = offset + dcd->dc.regions[rid].base;
> > > > > > +        extents[i].len = len;
> > > > > > +        memset(extents[i].tag, 0, 0x10);
> > > > > > +        extents[i].shared_seq = 0;
> > > > > > +
> > > > > > +        if (type == DC_EVENT_RELEASE_CAPACITY ||
> > > > > > +            type == DC_EVENT_FORCED_RELEASE_CAPACITY) {
> > > > > > +            /*
> > > > > > +             *  if the extent is still pending to be added to the host,    
> > > > > 
> > > > > Odd spacing.
> > > > >     
> > > > > > +             * remove it from the pending extent list, so later when the add
> > > > > > +             * response for the extent arrives, the device can reject the
> > > > > > +             * extent as it is not in the pending list.
> > > > > > +             */
> > > > > > +            ent = cxl_dc_extent_exists(&dcd->dc.extents_pending_to_add,
> > > > > > +                        &extents[i]);
> > > > > > +            if (ent) {
> > > > > > +                QTAILQ_REMOVE(&dcd->dc.extents_pending_to_add, ent, node);
> > > > > > +                g_free(ent);
> > > > > > +                skip_extent = true;
> > > > > > +            } else if (!cxl_dc_extent_exists(&dcd->dc.extents, &extents[i])) {
> > > > > > +                /* If the exact extent is not in the accepted list, skip */
> > > > > > +                skip_extent = true;
> > > > > > +            }    
> > > > > I think we need to reject case of some extents skipped and others not.
> > > > > That's not supported yet so we need to complain if we get it at least. Maybe we need
> > > > > to do two passes so we know this has happened early (or perhaps this is a later
> > > > > patch in which case a todo here would help).    
> > > > 
> > > > Skip here does not mean the extent is invalid, it just means the extent
> > > > is still pending to add, so remove them from pending list would be
> > > > enough to reject the extent, no need to release further. That is based
> > > > on your feedback on v4.  
> > > 
> > > Ah. I'd missunderstood.  
> > 
> > Hi Jonathan,
> > 
> > I think we should not allow to release extents that are still pending to
> > add. 
> > If we allow it, there is a case that will not work.
> > Let's see the following case (time order):
> > 1. Send request to add extent A to host; (A --> pending list)
> > 2. Send request to release A from the host; (Delete A from pending list,
> > hoping the following add response for A will fail as there is not a matched
> > extent in the pending list).
> 
> Definitely not allow the host to release something it hasn't accepted.
> Should allow QMP to release such entrees though (and same for fmapi when
> we get there). Any such requested from host should be treated as whatever
> it says to do if you release an extent that you don't have.

Not sure how it works here. If we allow QMP to release such extents and
clear the pending list entrees accordingly, later if the host response with
empty extent list, how can the device figure out which request the response is
for. The spec assumes the response comes in order, so the head of the
pending list should be removed from the pending list, however, if QMP
process already removed it.

The key problem here is for empty updated extent list, we do not have a way to
figure out the corresponding request as there is no DPA info to look
into.

> 
> > 3. Host send response to the device for the add request, however, for
> > some reason, it does not accept any of it, so updated list is empty,
> > spec allows it. Based on the spec, we need to drop the extent at the
> > head of the event log. Now we have problem. Since extent A is already
> > dropped from the list, we either cannot drop as the list is empty, which
> > is not the worst. If we have more extents in the list, we may drop the
> > one following A, which is for another request. If this happens, all the
> > following extents will be acked incorrectly as the order has been
> > shifted.
> >  
> > Does the above reasoning make sense to you?
> Absolutely.  I got confused here on who was doing release.
> Host definitely can't release stuff it hasn't successfully accepted.
> 
> Jonathan
> 

The assumption here is FM first initiates the request to add some
extents to the hosts, and later FM initiates to release the extents
while the extents has not been accepted by the host yet. 

Fan
> > 
> > Fan
> > 
> > >   
> > > > 
> > > > The loop here is only to collect the extents to sent to the event log. 
> > > > But as you said, we need one pass before updating pending list.
> > > > Actually if we do not allow the above case where extents to release is
> > > > still in the pending to add list, we can just return here with error, no
> > > > extra dry run needed. 
> > > > 
> > > > What do you think?  
> > > 
> > > I think we need a way to back out extents from the pending to add list
> > > so we can create the race where they are offered to the OS and it takes
> > > forever to accept and by the time it does we've removed them.
> > >   
> > > >   
> > > > >     
> > > > > > +        
> > > > > > +
> > > > > > +        /* No duplicate or overlapped extents are allowed */
> > > > > > +        if (test_any_bits_set(blk_bitmap, offset / block_size,
> > > > > > +                              len / block_size)) {
> > > > > > +            error_setg(errp, "duplicate or overlapped extents are detected");
> > > > > > +            return;
> > > > > > +        }
> > > > > > +        bitmap_set(blk_bitmap, offset / block_size, len / block_size);
> > > > > > +
> > > > > > +        list = list->next;
> > > > > > +        if (!skip_extent) {
> > > > > > +            i++;    
> > > > > Problem is if we skip one in the middle the records will be wrong below.    
> > > > 
> > > > Why? Only extents passed the check will be stored in variable extents and
> > > > processed further and i be updated. 
> > > > For skipped ones, since i is not updated, they will be
> > > > overwritten by following valid ones.  
> > > Ah. I'd missed the fact you store into the extent without a check on validity
> > > but only move the index on if they were valid. Then rely on not passing a trailing
> > > entry at the end.
> > > If would be more readable I think if local variables were used for the parameters
> > > until we've decided not to skip and the this ended with
> > > 
> > >         if (!skip_extent) {
> > >             extents[i] = (DCXLDCExtentRaw) {
> > >                 .start_dpa = ...
> > > 	        ...
> > >             };
> > >             i++
> > >         }
> > > We have local len already so probably just need
> > > uint64_t start_dpa = offset + dcd->dc.regions[rid].base;
> > > 
> > > Also maybe skip_extent_evlog or something like that to explain we are only
> > > skipping that part. 
> > > Helps people like me who read it completely wrong!
> > > 
> > > Jonathan
> > > 
> > >  
> > >   
>
Markus Armbruster April 24, 2024, 1:09 p.m. UTC | #9
nifan.cxl@gmail.com writes:

> From: Fan Ni <fan.ni@samsung.com>
>
> Since fabric manager emulation is not supported yet, the change implements
> the functions to add/release dynamic capacity extents as QMP interfaces.

Will fabric manager emulation obsolete these commands?

> Note: we skips any FM issued extent release request if the exact extent
> does not exist in the extent list of the device. We will loose the
> restriction later once we have partial release support in the kernel.
>
> 1. Add dynamic capacity extents:
>
> For example, the command to add two continuous extents (each 128MiB long)
> to region 0 (starting at DPA offset 0) looks like below:
>
> { "execute": "qmp_capabilities" }
>
> { "execute": "cxl-add-dynamic-capacity",
>   "arguments": {
>       "path": "/machine/peripheral/cxl-dcd0",
>       "region-id": 0,
>       "extents": [
>       {
>           "dpa": 0,
>           "len": 134217728
>       },
>       {
>           "dpa": 134217728,
>           "len": 134217728
>       }
>       ]
>   }
> }
>
> 2. Release dynamic capacity extents:
>
> For example, the command to release an extent of size 128MiB from region 0
> (DPA offset 128MiB) look like below:
>
> { "execute": "cxl-release-dynamic-capacity",
>   "arguments": {
>       "path": "/machine/peripheral/cxl-dcd0",
>       "region-id": 0,
>       "extents": [
>       {
>           "dpa": 134217728,
>           "len": 134217728
>       }
>       ]
>   }
> }
>
> Signed-off-by: Fan Ni <fan.ni@samsung.com>

[...]

> diff --git a/qapi/cxl.json b/qapi/cxl.json
> index 8cc4c72fa9..2645004666 100644
> --- a/qapi/cxl.json
> +++ b/qapi/cxl.json
> @@ -19,13 +19,16 @@
>  #
>  # @fatal: Fatal Event Log
>  #
> +# @dyncap: Dynamic Capacity Event Log
> +#
>  # Since: 8.1
>  ##
>  { 'enum': 'CxlEventLog',
>    'data': ['informational',
>             'warning',
>             'failure',
> -           'fatal']
> +           'fatal',
> +           'dyncap']

We tend to avoid abbreviations in QMP identifiers: dynamic-capacity.

>   }
>  
>  ##
> @@ -361,3 +364,59 @@
>  ##
>  {'command': 'cxl-inject-correctable-error',
>   'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
> +
> +##
> +# @CXLDCExtentRecord:

Such traffic jams of capital letters are hard to read.

What does DC mean?

> +#
> +# Record of a single extent to add/release
> +#
> +# @offset: offset to the start of the region where the extent to be operated

Blank line here, please

> +# @len: length of the extent
> +#
> +# Since: 9.0
> +##
> +{ 'struct': 'CXLDCExtentRecord',
> +  'data': {
> +      'offset':'uint64',
> +      'len': 'uint64'
> +  }
> +}
> +
> +##
> +# @cxl-add-dynamic-capacity:
> +#
> +# Command to start add dynamic capacity extents flow. The device will

I think we're missing an article here.  Is it "a flow" or "the flow"?

> +# have to acknowledged the acceptance of the extents before they are usable.

to acknowledge

docs/devel/qapi-code-gen.rst:

    For legibility, wrap text paragraphs so every line is at most 70
    characters long.

    Separate sentences with two spaces.

> +#
> +# @path: CXL DCD canonical QOM path

What is a CXL DCD?  Is it a device?

I'd prefer @qom-path, unless you can make a consistency argument for
@path.

> +# @region-id: id of the region where the extent to add

What's a region, and how do they get their IDs?

> +# @extents: Extents to add

Blank lines between argument descriptions, please.

> +#
> +# Since : 9.0

9.1

> +##
> +{ 'command': 'cxl-add-dynamic-capacity',
> +  'data': { 'path': 'str',
> +            'region-id': 'uint8',
> +            'extents': [ 'CXLDCExtentRecord' ]
> +           }
> +}
> +
> +##
> +# @cxl-release-dynamic-capacity:
> +#
> +# Command to start release dynamic capacity extents flow. The host will

Article again.

The host?  In cxl-add-dynamic-capacity's doc comment, it's the device.

> +# need to respond to indicate that it has released the capacity before it
> +# is made unavailable for read and write and can be re-added.

Is "and can be re-added" relevant here?

> +#
> +# @path: CXL DCD canonical QOM path
> +# @region-id: id of the region where the extent to release
> +# @extents: Extents to release
> +#
> +# Since : 9.0

9.1

> +##
> +{ 'command': 'cxl-release-dynamic-capacity',
> +  'data': { 'path': 'str',
> +            'region-id': 'uint8',
> +            'extents': [ 'CXLDCExtentRecord' ]
> +           }
> +}
fan April 24, 2024, 5:10 p.m. UTC | #10
On Wed, Apr 24, 2024 at 03:09:52PM +0200, Markus Armbruster wrote:
> nifan.cxl@gmail.com writes:
> 
> > From: Fan Ni <fan.ni@samsung.com>
> >
> > Since fabric manager emulation is not supported yet, the change implements
> > the functions to add/release dynamic capacity extents as QMP interfaces.
> 
> Will fabric manager emulation obsolete these commands?
> 

Hi Markus,
Thanks for reviewing the patchset. This is v5 and we have sent out v7
recently, there are a lot of changes from v5 to v7.

FYI. v7: https://lore.kernel.org/linux-cxl/ZiaFYUB6FC9NR7W4@memverge.com/T/#t

Fan

> > Note: we skips any FM issued extent release request if the exact extent
> > does not exist in the extent list of the device. We will loose the
> > restriction later once we have partial release support in the kernel.
> >
> > 1. Add dynamic capacity extents:
> >
> > For example, the command to add two continuous extents (each 128MiB long)
> > to region 0 (starting at DPA offset 0) looks like below:
> >
> > { "execute": "qmp_capabilities" }
> >
> > { "execute": "cxl-add-dynamic-capacity",
> >   "arguments": {
> >       "path": "/machine/peripheral/cxl-dcd0",
> >       "region-id": 0,
> >       "extents": [
> >       {
> >           "dpa": 0,
> >           "len": 134217728
> >       },
> >       {
> >           "dpa": 134217728,
> >           "len": 134217728
> >       }
> >       ]
> >   }
> > }
> >
> > 2. Release dynamic capacity extents:
> >
> > For example, the command to release an extent of size 128MiB from region 0
> > (DPA offset 128MiB) look like below:
> >
> > { "execute": "cxl-release-dynamic-capacity",
> >   "arguments": {
> >       "path": "/machine/peripheral/cxl-dcd0",
> >       "region-id": 0,
> >       "extents": [
> >       {
> >           "dpa": 134217728,
> >           "len": 134217728
> >       }
> >       ]
> >   }
> > }
> >
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> 
> [...]
> 
> > diff --git a/qapi/cxl.json b/qapi/cxl.json
> > index 8cc4c72fa9..2645004666 100644
> > --- a/qapi/cxl.json
> > +++ b/qapi/cxl.json
> > @@ -19,13 +19,16 @@
> >  #
> >  # @fatal: Fatal Event Log
> >  #
> > +# @dyncap: Dynamic Capacity Event Log
> > +#
> >  # Since: 8.1
> >  ##
> >  { 'enum': 'CxlEventLog',
> >    'data': ['informational',
> >             'warning',
> >             'failure',
> > -           'fatal']
> > +           'fatal',
> > +           'dyncap']
> 
> We tend to avoid abbreviations in QMP identifiers: dynamic-capacity.
> 
> >   }
> >  
> >  ##
> > @@ -361,3 +364,59 @@
> >  ##
> >  {'command': 'cxl-inject-correctable-error',
> >   'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
> > +
> > +##
> > +# @CXLDCExtentRecord:
> 
> Such traffic jams of capital letters are hard to read.
> 
> What does DC mean?
> 
> > +#
> > +# Record of a single extent to add/release
> > +#
> > +# @offset: offset to the start of the region where the extent to be operated
> 
> Blank line here, please
> 
> > +# @len: length of the extent
> > +#
> > +# Since: 9.0
> > +##
> > +{ 'struct': 'CXLDCExtentRecord',
> > +  'data': {
> > +      'offset':'uint64',
> > +      'len': 'uint64'
> > +  }
> > +}
> > +
> > +##
> > +# @cxl-add-dynamic-capacity:
> > +#
> > +# Command to start add dynamic capacity extents flow. The device will
> 
> I think we're missing an article here.  Is it "a flow" or "the flow"?
> 
> > +# have to acknowledged the acceptance of the extents before they are usable.
> 
> to acknowledge
> 
> docs/devel/qapi-code-gen.rst:
> 
>     For legibility, wrap text paragraphs so every line is at most 70
>     characters long.
> 
>     Separate sentences with two spaces.
> 
> > +#
> > +# @path: CXL DCD canonical QOM path
> 
> What is a CXL DCD?  Is it a device?
> 
> I'd prefer @qom-path, unless you can make a consistency argument for
> @path.
> 
> > +# @region-id: id of the region where the extent to add
> 
> What's a region, and how do they get their IDs?
> 
> > +# @extents: Extents to add
> 
> Blank lines between argument descriptions, please.
> 
> > +#
> > +# Since : 9.0
> 
> 9.1
> 
> > +##
> > +{ 'command': 'cxl-add-dynamic-capacity',
> > +  'data': { 'path': 'str',
> > +            'region-id': 'uint8',
> > +            'extents': [ 'CXLDCExtentRecord' ]
> > +           }
> > +}
> > +
> > +##
> > +# @cxl-release-dynamic-capacity:
> > +#
> > +# Command to start release dynamic capacity extents flow. The host will
> 
> Article again.
> 
> The host?  In cxl-add-dynamic-capacity's doc comment, it's the device.
> 
> > +# need to respond to indicate that it has released the capacity before it
> > +# is made unavailable for read and write and can be re-added.
> 
> Is "and can be re-added" relevant here?
> 
> > +#
> > +# @path: CXL DCD canonical QOM path
> > +# @region-id: id of the region where the extent to release
> > +# @extents: Extents to release
> > +#
> > +# Since : 9.0
> 
> 9.1
> 
> > +##
> > +{ 'command': 'cxl-release-dynamic-capacity',
> > +  'data': { 'path': 'str',
> > +            'region-id': 'uint8',
> > +            'extents': [ 'CXLDCExtentRecord' ]
> > +           }
> > +}
>
Markus Armbruster April 24, 2024, 5:26 p.m. UTC | #11
fan <nifan.cxl@gmail.com> writes:

> On Wed, Apr 24, 2024 at 03:09:52PM +0200, Markus Armbruster wrote:
>> nifan.cxl@gmail.com writes:
>> 
>> > From: Fan Ni <fan.ni@samsung.com>
>> >
>> > Since fabric manager emulation is not supported yet, the change implements
>> > the functions to add/release dynamic capacity extents as QMP interfaces.
>> 
>> Will fabric manager emulation obsolete these commands?
>> 
>
> Hi Markus,
> Thanks for reviewing the patchset. This is v5 and we have sent out v7
> recently, there are a lot of changes from v5 to v7.
>
> FYI. v7: https://lore.kernel.org/linux-cxl/ZiaFYUB6FC9NR7W4@memverge.com/T/#t

Missed it because you neglected to cc: me for qapi/cxl.json :)

Thanks!
Ira Weiny April 24, 2024, 5:33 p.m. UTC | #12
Markus Armbruster wrote:
> nifan.cxl@gmail.com writes:
> 
> > From: Fan Ni <fan.ni@samsung.com>
> >
> > Since fabric manager emulation is not supported yet, the change implements
> > the functions to add/release dynamic capacity extents as QMP interfaces.
> 
> Will fabric manager emulation obsolete these commands?

I don't think so.  In the development of the kernel, I see these being
valuable to do CI and regression testing without the complexity of an FM.

Ira

> 
> > Note: we skips any FM issued extent release request if the exact extent
> > does not exist in the extent list of the device. We will loose the
> > restriction later once we have partial release support in the kernel.
> >
> > 1. Add dynamic capacity extents:
> >
> > For example, the command to add two continuous extents (each 128MiB long)
> > to region 0 (starting at DPA offset 0) looks like below:
> >
> > { "execute": "qmp_capabilities" }
> >
> > { "execute": "cxl-add-dynamic-capacity",
> >   "arguments": {
> >       "path": "/machine/peripheral/cxl-dcd0",
> >       "region-id": 0,
> >       "extents": [
> >       {
> >           "dpa": 0,
> >           "len": 134217728
> >       },
> >       {
> >           "dpa": 134217728,
> >           "len": 134217728
> >       }
> >       ]
> >   }
> > }
> >
> > 2. Release dynamic capacity extents:
> >
> > For example, the command to release an extent of size 128MiB from region 0
> > (DPA offset 128MiB) look like below:
> >
> > { "execute": "cxl-release-dynamic-capacity",
> >   "arguments": {
> >       "path": "/machine/peripheral/cxl-dcd0",
> >       "region-id": 0,
> >       "extents": [
> >       {
> >           "dpa": 134217728,
> >           "len": 134217728
> >       }
> >       ]
> >   }
> > }
> >
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> 
> [...]
> 
> > diff --git a/qapi/cxl.json b/qapi/cxl.json
> > index 8cc4c72fa9..2645004666 100644
> > --- a/qapi/cxl.json
> > +++ b/qapi/cxl.json
> > @@ -19,13 +19,16 @@
> >  #
> >  # @fatal: Fatal Event Log
> >  #
> > +# @dyncap: Dynamic Capacity Event Log
> > +#
> >  # Since: 8.1
> >  ##
> >  { 'enum': 'CxlEventLog',
> >    'data': ['informational',
> >             'warning',
> >             'failure',
> > -           'fatal']
> > +           'fatal',
> > +           'dyncap']
> 
> We tend to avoid abbreviations in QMP identifiers: dynamic-capacity.
> 
> >   }
> >  
> >  ##
> > @@ -361,3 +364,59 @@
> >  ##
> >  {'command': 'cxl-inject-correctable-error',
> >   'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
> > +
> > +##
> > +# @CXLDCExtentRecord:
> 
> Such traffic jams of capital letters are hard to read.
> 
> What does DC mean?
> 
> > +#
> > +# Record of a single extent to add/release
> > +#
> > +# @offset: offset to the start of the region where the extent to be operated
> 
> Blank line here, please
> 
> > +# @len: length of the extent
> > +#
> > +# Since: 9.0
> > +##
> > +{ 'struct': 'CXLDCExtentRecord',
> > +  'data': {
> > +      'offset':'uint64',
> > +      'len': 'uint64'
> > +  }
> > +}
> > +
> > +##
> > +# @cxl-add-dynamic-capacity:
> > +#
> > +# Command to start add dynamic capacity extents flow. The device will
> 
> I think we're missing an article here.  Is it "a flow" or "the flow"?
> 
> > +# have to acknowledged the acceptance of the extents before they are usable.
> 
> to acknowledge
> 
> docs/devel/qapi-code-gen.rst:
> 
>     For legibility, wrap text paragraphs so every line is at most 70
>     characters long.
> 
>     Separate sentences with two spaces.
> 
> > +#
> > +# @path: CXL DCD canonical QOM path
> 
> What is a CXL DCD?  Is it a device?
> 
> I'd prefer @qom-path, unless you can make a consistency argument for
> @path.
> 
> > +# @region-id: id of the region where the extent to add
> 
> What's a region, and how do they get their IDs?
> 
> > +# @extents: Extents to add
> 
> Blank lines between argument descriptions, please.
> 
> > +#
> > +# Since : 9.0
> 
> 9.1
> 
> > +##
> > +{ 'command': 'cxl-add-dynamic-capacity',
> > +  'data': { 'path': 'str',
> > +            'region-id': 'uint8',
> > +            'extents': [ 'CXLDCExtentRecord' ]
> > +           }
> > +}
> > +
> > +##
> > +# @cxl-release-dynamic-capacity:
> > +#
> > +# Command to start release dynamic capacity extents flow. The host will
> 
> Article again.
> 
> The host?  In cxl-add-dynamic-capacity's doc comment, it's the device.
> 
> > +# need to respond to indicate that it has released the capacity before it
> > +# is made unavailable for read and write and can be re-added.
> 
> Is "and can be re-added" relevant here?
> 
> > +#
> > +# @path: CXL DCD canonical QOM path
> > +# @region-id: id of the region where the extent to release
> > +# @extents: Extents to release
> > +#
> > +# Since : 9.0
> 
> 9.1
> 
> > +##
> > +{ 'command': 'cxl-release-dynamic-capacity',
> > +  'data': { 'path': 'str',
> > +            'region-id': 'uint8',
> > +            'extents': [ 'CXLDCExtentRecord' ]
> > +           }
> > +}
>
fan April 24, 2024, 5:39 p.m. UTC | #13
On Wed, Apr 24, 2024 at 03:09:52PM +0200, Markus Armbruster wrote:
> nifan.cxl@gmail.com writes:
> 
> > From: Fan Ni <fan.ni@samsung.com>
> >
> > Since fabric manager emulation is not supported yet, the change implements
> > the functions to add/release dynamic capacity extents as QMP interfaces.
> 
> Will fabric manager emulation obsolete these commands?

If in the future, fabric manager emulation supports commands for dynamic capacity
extent add/release, it is possible we do not need the commands.
But it seems not to happen soon, we need the qmp commands for the
end-to-end test with kernel DCD support.

> 
> > Note: we skips any FM issued extent release request if the exact extent
> > does not exist in the extent list of the device. We will loose the
> > restriction later once we have partial release support in the kernel.
> >
> > 1. Add dynamic capacity extents:
> >
> > For example, the command to add two continuous extents (each 128MiB long)
> > to region 0 (starting at DPA offset 0) looks like below:
> >
> > { "execute": "qmp_capabilities" }
> >
> > { "execute": "cxl-add-dynamic-capacity",
> >   "arguments": {
> >       "path": "/machine/peripheral/cxl-dcd0",
> >       "region-id": 0,
> >       "extents": [
> >       {
> >           "dpa": 0,
> >           "len": 134217728
> >       },
> >       {
> >           "dpa": 134217728,
> >           "len": 134217728
> >       }
> >       ]
> >   }
> > }
> >
> > 2. Release dynamic capacity extents:
> >
> > For example, the command to release an extent of size 128MiB from region 0
> > (DPA offset 128MiB) look like below:
> >
> > { "execute": "cxl-release-dynamic-capacity",
> >   "arguments": {
> >       "path": "/machine/peripheral/cxl-dcd0",
> >       "region-id": 0,
> >       "extents": [
> >       {
> >           "dpa": 134217728,
> >           "len": 134217728
> >       }
> >       ]
> >   }
> > }
> >
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> 
> [...]
> 
> > diff --git a/qapi/cxl.json b/qapi/cxl.json
> > index 8cc4c72fa9..2645004666 100644
> > --- a/qapi/cxl.json
> > +++ b/qapi/cxl.json
> > @@ -19,13 +19,16 @@
> >  #
> >  # @fatal: Fatal Event Log
> >  #
> > +# @dyncap: Dynamic Capacity Event Log
> > +#
> >  # Since: 8.1
> >  ##
> >  { 'enum': 'CxlEventLog',
> >    'data': ['informational',
> >             'warning',
> >             'failure',
> > -           'fatal']
> > +           'fatal',
> > +           'dyncap']
> 
> We tend to avoid abbreviations in QMP identifiers: dynamic-capacity.

FYI. This has been removed to avoid the potential side effect in the
latest post.
v7: https://lore.kernel.org/linux-cxl/ZiaFYUB6FC9NR7W4@memverge.com/T/#t

> 
> >   }
> >  
> >  ##
> > @@ -361,3 +364,59 @@
> >  ##
> >  {'command': 'cxl-inject-correctable-error',
> >   'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
> > +
> > +##
> > +# @CXLDCExtentRecord:
> 
> Such traffic jams of capital letters are hard to read.
> 
> What does DC mean?
Dynamic capacity
> 
> > +#
> > +# Record of a single extent to add/release
> > +#
> > +# @offset: offset to the start of the region where the extent to be operated
> 
> Blank line here, please
> 
> > +# @len: length of the extent
> > +#
> > +# Since: 9.0
> > +##
> > +{ 'struct': 'CXLDCExtentRecord',
> > +  'data': {
> > +      'offset':'uint64',
> > +      'len': 'uint64'
> > +  }
> > +}
> > +
> > +##
> > +# @cxl-add-dynamic-capacity:
> > +#
> > +# Command to start add dynamic capacity extents flow. The device will
> 
> I think we're missing an article here.  Is it "a flow" or "the flow"?
> 
> > +# have to acknowledged the acceptance of the extents before they are usable.
> 
> to acknowledge

It should be "to be acknowledged". 

> 
> docs/devel/qapi-code-gen.rst:
> 
>     For legibility, wrap text paragraphs so every line is at most 70
>     characters long.
> 
>     Separate sentences with two spaces.

Thanks. Will fix.
> 
> > +#
> > +# @path: CXL DCD canonical QOM path
> 
> What is a CXL DCD?  Is it a device?
Dynamic capacity device.
Yes. It is cxl memory device that can change capacity dynamically.

> 
> I'd prefer @qom-path, unless you can make a consistency argument for
> @path.
> 
> > +# @region-id: id of the region where the extent to add
> 
> What's a region, and how do they get their IDs?

Each DCD device can support up to 8 regions (0-7).  

> 
> > +# @extents: Extents to add
> 
> Blank lines between argument descriptions, please.
> 
> > +#
> > +# Since : 9.0
> 
> 9.1

Already fixed in the latest post.

> 
> > +##
> > +{ 'command': 'cxl-add-dynamic-capacity',
> > +  'data': { 'path': 'str',
> > +            'region-id': 'uint8',
> > +            'extents': [ 'CXLDCExtentRecord' ]
> > +           }
> > +}
> > +
> > +##
> > +# @cxl-release-dynamic-capacity:
> > +#
> > +# Command to start release dynamic capacity extents flow. The host will
> 
> Article again.
> 
> The host?  In cxl-add-dynamic-capacity's doc comment, it's the device.

For add command, the host will send a mailbox command to response to the add
request to the device to indicate whether it accepts the add capacity offer
or not.

For release command, the host send a mailbox command (not always a response
since the host can proactively release capacity if it does not need it
any more) to device to ask device release the capacity.

But yes, the text needs to be polished.

> 
> > +# need to respond to indicate that it has released the capacity before it
> > +# is made unavailable for read and write and can be re-added.
> 
> Is "and can be re-added" relevant here?

Not really. Will fix.

> 
> > +#
> > +# @path: CXL DCD canonical QOM path
> > +# @region-id: id of the region where the extent to release
> > +# @extents: Extents to release
> > +#
> > +# Since : 9.0
> 
> 9.1

Already fixed in the latest post.

Thanks again for the review. Will take care of the comments in the next
version.

Fan
> 
> > +##
> > +{ 'command': 'cxl-release-dynamic-capacity',
> > +  'data': { 'path': 'str',
> > +            'region-id': 'uint8',
> > +            'extents': [ 'CXLDCExtentRecord' ]
> > +           }
> > +}
>
fan April 24, 2024, 5:44 p.m. UTC | #14
On Wed, Apr 24, 2024 at 07:26:23PM +0200, Markus Armbruster wrote:
> fan <nifan.cxl@gmail.com> writes:
> 
> > On Wed, Apr 24, 2024 at 03:09:52PM +0200, Markus Armbruster wrote:
> >> nifan.cxl@gmail.com writes:
> >> 
> >> > From: Fan Ni <fan.ni@samsung.com>
> >> >
> >> > Since fabric manager emulation is not supported yet, the change implements
> >> > the functions to add/release dynamic capacity extents as QMP interfaces.
> >> 
> >> Will fabric manager emulation obsolete these commands?
> >> 
> >
> > Hi Markus,
> > Thanks for reviewing the patchset. This is v5 and we have sent out v7
> > recently, there are a lot of changes from v5 to v7.
> >
> > FYI. v7: https://lore.kernel.org/linux-cxl/ZiaFYUB6FC9NR7W4@memverge.com/T/#t
> 
> Missed it because you neglected to cc: me for qapi/cxl.json :)
> 
> Thanks!

Sorry for that. This is the first time I made changes to qapi/cxl.json so
missed that.  I will cc you when I sent out the next version.
Btw, thanks for the review. I have replied to your comments in another reply.

Fan
>
Markus Armbruster April 25, 2024, 5:48 a.m. UTC | #15
fan <nifan.cxl@gmail.com> writes:

> On Wed, Apr 24, 2024 at 03:09:52PM +0200, Markus Armbruster wrote:
>> nifan.cxl@gmail.com writes:
>> 
>> > From: Fan Ni <fan.ni@samsung.com>
>> >
>> > Since fabric manager emulation is not supported yet, the change implements
>> > the functions to add/release dynamic capacity extents as QMP interfaces.
>> 
>> Will fabric manager emulation obsolete these commands?
>
> If in the future, fabric manager emulation supports commands for dynamic capacity
> extent add/release, it is possible we do not need the commands.
> But it seems not to happen soon, we need the qmp commands for the
> end-to-end test with kernel DCD support.

I asked because if the commands are temporary testing aids, they should
probably be declared unstable.  Even if they are permanent testing aids,
unstable might be the right choice.  This is for the CXL maintainers to
decide.

What does "unstable" mean?  docs/devel/qapi-code-gen.rst: "Interfaces so
marked may be withdrawn or changed incompatibly in future releases."

Management applications need stable interfaces.  Libvirt developers
generally refuse to touch anything in QMP that's declared unstable.

Human users and their ad hoc scripts appreciate stability, but they
don't need it nearly as much as management applications do.

A stability promise increases the maintenance burden.  By how much is
unclear.  In other words, by promising stability, the maintainers take
on risk.  Are the CXL maintainers happy to accept the risk here?

>> > Note: we skips any FM issued extent release request if the exact extent
>> > does not exist in the extent list of the device. We will loose the
>> > restriction later once we have partial release support in the kernel.
>> >
>> > 1. Add dynamic capacity extents:
>> >
>> > For example, the command to add two continuous extents (each 128MiB long)
>> > to region 0 (starting at DPA offset 0) looks like below:
>> >
>> > { "execute": "qmp_capabilities" }
>> >
>> > { "execute": "cxl-add-dynamic-capacity",
>> >   "arguments": {
>> >       "path": "/machine/peripheral/cxl-dcd0",
>> >       "region-id": 0,
>> >       "extents": [
>> >       {
>> >           "dpa": 0,
>> >           "len": 134217728
>> >       },
>> >       {
>> >           "dpa": 134217728,
>> >           "len": 134217728
>> >       }
>> >       ]
>> >   }
>> > }
>> >
>> > 2. Release dynamic capacity extents:
>> >
>> > For example, the command to release an extent of size 128MiB from region 0
>> > (DPA offset 128MiB) look like below:
>> >
>> > { "execute": "cxl-release-dynamic-capacity",
>> >   "arguments": {
>> >       "path": "/machine/peripheral/cxl-dcd0",
>> >       "region-id": 0,
>> >       "extents": [
>> >       {
>> >           "dpa": 134217728,
>> >           "len": 134217728
>> >       }
>> >       ]
>> >   }
>> > }
>> >
>> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
>> 
>> [...]
>> 
>> > diff --git a/qapi/cxl.json b/qapi/cxl.json
>> > index 8cc4c72fa9..2645004666 100644
>> > --- a/qapi/cxl.json
>> > +++ b/qapi/cxl.json
>> > @@ -19,13 +19,16 @@
>> >  #
>> >  # @fatal: Fatal Event Log
>> >  #
>> > +# @dyncap: Dynamic Capacity Event Log
>> > +#
>> >  # Since: 8.1
>> >  ##
>> >  { 'enum': 'CxlEventLog',
>> >    'data': ['informational',
>> >             'warning',
>> >             'failure',
>> > -           'fatal']
>> > +           'fatal',
>> > +           'dyncap']
>> 
>> We tend to avoid abbreviations in QMP identifiers: dynamic-capacity.
>
> FYI. This has been removed to avoid the potential side effect in the
> latest post.
> v7: https://lore.kernel.org/linux-cxl/ZiaFYUB6FC9NR7W4@memverge.com/T/#t
>
>> 
>> >   }
>> >  
>> >  ##
>> > @@ -361,3 +364,59 @@
>> >  ##
>> >  {'command': 'cxl-inject-correctable-error',
>> >   'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
>> > +
>> > +##
>> > +# @CXLDCExtentRecord:
>> 
>> Such traffic jams of capital letters are hard to read.
>> 
>> What does DC mean?
>
> Dynamic capacity

Suggest CxlDynamicCapacityExtent.

>> > +#
>> > +# Record of a single extent to add/release
>> > +#
>> > +# @offset: offset to the start of the region where the extent to be operated
>> 
>> Blank line here, please
>> 
>> > +# @len: length of the extent
>> > +#
>> > +# Since: 9.0
>> > +##
>> > +{ 'struct': 'CXLDCExtentRecord',
>> > +  'data': {
>> > +      'offset':'uint64',
>> > +      'len': 'uint64'
>> > +  }
>> > +}
>> > +
>> > +##
>> > +# @cxl-add-dynamic-capacity:
>> > +#
>> > +# Command to start add dynamic capacity extents flow. The device will
>> 
>> I think we're missing an article here.  Is it "a flow" or "the flow"?
>> 
>> > +# have to acknowledged the acceptance of the extents before they are usable.
>> 
>> to acknowledge
>
> It should be "to be acknowledged". 
>
>> 
>> docs/devel/qapi-code-gen.rst:
>> 
>>     For legibility, wrap text paragraphs so every line is at most 70
>>     characters long.
>> 
>>     Separate sentences with two spaces.
>
> Thanks. Will fix.
>> 
>> > +#
>> > +# @path: CXL DCD canonical QOM path
>> 
>> What is a CXL DCD?  Is it a device?
>
> Dynamic capacity device.
> Yes. It is cxl memory device that can change capacity dynamically.

Sure the QOM path needs to be canonical?

If not, what about "path to the CXL dynamic capacity device in the QOM
tree".  Intentionally close to existing descriptions of @qom-path
elsewhere.

>> I'd prefer @qom-path, unless you can make a consistency argument for
>> @path.
>> 
>> > +# @region-id: id of the region where the extent to add
>> 
>> What's a region, and how do they get their IDs?
>
> Each DCD device can support up to 8 regions (0-7).  

Is "region ID" the established terminology in CXL-land?  Or is "region
number" also used?  I'm asking because "ID" in this QEMU device context
suggests a connection to a qdev ID.

If region number is fine, I'd rename to just @region, and rephrase the
description to avoid "ID".  Perhaps "number of the region the extent is
to be added to".  Not entirely happy with the phrasing, doesn't exactly
roll off the tongue, but "where the extent to add" sounds worse to my
ears.  Mind, I'm not a native speaker.

>> > +# @extents: Extents to add
>> 
>> Blank lines between argument descriptions, please.
>> 
>> > +#
>> > +# Since : 9.0
>> 
>> 9.1
>
> Already fixed in the latest post.
>
>> 
>> > +##
>> > +{ 'command': 'cxl-add-dynamic-capacity',
>> > +  'data': { 'path': 'str',
>> > +            'region-id': 'uint8',
>> > +            'extents': [ 'CXLDCExtentRecord' ]
>> > +           }
>> > +}
>> > +
>> > +##
>> > +# @cxl-release-dynamic-capacity:
>> > +#
>> > +# Command to start release dynamic capacity extents flow. The host will
>> 
>> Article again.
>> 
>> The host?  In cxl-add-dynamic-capacity's doc comment, it's the device.
>
> For add command, the host will send a mailbox command to response to the add
> request to the device to indicate whether it accepts the add capacity offer
> or not.
>
> For release command, the host send a mailbox command (not always a response
> since the host can proactively release capacity if it does not need it
> any more) to device to ask device release the capacity.
>
> But yes, the text needs to be polished.

Please do.  You may have to briefly explain which peer initiates what
for this to make sense.

>> > +# need to respond to indicate that it has released the capacity before it
>> > +# is made unavailable for read and write and can be re-added.
>> 
>> Is "and can be re-added" relevant here?
>
> Not really. Will fix.
>
>> 
>> > +#
>> > +# @path: CXL DCD canonical QOM path
>> > +# @region-id: id of the region where the extent to release
>> > +# @extents: Extents to release
>> > +#
>> > +# Since : 9.0
>> 
>> 9.1
>
> Already fixed in the latest post.
>
> Thanks again for the review. Will take care of the comments in the next
> version.

You're welcome!

> Fan
>> 
>> > +##
>> > +{ 'command': 'cxl-release-dynamic-capacity',
>> > +  'data': { 'path': 'str',
>> > +            'region-id': 'uint8',
>> > +            'extents': [ 'CXLDCExtentRecord' ]
>> > +           }
>> > +}
>>
Ira Weiny April 25, 2024, 5:30 p.m. UTC | #16
Markus Armbruster wrote:
> fan <nifan.cxl@gmail.com> writes:
> 
> > On Wed, Apr 24, 2024 at 03:09:52PM +0200, Markus Armbruster wrote:
> >> nifan.cxl@gmail.com writes:
> >> 
> >> > From: Fan Ni <fan.ni@samsung.com>
> >> >
> >> > Since fabric manager emulation is not supported yet, the change implements
> >> > the functions to add/release dynamic capacity extents as QMP interfaces.
> >> 
> >> Will fabric manager emulation obsolete these commands?
> >
> > If in the future, fabric manager emulation supports commands for dynamic capacity
> > extent add/release, it is possible we do not need the commands.
> > But it seems not to happen soon, we need the qmp commands for the
> > end-to-end test with kernel DCD support.
> 
> I asked because if the commands are temporary testing aids, they should
> probably be declared unstable.  Even if they are permanent testing aids,
> unstable might be the right choice.  This is for the CXL maintainers to
> decide.
> 
> What does "unstable" mean?  docs/devel/qapi-code-gen.rst: "Interfaces so
> marked may be withdrawn or changed incompatibly in future releases."
> 
> Management applications need stable interfaces.  Libvirt developers
> generally refuse to touch anything in QMP that's declared unstable.
> 
> Human users and their ad hoc scripts appreciate stability, but they
> don't need it nearly as much as management applications do.
> 
> A stability promise increases the maintenance burden.  By how much is
> unclear.  In other words, by promising stability, the maintainers take
> on risk.  Are the CXL maintainers happy to accept the risk here?
> 

Ah...  All great points.

Outside of CXL development I don't think there is a strong need for them
to be stable.  I would like to see more than ad hoc scripts use them
though.  So I don't think they are going to be changed without some
thought though.

Ira

[snip]
Jonathan Cameron April 26, 2024, 3:55 p.m. UTC | #17
On Wed, 24 Apr 2024 10:33:33 -0700
Ira Weiny <ira.weiny@intel.com> wrote:

> Markus Armbruster wrote:
> > nifan.cxl@gmail.com writes:
> >   
> > > From: Fan Ni <fan.ni@samsung.com>
> > >
> > > Since fabric manager emulation is not supported yet, the change implements
> > > the functions to add/release dynamic capacity extents as QMP interfaces.  
> > 
> > Will fabric manager emulation obsolete these commands?  
> 
> I don't think so.  In the development of the kernel, I see these being
> valuable to do CI and regression testing without the complexity of an FM.

Fully agree - I also long term see these as the drivers for one
possible virtualization stack for DCD devices (whether it turns
out to be the way forwards for that is going to take a while to
resolve!)

It doesn't make much sense to add a fabric manager into that flow
or to expose an appropriate (maybe MCTP) interface from QEMU just
to poke the emulated device.

Jonathan


> 
> Ira
> 
> >   
> > > Note: we skips any FM issued extent release request if the exact extent
> > > does not exist in the extent list of the device. We will loose the
> > > restriction later once we have partial release support in the kernel.
> > >
> > > 1. Add dynamic capacity extents:
> > >
> > > For example, the command to add two continuous extents (each 128MiB long)
> > > to region 0 (starting at DPA offset 0) looks like below:
> > >
> > > { "execute": "qmp_capabilities" }
> > >
> > > { "execute": "cxl-add-dynamic-capacity",
> > >   "arguments": {
> > >       "path": "/machine/peripheral/cxl-dcd0",
> > >       "region-id": 0,
> > >       "extents": [
> > >       {
> > >           "dpa": 0,
> > >           "len": 134217728
> > >       },
> > >       {
> > >           "dpa": 134217728,
> > >           "len": 134217728
> > >       }
> > >       ]
> > >   }
> > > }
> > >
> > > 2. Release dynamic capacity extents:
> > >
> > > For example, the command to release an extent of size 128MiB from region 0
> > > (DPA offset 128MiB) look like below:
> > >
> > > { "execute": "cxl-release-dynamic-capacity",
> > >   "arguments": {
> > >       "path": "/machine/peripheral/cxl-dcd0",
> > >       "region-id": 0,
> > >       "extents": [
> > >       {
> > >           "dpa": 134217728,
> > >           "len": 134217728
> > >       }
> > >       ]
> > >   }
> > > }
> > >
> > > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> > 
> > [...]
> >   
> > > diff --git a/qapi/cxl.json b/qapi/cxl.json
> > > index 8cc4c72fa9..2645004666 100644
> > > --- a/qapi/cxl.json
> > > +++ b/qapi/cxl.json
> > > @@ -19,13 +19,16 @@
> > >  #
> > >  # @fatal: Fatal Event Log
> > >  #
> > > +# @dyncap: Dynamic Capacity Event Log
> > > +#
> > >  # Since: 8.1
> > >  ##
> > >  { 'enum': 'CxlEventLog',
> > >    'data': ['informational',
> > >             'warning',
> > >             'failure',
> > > -           'fatal']
> > > +           'fatal',
> > > +           'dyncap']  
> > 
> > We tend to avoid abbreviations in QMP identifiers: dynamic-capacity.
> >   
> > >   }
> > >  
> > >  ##
> > > @@ -361,3 +364,59 @@
> > >  ##
> > >  {'command': 'cxl-inject-correctable-error',
> > >   'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
> > > +
> > > +##
> > > +# @CXLDCExtentRecord:  
> > 
> > Such traffic jams of capital letters are hard to read.
> > 
> > What does DC mean?
> >   
> > > +#
> > > +# Record of a single extent to add/release
> > > +#
> > > +# @offset: offset to the start of the region where the extent to be operated  
> > 
> > Blank line here, please
> >   
> > > +# @len: length of the extent
> > > +#
> > > +# Since: 9.0
> > > +##
> > > +{ 'struct': 'CXLDCExtentRecord',
> > > +  'data': {
> > > +      'offset':'uint64',
> > > +      'len': 'uint64'
> > > +  }
> > > +}
> > > +
> > > +##
> > > +# @cxl-add-dynamic-capacity:
> > > +#
> > > +# Command to start add dynamic capacity extents flow. The device will  
> > 
> > I think we're missing an article here.  Is it "a flow" or "the flow"?
> >   
> > > +# have to acknowledged the acceptance of the extents before they are usable.  
> > 
> > to acknowledge
> > 
> > docs/devel/qapi-code-gen.rst:
> > 
> >     For legibility, wrap text paragraphs so every line is at most 70
> >     characters long.
> > 
> >     Separate sentences with two spaces.
> >   
> > > +#
> > > +# @path: CXL DCD canonical QOM path  
> > 
> > What is a CXL DCD?  Is it a device?
> > 
> > I'd prefer @qom-path, unless you can make a consistency argument for
> > @path.
> >   
> > > +# @region-id: id of the region where the extent to add  
> > 
> > What's a region, and how do they get their IDs?
> >   
> > > +# @extents: Extents to add  
> > 
> > Blank lines between argument descriptions, please.
> >   
> > > +#
> > > +# Since : 9.0  
> > 
> > 9.1
> >   
> > > +##
> > > +{ 'command': 'cxl-add-dynamic-capacity',
> > > +  'data': { 'path': 'str',
> > > +            'region-id': 'uint8',
> > > +            'extents': [ 'CXLDCExtentRecord' ]
> > > +           }
> > > +}
> > > +
> > > +##
> > > +# @cxl-release-dynamic-capacity:
> > > +#
> > > +# Command to start release dynamic capacity extents flow. The host will  
> > 
> > Article again.
> > 
> > The host?  In cxl-add-dynamic-capacity's doc comment, it's the device.
> >   
> > > +# need to respond to indicate that it has released the capacity before it
> > > +# is made unavailable for read and write and can be re-added.  
> > 
> > Is "and can be re-added" relevant here?
> >   
> > > +#
> > > +# @path: CXL DCD canonical QOM path
> > > +# @region-id: id of the region where the extent to release
> > > +# @extents: Extents to release
> > > +#
> > > +# Since : 9.0  
> > 
> > 9.1
> >   
> > > +##
> > > +{ 'command': 'cxl-release-dynamic-capacity',
> > > +  'data': { 'path': 'str',
> > > +            'region-id': 'uint8',
> > > +            'extents': [ 'CXLDCExtentRecord' ]
> > > +           }
> > > +}  
> >   
> 
>
Jonathan Cameron April 26, 2024, 4 p.m. UTC | #18
On Thu, 25 Apr 2024 10:30:51 -0700
Ira Weiny <ira.weiny@intel.com> wrote:

> Markus Armbruster wrote:
> > fan <nifan.cxl@gmail.com> writes:
> >   
> > > On Wed, Apr 24, 2024 at 03:09:52PM +0200, Markus Armbruster wrote:  
> > >> nifan.cxl@gmail.com writes:
> > >>   
> > >> > From: Fan Ni <fan.ni@samsung.com>
> > >> >
> > >> > Since fabric manager emulation is not supported yet, the change implements
> > >> > the functions to add/release dynamic capacity extents as QMP interfaces.  
> > >> 
> > >> Will fabric manager emulation obsolete these commands?  
> > >
> > > If in the future, fabric manager emulation supports commands for dynamic capacity
> > > extent add/release, it is possible we do not need the commands.
> > > But it seems not to happen soon, we need the qmp commands for the
> > > end-to-end test with kernel DCD support.  
> > 
> > I asked because if the commands are temporary testing aids, they should
> > probably be declared unstable.  Even if they are permanent testing aids,
> > unstable might be the right choice.  This is for the CXL maintainers to
> > decide.
> > 
> > What does "unstable" mean?  docs/devel/qapi-code-gen.rst: "Interfaces so
> > marked may be withdrawn or changed incompatibly in future releases."
> > 
> > Management applications need stable interfaces.  Libvirt developers
> > generally refuse to touch anything in QMP that's declared unstable.
> > 
> > Human users and their ad hoc scripts appreciate stability, but they
> > don't need it nearly as much as management applications do.
> > 
> > A stability promise increases the maintenance burden.  By how much is
> > unclear.  In other words, by promising stability, the maintainers take
> > on risk.  Are the CXL maintainers happy to accept the risk here?
> >   
> 
> Ah...  All great points.
> 
> Outside of CXL development I don't think there is a strong need for them
> to be stable.  I would like to see more than ad hoc scripts use them
> though.  So I don't think they are going to be changed without some
> thought though.

These align closely with the data that comes from the fabric management
API in the CXL spec.  So I don't see a big maintenance burden problem
in having these as stable interfaces.  Whilst they aren't doing quite
the same job as the FM-API (which will be emulated such that it is
visible to the guest as that aids some other types of testing) that
interface defines the limits on what we can tell the device to do.

So yes, risk for these is minimal and I'm happy to accept that.
It'll be a while before we need libvirt to use them but I do
expect to see that happen. (subject to some guessing on a future
virtualization stack!)

Jonathan



> 
> Ira
> 
> [snip]
Gregory Price April 26, 2024, 4:22 p.m. UTC | #19
On Fri, Apr 26, 2024 at 04:55:55PM +0100, Jonathan Cameron wrote:
> On Wed, 24 Apr 2024 10:33:33 -0700
> Ira Weiny <ira.weiny@intel.com> wrote:
> 
> > Markus Armbruster wrote:
> > > nifan.cxl@gmail.com writes:
> > >   
> > > > From: Fan Ni <fan.ni@samsung.com>
> > > >
> > > > Since fabric manager emulation is not supported yet, the change implements
> > > > the functions to add/release dynamic capacity extents as QMP interfaces.  
> > > 
> > > Will fabric manager emulation obsolete these commands?  
> > 
> > I don't think so.  In the development of the kernel, I see these being
> > valuable to do CI and regression testing without the complexity of an FM.
> 
> Fully agree - I also long term see these as the drivers for one
> possible virtualization stack for DCD devices (whether it turns
> out to be the way forwards for that is going to take a while to
> resolve!)
> 
> It doesn't make much sense to add a fabric manager into that flow
> or to expose an appropriate (maybe MCTP) interface from QEMU just
> to poke the emulated device.
> 
> Jonathan
> 

fwiw it's useful in modeling the Orchestrator/Fabric Manager interaction,
since you can basically build a little emulated MHD FM-LD on top of this.

You basically just put a tiny software layer in front that converts what
would be MCTP or whatever commands into QMP commands forwarded to the
appropriate socket.

When a real device comes around, you just point it at the real thing
instead of that small software layer.

But for the actual fabric manager, less useful. (Also, if you're
confused, it's because fabric manager is such an overloaded term
*laughcry*)

~Gregory
diff mbox series

Patch

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 8c59635a9f..53ebc526ae 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1405,7 +1405,7 @@  static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
  * Check whether any bit between addr[nr, nr+size) is set,
  * return true if any bit is set, otherwise return false
  */
-static bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
                               unsigned long size)
 {
     unsigned long res = find_next_bit(addr, size + nr, nr);
@@ -1444,7 +1444,7 @@  CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len)
     return NULL;
 }
 
-static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
                                              uint64_t dpa,
                                              uint64_t len,
                                              uint8_t *tag,
@@ -1591,16 +1591,28 @@  static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
             }
         }
 
-        /*
-         * TODO: we will add a pending extent list based on event log record
-         * and verify the input response; also, the "More" flag is not
-         * considered at the moment.
-         */
+        QTAILQ_FOREACH(ent, &ct3d->dc.extents_pending_to_add, node) {
+            if (ent->start_dpa <= dpa &&
+                dpa + len <= ent->start_dpa + ent->len) {
+                break;
+            }
+        }
+        if (!ent) {
+            return CXL_MBOX_INVALID_PA;
+        }
+
+        cxl_remove_extent_from_extent_list(&ct3d->dc.extents_pending_to_add,
+                                           ent);
 
         cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
         ct3d->dc.total_extent_count += 1;
     }
 
+    /*
+     * TODO: extents_pending_to_add needs to be cleared so the extents not
+     * accepted can be reclaimed base on spec r3.1: 8.2.9.9.9.3
+     */
+
     return CXL_MBOX_SUCCESS;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index dccfaaad3a..e9c8994cdb 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -674,6 +674,7 @@  static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
         ct3d->dc.total_capacity += region->len;
     }
     QTAILQ_INIT(&ct3d->dc.extents);
+    QTAILQ_INIT(&ct3d->dc.extents_pending_to_add);
 
     return true;
 }
@@ -686,6 +687,12 @@  static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
         ent = QTAILQ_FIRST(&ct3d->dc.extents);
         cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
     }
+
+    while (!QTAILQ_EMPTY(&ct3d->dc.extents_pending_to_add)) {
+        ent = QTAILQ_FIRST(&ct3d->dc.extents_pending_to_add);
+        cxl_remove_extent_from_extent_list(&ct3d->dc.extents_pending_to_add,
+                                           ent);
+    }
 }
 
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
@@ -1451,7 +1458,8 @@  static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
         return CXL_EVENT_TYPE_FAIL;
     case CXL_EVENT_LOG_FATAL:
         return CXL_EVENT_TYPE_FATAL;
-/* DCD not yet supported */
+    case CXL_EVENT_LOG_DYNCAP:
+        return CXL_EVENT_TYPE_DYNAMIC_CAP;
     default:
         return -EINVAL;
     }
@@ -1702,6 +1710,241 @@  void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
     }
 }
 
+/* CXL r3.1 Table 8-50: Dynamic Capacity Event Record */
+static const QemuUUID dynamic_capacity_uuid = {
+    .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
+                 0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
+};
+
+typedef enum CXLDCEventType {
+    DC_EVENT_ADD_CAPACITY = 0x0,
+    DC_EVENT_RELEASE_CAPACITY = 0x1,
+    DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2,
+    DC_EVENT_REGION_CONFIG_UPDATED = 0x3,
+    DC_EVENT_ADD_CAPACITY_RSP = 0x4,
+    DC_EVENT_CAPACITY_RELEASED = 0x5,
+} CXLDCEventType;
+
+/*
+ * Check whether the exact extent exists in the list
+ * Return value: the extent pointer in the list; else null
+ */
+static CXLDCExtent *cxl_dc_extent_exists(CXLDCExtentList *list,
+                                         CXLDCExtentRaw *ext)
+{
+    CXLDCExtent *ent;
+
+    if (!ext || !list) {
+        return NULL;
+    }
+
+    QTAILQ_FOREACH(ent, list, node) {
+        if (ent->start_dpa != ext->start_dpa) {
+            continue;
+        }
+
+        /* Found exact extent */
+        return ent->len == ext->len ? ent : NULL;
+    }
+
+    return NULL;
+}
+
+/*
+ * The main function to process dynamic capacity event. Currently DC extents
+ * add/release requests are processed.
+ */
+static void qmp_cxl_process_dynamic_capacity(const char *path, CxlEventLog log,
+                                             CXLDCEventType type, uint16_t hid,
+                                             uint8_t rid,
+                                             CXLDCExtentRecordList *records,
+                                             Error **errp)
+{
+    Object *obj;
+    CXLEventDynamicCapacity dCap = {};
+    CXLEventRecordHdr *hdr = &dCap.hdr;
+    CXLType3Dev *dcd;
+    uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
+    uint32_t num_extents = 0;
+    CXLDCExtentRecordList *list;
+    g_autofree CXLDCExtentRaw *extents = NULL;
+    uint8_t enc_log;
+    uint64_t offset, len, block_size;
+    int i;
+    int rc;
+    g_autofree unsigned long *blk_bitmap = NULL;
+
+    obj = object_resolve_path(path, NULL);
+    if (!obj) {
+        error_setg(errp, "Unable to resolve path");
+        return;
+    }
+    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
+        error_setg(errp, "Path not point to a valid CXL type3 device");
+        return;
+    }
+
+    dcd = CXL_TYPE3(obj);
+    if (!dcd->dc.num_regions) {
+        error_setg(errp, "No dynamic capacity support from the device");
+        return;
+    }
+
+    rc = ct3d_qmp_cxl_event_log_enc(log);
+    if (rc < 0) {
+        error_setg(errp, "Unhandled error log type");
+        return;
+    }
+    enc_log = rc;
+
+    if (rid >= dcd->dc.num_regions) {
+        error_setg(errp, "region id is too large");
+        return;
+    }
+    block_size = dcd->dc.regions[rid].block_size;
+
+    /* Sanity check and count the extents */
+    list = records;
+    while (list) {
+        offset = list->value->offset;
+        len = list->value->len;
+
+        if (len == 0) {
+            error_setg(errp, "extent with 0 length is not allowed");
+            return;
+        }
+
+        if (offset % block_size || len % block_size) {
+            error_setg(errp, "dpa or len is not aligned to region block size");
+            return;
+        }
+
+        if (offset + len > dcd->dc.regions[rid].len) {
+            error_setg(errp, "extent range is beyond the region end");
+            return;
+        }
+
+        num_extents++;
+        list = list->next;
+    }
+    if (num_extents == 0) {
+        error_setg(errp, "No extents found in the command");
+        return;
+    }
+
+    blk_bitmap = bitmap_new(dcd->dc.regions[rid].len / block_size);
+
+    /* Create Extent list for event being passed to host */
+    i = 0;
+    list = records;
+    extents = g_new0(CXLDCExtentRaw, num_extents);
+    while (list) {
+        CXLDCExtent *ent;
+        bool skip_extent = false;
+
+        offset = list->value->offset;
+        len = list->value->len;
+
+        extents[i].start_dpa = offset + dcd->dc.regions[rid].base;
+        extents[i].len = len;
+        memset(extents[i].tag, 0, 0x10);
+        extents[i].shared_seq = 0;
+
+        if (type == DC_EVENT_RELEASE_CAPACITY ||
+            type == DC_EVENT_FORCED_RELEASE_CAPACITY) {
+            /*
+             *  if the extent is still pending to be added to the host,
+             * remove it from the pending extent list, so later when the add
+             * response for the extent arrives, the device can reject the
+             * extent as it is not in the pending list.
+             */
+            ent = cxl_dc_extent_exists(&dcd->dc.extents_pending_to_add,
+                        &extents[i]);
+            if (ent) {
+                QTAILQ_REMOVE(&dcd->dc.extents_pending_to_add, ent, node);
+                g_free(ent);
+                skip_extent = true;
+            } else if (!cxl_dc_extent_exists(&dcd->dc.extents, &extents[i])) {
+                /* If the exact extent is not in the accepted list, skip */
+                skip_extent = true;
+            }
+        }
+
+        /* No duplicate or overlapped extents are allowed */
+        if (test_any_bits_set(blk_bitmap, offset / block_size,
+                              len / block_size)) {
+            error_setg(errp, "duplicate or overlapped extents are detected");
+            return;
+        }
+        bitmap_set(blk_bitmap, offset / block_size, len / block_size);
+
+        list = list->next;
+        if (!skip_extent) {
+            i++;
+        }
+    }
+    num_extents = i;
+
+    /*
+     * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record
+     *
+     * All Dynamic Capacity event records shall set the Event Record Severity
+     * field in the Common Event Record Format to Informational Event. All
+     * Dynamic Capacity related events shall be logged in the Dynamic Capacity
+     * Event Log.
+     */
+    cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
+                            cxl_device_get_timestamp(&dcd->cxl_dstate));
+
+    dCap.type = type;
+    /* FIXME: for now, validity flag is cleared */
+    dCap.validity_flags = 0;
+    stw_le_p(&dCap.host_id, hid);
+    /* only valid for DC_REGION_CONFIG_UPDATED event */
+    dCap.updated_region_id = 0;
+    /*
+     * FIXME: for now, the "More" flag is cleared as there is only one
+     * extent associating with each record and tag-based release is
+     * not supported.
+     */
+    dCap.flags = 0;
+    for (i = 0; i < num_extents; i++) {
+        memcpy(&dCap.dynamic_capacity_extent, &extents[i],
+               sizeof(CXLDCExtentRaw));
+
+        if (type == DC_EVENT_ADD_CAPACITY) {
+            cxl_insert_extent_to_extent_list(&dcd->dc.extents_pending_to_add,
+                                             extents[i].start_dpa,
+                                             extents[i].len,
+                                             extents[i].tag,
+                                             extents[i].shared_seq);
+        }
+
+        if (cxl_event_insert(&dcd->cxl_dstate, enc_log,
+                             (CXLEventRecordRaw *)&dCap)) {
+            cxl_event_irq_assert(dcd);
+        }
+    }
+}
+
+void qmp_cxl_add_dynamic_capacity(const char *path, uint8_t region_id,
+                                  CXLDCExtentRecordList  *records,
+                                  Error **errp)
+{
+   qmp_cxl_process_dynamic_capacity(path, CXL_EVENT_LOG_DYNCAP,
+                                    DC_EVENT_ADD_CAPACITY, 0,
+                                    region_id, records, errp);
+}
+
+void qmp_cxl_release_dynamic_capacity(const char *path, uint8_t region_id,
+                                      CXLDCExtentRecordList  *records,
+                                      Error **errp)
+{
+    qmp_cxl_process_dynamic_capacity(path, CXL_EVENT_LOG_DYNCAP,
+                                     DC_EVENT_RELEASE_CAPACITY, 0,
+                                     region_id, records, errp);
+}
+
 static void ct3_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
index 3e1851e32b..d913b11b4d 100644
--- a/hw/mem/cxl_type3_stubs.c
+++ b/hw/mem/cxl_type3_stubs.c
@@ -67,3 +67,17 @@  void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
 {
     error_setg(errp, "CXL Type 3 support is not compiled in");
 }
+
+void qmp_cxl_add_dynamic_capacity(const char *path, uint8_t region_id,
+                                  CXLDCExtentRecordList  *records,
+                                  Error **errp)
+{
+    error_setg(errp, "CXL Type 3 support is not compiled in");
+}
+
+void qmp_cxl_release_dynamic_capacity(const char *path, uint8_t region_id,
+                                      CXLDCExtentRecordList  *records,
+                                      Error **errp)
+{
+    error_setg(errp, "CXL Type 3 support is not compiled in");
+}
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 341260e6e4..b524c5e699 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -490,6 +490,7 @@  struct CXLType3Dev {
         AddressSpace host_dc_as;
         uint64_t total_capacity; /* 256M aligned */
         CXLDCExtentList extents;
+        CXLDCExtentList extents_pending_to_add;
         uint32_t total_extent_count;
         uint32_t ext_list_gen_seq;
 
@@ -551,4 +552,9 @@  CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len);
 
 void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
                                         CXLDCExtent *extent);
+void cxl_insert_extent_to_extent_list(CXLDCExtentList *list, uint64_t dpa,
+                                      uint64_t len, uint8_t *tag,
+                                      uint16_t shared_seq);
+bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+                       unsigned long size);
 #endif
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index 5170b8dbf8..38cadaa0f3 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -166,4 +166,22 @@  typedef struct CXLEventMemoryModule {
     uint8_t reserved[0x3d];
 } QEMU_PACKED CXLEventMemoryModule;
 
+/*
+ * CXL r3.1 section Table 8-50: Dynamic Capacity Event Record
+ * All fields little endian.
+ */
+typedef struct CXLEventDynamicCapacity {
+    CXLEventRecordHdr hdr;
+    uint8_t type;
+    uint8_t validity_flags;
+    uint16_t host_id;
+    uint8_t updated_region_id;
+    uint8_t flags;
+    uint8_t reserved2[2];
+    uint8_t dynamic_capacity_extent[0x28]; /* defined in cxl_device.h */
+    uint8_t reserved[0x18];
+    uint32_t extents_avail;
+    uint32_t tags_avail;
+} QEMU_PACKED CXLEventDynamicCapacity;
+
 #endif /* CXL_EVENTS_H */
diff --git a/qapi/cxl.json b/qapi/cxl.json
index 8cc4c72fa9..2645004666 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -19,13 +19,16 @@ 
 #
 # @fatal: Fatal Event Log
 #
+# @dyncap: Dynamic Capacity Event Log
+#
 # Since: 8.1
 ##
 { 'enum': 'CxlEventLog',
   'data': ['informational',
            'warning',
            'failure',
-           'fatal']
+           'fatal',
+           'dyncap']
  }
 
 ##
@@ -361,3 +364,59 @@ 
 ##
 {'command': 'cxl-inject-correctable-error',
  'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
+
+##
+# @CXLDCExtentRecord:
+#
+# Record of a single extent to add/release
+#
+# @offset: offset to the start of the region where the extent to be operated
+# @len: length of the extent
+#
+# Since: 9.0
+##
+{ 'struct': 'CXLDCExtentRecord',
+  'data': {
+      'offset':'uint64',
+      'len': 'uint64'
+  }
+}
+
+##
+# @cxl-add-dynamic-capacity:
+#
+# Command to start add dynamic capacity extents flow. The device will
+# have to acknowledged the acceptance of the extents before they are usable.
+#
+# @path: CXL DCD canonical QOM path
+# @region-id: id of the region where the extent to add
+# @extents: Extents to add
+#
+# Since : 9.0
+##
+{ 'command': 'cxl-add-dynamic-capacity',
+  'data': { 'path': 'str',
+            'region-id': 'uint8',
+            'extents': [ 'CXLDCExtentRecord' ]
+           }
+}
+
+##
+# @cxl-release-dynamic-capacity:
+#
+# Command to start release dynamic capacity extents flow. The host will
+# need to respond to indicate that it has released the capacity before it
+# is made unavailable for read and write and can be re-added.
+#
+# @path: CXL DCD canonical QOM path
+# @region-id: id of the region where the extent to release
+# @extents: Extents to release
+#
+# Since : 9.0
+##
+{ 'command': 'cxl-release-dynamic-capacity',
+  'data': { 'path': 'str',
+            'region-id': 'uint8',
+            'extents': [ 'CXLDCExtentRecord' ]
+           }
+}