diff mbox series

[Qemu,v2,9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions

Message ID 20230725183939.2741025-10-fan.ni@samsung.com (mailing list archive)
State New, archived
Headers show
Series [Qemu,v2,1/9] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command | expand

Commit Message

Fan Ni July 25, 2023, 6:39 p.m. UTC
From: Fan Ni <nifan@outlook.com>

Not all dpa range in the dc regions is valid to access until an extent
covering the range has been added. Add a bitmap for each region to
record whether a dc block in the region has been backed by dc extent.
For the bitmap, a bit in the bitmap represents a dc block. When a dc
extent is added, all the bits of the blocks in the extent will be set,
which will be cleared when the extent is released.

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
 include/hw/cxl/cxl_device.h |   1 +
 2 files changed, 156 insertions(+)

Comments

Jonathan Cameron Aug. 7, 2023, 8:53 a.m. UTC | #1
On Tue, 25 Jul 2023 18:39:56 +0000
Fan Ni <fan.ni@samsung.com> wrote:

> From: Fan Ni <nifan@outlook.com>
> 
> Not all dpa range in the dc regions is valid to access until an extent
> covering the range has been added. Add a bitmap for each region to
> record whether a dc block in the region has been backed by dc extent.
> For the bitmap, a bit in the bitmap represents a dc block. When a dc
> extent is added, all the bits of the blocks in the extent will be set,
> which will be cleared when the extent is released.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
Hi Fan,

A few of the bits of feedback apply broadly across the series.  Given I'm
rebasing this anyway to give myself something to test I'll tidy things up
(feel free to disagree with and revert any changes !) 
and push a tree out in next day or two.  I'll message when I've done so.

Jonathan

> ---
>  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
>  include/hw/cxl/cxl_device.h |   1 +
>  2 files changed, 156 insertions(+)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 41a828598a..51943a36fc 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
>          /* dsmad_handle is set when creating cdat table entries */
>          region->flags = 0;
>  
> +        region->blk_bitmap = bitmap_new(region->len / region->block_size);

In common with many allocators in qemu if this fails it calls abort()
internally so no need to handle potential errors.

> +        if (!region->blk_bitmap) {
> +            break;
> +        }
> +
>          region_base += region->len;
>      }
> +
> +    if (i < ct3d->dc.num_regions) {
> +        while (--i >= 0) {
> +            g_free(ct3d->dc.regions[i].blk_bitmap);
> +        }
> +        return -1;
> +    }
> +
>      QTAILQ_INIT(&ct3d->dc.extents);
>  
>      return 0;
>  }
>  
> +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> +{
> +    int i;
> +    struct CXLDCD_Region *region;
> +
> +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> +        region = &ct3d->dc.regions[i];
> +        g_free(region->blk_bitmap);
> +    }
> +}
> +
>  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>  {
>      DeviceState *ds = DEVICE(ct3d);
> @@ -1021,6 +1045,7 @@ err_free_special_ops:
>      g_free(regs->special_ops);
>  err_address_space_free:
>      if (ct3d->dc.host_dc) {
> +        cxl_destroy_dc_regions(ct3d);
>          address_space_destroy(&ct3d->dc.host_dc_as);
>      }
>      if (ct3d->hostpmem) {
> @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
>      spdm_sock_fini(ct3d->doe_spdm.socket);
>      g_free(regs->special_ops);
>      if (ct3d->dc.host_dc) {
> +        cxl_destroy_dc_regions(ct3d);
>          address_space_destroy(&ct3d->dc.host_dc_as);
>      }
>      if (ct3d->hostpmem) {
> @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
>      }
>  }
>  
> +/*
> + * This function will marked the dpa range [dpa, dap + len) to be backed and
> + * accessible, this happens when a dc extent is added and accepted by the
> + * host.
> + */
> +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     **/
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> +            len / region->block_size);
> +}
> +
> +/*
> + * This function check whether a dpa range [dpa, dpa + len) has been backed
> + * with dc extents, used when validating read/write to dc regions
> + */
> +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +    uint64_t nbits;
> +    long nr;
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return false;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     */
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    nr = (dpa - region->base) / region->block_size;
> +    nbits = len / region->block_size;
> +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> +}
> +
> +/*
> + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> + * inaccessible, this happens when a dc extent is added and accepted by the
> + * host.
> + */
> +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +    uint64_t nbits;
> +    long nr;
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     */
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    nr = (dpa - region->base) / region->block_size;
> +    nbits = len / region->block_size;
> +    bitmap_clear(region->blk_bitmap, nr, nbits);
> +}
> +
>  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
>  {
>      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>          *as = &ct3d->hostpmem_as;
>          *dpa_offset -= vmr_size;
>      } else {
> +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> +            return -ENODEV;
> +        }
> +
>          *as = &ct3d->dc.host_dc_as;
>          *dpa_offset -= (vmr_size + pmr_size);
>      }
> @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
>      }
>  
>      g_free(extents);
> +
> +    /* Another choice is to do the set/clear after getting mailbox response*/
> +    list = records;
> +    while (list) {
> +        dpa = list->value->dpa * 1024 * 1024;
> +        len = list->value->len * 1024 * 1024;
> +        rid = list->value->region_id;
> +
> +        switch (type) {
> +        case DC_EVENT_ADD_CAPACITY:
> +            set_region_block_backed(dcd, dpa, len);
> +            break;
> +        case DC_EVENT_RELEASE_CAPACITY:
> +            clear_region_block_backed(dcd, dpa, len);
> +            break;
> +        default:
> +            error_setg(errp, "DC event type not handled yet");
> +            break;
> +        }
> +        list = list->next;
> +    }
>  }
>  
>  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 01a5eaca48..1f85c88017 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
>      uint64_t block_size;
>      uint32_t dsmadhandle;
>      uint8_t flags;
> +    unsigned long *blk_bitmap;
>  } CXLDCD_Region;
>  
>  struct CXLType3Dev {
Jonathan Cameron Aug. 7, 2023, 9:37 a.m. UTC | #2
On Mon, 7 Aug 2023 09:53:42 +0100
Jonathan Cameron via <qemu-devel@nongnu.org> wrote:

> On Tue, 25 Jul 2023 18:39:56 +0000
> Fan Ni <fan.ni@samsung.com> wrote:
> 
> > From: Fan Ni <nifan@outlook.com>
> > 
> > Not all dpa range in the dc regions is valid to access until an extent
> > covering the range has been added. Add a bitmap for each region to
> > record whether a dc block in the region has been backed by dc extent.
> > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > extent is added, all the bits of the blocks in the extent will be set,
> > which will be cleared when the extent is released.
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> Hi Fan,
> 
> A few of the bits of feedback apply broadly across the series.  Given I'm
> rebasing this anyway to give myself something to test I'll tidy things up
> (feel free to disagree with and revert any changes !) 
> and push a tree out in next day or two.  I'll message when I've done so.
> 
> Jonathan
> 

I'll review here but note I've changed all this in my tree anyway 
unless I specifically add questions etc.

> > ---
> >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> >  include/hw/cxl/cxl_device.h |   1 +
> >  2 files changed, 156 insertions(+)
> > 
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 41a828598a..51943a36fc 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> >          /* dsmad_handle is set when creating cdat table entries */
> >          region->flags = 0;
> >  
> > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);  
> 
> In common with many allocators in qemu if this fails it calls abort()
> internally so no need to handle potential errors.
> 
> > +        if (!region->blk_bitmap) {
> > +            break;
> > +        }
> > +
> >          region_base += region->len;
> >      }
> > +
> > +    if (i < ct3d->dc.num_regions) {
> > +        while (--i >= 0) {
> > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > +        }
> > +        return -1;
> > +    }
> > +
> >      QTAILQ_INIT(&ct3d->dc.extents);
> >  
> >      return 0;
> >  }
> >  
> > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > +{
> > +    int i;
> > +    struct CXLDCD_Region *region;
> > +
> > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +        region = &ct3d->dc.regions[i];
> > +        g_free(region->blk_bitmap);
> > +    }
> > +}
> > +
> >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >  {
> >      DeviceState *ds = DEVICE(ct3d);
> > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> >      g_free(regs->special_ops);
> >  err_address_space_free:
> >      if (ct3d->dc.host_dc) {
> > +        cxl_destroy_dc_regions(ct3d);
> >          address_space_destroy(&ct3d->dc.host_dc_as);
> >      }
> >      if (ct3d->hostpmem) {
> > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      spdm_sock_fini(ct3d->doe_spdm.socket);
> >      g_free(regs->special_ops);
> >      if (ct3d->dc.host_dc) {
> > +        cxl_destroy_dc_regions(ct3d);
> >          address_space_destroy(&ct3d->dc.host_dc_as);
> >      }
> >      if (ct3d->hostpmem) {
> > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      }
> >  }
> >  
> > +/*
> > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > + * accessible, this happens when a dc extent is added and accepted by the
> > + * host.
> > + */
> > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,

I'd prefer all functions to be in the ct3 namespace.

> > +        uint64_t len)
> > +{
> > +    int i;

A large chunk of stuff here is repeated as it is just finding the
relevant region.  Pulled out to a ct3_find_dc_region() utility function.

> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     **/
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > +            len / region->block_size);
> > +}
> > +
> > +/*
> > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > + * with dc extents, used when validating read/write to dc regions
> > + */
> > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return false;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;
> > +    nbits = len / region->block_size;
> > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > +}
> > +
> > +/*
> > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > + * inaccessible, this happens when a dc extent is added and accepted by the
> > + * host.
Second part of comment wrong (Cut and paste fun ;)

> > + */
> > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;
> > +    nbits = len / region->block_size;
> > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > +}
> > +
> >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> >  {
> >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> >          *as = &ct3d->hostpmem_as;
> >          *dpa_offset -= vmr_size;
> >      } else {
> > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > +            return -ENODEV;
> > +        }
> > +
> >          *as = &ct3d->dc.host_dc_as;
> >          *dpa_offset -= (vmr_size + pmr_size);
> >      }
> > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> >      }
> >  
> >      g_free(extents);
> > +
> > +    /* Another choice is to do the set/clear after getting mailbox response*/

I haven't changed this yet - but it needs to be done on host acceptance, not on
the QMP command. We also need to validate it - so keep a record of what has
been offered and not yet accepted.  Unfortunately that probably doubles the bitmaps :(

I've updated the comment to reflect this.
> > +    list = records;
> > +    while (list) {
> > +        dpa = list->value->dpa * 1024 * 1024;
* MiB
> > +        len = list->value->len * 1024 * 1024;
> > +        rid = list->value->region_id;
> > +
> > +        switch (type) {
> > +        case DC_EVENT_ADD_CAPACITY:
> > +            set_region_block_backed(dcd, dpa, len);
> > +            break;
> > +        case DC_EVENT_RELEASE_CAPACITY:
> > +            clear_region_block_backed(dcd, dpa, len);
> > +            break;
> > +        default:
> > +            error_setg(errp, "DC event type not handled yet");
> > +            break;
> > +        }
> > +        list = list->next;
> > +    }
> >  }
> >  
> >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 01a5eaca48..1f85c88017 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> >      uint64_t block_size;
> >      uint32_t dsmadhandle;
> >      uint8_t flags;
> > +    unsigned long *blk_bitmap;
> >  } CXLDCD_Region;
> >  
> >  struct CXLType3Dev {  
> 
> 
>
Fan Ni Aug. 24, 2023, 8:49 p.m. UTC | #3
On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:
> On Tue, 25 Jul 2023 18:39:56 +0000
> Fan Ni <fan.ni@samsung.com> wrote:
>
> > From: Fan Ni <nifan@outlook.com>
> >
> > Not all dpa range in the dc regions is valid to access until an extent
> > covering the range has been added. Add a bitmap for each region to
> > record whether a dc block in the region has been backed by dc extent.
> > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > extent is added, all the bits of the blocks in the extent will be set,
> > which will be cleared when the extent is released.
> >
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> Hi Fan,
>
> A few of the bits of feedback apply broadly across the series.  Given I'm
> rebasing this anyway to give myself something to test I'll tidy things up
> (feel free to disagree with and revert any changes !)
> and push a tree out in next day or two.  I'll message when I've done so.
>
> Jonathan

Hi Jonathan,
I tried DCD with your branch "cxl-2023-08-07", and noticed the
following,
1. You made some changes to the bitmap functionality, now it is only
used to validate extents when adding/releasing dc extents. My original
thought of adding the bitmap is to 1) validating extents for extent
add/release as you do; 2) Add validating when doing read/write to the dc
regions since some address region may not have valid extent added yet.
Do you think 2) is not necessary?

2. Your change introduced a bug in the code.
https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
ct3d->dc.num_regions should be ct3d->dc.num_regions-1.

Thanks,
Fan

>
> > ---
> >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> >  include/hw/cxl/cxl_device.h |   1 +
> >  2 files changed, 156 insertions(+)
> >
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 41a828598a..51943a36fc 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> >          /* dsmad_handle is set when creating cdat table entries */
> >          region->flags = 0;
> >
> > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);
>
> In common with many allocators in qemu if this fails it calls abort()
> internally so no need to handle potential errors.
>
> > +        if (!region->blk_bitmap) {
> > +            break;
> > +        }
> > +
> >          region_base += region->len;
> >      }
> > +
> > +    if (i < ct3d->dc.num_regions) {
> > +        while (--i >= 0) {
> > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > +        }
> > +        return -1;
> > +    }
> > +
> >      QTAILQ_INIT(&ct3d->dc.extents);
> >
> >      return 0;
> >  }
> >
> > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > +{
> > +    int i;
> > +    struct CXLDCD_Region *region;
> > +
> > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +        region = &ct3d->dc.regions[i];
> > +        g_free(region->blk_bitmap);
> > +    }
> > +}
> > +
> >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >  {
> >      DeviceState *ds = DEVICE(ct3d);
> > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> >      g_free(regs->special_ops);
> >  err_address_space_free:
> >      if (ct3d->dc.host_dc) {
> > +        cxl_destroy_dc_regions(ct3d);
> >          address_space_destroy(&ct3d->dc.host_dc_as);
> >      }
> >      if (ct3d->hostpmem) {
> > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      spdm_sock_fini(ct3d->doe_spdm.socket);
> >      g_free(regs->special_ops);
> >      if (ct3d->dc.host_dc) {
> > +        cxl_destroy_dc_regions(ct3d);
> >          address_space_destroy(&ct3d->dc.host_dc_as);
> >      }
> >      if (ct3d->hostpmem) {
> > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> >      }
> >  }
> >
> > +/*
> > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > + * accessible, this happens when a dc extent is added and accepted by the
> > + * host.
> > + */
> > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     **/
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > +            len / region->block_size);
> > +}
> > +
> > +/*
> > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > + * with dc extents, used when validating read/write to dc regions
> > + */
> > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return false;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;
> > +    nbits = len / region->block_size;
> > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > +}
> > +
> > +/*
> > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > + * inaccessible, this happens when a dc extent is added and accepted by the
> > + * host.
> > + */
> > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;
> > +    nbits = len / region->block_size;
> > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > +}
> > +
> >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> >  {
> >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> >          *as = &ct3d->hostpmem_as;
> >          *dpa_offset -= vmr_size;
> >      } else {
> > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > +            return -ENODEV;
> > +        }
> > +
> >          *as = &ct3d->dc.host_dc_as;
> >          *dpa_offset -= (vmr_size + pmr_size);
> >      }
> > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> >      }
> >
> >      g_free(extents);
> > +
> > +    /* Another choice is to do the set/clear after getting mailbox response*/
> > +    list = records;
> > +    while (list) {
> > +        dpa = list->value->dpa * 1024 * 1024;
> > +        len = list->value->len * 1024 * 1024;
> > +        rid = list->value->region_id;
> > +
> > +        switch (type) {
> > +        case DC_EVENT_ADD_CAPACITY:
> > +            set_region_block_backed(dcd, dpa, len);
> > +            break;
> > +        case DC_EVENT_RELEASE_CAPACITY:
> > +            clear_region_block_backed(dcd, dpa, len);
> > +            break;
> > +        default:
> > +            error_setg(errp, "DC event type not handled yet");
> > +            break;
> > +        }
> > +        list = list->next;
> > +    }
> >  }
> >
> >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 01a5eaca48..1f85c88017 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> >      uint64_t block_size;
> >      uint32_t dsmadhandle;
> >      uint8_t flags;
> > +    unsigned long *blk_bitmap;
> >  } CXLDCD_Region;
> >
> >  struct CXLType3Dev {
>
Jonathan Cameron Aug. 25, 2023, 11:42 a.m. UTC | #4
On Thu, 24 Aug 2023 13:49:00 -0700
Fan Ni <fan.ni@gmx.us> wrote:

> On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:
> > On Tue, 25 Jul 2023 18:39:56 +0000
> > Fan Ni <fan.ni@samsung.com> wrote:
> >  
> > > From: Fan Ni <nifan@outlook.com>
> > >
> > > Not all dpa range in the dc regions is valid to access until an extent
> > > covering the range has been added. Add a bitmap for each region to
> > > record whether a dc block in the region has been backed by dc extent.
> > > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > > extent is added, all the bits of the blocks in the extent will be set,
> > > which will be cleared when the extent is released.
> > >
> > > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> > Hi Fan,
> >
> > A few of the bits of feedback apply broadly across the series.  Given I'm
> > rebasing this anyway to give myself something to test I'll tidy things up
> > (feel free to disagree with and revert any changes !)
> > and push a tree out in next day or two.  I'll message when I've done so.
> >
> > Jonathan  
> 
> Hi Jonathan,
> I tried DCD with your branch "cxl-2023-08-07", and noticed the
> following,
> 1. You made some changes to the bitmap functionality, now it is only
> used to validate extents when adding/releasing dc extents. My original
> thought of adding the bitmap is to 1) validating extents for extent
> add/release as you do; 2) Add validating when doing read/write to the dc
> regions since some address region may not have valid extent added yet.
> Do you think 2) is not necessary?

Change wasn't intentional. I probably just messed up the rebase!

> 
> 2. Your change introduced a bug in the code.
> https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
> ct3d->dc.num_regions should be ct3d->dc.num_regions-1.
Thanks.  Given I might forget about about it, if you want to incorporate that in
your next version that would be great. I might remember to fix it in the meantime!

Jonathan

> 
> Thanks,
> Fan
> 
> >  
> > > ---
> > >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> > >  include/hw/cxl/cxl_device.h |   1 +
> > >  2 files changed, 156 insertions(+)
> > >
> > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > index 41a828598a..51943a36fc 100644
> > > --- a/hw/mem/cxl_type3.c
> > > +++ b/hw/mem/cxl_type3.c
> > > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> > >          /* dsmad_handle is set when creating cdat table entries */
> > >          region->flags = 0;
> > >
> > > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);  
> >
> > In common with many allocators in qemu if this fails it calls abort()
> > internally so no need to handle potential errors.
> >  
> > > +        if (!region->blk_bitmap) {
> > > +            break;
> > > +        }
> > > +
> > >          region_base += region->len;
> > >      }
> > > +
> > > +    if (i < ct3d->dc.num_regions) {
> > > +        while (--i >= 0) {
> > > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > > +        }
> > > +        return -1;
> > > +    }
> > > +
> > >      QTAILQ_INIT(&ct3d->dc.extents);
> > >
> > >      return 0;
> > >  }
> > >
> > > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > > +{
> > > +    int i;
> > > +    struct CXLDCD_Region *region;
> > > +
> > > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        g_free(region->blk_bitmap);
> > > +    }
> > > +}
> > > +
> > >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > >  {
> > >      DeviceState *ds = DEVICE(ct3d);
> > > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> > >      g_free(regs->special_ops);
> > >  err_address_space_free:
> > >      if (ct3d->dc.host_dc) {
> > > +        cxl_destroy_dc_regions(ct3d);
> > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > >      }
> > >      if (ct3d->hostpmem) {
> > > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> > >      spdm_sock_fini(ct3d->doe_spdm.socket);
> > >      g_free(regs->special_ops);
> > >      if (ct3d->dc.host_dc) {
> > > +        cxl_destroy_dc_regions(ct3d);
> > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > >      }
> > >      if (ct3d->hostpmem) {
> > > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> > >      }
> > >  }
> > >
> > > +/*
> > > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > > + * accessible, this happens when a dc extent is added and accepted by the
> > > + * host.
> > > + */
> > > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > +        uint64_t len)
> > > +{
> > > +    int i;
> > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > +
> > > +    if (dpa < region->base
> > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > +        return;
> > > +
> > > +    /*
> > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > +     * Region 7 for the highest DPA.
> > > +     * So we check from the last region to find where the dpa belongs.
> > > +     * access across multiple regions is not allowed.
> > > +     **/
> > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        if (dpa >= region->base) {
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > > +            len / region->block_size);
> > > +}
> > > +
> > > +/*
> > > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > > + * with dc extents, used when validating read/write to dc regions
> > > + */
> > > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > +        uint64_t len)
> > > +{
> > > +    int i;
> > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > +    uint64_t nbits;
> > > +    long nr;
> > > +
> > > +    if (dpa < region->base
> > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > +        return false;
> > > +
> > > +    /*
> > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > +     * Region 7 for the highest DPA.
> > > +     * So we check from the last region to find where the dpa belongs.
> > > +     * access across multiple regions is not allowed.
> > > +     */
> > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        if (dpa >= region->base) {
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    nr = (dpa - region->base) / region->block_size;
> > > +    nbits = len / region->block_size;
> > > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > > +}
> > > +
> > > +/*
> > > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > > + * inaccessible, this happens when a dc extent is added and accepted by the
> > > + * host.
> > > + */
> > > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > +        uint64_t len)
> > > +{
> > > +    int i;
> > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > +    uint64_t nbits;
> > > +    long nr;
> > > +
> > > +    if (dpa < region->base
> > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > +        return;
> > > +
> > > +    /*
> > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > +     * Region 7 for the highest DPA.
> > > +     * So we check from the last region to find where the dpa belongs.
> > > +     * access across multiple regions is not allowed.
> > > +     */
> > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > +        region = &ct3d->dc.regions[i];
> > > +        if (dpa >= region->base) {
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    nr = (dpa - region->base) / region->block_size;
> > > +    nbits = len / region->block_size;
> > > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > > +}
> > > +
> > >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> > >  {
> > >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> > >          *as = &ct3d->hostpmem_as;
> > >          *dpa_offset -= vmr_size;
> > >      } else {
> > > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > > +            return -ENODEV;
> > > +        }
> > > +
> > >          *as = &ct3d->dc.host_dc_as;
> > >          *dpa_offset -= (vmr_size + pmr_size);
> > >      }
> > > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> > >      }
> > >
> > >      g_free(extents);
> > > +
> > > +    /* Another choice is to do the set/clear after getting mailbox response*/
> > > +    list = records;
> > > +    while (list) {
> > > +        dpa = list->value->dpa * 1024 * 1024;
> > > +        len = list->value->len * 1024 * 1024;
> > > +        rid = list->value->region_id;
> > > +
> > > +        switch (type) {
> > > +        case DC_EVENT_ADD_CAPACITY:
> > > +            set_region_block_backed(dcd, dpa, len);
> > > +            break;
> > > +        case DC_EVENT_RELEASE_CAPACITY:
> > > +            clear_region_block_backed(dcd, dpa, len);
> > > +            break;
> > > +        default:
> > > +            error_setg(errp, "DC event type not handled yet");
> > > +            break;
> > > +        }
> > > +        list = list->next;
> > > +    }
> > >  }
> > >
> > >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > index 01a5eaca48..1f85c88017 100644
> > > --- a/include/hw/cxl/cxl_device.h
> > > +++ b/include/hw/cxl/cxl_device.h
> > > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> > >      uint64_t block_size;
> > >      uint32_t dsmadhandle;
> > >      uint8_t flags;
> > > +    unsigned long *blk_bitmap;
> > >  } CXLDCD_Region;
> > >
> > >  struct CXLType3Dev {  
> >
Fan Ni Aug. 25, 2023, 4:34 p.m. UTC | #5
On Fri, Aug 25, 2023 at 12:42:56PM +0100, Jonathan Cameron wrote:
> On Thu, 24 Aug 2023 13:49:00 -0700
> Fan Ni <fan.ni@gmx.us> wrote:
>
> > On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:
> > > On Tue, 25 Jul 2023 18:39:56 +0000
> > > Fan Ni <fan.ni@samsung.com> wrote:
> > >
> > > > From: Fan Ni <nifan@outlook.com>
> > > >
> > > > Not all dpa range in the dc regions is valid to access until an extent
> > > > covering the range has been added. Add a bitmap for each region to
> > > > record whether a dc block in the region has been backed by dc extent.
> > > > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > > > extent is added, all the bits of the blocks in the extent will be set,
> > > > which will be cleared when the extent is released.
> > > >
> > > > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> > > Hi Fan,
> > >
> > > A few of the bits of feedback apply broadly across the series.  Given I'm
> > > rebasing this anyway to give myself something to test I'll tidy things up
> > > (feel free to disagree with and revert any changes !)
> > > and push a tree out in next day or two.  I'll message when I've done so.
> > >
> > > Jonathan
> >
> > Hi Jonathan,
> > I tried DCD with your branch "cxl-2023-08-07", and noticed the
> > following,
> > 1. You made some changes to the bitmap functionality, now it is only
> > used to validate extents when adding/releasing dc extents. My original
> > thought of adding the bitmap is to 1) validating extents for extent
> > add/release as you do; 2) Add validating when doing read/write to the dc
> > regions since some address region may not have valid extent added yet.
> > Do you think 2) is not necessary?
>
> Change wasn't intentional. I probably just messed up the rebase!

Just double checked the code. The logic is still there, but in another
patch in the series, so no issue and ignore my previous question.
Sorry for the confusion.

>
> >
> > 2. Your change introduced a bug in the code.
> > https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
> > ct3d->dc.num_regions should be ct3d->dc.num_regions-1.
> Thanks.  Given I might forget about about it, if you want to incorporate that in
> your next version that would be great. I might remember to fix it in the meantime!
>
> Jonathan
>

My code does not have this. It seems you added the lastregion variable
to record the last region, while I use the following logic to iterate
the regions and record last region automatically while collecting
min_block_size.

+    for (i = 1; i < dev->dc.num_regions; i++) {
+        region = &dev->dc.regions[i];
+        if (min_block_size > region->block_size) {
+            min_block_size = region->block_size;
+        }
+    }
+
+    blk_bitmap = bitmap_new((region->len + region->base
+                - dev->dc.regions[0].base) / min_block_size);


Fan

> >
> > Thanks,
> > Fan
> >
> > >
> > > > ---
> > > >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> > > >  include/hw/cxl/cxl_device.h |   1 +
> > > >  2 files changed, 156 insertions(+)
> > > >
> > > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > > index 41a828598a..51943a36fc 100644
> > > > --- a/hw/mem/cxl_type3.c
> > > > +++ b/hw/mem/cxl_type3.c
> > > > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> > > >          /* dsmad_handle is set when creating cdat table entries */
> > > >          region->flags = 0;
> > > >
> > > > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);
> > >
> > > In common with many allocators in qemu if this fails it calls abort()
> > > internally so no need to handle potential errors.
> > >
> > > > +        if (!region->blk_bitmap) {
> > > > +            break;
> > > > +        }
> > > > +
> > > >          region_base += region->len;
> > > >      }
> > > > +
> > > > +    if (i < ct3d->dc.num_regions) {
> > > > +        while (--i >= 0) {
> > > > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > > > +        }
> > > > +        return -1;
> > > > +    }
> > > > +
> > > >      QTAILQ_INIT(&ct3d->dc.extents);
> > > >
> > > >      return 0;
> > > >  }
> > > >
> > > > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > > > +{
> > > > +    int i;
> > > > +    struct CXLDCD_Region *region;
> > > > +
> > > > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > > > +        region = &ct3d->dc.regions[i];
> > > > +        g_free(region->blk_bitmap);
> > > > +    }
> > > > +}
> > > > +
> > > >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > > >  {
> > > >      DeviceState *ds = DEVICE(ct3d);
> > > > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> > > >      g_free(regs->special_ops);
> > > >  err_address_space_free:
> > > >      if (ct3d->dc.host_dc) {
> > > > +        cxl_destroy_dc_regions(ct3d);
> > > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > > >      }
> > > >      if (ct3d->hostpmem) {
> > > > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > >      spdm_sock_fini(ct3d->doe_spdm.socket);
> > > >      g_free(regs->special_ops);
> > > >      if (ct3d->dc.host_dc) {
> > > > +        cxl_destroy_dc_regions(ct3d);
> > > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > > >      }
> > > >      if (ct3d->hostpmem) {
> > > > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > >      }
> > > >  }
> > > >
> > > > +/*
> > > > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > > > + * accessible, this happens when a dc extent is added and accepted by the
> > > > + * host.
> > > > + */
> > > > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > +        uint64_t len)
> > > > +{
> > > > +    int i;
> > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > +
> > > > +    if (dpa < region->base
> > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > +        return;
> > > > +
> > > > +    /*
> > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > +     * Region 7 for the highest DPA.
> > > > +     * So we check from the last region to find where the dpa belongs.
> > > > +     * access across multiple regions is not allowed.
> > > > +     **/
> > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > +        region = &ct3d->dc.regions[i];
> > > > +        if (dpa >= region->base) {
> > > > +            break;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > > > +            len / region->block_size);
> > > > +}
> > > > +
> > > > +/*
> > > > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > > > + * with dc extents, used when validating read/write to dc regions
> > > > + */
> > > > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > +        uint64_t len)
> > > > +{
> > > > +    int i;
> > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > +    uint64_t nbits;
> > > > +    long nr;
> > > > +
> > > > +    if (dpa < region->base
> > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > +        return false;
> > > > +
> > > > +    /*
> > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > +     * Region 7 for the highest DPA.
> > > > +     * So we check from the last region to find where the dpa belongs.
> > > > +     * access across multiple regions is not allowed.
> > > > +     */
> > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > +        region = &ct3d->dc.regions[i];
> > > > +        if (dpa >= region->base) {
> > > > +            break;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    nr = (dpa - region->base) / region->block_size;
> > > > +    nbits = len / region->block_size;
> > > > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > > > +}
> > > > +
> > > > +/*
> > > > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > > > + * inaccessible, this happens when a dc extent is added and accepted by the
> > > > + * host.
> > > > + */
> > > > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > +        uint64_t len)
> > > > +{
> > > > +    int i;
> > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > +    uint64_t nbits;
> > > > +    long nr;
> > > > +
> > > > +    if (dpa < region->base
> > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > +        return;
> > > > +
> > > > +    /*
> > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > +     * Region 7 for the highest DPA.
> > > > +     * So we check from the last region to find where the dpa belongs.
> > > > +     * access across multiple regions is not allowed.
> > > > +     */
> > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > +        region = &ct3d->dc.regions[i];
> > > > +        if (dpa >= region->base) {
> > > > +            break;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    nr = (dpa - region->base) / region->block_size;
> > > > +    nbits = len / region->block_size;
> > > > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > > > +}
> > > > +
> > > >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> > > >  {
> > > >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > > > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> > > >          *as = &ct3d->hostpmem_as;
> > > >          *dpa_offset -= vmr_size;
> > > >      } else {
> > > > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > > > +            return -ENODEV;
> > > > +        }
> > > > +
> > > >          *as = &ct3d->dc.host_dc_as;
> > > >          *dpa_offset -= (vmr_size + pmr_size);
> > > >      }
> > > > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> > > >      }
> > > >
> > > >      g_free(extents);
> > > > +
> > > > +    /* Another choice is to do the set/clear after getting mailbox response*/
> > > > +    list = records;
> > > > +    while (list) {
> > > > +        dpa = list->value->dpa * 1024 * 1024;
> > > > +        len = list->value->len * 1024 * 1024;
> > > > +        rid = list->value->region_id;
> > > > +
> > > > +        switch (type) {
> > > > +        case DC_EVENT_ADD_CAPACITY:
> > > > +            set_region_block_backed(dcd, dpa, len);
> > > > +            break;
> > > > +        case DC_EVENT_RELEASE_CAPACITY:
> > > > +            clear_region_block_backed(dcd, dpa, len);
> > > > +            break;
> > > > +        default:
> > > > +            error_setg(errp, "DC event type not handled yet");
> > > > +            break;
> > > > +        }
> > > > +        list = list->next;
> > > > +    }
> > > >  }
> > > >
> > > >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > > index 01a5eaca48..1f85c88017 100644
> > > > --- a/include/hw/cxl/cxl_device.h
> > > > +++ b/include/hw/cxl/cxl_device.h
> > > > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> > > >      uint64_t block_size;
> > > >      uint32_t dsmadhandle;
> > > >      uint8_t flags;
> > > > +    unsigned long *blk_bitmap;
> > > >  } CXLDCD_Region;
> > > >
> > > >  struct CXLType3Dev {
> > >
>
Jørgen Hansen Aug. 30, 2023, 12:08 p.m. UTC | #6
On 7/25/23 20:39, Fan Ni wrote:
> From: Fan Ni <nifan@outlook.com>
> 
> Not all dpa range in the dc regions is valid to access until an extent
> covering the range has been added. Add a bitmap for each region to
> record whether a dc block in the region has been backed by dc extent.
> For the bitmap, a bit in the bitmap represents a dc block. When a dc
> extent is added, all the bits of the blocks in the extent will be set,
> which will be cleared when the extent is released.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>

Hi Fan,

Great to see this being implemented. I've been playing around with it 
for a bit, and ran into the issue mentioned below.

> ---
>   hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
>   include/hw/cxl/cxl_device.h |   1 +
>   2 files changed, 156 insertions(+)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 41a828598a..51943a36fc 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
>           /* dsmad_handle is set when creating cdat table entries */
>           region->flags = 0;
> 
> +        region->blk_bitmap = bitmap_new(region->len / region->block_size);
> +        if (!region->blk_bitmap) {
> +            break;
> +        }
> +
>           region_base += region->len;
>       }
> +
> +    if (i < ct3d->dc.num_regions) {
> +        while (--i >= 0) {
> +            g_free(ct3d->dc.regions[i].blk_bitmap);
> +        }
> +        return -1;
> +    }
> +
>       QTAILQ_INIT(&ct3d->dc.extents);
> 
>       return 0;
>   }
> 
> +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> +{
> +    int i;
> +    struct CXLDCD_Region *region;
> +
> +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> +        region = &ct3d->dc.regions[i];
> +        g_free(region->blk_bitmap);
> +    }
> +}
> +
>   static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>   {
>       DeviceState *ds = DEVICE(ct3d);
> @@ -1021,6 +1045,7 @@ err_free_special_ops:
>       g_free(regs->special_ops);
>   err_address_space_free:
>       if (ct3d->dc.host_dc) {
> +        cxl_destroy_dc_regions(ct3d);
>           address_space_destroy(&ct3d->dc.host_dc_as);
>       }
>       if (ct3d->hostpmem) {
> @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
>       spdm_sock_fini(ct3d->doe_spdm.socket);
>       g_free(regs->special_ops);
>       if (ct3d->dc.host_dc) {
> +        cxl_destroy_dc_regions(ct3d);
>           address_space_destroy(&ct3d->dc.host_dc_as);
>       }
>       if (ct3d->hostpmem) {
> @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
>       }
>   }
> 
> +/*
> + * This function will marked the dpa range [dpa, dap + len) to be backed and
> + * accessible, this happens when a dc extent is added and accepted by the
> + * host.
> + */
> +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     **/
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> +            len / region->block_size);
> +}
> +
> +/*
> + * This function check whether a dpa range [dpa, dpa + len) has been backed
> + * with dc extents, used when validating read/write to dc regions
> + */
> +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +    uint64_t nbits;
> +    long nr;
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return false;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     */
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    nr = (dpa - region->base) / region->block_size;

> +    nbits = len / region->block_size;
> +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;

The 2nd parameter to find_next_zero_bit is the length of the bitmap, so 
shouldn't this be something like (also considering that len is the 
read/write len, and will be smaller than the region block size):

   nbits = DIV_ROUND_UP(len, region->block_size); 
 

   return find_next_zero_bit(region->blk_bitmap, nbits + nr, nr) ==
          nbits + nr;

> +}
> +
> +/*
> + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> + * inaccessible, this happens when a dc extent is added and accepted by the
> + * host.
> + */
> +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> +        uint64_t len)
> +{
> +    int i;
> +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> +    uint64_t nbits;
> +    long nr;
> +
> +    if (dpa < region->base
> +            || dpa >= region->base + ct3d->dc.total_capacity)
> +        return;
> +
> +    /*
> +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> +     * Region 7 for the highest DPA.
> +     * So we check from the last region to find where the dpa belongs.
> +     * access across multiple regions is not allowed.
> +     */
> +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> +        region = &ct3d->dc.regions[i];
> +        if (dpa >= region->base) {
> +            break;
> +        }
> +    }
> +
> +    nr = (dpa - region->base) / region->block_size;
> +    nbits = len / region->block_size;
> +    bitmap_clear(region->blk_bitmap, nr, nbits);
> +}
> +
>   static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
>   {
>       uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>           *as = &ct3d->hostpmem_as;
>           *dpa_offset -= vmr_size;
>       } else {
> +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> +            return -ENODEV;
> +        }
> +
>           *as = &ct3d->dc.host_dc_as;
>           *dpa_offset -= (vmr_size + pmr_size);
>       }
> @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
>       }
> 
>       g_free(extents);
> +
> +    /* Another choice is to do the set/clear after getting mailbox response*/
> +    list = records;
> +    while (list) {
> +        dpa = list->value->dpa * 1024 * 1024;
> +        len = list->value->len * 1024 * 1024;
> +        rid = list->value->region_id;
> +
> +        switch (type) {
> +        case DC_EVENT_ADD_CAPACITY:
> +            set_region_block_backed(dcd, dpa, len);
> +            break;
> +        case DC_EVENT_RELEASE_CAPACITY:
> +            clear_region_block_backed(dcd, dpa, len);
> +            break;
> +        default:
> +            error_setg(errp, "DC event type not handled yet");
> +            break;
> +        }
> +        list = list->next;
> +    }
>   }
> 
>   void qmp_cxl_add_dynamic_capacity_event(const char *path,
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 01a5eaca48..1f85c88017 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
>       uint64_t block_size;
>       uint32_t dsmadhandle;
>       uint8_t flags;
> +    unsigned long *blk_bitmap;
>   } CXLDCD_Region;
> 
>   struct CXLType3Dev {
> --
> 2.25.1

Thanks,
Jorgen
Jonathan Cameron Aug. 30, 2023, 3:04 p.m. UTC | #7
On Fri, 25 Aug 2023 09:34:50 -0700
Fan Ni <fan.ni@gmx.us> wrote:

> On Fri, Aug 25, 2023 at 12:42:56PM +0100, Jonathan Cameron wrote:
> > On Thu, 24 Aug 2023 13:49:00 -0700
> > Fan Ni <fan.ni@gmx.us> wrote:
> >  
> > > On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:  
> > > > On Tue, 25 Jul 2023 18:39:56 +0000
> > > > Fan Ni <fan.ni@samsung.com> wrote:
> > > >  
> > > > > From: Fan Ni <nifan@outlook.com>
> > > > >
> > > > > Not all dpa range in the dc regions is valid to access until an extent
> > > > > covering the range has been added. Add a bitmap for each region to
> > > > > record whether a dc block in the region has been backed by dc extent.
> > > > > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > > > > extent is added, all the bits of the blocks in the extent will be set,
> > > > > which will be cleared when the extent is released.
> > > > >
> > > > > Signed-off-by: Fan Ni <fan.ni@samsung.com>  
> > > > Hi Fan,
> > > >
> > > > A few of the bits of feedback apply broadly across the series.  Given I'm
> > > > rebasing this anyway to give myself something to test I'll tidy things up
> > > > (feel free to disagree with and revert any changes !)
> > > > and push a tree out in next day or two.  I'll message when I've done so.
> > > >
> > > > Jonathan  
> > >
> > > Hi Jonathan,
> > > I tried DCD with your branch "cxl-2023-08-07", and noticed the
> > > following,
> > > 1. You made some changes to the bitmap functionality, now it is only
> > > used to validate extents when adding/releasing dc extents. My original
> > > thought of adding the bitmap is to 1) validating extents for extent
> > > add/release as you do; 2) Add validating when doing read/write to the dc
> > > regions since some address region may not have valid extent added yet.
> > > Do you think 2) is not necessary?  
> >
> > Change wasn't intentional. I probably just messed up the rebase!  
> 
> Just double checked the code. The logic is still there, but in another
> patch in the series, so no issue and ignore my previous question.
> Sorry for the confusion.
> 
> >  
> > >
> > > 2. Your change introduced a bug in the code.
> > > https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
> > > ct3d->dc.num_regions should be ct3d->dc.num_regions-1.  
> > Thanks.  Given I might forget about about it, if you want to incorporate that in
> > your next version that would be great. I might remember to fix it in the meantime!
Oops. I'll tiddy that up in my tree. 
> >
> > Jonathan
> >  
> 
> My code does not have this. It seems you added the lastregion variable
> to record the last region, while I use the following logic to iterate
> the regions and record last region automatically while collecting
> min_block_size.
> 
> +    for (i = 1; i < dev->dc.num_regions; i++) {
> +        region = &dev->dc.regions[i];
> +        if (min_block_size > region->block_size) {
> +            min_block_size = region->block_size;
> +        }
> +    }
> +
> +    blk_bitmap = bitmap_new((region->len + region->base
> +                - dev->dc.regions[0].base) / min_block_size);
Understood.  I found that hard to read (see review of patch 7).
I then messed up the cleanup as you've noted.

Jonathan
> 
> 
> Fan
> 
> > >
> > > Thanks,
> > > Fan
> > >  
> > > >  
> > > > > ---
> > > > >  hw/mem/cxl_type3.c          | 155 ++++++++++++++++++++++++++++++++++++
> > > > >  include/hw/cxl/cxl_device.h |   1 +
> > > > >  2 files changed, 156 insertions(+)
> > > > >
> > > > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > > > index 41a828598a..51943a36fc 100644
> > > > > --- a/hw/mem/cxl_type3.c
> > > > > +++ b/hw/mem/cxl_type3.c
> > > > > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
> > > > >          /* dsmad_handle is set when creating cdat table entries */
> > > > >          region->flags = 0;
> > > > >
> > > > > +        region->blk_bitmap = bitmap_new(region->len / region->block_size);  
> > > >
> > > > In common with many allocators in qemu if this fails it calls abort()
> > > > internally so no need to handle potential errors.
> > > >  
> > > > > +        if (!region->blk_bitmap) {
> > > > > +            break;
> > > > > +        }
> > > > > +
> > > > >          region_base += region->len;
> > > > >      }
> > > > > +
> > > > > +    if (i < ct3d->dc.num_regions) {
> > > > > +        while (--i >= 0) {
> > > > > +            g_free(ct3d->dc.regions[i].blk_bitmap);
> > > > > +        }
> > > > > +        return -1;
> > > > > +    }
> > > > > +
> > > > >      QTAILQ_INIT(&ct3d->dc.extents);
> > > > >
> > > > >      return 0;
> > > > >  }
> > > > >
> > > > > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > > > > +{
> > > > > +    int i;
> > > > > +    struct CXLDCD_Region *region;
> > > > > +
> > > > > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        g_free(region->blk_bitmap);
> > > > > +    }
> > > > > +}
> > > > > +
> > > > >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > > > >  {
> > > > >      DeviceState *ds = DEVICE(ct3d);
> > > > > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> > > > >      g_free(regs->special_ops);
> > > > >  err_address_space_free:
> > > > >      if (ct3d->dc.host_dc) {
> > > > > +        cxl_destroy_dc_regions(ct3d);
> > > > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > > > >      }
> > > > >      if (ct3d->hostpmem) {
> > > > > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > > >      spdm_sock_fini(ct3d->doe_spdm.socket);
> > > > >      g_free(regs->special_ops);
> > > > >      if (ct3d->dc.host_dc) {
> > > > > +        cxl_destroy_dc_regions(ct3d);
> > > > >          address_space_destroy(&ct3d->dc.host_dc_as);
> > > > >      }
> > > > >      if (ct3d->hostpmem) {
> > > > > @@ -1053,6 +1079,110 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > > >      }
> > > > >  }
> > > > >
> > > > > +/*
> > > > > + * This function will marked the dpa range [dpa, dap + len) to be backed and
> > > > > + * accessible, this happens when a dc extent is added and accepted by the
> > > > > + * host.
> > > > > + */
> > > > > +static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > > +        uint64_t len)
> > > > > +{
> > > > > +    int i;
> > > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > > +
> > > > > +    if (dpa < region->base
> > > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > > +        return;
> > > > > +
> > > > > +    /*
> > > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > > +     * Region 7 for the highest DPA.
> > > > > +     * So we check from the last region to find where the dpa belongs.
> > > > > +     * access across multiple regions is not allowed.
> > > > > +     **/
> > > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        if (dpa >= region->base) {
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
> > > > > +            len / region->block_size);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > > > > + * with dc extents, used when validating read/write to dc regions
> > > > > + */
> > > > > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > > +        uint64_t len)
> > > > > +{
> > > > > +    int i;
> > > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > > +    uint64_t nbits;
> > > > > +    long nr;
> > > > > +
> > > > > +    if (dpa < region->base
> > > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > > +        return false;
> > > > > +
> > > > > +    /*
> > > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > > +     * Region 7 for the highest DPA.
> > > > > +     * So we check from the last region to find where the dpa belongs.
> > > > > +     * access across multiple regions is not allowed.
> > > > > +     */
> > > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        if (dpa >= region->base) {
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    nr = (dpa - region->base) / region->block_size;
> > > > > +    nbits = len / region->block_size;
> > > > > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * This function will marked the dpa range [dpa, dap + len) to be unbacked and
> > > > > + * inaccessible, this happens when a dc extent is added and accepted by the
> > > > > + * host.
> > > > > + */
> > > > > +static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > > > > +        uint64_t len)
> > > > > +{
> > > > > +    int i;
> > > > > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > > > > +    uint64_t nbits;
> > > > > +    long nr;
> > > > > +
> > > > > +    if (dpa < region->base
> > > > > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > > > > +        return;
> > > > > +
> > > > > +    /*
> > > > > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > > > > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > > > > +     * Region 7 for the highest DPA.
> > > > > +     * So we check from the last region to find where the dpa belongs.
> > > > > +     * access across multiple regions is not allowed.
> > > > > +     */
> > > > > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > > > > +        region = &ct3d->dc.regions[i];
> > > > > +        if (dpa >= region->base) {
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    nr = (dpa - region->base) / region->block_size;
> > > > > +    nbits = len / region->block_size;
> > > > > +    bitmap_clear(region->blk_bitmap, nr, nbits);
> > > > > +}
> > > > > +
> > > > >  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> > > > >  {
> > > > >      uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> > > > > @@ -1145,6 +1275,10 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
> > > > >          *as = &ct3d->hostpmem_as;
> > > > >          *dpa_offset -= vmr_size;
> > > > >      } else {
> > > > > +        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
> > > > > +            return -ENODEV;
> > > > > +        }
> > > > > +
> > > > >          *as = &ct3d->dc.host_dc_as;
> > > > >          *dpa_offset -= (vmr_size + pmr_size);
> > > > >      }
> > > > > @@ -1944,6 +2078,27 @@ static void qmp_cxl_process_dynamic_capacity_event(const char *path,
> > > > >      }
> > > > >
> > > > >      g_free(extents);
> > > > > +
> > > > > +    /* Another choice is to do the set/clear after getting mailbox response*/
> > > > > +    list = records;
> > > > > +    while (list) {
> > > > > +        dpa = list->value->dpa * 1024 * 1024;
> > > > > +        len = list->value->len * 1024 * 1024;
> > > > > +        rid = list->value->region_id;
> > > > > +
> > > > > +        switch (type) {
> > > > > +        case DC_EVENT_ADD_CAPACITY:
> > > > > +            set_region_block_backed(dcd, dpa, len);
> > > > > +            break;
> > > > > +        case DC_EVENT_RELEASE_CAPACITY:
> > > > > +            clear_region_block_backed(dcd, dpa, len);
> > > > > +            break;
> > > > > +        default:
> > > > > +            error_setg(errp, "DC event type not handled yet");
> > > > > +            break;
> > > > > +        }
> > > > > +        list = list->next;
> > > > > +    }
> > > > >  }
> > > > >
> > > > >  void qmp_cxl_add_dynamic_capacity_event(const char *path,
> > > > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > > > index 01a5eaca48..1f85c88017 100644
> > > > > --- a/include/hw/cxl/cxl_device.h
> > > > > +++ b/include/hw/cxl/cxl_device.h
> > > > > @@ -412,6 +412,7 @@ typedef struct CXLDCD_Region {
> > > > >      uint64_t block_size;
> > > > >      uint32_t dsmadhandle;
> > > > >      uint8_t flags;
> > > > > +    unsigned long *blk_bitmap;
> > > > >  } CXLDCD_Region;
> > > > >
> > > > >  struct CXLType3Dev {  
> > > >  
> >
Jonathan Cameron Aug. 30, 2023, 3:37 p.m. UTC | #8
> > +
> > +/*
> > + * This function check whether a dpa range [dpa, dpa + len) has been backed
> > + * with dc extents, used when validating read/write to dc regions
> > + */
> > +static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
> > +        uint64_t len)
> > +{
> > +    int i;
> > +    CXLDCD_Region *region = &ct3d->dc.regions[0];
> > +    uint64_t nbits;
> > +    long nr;
> > +
> > +    if (dpa < region->base
> > +            || dpa >= region->base + ct3d->dc.total_capacity)
> > +        return false;
> > +
> > +    /*
> > +     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
> > +     * Region 0 being used for the lowest DPA of Dynamic Capacity and
> > +     * Region 7 for the highest DPA.
> > +     * So we check from the last region to find where the dpa belongs.
> > +     * access across multiple regions is not allowed.
> > +     */
> > +    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
> > +        region = &ct3d->dc.regions[i];
> > +        if (dpa >= region->base) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    nr = (dpa - region->base) / region->block_size;  
> 
> > +    nbits = len / region->block_size;
oops. Len is probably always smaller than block_size (typically 8 or less)
so nbits always 0.  Should be 1 in those cases.

> > +    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;  
> 
> The 2nd parameter to find_next_zero_bit is the length of the bitmap, so 
> shouldn't this be something like (also considering that len is the 
> read/write len, and will be smaller than the region block size):
> 
>    nbits = DIV_ROUND_UP(len, region->block_size);

>  
> 
>    return find_next_zero_bit(region->blk_bitmap, nbits + nr, nr) ==
>           nbits + nr;

Agreed with your suggestion. I'll carry that in my forward port of this
series for now and update my tree at
gitlab.com/jic23/qemu branch will probably be cxl-2023-08-30
a bit later today.

Jonathan
diff mbox series

Patch

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 41a828598a..51943a36fc 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -787,13 +787,37 @@  static int cxl_create_dc_regions(CXLType3Dev *ct3d)
         /* dsmad_handle is set when creating cdat table entries */
         region->flags = 0;
 
+        region->blk_bitmap = bitmap_new(region->len / region->block_size);
+        if (!region->blk_bitmap) {
+            break;
+        }
+
         region_base += region->len;
     }
+
+    if (i < ct3d->dc.num_regions) {
+        while (--i >= 0) {
+            g_free(ct3d->dc.regions[i].blk_bitmap);
+        }
+        return -1;
+    }
+
     QTAILQ_INIT(&ct3d->dc.extents);
 
     return 0;
 }
 
+static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
+{
+    int i;
+    struct CXLDCD_Region *region;
+
+    for (i = 0; i < ct3d->dc.num_regions; i++) {
+        region = &ct3d->dc.regions[i];
+        g_free(region->blk_bitmap);
+    }
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
     DeviceState *ds = DEVICE(ct3d);
@@ -1021,6 +1045,7 @@  err_free_special_ops:
     g_free(regs->special_ops);
 err_address_space_free:
     if (ct3d->dc.host_dc) {
+        cxl_destroy_dc_regions(ct3d);
         address_space_destroy(&ct3d->dc.host_dc_as);
     }
     if (ct3d->hostpmem) {
@@ -1043,6 +1068,7 @@  static void ct3_exit(PCIDevice *pci_dev)
     spdm_sock_fini(ct3d->doe_spdm.socket);
     g_free(regs->special_ops);
     if (ct3d->dc.host_dc) {
+        cxl_destroy_dc_regions(ct3d);
         address_space_destroy(&ct3d->dc.host_dc_as);
     }
     if (ct3d->hostpmem) {
@@ -1053,6 +1079,110 @@  static void ct3_exit(PCIDevice *pci_dev)
     }
 }
 
+/*
+ * This function will marked the dpa range [dpa, dap + len) to be backed and
+ * accessible, this happens when a dc extent is added and accepted by the
+ * host.
+ */
+static void set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+        uint64_t len)
+{
+    int i;
+    CXLDCD_Region *region = &ct3d->dc.regions[0];
+
+    if (dpa < region->base
+            || dpa >= region->base + ct3d->dc.total_capacity)
+        return;
+
+    /*
+     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+     * Region 0 being used for the lowest DPA of Dynamic Capacity and
+     * Region 7 for the highest DPA.
+     * So we check from the last region to find where the dpa belongs.
+     * access across multiple regions is not allowed.
+     **/
+    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+        region = &ct3d->dc.regions[i];
+        if (dpa >= region->base) {
+            break;
+        }
+    }
+
+    bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
+            len / region->block_size);
+}
+
+/*
+ * This function check whether a dpa range [dpa, dpa + len) has been backed
+ * with dc extents, used when validating read/write to dc regions
+ */
+static bool test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+        uint64_t len)
+{
+    int i;
+    CXLDCD_Region *region = &ct3d->dc.regions[0];
+    uint64_t nbits;
+    long nr;
+
+    if (dpa < region->base
+            || dpa >= region->base + ct3d->dc.total_capacity)
+        return false;
+
+    /*
+     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+     * Region 0 being used for the lowest DPA of Dynamic Capacity and
+     * Region 7 for the highest DPA.
+     * So we check from the last region to find where the dpa belongs.
+     * access across multiple regions is not allowed.
+     */
+    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+        region = &ct3d->dc.regions[i];
+        if (dpa >= region->base) {
+            break;
+        }
+    }
+
+    nr = (dpa - region->base) / region->block_size;
+    nbits = len / region->block_size;
+    return find_next_zero_bit(region->blk_bitmap, nbits, nr) >= nr + nbits;
+}
+
+/*
+ * This function will marked the dpa range [dpa, dap + len) to be unbacked and
+ * inaccessible, this happens when a dc extent is added and accepted by the
+ * host.
+ */
+static void clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+        uint64_t len)
+{
+    int i;
+    CXLDCD_Region *region = &ct3d->dc.regions[0];
+    uint64_t nbits;
+    long nr;
+
+    if (dpa < region->base
+            || dpa >= region->base + ct3d->dc.total_capacity)
+        return;
+
+    /*
+     * spec 3.0 9.13.3: Regions are used in increasing-DPA order, with
+     * Region 0 being used for the lowest DPA of Dynamic Capacity and
+     * Region 7 for the highest DPA.
+     * So we check from the last region to find where the dpa belongs.
+     * access across multiple regions is not allowed.
+     */
+    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+        region = &ct3d->dc.regions[i];
+        if (dpa >= region->base) {
+            break;
+        }
+    }
+
+    nr = (dpa - region->base) / region->block_size;
+    nbits = len / region->block_size;
+    bitmap_clear(region->blk_bitmap, nr, nbits);
+}
+
 static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
 {
     uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
@@ -1145,6 +1275,10 @@  static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
         *as = &ct3d->hostpmem_as;
         *dpa_offset -= vmr_size;
     } else {
+        if (!test_region_block_backed(ct3d, *dpa_offset, size)) {
+            return -ENODEV;
+        }
+
         *as = &ct3d->dc.host_dc_as;
         *dpa_offset -= (vmr_size + pmr_size);
     }
@@ -1944,6 +2078,27 @@  static void qmp_cxl_process_dynamic_capacity_event(const char *path,
     }
 
     g_free(extents);
+
+    /* Another choice is to do the set/clear after getting mailbox response*/
+    list = records;
+    while (list) {
+        dpa = list->value->dpa * 1024 * 1024;
+        len = list->value->len * 1024 * 1024;
+        rid = list->value->region_id;
+
+        switch (type) {
+        case DC_EVENT_ADD_CAPACITY:
+            set_region_block_backed(dcd, dpa, len);
+            break;
+        case DC_EVENT_RELEASE_CAPACITY:
+            clear_region_block_backed(dcd, dpa, len);
+            break;
+        default:
+            error_setg(errp, "DC event type not handled yet");
+            break;
+        }
+        list = list->next;
+    }
 }
 
 void qmp_cxl_add_dynamic_capacity_event(const char *path,
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 01a5eaca48..1f85c88017 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -412,6 +412,7 @@  typedef struct CXLDCD_Region {
     uint64_t block_size;
     uint32_t dsmadhandle;
     uint8_t flags;
+    unsigned long *blk_bitmap;
 } CXLDCD_Region;
 
 struct CXLType3Dev {