diff mbox series

[14/26] cxl/region: Read existing extents on region creation

Message ID 20240324-dcd-type2-upstream-v1-14-b7b00d623625@intel.com (mailing list archive)
State New, archived
Headers show
Series DCD: Add support for Dynamic Capacity Devices (DCD) | expand

Commit Message

Ira Weiny March 24, 2024, 11:18 p.m. UTC
From: Navneet Singh <navneet.singh@intel.com>

Dynamic capacity device extents may be left in an accepted state on a
device due to an unexpected host crash.  In this case creation of a new
region on top of the DC partition (region) is expected to expose those
extents for continued use.

Once all endpoint decoders are part of a region and the region is being
realized read the device extent list.  For ease of review, this patch
stops after reading the extent list and leaves realization of the region
extents to a future patch.

Signed-off-by: Navneet Singh <navneet.singh@intel.com>
Co-developed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes for v1:
[iweiny: remove extent list xarray]
[iweiny: Update spec references to 3.1]
[iweiny: use struct range in extents]
[iweiny: remove all reference tracking and let regions track extents
	 through the extent devices.]
[djbw/Jonathan/Fan: move extent tracking to endpoint decoders]
---
 drivers/cxl/core/core.h   |   9 +++
 drivers/cxl/core/mbox.c   | 192 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/core/region.c |  29 +++++++
 drivers/cxl/cxlmem.h      |  49 ++++++++++++
 4 files changed, 279 insertions(+)

Comments

fan March 26, 2024, 11:27 p.m. UTC | #1
On Sun, Mar 24, 2024 at 04:18:17PM -0700, ira.weiny@intel.com wrote:
> From: Navneet Singh <navneet.singh@intel.com>
> 
> Dynamic capacity device extents may be left in an accepted state on a
> device due to an unexpected host crash.  In this case creation of a new
> region on top of the DC partition (region) is expected to expose those
> extents for continued use.
> 
> Once all endpoint decoders are part of a region and the region is being
> realized read the device extent list.  For ease of review, this patch
> stops after reading the extent list and leaves realization of the region
> extents to a future patch.
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes for v1:
> [iweiny: remove extent list xarray]
> [iweiny: Update spec references to 3.1]
> [iweiny: use struct range in extents]
> [iweiny: remove all reference tracking and let regions track extents
> 	 through the extent devices.]
> [djbw/Jonathan/Fan: move extent tracking to endpoint decoders]
> ---
>  drivers/cxl/core/core.h   |   9 +++
>  drivers/cxl/core/mbox.c   | 192 ++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/region.c |  29 +++++++
>  drivers/cxl/cxlmem.h      |  49 ++++++++++++
>  4 files changed, 279 insertions(+)
> 
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 91abeffbe985..119b12362977 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -4,6 +4,8 @@
>  #ifndef __CXL_CORE_H__
>  #define __CXL_CORE_H__
>  
> +#include <cxlmem.h>
> +
>  extern const struct device_type cxl_nvdimm_bridge_type;
>  extern const struct device_type cxl_nvdimm_type;
>  extern const struct device_type cxl_pmu_type;
> @@ -28,6 +30,8 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
>  int cxl_region_init(void);
>  void cxl_region_exit(void);
>  int cxl_get_poison_by_endpoint(struct cxl_port *port);
> +int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +			  struct cxl_dc_extent *dc_extent);
>  #else
>  static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
>  {
> @@ -43,6 +47,11 @@ static inline int cxl_region_init(void)
>  static inline void cxl_region_exit(void)
>  {
>  }
> +static inline int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +					struct cxl_dc_extent *dc_extent)
> +{
> +	return 0;
> +}
>  #define CXL_REGION_ATTR(x) NULL
>  #define CXL_REGION_TYPE(x) NULL
>  #define SET_CXL_REGION_ATTR(x)
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 58b31fa47b93..9e33a0976828 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -870,6 +870,53 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
>  
> +static int cxl_validate_extent(struct cxl_memdev_state *mds,
> +			       struct cxl_dc_extent *dc_extent)
> +{
> +	struct device *dev = mds->cxlds.dev;
> +	uint64_t start, len;
> +
> +	start = le64_to_cpu(dc_extent->start_dpa);
> +	len = le64_to_cpu(dc_extent->length);
> +
> +	/* Extents must not cross region boundary's */
> +	for (int i = 0; i < mds->nr_dc_region; i++) {
> +		struct cxl_dc_region_info *dcr = &mds->dc_region[i];
> +
> +		if (dcr->base <= start &&
> +		    (start + len) <= (dcr->base + dcr->decode_len)) {

Why not use range_contains here as below?

Fan
> +			dev_dbg(dev, "DC extent DPA %#llx - %#llx (DCR:%d:%#llx)\n",
> +				start, start + len - 1, i, start - dcr->base);
> +			return 0;
> +		}
> +	}
> +
> +	dev_err_ratelimited(dev,
> +			    "DC extent DPA %#llx - %#llx is not in any DC region\n",
> +			    start, start + len - 1);
> +	return -EINVAL;
> +}
> +
> +static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,
> +				struct cxl_dc_extent *extent)
> +{
> +	uint64_t start = le64_to_cpu(extent->start_dpa);
> +	uint64_t length = le64_to_cpu(extent->length);
> +	struct range ext_range = (struct range){
> +		.start = start,
> +		.end = start + length - 1,
> +	};
> +	struct range ed_range = (struct range) {
> +		.start = cxled->dpa_res->start,
> +		.end = cxled->dpa_res->end,
> +	};
> +
> +	dev_dbg(&cxled->cxld.dev, "Checking ED (%pr) for extent DPA:%#llx LEN:%#llx\n",
> +		cxled->dpa_res, start, length);
> +
> +	return range_contains(&ed_range, &ext_range);
> +}
> +
>  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  			    enum cxl_event_log_type type,
>  			    enum cxl_event_type event_type,
> @@ -973,6 +1020,15 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
>  	return rc;
>  }
>  
> +static struct cxl_memdev_state *
> +cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> +	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> +
> +	return container_of(cxlds, struct cxl_memdev_state, cxlds);
> +}
> +
>  static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>  				    enum cxl_event_log_type type)
>  {
> @@ -1406,6 +1462,142 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);
>  
> +static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> +				     unsigned int *extent_gen_num)
> +{
> +	struct cxl_mbox_get_dc_extent_in get_dc_extent;
> +	struct cxl_mbox_get_dc_extent_out dc_extents;
> +	struct cxl_mbox_cmd mbox_cmd;
> +	unsigned int count;
> +	int rc;
> +
> +	get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> +		.extent_cnt = cpu_to_le32(0),
> +		.start_extent_index = cpu_to_le32(0),
> +	};
> +
> +	mbox_cmd = (struct cxl_mbox_cmd) {
> +		.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> +		.payload_in = &get_dc_extent,
> +		.size_in = sizeof(get_dc_extent),
> +		.size_out = sizeof(dc_extents),
> +		.payload_out = &dc_extents,
> +		.min_out = 1,
> +	};
> +
> +	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +	if (rc < 0)
> +		return rc;
> +
> +	count = le32_to_cpu(dc_extents.total_extent_cnt);
> +	*extent_gen_num = le32_to_cpu(dc_extents.extent_list_num);
> +
> +	return count;
> +}
> +
> +static int cxl_dev_get_dc_extents(struct cxl_endpoint_decoder *cxled,
> +				  unsigned int start_gen_num,
> +				  unsigned int exp_cnt)
> +{
> +	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> +	unsigned int start_index, total_read;
> +	struct device *dev = mds->cxlds.dev;
> +	struct cxl_mbox_cmd mbox_cmd;
> +
> +	struct cxl_mbox_get_dc_extent_out *dc_extents __free(kfree) =
> +				kvmalloc(mds->payload_size, GFP_KERNEL);
> +	if (!dc_extents)
> +		return -ENOMEM;
> +
> +	total_read = 0;
> +	start_index = 0;
> +	do {
> +		unsigned int nr_ext, total_extent_cnt, gen_num;
> +		struct cxl_mbox_get_dc_extent_in get_dc_extent;
> +		int rc;
> +
> +		get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> +			.extent_cnt = cpu_to_le32(exp_cnt - start_index),
> +			.start_extent_index = cpu_to_le32(start_index),
> +		};
> +
> +		mbox_cmd = (struct cxl_mbox_cmd) {
> +			.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> +			.payload_in = &get_dc_extent,
> +			.size_in = sizeof(get_dc_extent),
> +			.size_out = mds->payload_size,
> +			.payload_out = dc_extents,
> +			.min_out = 1,
> +		};
> +
> +		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +		if (rc < 0)
> +			return rc;
> +
> +		nr_ext = le32_to_cpu(dc_extents->ret_extent_cnt);
> +		total_read += nr_ext;
> +		total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
> +		gen_num = le32_to_cpu(dc_extents->extent_list_num);
> +
> +		dev_dbg(dev, "Get extent list count:%d generation Num:%d\n",
> +			total_extent_cnt, gen_num);
> +
> +		if (gen_num != start_gen_num || exp_cnt != total_extent_cnt) {
> +			dev_err(dev, "Possible incomplete extent list; gen %u != %u : cnt %u != %u\n",
> +				gen_num, start_gen_num, exp_cnt, total_extent_cnt);
> +			return -EIO;
> +		}
> +
> +		for (int i = 0; i < nr_ext ; i++) {
> +			dev_dbg(dev, "Processing extent %d/%d\n",
> +				start_index + i, exp_cnt);
> +			rc = cxl_validate_extent(mds, &dc_extents->extent[i]);
> +			if (rc)
> +				continue;
> +			if (!cxl_dc_extent_in_ed(cxled, &dc_extents->extent[i]))
> +				continue;
> +			rc = cxl_ed_add_one_extent(cxled, &dc_extents->extent[i]);
> +			if (rc)
> +				return rc;
> +		}
> +
> +		start_index += nr_ext;
> +	} while (exp_cnt > total_read);
> +
> +	return 0;
> +}
> +
> +/**
> + * cxl_read_dc_extents() - Read any existing extents
> + * @cxled: Endpoint decoder which is part of a region
> + *
> + * Issue the Get Dynamic Capacity Extent List command to the device
> + * and add any existing extents found which belong to this decoder.
> + *
> + * Return: 0 if command was executed successfully, -ERRNO on error.
> + */
> +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> +	struct device *dev = mds->cxlds.dev;
> +	unsigned int extent_gen_num;
> +	int rc;
> +
> +	if (!cxl_dcd_supported(mds)) {
> +		dev_dbg(dev, "DCD unsupported\n");
> +		return 0;
> +	}
> +
> +	rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
> +	dev_dbg(mds->cxlds.dev, "Extent count: %d Generation Num: %d\n",
> +		rc, extent_gen_num);
> +	if (rc <= 0) /* 0 == no records found */
> +		return rc;
> +
> +	return cxl_dev_get_dc_extents(cxled, extent_gen_num, rc);
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_read_dc_extents, CXL);
> +
>  static int add_dpa_res(struct device *dev, struct resource *parent,
>  		       struct resource *res, resource_size_t start,
>  		       resource_size_t size, const char *type)
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 0d7b09a49dcf..3e563ab29afe 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -1450,6 +1450,13 @@ static int cxl_region_validate_position(struct cxl_region *cxlr,
>  	return 0;
>  }
>  
> +/* Callers are expected to ensure cxled has been attached to a region */
> +int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +			  struct cxl_dc_extent *dc_extent)
> +{
> +	return 0;
> +}
> +
>  static int cxl_region_attach_position(struct cxl_region *cxlr,
>  				      struct cxl_root_decoder *cxlrd,
>  				      struct cxl_endpoint_decoder *cxled,
> @@ -2773,6 +2780,22 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
>  	return rc;
>  }
>  
> +static int cxl_region_read_extents(struct cxl_region *cxlr)
> +{
> +	struct cxl_region_params *p = &cxlr->params;
> +	int i;
> +
> +	for (i = 0; i < p->nr_targets; i++) {
> +		int rc;
> +
> +		rc = cxl_read_dc_extents(p->targets[i]);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	return 0;
> +}
> +
>  static void cxlr_dax_unregister(void *_cxlr_dax)
>  {
>  	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
> @@ -2807,6 +2830,12 @@ static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
>  	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
>  		dev_name(dev));
>  
> +	if (cxlr->mode == CXL_REGION_DC) {
> +		rc = cxl_region_read_extents(cxlr);
> +		if (rc)
> +			goto err;
> +	}
> +
>  	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
>  					cxlr_dax);
>  err:
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 01bee6eedff3..8f2d8944d334 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -604,6 +604,54 @@ enum cxl_opcode {
>  	UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19,     \
>  		  0x40, 0x3d, 0x86)
>  
> +/*
> + * Add Dynamic Capacity Response
> + * CXL rev 3.1 section 8.2.9.9.9.3; Table 8-168 & Table 8-169
> + */
> +struct cxl_mbox_dc_response {
> +	__le32 extent_list_size;
> +	u8 flags;
> +	u8 reserved[3];
> +	struct updated_extent_list {
> +		__le64 dpa_start;
> +		__le64 length;
> +		u8 reserved[8];
> +	} __packed extent_list[];
> +} __packed;
> +
> +/*
> + * CXL rev 3.1 section 8.2.9.2.1.6; Table 8-51
> + */
> +#define CXL_DC_EXTENT_TAG_LEN 0x10
> +struct cxl_dc_extent {
> +	__le64 start_dpa;
> +	__le64 length;
> +	u8 tag[CXL_DC_EXTENT_TAG_LEN];
> +	__le16 shared_extn_seq;
> +	u8 reserved[6];
> +} __packed;
> +
> +/*
> + * Get Dynamic Capacity Extent List; Input Payload
> + * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-166
> + */
> +struct cxl_mbox_get_dc_extent_in {
> +	__le32 extent_cnt;
> +	__le32 start_extent_index;
> +} __packed;
> +
> +/*
> + * Get Dynamic Capacity Extent List; Output Payload
> + * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-167
> + */
> +struct cxl_mbox_get_dc_extent_out {
> +	__le32 ret_extent_cnt;
> +	__le32 total_extent_cnt;
> +	__le32 extent_list_num;
> +	u8 rsvd[4];
> +	struct cxl_dc_extent extent[];
> +} __packed;
> +
>  struct cxl_mbox_get_supported_logs {
>  	__le16 entries;
>  	u8 rsvd[6];
> @@ -879,6 +927,7 @@ int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
>  			  struct cxl_mbox_cmd *cmd);
>  int cxl_dev_state_identify(struct cxl_memdev_state *mds);
>  int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds);
> +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled);
>  int cxl_await_media_ready(struct cxl_dev_state *cxlds);
>  int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
>  int cxl_mem_create_range_info(struct cxl_memdev_state *mds);
> 
> -- 
> 2.44.0
>
fan March 27, 2024, 5:45 p.m. UTC | #2
On Sun, Mar 24, 2024 at 04:18:17PM -0700, ira.weiny@intel.com wrote:
> From: Navneet Singh <navneet.singh@intel.com>
> 
> Dynamic capacity device extents may be left in an accepted state on a
> device due to an unexpected host crash.  In this case creation of a new
> region on top of the DC partition (region) is expected to expose those
> extents for continued use.
> 
> Once all endpoint decoders are part of a region and the region is being
> realized read the device extent list.  For ease of review, this patch
> stops after reading the extent list and leaves realization of the region
> extents to a future patch.
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes for v1:
> [iweiny: remove extent list xarray]
> [iweiny: Update spec references to 3.1]
> [iweiny: use struct range in extents]
> [iweiny: remove all reference tracking and let regions track extents
> 	 through the extent devices.]
> [djbw/Jonathan/Fan: move extent tracking to endpoint decoders]
> ---
>  drivers/cxl/core/core.h   |   9 +++
>  drivers/cxl/core/mbox.c   | 192 ++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/region.c |  29 +++++++
>  drivers/cxl/cxlmem.h      |  49 ++++++++++++
>  4 files changed, 279 insertions(+)
> 
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 91abeffbe985..119b12362977 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -4,6 +4,8 @@
>  #ifndef __CXL_CORE_H__
>  #define __CXL_CORE_H__
>  
> +#include <cxlmem.h>
> +
>  extern const struct device_type cxl_nvdimm_bridge_type;
>  extern const struct device_type cxl_nvdimm_type;
>  extern const struct device_type cxl_pmu_type;
> @@ -28,6 +30,8 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
>  int cxl_region_init(void);
>  void cxl_region_exit(void);
>  int cxl_get_poison_by_endpoint(struct cxl_port *port);
> +int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +			  struct cxl_dc_extent *dc_extent);
>  #else
>  static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
>  {
> @@ -43,6 +47,11 @@ static inline int cxl_region_init(void)
>  static inline void cxl_region_exit(void)
>  {
>  }
> +static inline int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +					struct cxl_dc_extent *dc_extent)
> +{
> +	return 0;
> +}
>  #define CXL_REGION_ATTR(x) NULL
>  #define CXL_REGION_TYPE(x) NULL
>  #define SET_CXL_REGION_ATTR(x)
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 58b31fa47b93..9e33a0976828 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -870,6 +870,53 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
>  
> +static int cxl_validate_extent(struct cxl_memdev_state *mds,
> +			       struct cxl_dc_extent *dc_extent)
> +{
> +	struct device *dev = mds->cxlds.dev;
> +	uint64_t start, len;
> +
> +	start = le64_to_cpu(dc_extent->start_dpa);
> +	len = le64_to_cpu(dc_extent->length);
> +
> +	/* Extents must not cross region boundary's */
> +	for (int i = 0; i < mds->nr_dc_region; i++) {
> +		struct cxl_dc_region_info *dcr = &mds->dc_region[i];
> +
> +		if (dcr->base <= start &&
> +		    (start + len) <= (dcr->base + dcr->decode_len)) {
> +			dev_dbg(dev, "DC extent DPA %#llx - %#llx (DCR:%d:%#llx)\n",
> +				start, start + len - 1, i, start - dcr->base);
> +			return 0;
> +		}
> +	}
> +
> +	dev_err_ratelimited(dev,
> +			    "DC extent DPA %#llx - %#llx is not in any DC region\n",
> +			    start, start + len - 1);
> +	return -EINVAL;
> +}
> +
> +static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,
> +				struct cxl_dc_extent *extent)
> +{
> +	uint64_t start = le64_to_cpu(extent->start_dpa);
> +	uint64_t length = le64_to_cpu(extent->length);
> +	struct range ext_range = (struct range){
> +		.start = start,
> +		.end = start + length - 1,
> +	};
> +	struct range ed_range = (struct range) {
> +		.start = cxled->dpa_res->start,
> +		.end = cxled->dpa_res->end,
> +	};
> +
> +	dev_dbg(&cxled->cxld.dev, "Checking ED (%pr) for extent DPA:%#llx LEN:%#llx\n",
> +		cxled->dpa_res, start, length);
> +
> +	return range_contains(&ed_range, &ext_range);
> +}
> +
>  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  			    enum cxl_event_log_type type,
>  			    enum cxl_event_type event_type,
> @@ -973,6 +1020,15 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
>  	return rc;
>  }
>  
> +static struct cxl_memdev_state *
> +cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> +	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> +
> +	return container_of(cxlds, struct cxl_memdev_state, cxlds);
> +}
> +
>  static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>  				    enum cxl_event_log_type type)
>  {
> @@ -1406,6 +1462,142 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);
>  
> +static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> +				     unsigned int *extent_gen_num)
> +{
> +	struct cxl_mbox_get_dc_extent_in get_dc_extent;
> +	struct cxl_mbox_get_dc_extent_out dc_extents;
> +	struct cxl_mbox_cmd mbox_cmd;
> +	unsigned int count;
> +	int rc;
> +
> +	get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> +		.extent_cnt = cpu_to_le32(0),
> +		.start_extent_index = cpu_to_le32(0),
> +	};
> +
> +	mbox_cmd = (struct cxl_mbox_cmd) {
> +		.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> +		.payload_in = &get_dc_extent,
> +		.size_in = sizeof(get_dc_extent),
> +		.size_out = sizeof(dc_extents),
> +		.payload_out = &dc_extents,
> +		.min_out = 1,
> +	};
> +
> +	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +	if (rc < 0)
> +		return rc;
> +
> +	count = le32_to_cpu(dc_extents.total_extent_cnt);
> +	*extent_gen_num = le32_to_cpu(dc_extents.extent_list_num);
> +
> +	return count;
> +}
> +
> +static int cxl_dev_get_dc_extents(struct cxl_endpoint_decoder *cxled,
> +				  unsigned int start_gen_num,
> +				  unsigned int exp_cnt)
> +{
> +	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> +	unsigned int start_index, total_read;
> +	struct device *dev = mds->cxlds.dev;
> +	struct cxl_mbox_cmd mbox_cmd;
> +
> +	struct cxl_mbox_get_dc_extent_out *dc_extents __free(kfree) =
> +				kvmalloc(mds->payload_size, GFP_KERNEL);
> +	if (!dc_extents)
> +		return -ENOMEM;
> +
> +	total_read = 0;
> +	start_index = 0;
> +	do {
> +		unsigned int nr_ext, total_extent_cnt, gen_num;
> +		struct cxl_mbox_get_dc_extent_in get_dc_extent;
> +		int rc;
> +
> +		get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> +			.extent_cnt = cpu_to_le32(exp_cnt - start_index),
> +			.start_extent_index = cpu_to_le32(start_index),
> +		};
> +
> +		mbox_cmd = (struct cxl_mbox_cmd) {
> +			.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> +			.payload_in = &get_dc_extent,
> +			.size_in = sizeof(get_dc_extent),
> +			.size_out = mds->payload_size,
> +			.payload_out = dc_extents,
> +			.min_out = 1,
> +		};
> +
> +		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +		if (rc < 0)
> +			return rc;
> +
> +		nr_ext = le32_to_cpu(dc_extents->ret_extent_cnt);
> +		total_read += nr_ext;
> +		total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
> +		gen_num = le32_to_cpu(dc_extents->extent_list_num);
> +
> +		dev_dbg(dev, "Get extent list count:%d generation Num:%d\n",
> +			total_extent_cnt, gen_num);
> +
> +		if (gen_num != start_gen_num || exp_cnt != total_extent_cnt) {
> +			dev_err(dev, "Possible incomplete extent list; gen %u != %u : cnt %u != %u\n",
> +				gen_num, start_gen_num, exp_cnt, total_extent_cnt);
> +			return -EIO;
> +		}
> +
> +		for (int i = 0; i < nr_ext ; i++) {
> +			dev_dbg(dev, "Processing extent %d/%d\n",
> +				start_index + i, exp_cnt);
> +			rc = cxl_validate_extent(mds, &dc_extents->extent[i]);
> +			if (rc)
> +				continue;
> +			if (!cxl_dc_extent_in_ed(cxled, &dc_extents->extent[i]))
> +				continue;
> +			rc = cxl_ed_add_one_extent(cxled, &dc_extents->extent[i]);
> +			if (rc)
> +				return rc;
> +		}
> +
> +		start_index += nr_ext;
> +	} while (exp_cnt > total_read);
> +
> +	return 0;
> +}
> +
> +/**
> + * cxl_read_dc_extents() - Read any existing extents
> + * @cxled: Endpoint decoder which is part of a region
> + *
> + * Issue the Get Dynamic Capacity Extent List command to the device
> + * and add any existing extents found which belong to this decoder.
> + *
> + * Return: 0 if command was executed successfully, -ERRNO on error.
> + */
> +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> +	struct device *dev = mds->cxlds.dev;
> +	unsigned int extent_gen_num;
> +	int rc;
> +
> +	if (!cxl_dcd_supported(mds)) {
> +		dev_dbg(dev, "DCD unsupported\n");
> +		return 0;
> +	}
> +
> +	rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
> +	dev_dbg(mds->cxlds.dev, "Extent count: %d Generation Num: %d\n",
> +		rc, extent_gen_num);
> +	if (rc <= 0) /* 0 == no records found */
> +		return rc;
> +
> +	return cxl_dev_get_dc_extents(cxled, extent_gen_num, rc);

Not sure about the behaviour here. From the cxl_dev_get_dc_extents
implementation below, if gen_num changed or the expected extent count
changed, it will return error. 
If I understand it correctly, if the above two values change, it means
the extent list has been updated due to extent add/release since last
time we read the extent list info (cxl_dev_get_dc_extent_cnt), do we
need to fail the operation or try again?

Fan

> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_read_dc_extents, CXL);
> +
>  static int add_dpa_res(struct device *dev, struct resource *parent,
>  		       struct resource *res, resource_size_t start,
>  		       resource_size_t size, const char *type)
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 0d7b09a49dcf..3e563ab29afe 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -1450,6 +1450,13 @@ static int cxl_region_validate_position(struct cxl_region *cxlr,
>  	return 0;
>  }
>  
> +/* Callers are expected to ensure cxled has been attached to a region */
> +int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +			  struct cxl_dc_extent *dc_extent)
> +{
> +	return 0;
> +}
> +
>  static int cxl_region_attach_position(struct cxl_region *cxlr,
>  				      struct cxl_root_decoder *cxlrd,
>  				      struct cxl_endpoint_decoder *cxled,
> @@ -2773,6 +2780,22 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
>  	return rc;
>  }
>  
> +static int cxl_region_read_extents(struct cxl_region *cxlr)
> +{
> +	struct cxl_region_params *p = &cxlr->params;
> +	int i;
> +
> +	for (i = 0; i < p->nr_targets; i++) {
> +		int rc;
> +
> +		rc = cxl_read_dc_extents(p->targets[i]);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	return 0;
> +}
> +
>  static void cxlr_dax_unregister(void *_cxlr_dax)
>  {
>  	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
> @@ -2807,6 +2830,12 @@ static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
>  	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
>  		dev_name(dev));
>  
> +	if (cxlr->mode == CXL_REGION_DC) {
> +		rc = cxl_region_read_extents(cxlr);
> +		if (rc)
> +			goto err;
> +	}
> +
>  	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
>  					cxlr_dax);
>  err:
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 01bee6eedff3..8f2d8944d334 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -604,6 +604,54 @@ enum cxl_opcode {
>  	UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19,     \
>  		  0x40, 0x3d, 0x86)
>  
> +/*
> + * Add Dynamic Capacity Response
> + * CXL rev 3.1 section 8.2.9.9.9.3; Table 8-168 & Table 8-169
> + */
> +struct cxl_mbox_dc_response {
> +	__le32 extent_list_size;
> +	u8 flags;
> +	u8 reserved[3];
> +	struct updated_extent_list {
> +		__le64 dpa_start;
> +		__le64 length;
> +		u8 reserved[8];
> +	} __packed extent_list[];
> +} __packed;
> +
> +/*
> + * CXL rev 3.1 section 8.2.9.2.1.6; Table 8-51
> + */
> +#define CXL_DC_EXTENT_TAG_LEN 0x10
> +struct cxl_dc_extent {
> +	__le64 start_dpa;
> +	__le64 length;
> +	u8 tag[CXL_DC_EXTENT_TAG_LEN];
> +	__le16 shared_extn_seq;
> +	u8 reserved[6];
> +} __packed;
> +
> +/*
> + * Get Dynamic Capacity Extent List; Input Payload
> + * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-166
> + */
> +struct cxl_mbox_get_dc_extent_in {
> +	__le32 extent_cnt;
> +	__le32 start_extent_index;
> +} __packed;
> +
> +/*
> + * Get Dynamic Capacity Extent List; Output Payload
> + * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-167
> + */
> +struct cxl_mbox_get_dc_extent_out {
> +	__le32 ret_extent_cnt;
> +	__le32 total_extent_cnt;
> +	__le32 extent_list_num;
> +	u8 rsvd[4];
> +	struct cxl_dc_extent extent[];
> +} __packed;
> +
>  struct cxl_mbox_get_supported_logs {
>  	__le16 entries;
>  	u8 rsvd[6];
> @@ -879,6 +927,7 @@ int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
>  			  struct cxl_mbox_cmd *cmd);
>  int cxl_dev_state_identify(struct cxl_memdev_state *mds);
>  int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds);
> +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled);
>  int cxl_await_media_ready(struct cxl_dev_state *cxlds);
>  int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
>  int cxl_mem_create_range_info(struct cxl_memdev_state *mds);
> 
> -- 
> 2.44.0
>
Dave Jiang March 27, 2024, 6:31 p.m. UTC | #3
On 3/24/24 4:18 PM, ira.weiny@intel.com wrote:
> From: Navneet Singh <navneet.singh@intel.com>
> 
> Dynamic capacity device extents may be left in an accepted state on a
> device due to an unexpected host crash.  In this case creation of a new
> region on top of the DC partition (region) is expected to expose those
> extents for continued use.
> 
> Once all endpoint decoders are part of a region and the region is being
> realized read the device extent list.  For ease of review, this patch
> stops after reading the extent list and leaves realization of the region
> extents to a future patch.
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes for v1:
> [iweiny: remove extent list xarray]
> [iweiny: Update spec references to 3.1]
> [iweiny: use struct range in extents]
> [iweiny: remove all reference tracking and let regions track extents
> 	 through the extent devices.]
> [djbw/Jonathan/Fan: move extent tracking to endpoint decoders]
> ---
>  drivers/cxl/core/core.h   |   9 +++
>  drivers/cxl/core/mbox.c   | 192 ++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/region.c |  29 +++++++
>  drivers/cxl/cxlmem.h      |  49 ++++++++++++
>  4 files changed, 279 insertions(+)
> 
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 91abeffbe985..119b12362977 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -4,6 +4,8 @@
>  #ifndef __CXL_CORE_H__
>  #define __CXL_CORE_H__
>  
> +#include <cxlmem.h>
> +
>  extern const struct device_type cxl_nvdimm_bridge_type;
>  extern const struct device_type cxl_nvdimm_type;
>  extern const struct device_type cxl_pmu_type;
> @@ -28,6 +30,8 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
>  int cxl_region_init(void);
>  void cxl_region_exit(void);
>  int cxl_get_poison_by_endpoint(struct cxl_port *port);
> +int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +			  struct cxl_dc_extent *dc_extent);
>  #else
>  static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
>  {
> @@ -43,6 +47,11 @@ static inline int cxl_region_init(void)
>  static inline void cxl_region_exit(void)
>  {
>  }
> +static inline int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +					struct cxl_dc_extent *dc_extent)
> +{
> +	return 0;
> +}
>  #define CXL_REGION_ATTR(x) NULL
>  #define CXL_REGION_TYPE(x) NULL
>  #define SET_CXL_REGION_ATTR(x)
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 58b31fa47b93..9e33a0976828 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -870,6 +870,53 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
>  
> +static int cxl_validate_extent(struct cxl_memdev_state *mds,
> +			       struct cxl_dc_extent *dc_extent)
> +{
> +	struct device *dev = mds->cxlds.dev;
> +	uint64_t start, len;
u64


> +
> +	start = le64_to_cpu(dc_extent->start_dpa);
> +	len = le64_to_cpu(dc_extent->length);
> +
> +	/* Extents must not cross region boundary's */
> +	for (int i = 0; i < mds->nr_dc_region; i++) {
> +		struct cxl_dc_region_info *dcr = &mds->dc_region[i];
> +
> +		if (dcr->base <= start &&
> +		    (start + len) <= (dcr->base + dcr->decode_len)) {

Can range_contains() be used here as well?

> +			dev_dbg(dev, "DC extent DPA %#llx - %#llx (DCR:%d:%#llx)\n",
> +				start, start + len - 1, i, start - dcr->base);
> +			return 0;
> +		}
> +	}
> +
> +	dev_err_ratelimited(dev,
> +			    "DC extent DPA %#llx - %#llx is not in any DC region\n",
> +			    start, start + len - 1);
> +	return -EINVAL;
> +}
> +
> +static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,

cxl_dc_extent_in_endpoint_decoder() is more readable

> +				struct cxl_dc_extent *extent)
> +{
> +	uint64_t start = le64_to_cpu(extent->start_dpa);
> +	uint64_t length = le64_to_cpu(extent->length);
u64


> +	struct range ext_range = (struct range){
> +		.start = start,
> +		.end = start + length - 1,
> +	};
> +	struct range ed_range = (struct range) {
> +		.start = cxled->dpa_res->start,
> +		.end = cxled->dpa_res->end,
> +	};
> +
> +	dev_dbg(&cxled->cxld.dev, "Checking ED (%pr) for extent DPA:%#llx LEN:%#llx\n",
> +		cxled->dpa_res, start, length);
> +
> +	return range_contains(&ed_range, &ext_range);
> +}
> +
>  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  			    enum cxl_event_log_type type,
>  			    enum cxl_event_type event_type,
> @@ -973,6 +1020,15 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
>  	return rc;
>  }
>  
> +static struct cxl_memdev_state *
> +cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> +	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> +
> +	return container_of(cxlds, struct cxl_memdev_state, cxlds);
> +}
> +
>  static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>  				    enum cxl_event_log_type type)
>  {
> @@ -1406,6 +1462,142 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);
>  
> +static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,

cxl_dev_get_dc_extent_generation()? or spell out count

DJ

> +				     unsigned int *extent_gen_num)
> +{
> +	struct cxl_mbox_get_dc_extent_in get_dc_extent;
> +	struct cxl_mbox_get_dc_extent_out dc_extents;
> +	struct cxl_mbox_cmd mbox_cmd;
> +	unsigned int count;
> +	int rc;
> +
> +	get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> +		.extent_cnt = cpu_to_le32(0),
> +		.start_extent_index = cpu_to_le32(0),
> +	};
> +
> +	mbox_cmd = (struct cxl_mbox_cmd) {
> +		.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> +		.payload_in = &get_dc_extent,
> +		.size_in = sizeof(get_dc_extent),
> +		.size_out = sizeof(dc_extents),
> +		.payload_out = &dc_extents,
> +		.min_out = 1,
> +	};
> +
> +	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +	if (rc < 0)
> +		return rc;
> +
> +	count = le32_to_cpu(dc_extents.total_extent_cnt);
> +	*extent_gen_num = le32_to_cpu(dc_extents.extent_list_num);
> +
> +	return count;
> +}
> +
> +static int cxl_dev_get_dc_extents(struct cxl_endpoint_decoder *cxled,
> +				  unsigned int start_gen_num,
> +				  unsigned int exp_cnt)
> +{
> +	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> +	unsigned int start_index, total_read;
> +	struct device *dev = mds->cxlds.dev;
> +	struct cxl_mbox_cmd mbox_cmd;
> +
> +	struct cxl_mbox_get_dc_extent_out *dc_extents __free(kfree) =
> +				kvmalloc(mds->payload_size, GFP_KERNEL);
> +	if (!dc_extents)
> +		return -ENOMEM;
> +
> +	total_read = 0;
> +	start_index = 0;
> +	do {
> +		unsigned int nr_ext, total_extent_cnt, gen_num;
> +		struct cxl_mbox_get_dc_extent_in get_dc_extent;
> +		int rc;
> +
> +		get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> +			.extent_cnt = cpu_to_le32(exp_cnt - start_index),
> +			.start_extent_index = cpu_to_le32(start_index),
> +		};
> +
> +		mbox_cmd = (struct cxl_mbox_cmd) {
> +			.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> +			.payload_in = &get_dc_extent,
> +			.size_in = sizeof(get_dc_extent),
> +			.size_out = mds->payload_size,
> +			.payload_out = dc_extents,
> +			.min_out = 1,
> +		};
> +
> +		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +		if (rc < 0)
> +			return rc;
> +
> +		nr_ext = le32_to_cpu(dc_extents->ret_extent_cnt);
> +		total_read += nr_ext;
> +		total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
> +		gen_num = le32_to_cpu(dc_extents->extent_list_num);
> +
> +		dev_dbg(dev, "Get extent list count:%d generation Num:%d\n",
> +			total_extent_cnt, gen_num);
> +
> +		if (gen_num != start_gen_num || exp_cnt != total_extent_cnt) {
> +			dev_err(dev, "Possible incomplete extent list; gen %u != %u : cnt %u != %u\n",
> +				gen_num, start_gen_num, exp_cnt, total_extent_cnt);
> +			return -EIO;
> +		}
> +
> +		for (int i = 0; i < nr_ext ; i++) {
> +			dev_dbg(dev, "Processing extent %d/%d\n",
> +				start_index + i, exp_cnt);
> +			rc = cxl_validate_extent(mds, &dc_extents->extent[i]);
> +			if (rc)
> +				continue;
> +			if (!cxl_dc_extent_in_ed(cxled, &dc_extents->extent[i]))
> +				continue;
> +			rc = cxl_ed_add_one_extent(cxled, &dc_extents->extent[i]);
> +			if (rc)
> +				return rc;
> +		}
> +
> +		start_index += nr_ext;
> +	} while (exp_cnt > total_read);
> +
> +	return 0;
> +}
> +
> +/**
> + * cxl_read_dc_extents() - Read any existing extents
> + * @cxled: Endpoint decoder which is part of a region
> + *
> + * Issue the Get Dynamic Capacity Extent List command to the device
> + * and add any existing extents found which belong to this decoder.
> + *
> + * Return: 0 if command was executed successfully, -ERRNO on error.
> + */
> +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> +	struct device *dev = mds->cxlds.dev;
> +	unsigned int extent_gen_num;
> +	int rc;
> +
> +	if (!cxl_dcd_supported(mds)) {
> +		dev_dbg(dev, "DCD unsupported\n");
> +		return 0;
> +	}
> +
> +	rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
> +	dev_dbg(mds->cxlds.dev, "Extent count: %d Generation Num: %d\n",
> +		rc, extent_gen_num);
> +	if (rc <= 0) /* 0 == no records found */
> +		return rc;
> +
> +	return cxl_dev_get_dc_extents(cxled, extent_gen_num, rc);
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_read_dc_extents, CXL);
> +
>  static int add_dpa_res(struct device *dev, struct resource *parent,
>  		       struct resource *res, resource_size_t start,
>  		       resource_size_t size, const char *type)
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 0d7b09a49dcf..3e563ab29afe 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -1450,6 +1450,13 @@ static int cxl_region_validate_position(struct cxl_region *cxlr,
>  	return 0;
>  }
>  
> +/* Callers are expected to ensure cxled has been attached to a region */
> +int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +			  struct cxl_dc_extent *dc_extent)
> +{
> +	return 0;
> +}
> +
>  static int cxl_region_attach_position(struct cxl_region *cxlr,
>  				      struct cxl_root_decoder *cxlrd,
>  				      struct cxl_endpoint_decoder *cxled,
> @@ -2773,6 +2780,22 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
>  	return rc;
>  }
>  
> +static int cxl_region_read_extents(struct cxl_region *cxlr)
> +{
> +	struct cxl_region_params *p = &cxlr->params;
> +	int i;
> +
> +	for (i = 0; i < p->nr_targets; i++) {
> +		int rc;
> +
> +		rc = cxl_read_dc_extents(p->targets[i]);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	return 0;
> +}
> +
>  static void cxlr_dax_unregister(void *_cxlr_dax)
>  {
>  	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
> @@ -2807,6 +2830,12 @@ static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
>  	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
>  		dev_name(dev));
>  
> +	if (cxlr->mode == CXL_REGION_DC) {
> +		rc = cxl_region_read_extents(cxlr);
> +		if (rc)
> +			goto err;
> +	}
> +
>  	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
>  					cxlr_dax);
>  err:
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 01bee6eedff3..8f2d8944d334 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -604,6 +604,54 @@ enum cxl_opcode {
>  	UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19,     \
>  		  0x40, 0x3d, 0x86)
>  
> +/*
> + * Add Dynamic Capacity Response
> + * CXL rev 3.1 section 8.2.9.9.9.3; Table 8-168 & Table 8-169
> + */
> +struct cxl_mbox_dc_response {
> +	__le32 extent_list_size;
> +	u8 flags;
> +	u8 reserved[3];
> +	struct updated_extent_list {
> +		__le64 dpa_start;
> +		__le64 length;
> +		u8 reserved[8];
> +	} __packed extent_list[];
> +} __packed;
> +
> +/*
> + * CXL rev 3.1 section 8.2.9.2.1.6; Table 8-51
> + */
> +#define CXL_DC_EXTENT_TAG_LEN 0x10
> +struct cxl_dc_extent {
> +	__le64 start_dpa;
> +	__le64 length;
> +	u8 tag[CXL_DC_EXTENT_TAG_LEN];
> +	__le16 shared_extn_seq;
> +	u8 reserved[6];
> +} __packed;
> +
> +/*
> + * Get Dynamic Capacity Extent List; Input Payload
> + * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-166
> + */
> +struct cxl_mbox_get_dc_extent_in {
> +	__le32 extent_cnt;
> +	__le32 start_extent_index;
> +} __packed;
> +
> +/*
> + * Get Dynamic Capacity Extent List; Output Payload
> + * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-167
> + */
> +struct cxl_mbox_get_dc_extent_out {
> +	__le32 ret_extent_cnt;
> +	__le32 total_extent_cnt;
> +	__le32 extent_list_num;
> +	u8 rsvd[4];
> +	struct cxl_dc_extent extent[];
> +} __packed;
> +
>  struct cxl_mbox_get_supported_logs {
>  	__le16 entries;
>  	u8 rsvd[6];
> @@ -879,6 +927,7 @@ int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
>  			  struct cxl_mbox_cmd *cmd);
>  int cxl_dev_state_identify(struct cxl_memdev_state *mds);
>  int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds);
> +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled);
>  int cxl_await_media_ready(struct cxl_dev_state *cxlds);
>  int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
>  int cxl_mem_create_range_info(struct cxl_memdev_state *mds);
>
Jørgen Hansen April 2, 2024, 1:57 p.m. UTC | #4
On 3/25/24 00:18, ira.weiny@intel.com wrote:
> From: Navneet Singh <navneet.singh@intel.com>
> 
> Dynamic capacity device extents may be left in an accepted state on a
> device due to an unexpected host crash.  In this case creation of a new
> region on top of the DC partition (region) is expected to expose those
> extents for continued use.
> 
> Once all endpoint decoders are part of a region and the region is being
> realized read the device extent list.  For ease of review, this patch
> stops after reading the extent list and leaves realization of the region
> extents to a future patch.
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes for v1:
> [iweiny: remove extent list xarray]
> [iweiny: Update spec references to 3.1]
> [iweiny: use struct range in extents]
> [iweiny: remove all reference tracking and let regions track extents
>           through the extent devices.]
> [djbw/Jonathan/Fan: move extent tracking to endpoint decoders]
> ---
>   drivers/cxl/core/core.h   |   9 +++
>   drivers/cxl/core/mbox.c   | 192 ++++++++++++++++++++++++++++++++++++++++++++++
>   drivers/cxl/core/region.c |  29 +++++++
>   drivers/cxl/cxlmem.h      |  49 ++++++++++++
>   4 files changed, 279 insertions(+)
> 
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 91abeffbe985..119b12362977 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -4,6 +4,8 @@
>   #ifndef __CXL_CORE_H__
>   #define __CXL_CORE_H__
> 
> +#include <cxlmem.h>
> +
>   extern const struct device_type cxl_nvdimm_bridge_type;
>   extern const struct device_type cxl_nvdimm_type;
>   extern const struct device_type cxl_pmu_type;
> @@ -28,6 +30,8 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
>   int cxl_region_init(void);
>   void cxl_region_exit(void);
>   int cxl_get_poison_by_endpoint(struct cxl_port *port);
> +int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +                         struct cxl_dc_extent *dc_extent);
>   #else
>   static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
>   {
> @@ -43,6 +47,11 @@ static inline int cxl_region_init(void)
>   static inline void cxl_region_exit(void)
>   {
>   }
> +static inline int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +                                       struct cxl_dc_extent *dc_extent)
> +{
> +       return 0;
> +}
>   #define CXL_REGION_ATTR(x) NULL
>   #define CXL_REGION_TYPE(x) NULL
>   #define SET_CXL_REGION_ATTR(x)
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 58b31fa47b93..9e33a0976828 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -870,6 +870,53 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
>   }
>   EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
> 
> +static int cxl_validate_extent(struct cxl_memdev_state *mds,
> +                              struct cxl_dc_extent *dc_extent)
> +{
> +       struct device *dev = mds->cxlds.dev;
> +       uint64_t start, len;
> +
> +       start = le64_to_cpu(dc_extent->start_dpa);
> +       len = le64_to_cpu(dc_extent->length);
> +
> +       /* Extents must not cross region boundary's */
> +       for (int i = 0; i < mds->nr_dc_region; i++) {
> +               struct cxl_dc_region_info *dcr = &mds->dc_region[i];
> +
> +               if (dcr->base <= start &&
> +                   (start + len) <= (dcr->base + dcr->decode_len)) {
> +                       dev_dbg(dev, "DC extent DPA %#llx - %#llx (DCR:%d:%#llx)\n",
> +                               start, start + len - 1, i, start - dcr->base);
> +                       return 0;
> +               }
> +       }
> +
> +       dev_err_ratelimited(dev,
> +                           "DC extent DPA %#llx - %#llx is not in any DC region\n",
> +                           start, start + len - 1);
> +       return -EINVAL;
> +}
> +
> +static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,
> +                               struct cxl_dc_extent *extent)
> +{
> +       uint64_t start = le64_to_cpu(extent->start_dpa);
> +       uint64_t length = le64_to_cpu(extent->length);
> +       struct range ext_range = (struct range){
> +               .start = start,
> +               .end = start + length - 1,
> +       };
> +       struct range ed_range = (struct range) {
> +               .start = cxled->dpa_res->start,
> +               .end = cxled->dpa_res->end,
> +       };
> +
> +       dev_dbg(&cxled->cxld.dev, "Checking ED (%pr) for extent DPA:%#llx LEN:%#llx\n",
> +               cxled->dpa_res, start, length);
> +
> +       return range_contains(&ed_range, &ext_range);
> +}
> +
>   void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>                              enum cxl_event_log_type type,
>                              enum cxl_event_type event_type,
> @@ -973,6 +1020,15 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
>          return rc;
>   }
> 
> +static struct cxl_memdev_state *
> +cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> +{
> +       struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> +       struct cxl_dev_state *cxlds = cxlmd->cxlds;
> +
> +       return container_of(cxlds, struct cxl_memdev_state, cxlds);
> +}
> +
>   static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>                                      enum cxl_event_log_type type)
>   {
> @@ -1406,6 +1462,142 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
>   }
>   EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);
> 
> +static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> +                                    unsigned int *extent_gen_num)
> +{
> +       struct cxl_mbox_get_dc_extent_in get_dc_extent;
> +       struct cxl_mbox_get_dc_extent_out dc_extents;
> +       struct cxl_mbox_cmd mbox_cmd;
> +       unsigned int count;
> +       int rc;
> +
> +       get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> +               .extent_cnt = cpu_to_le32(0),
> +               .start_extent_index = cpu_to_le32(0),
> +       };
> +
> +       mbox_cmd = (struct cxl_mbox_cmd) {
> +               .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> +               .payload_in = &get_dc_extent,
> +               .size_in = sizeof(get_dc_extent),
> +               .size_out = sizeof(dc_extents),
> +               .payload_out = &dc_extents,
> +               .min_out = 1,
> +       };
> +
> +       rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +       if (rc < 0)
> +               return rc;
> +
> +       count = le32_to_cpu(dc_extents.total_extent_cnt);
> +       *extent_gen_num = le32_to_cpu(dc_extents.extent_list_num);
> +
> +       return count;
> +}
> +
> +static int cxl_dev_get_dc_extents(struct cxl_endpoint_decoder *cxled,
> +                                 unsigned int start_gen_num,
> +                                 unsigned int exp_cnt)
> +{
> +       struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> +       unsigned int start_index, total_read;
> +       struct device *dev = mds->cxlds.dev;
> +       struct cxl_mbox_cmd mbox_cmd;
> +
> +       struct cxl_mbox_get_dc_extent_out *dc_extents __free(kfree) =
> +                               kvmalloc(mds->payload_size, GFP_KERNEL);
> +       if (!dc_extents)
> +               return -ENOMEM;
> +
> +       total_read = 0;
> +       start_index = 0;
> +       do {
> +               unsigned int nr_ext, total_extent_cnt, gen_num;
> +               struct cxl_mbox_get_dc_extent_in get_dc_extent;
> +               int rc;
> +
> +               get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> +                       .extent_cnt = cpu_to_le32(exp_cnt - start_index),
> +                       .start_extent_index = cpu_to_le32(start_index),
> +               };
> +
> +               mbox_cmd = (struct cxl_mbox_cmd) {
> +                       .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> +                       .payload_in = &get_dc_extent,
> +                       .size_in = sizeof(get_dc_extent),
> +                       .size_out = mds->payload_size,
> +                       .payload_out = dc_extents,
> +                       .min_out = 1,
> +               };
> +
> +               rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +               if (rc < 0)
> +                       return rc;
> +
> +               nr_ext = le32_to_cpu(dc_extents->ret_extent_cnt);
> +               total_read += nr_ext;
> +               total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
> +               gen_num = le32_to_cpu(dc_extents->extent_list_num);
> +
> +               dev_dbg(dev, "Get extent list count:%d generation Num:%d\n",
> +                       total_extent_cnt, gen_num);
> +
> +               if (gen_num != start_gen_num || exp_cnt != total_extent_cnt) {
> +                       dev_err(dev, "Possible incomplete extent list; gen %u != %u : cnt %u != %u\n",
> +                               gen_num, start_gen_num, exp_cnt, total_extent_cnt);
> +                       return -EIO;
> +               }
> +
> +               for (int i = 0; i < nr_ext ; i++) {
> +                       dev_dbg(dev, "Processing extent %d/%d\n",
> +                               start_index + i, exp_cnt);
> +                       rc = cxl_validate_extent(mds, &dc_extents->extent[i]);
> +                       if (rc)
> +                               continue;
> +                       if (!cxl_dc_extent_in_ed(cxled, &dc_extents->extent[i]))
> +                               continue;
> +                       rc = cxl_ed_add_one_extent(cxled, &dc_extents->extent[i]);
> +                       if (rc)
> +                               return rc;
> +               }
> +
> +               start_index += nr_ext;
> +       } while (exp_cnt > total_read);
> +
> +       return 0;
> +}
> +
> +/**
> + * cxl_read_dc_extents() - Read any existing extents
> + * @cxled: Endpoint decoder which is part of a region
> + *
> + * Issue the Get Dynamic Capacity Extent List command to the device
> + * and add any existing extents found which belong to this decoder.
> + *
> + * Return: 0 if command was executed successfully, -ERRNO on error.
> + */
> +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled)
> +{
> +       struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> +       struct device *dev = mds->cxlds.dev;
> +       unsigned int extent_gen_num;
> +       int rc;
> +
> +       if (!cxl_dcd_supported(mds)) {
> +               dev_dbg(dev, "DCD unsupported\n");
> +               return 0;
> +       }
> +
> +       rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
> +       dev_dbg(mds->cxlds.dev, "Extent count: %d Generation Num: %d\n",
> +               rc, extent_gen_num);
> +       if (rc <= 0) /* 0 == no records found */
> +               return rc;
> +
> +       return cxl_dev_get_dc_extents(cxled, extent_gen_num, rc);

Is it necessary to spend a device interaction to get the generation 
number? Couldn't cxl_dev_get_dc_extents obtain that as part of the first 
call to the device, and then use it to ensure the consistency of any 
remaining calls, if any are necessary?

Thanks,
Jørgen

> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_read_dc_extents, CXL);
> +
>   static int add_dpa_res(struct device *dev, struct resource *parent,
>                         struct resource *res, resource_size_t start,
>                         resource_size_t size, const char *type)
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 0d7b09a49dcf..3e563ab29afe 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -1450,6 +1450,13 @@ static int cxl_region_validate_position(struct cxl_region *cxlr,
>          return 0;
>   }
> 
> +/* Callers are expected to ensure cxled has been attached to a region */
> +int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> +                         struct cxl_dc_extent *dc_extent)
> +{
> +       return 0;
> +}
> +
>   static int cxl_region_attach_position(struct cxl_region *cxlr,
>                                        struct cxl_root_decoder *cxlrd,
>                                        struct cxl_endpoint_decoder *cxled,
> @@ -2773,6 +2780,22 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
>          return rc;
>   }
> 
> +static int cxl_region_read_extents(struct cxl_region *cxlr)
> +{
> +       struct cxl_region_params *p = &cxlr->params;
> +       int i;
> +
> +       for (i = 0; i < p->nr_targets; i++) {
> +               int rc;
> +
> +               rc = cxl_read_dc_extents(p->targets[i]);
> +               if (rc)
> +                       return rc;
> +       }
> +
> +       return 0;
> +}
> +
>   static void cxlr_dax_unregister(void *_cxlr_dax)
>   {
>          struct cxl_dax_region *cxlr_dax = _cxlr_dax;
> @@ -2807,6 +2830,12 @@ static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
>          dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
>                  dev_name(dev));
> 
> +       if (cxlr->mode == CXL_REGION_DC) {
> +               rc = cxl_region_read_extents(cxlr);
> +               if (rc)
> +                       goto err;
> +       }
> +
>          return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
>                                          cxlr_dax);
>   err:
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 01bee6eedff3..8f2d8944d334 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -604,6 +604,54 @@ enum cxl_opcode {
>          UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19,     \
>                    0x40, 0x3d, 0x86)
> 
> +/*
> + * Add Dynamic Capacity Response
> + * CXL rev 3.1 section 8.2.9.9.9.3; Table 8-168 & Table 8-169
> + */
> +struct cxl_mbox_dc_response {
> +       __le32 extent_list_size;
> +       u8 flags;
> +       u8 reserved[3];
> +       struct updated_extent_list {
> +               __le64 dpa_start;
> +               __le64 length;
> +               u8 reserved[8];
> +       } __packed extent_list[];
> +} __packed;
> +
> +/*
> + * CXL rev 3.1 section 8.2.9.2.1.6; Table 8-51
> + */
> +#define CXL_DC_EXTENT_TAG_LEN 0x10
> +struct cxl_dc_extent {
> +       __le64 start_dpa;
> +       __le64 length;
> +       u8 tag[CXL_DC_EXTENT_TAG_LEN];
> +       __le16 shared_extn_seq;
> +       u8 reserved[6];
> +} __packed;
> +
> +/*
> + * Get Dynamic Capacity Extent List; Input Payload
> + * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-166
> + */
> +struct cxl_mbox_get_dc_extent_in {
> +       __le32 extent_cnt;
> +       __le32 start_extent_index;
> +} __packed;
> +
> +/*
> + * Get Dynamic Capacity Extent List; Output Payload
> + * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-167
> + */
> +struct cxl_mbox_get_dc_extent_out {
> +       __le32 ret_extent_cnt;
> +       __le32 total_extent_cnt;
> +       __le32 extent_list_num;
> +       u8 rsvd[4];
> +       struct cxl_dc_extent extent[];
> +} __packed;
> +
>   struct cxl_mbox_get_supported_logs {
>          __le16 entries;
>          u8 rsvd[6];
> @@ -879,6 +927,7 @@ int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
>                            struct cxl_mbox_cmd *cmd);
>   int cxl_dev_state_identify(struct cxl_memdev_state *mds);
>   int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds);
> +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled);
>   int cxl_await_media_ready(struct cxl_dev_state *cxlds);
>   int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
>   int cxl_mem_create_range_info(struct cxl_memdev_state *mds);
> 
> --
> 2.44.0
> 
>
Jonathan Cameron April 4, 2024, 4:04 p.m. UTC | #5
On Sun, 24 Mar 2024 16:18:17 -0700
ira.weiny@intel.com wrote:

> From: Navneet Singh <navneet.singh@intel.com>
> 
> Dynamic capacity device extents may be left in an accepted state on a
> device due to an unexpected host crash.  In this case creation of a new
> region on top of the DC partition (region) is expected to expose those
> extents for continued use.
> 
> Once all endpoint decoders are part of a region and the region is being
> realized read the device extent list.  For ease of review, this patch
> stops after reading the extent list and leaves realization of the region
> extents to a future patch.
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

A few things inline.

J
>  static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>  				    enum cxl_event_log_type type)
>  {
> @@ -1406,6 +1462,142 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);
>  
> +static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> +				     unsigned int *extent_gen_num)
> +{
> +	struct cxl_mbox_get_dc_extent_in get_dc_extent;
> +	struct cxl_mbox_get_dc_extent_out dc_extents;
> +	struct cxl_mbox_cmd mbox_cmd;
> +	unsigned int count;
> +	int rc;
> +
> +	get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> +		.extent_cnt = cpu_to_le32(0),
> +		.start_extent_index = cpu_to_le32(0),
> +	};
> +
> +	mbox_cmd = (struct cxl_mbox_cmd) {
> +		.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> +		.payload_in = &get_dc_extent,
> +		.size_in = sizeof(get_dc_extent),
> +		.size_out = sizeof(dc_extents),
> +		.payload_out = &dc_extents,
> +		.min_out = 1,

Why 1?

> +	};
> +
> +	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +	if (rc < 0)
> +		return rc;
> +
> +	count = le32_to_cpu(dc_extents.total_extent_cnt);
> +	*extent_gen_num = le32_to_cpu(dc_extents.extent_list_num);
> +
> +	return count;
> +}
> +
> +static int cxl_dev_get_dc_extents(struct cxl_endpoint_decoder *cxled,
> +				  unsigned int start_gen_num,
> +				  unsigned int exp_cnt)
> +{
> +	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> +	unsigned int start_index, total_read;
> +	struct device *dev = mds->cxlds.dev;
> +	struct cxl_mbox_cmd mbox_cmd;
> +
> +	struct cxl_mbox_get_dc_extent_out *dc_extents __free(kfree) =
> +				kvmalloc(mds->payload_size, GFP_KERNEL);
> +	if (!dc_extents)
> +		return -ENOMEM;
> +
> +	total_read = 0;
> +	start_index = 0;
> +	do {
> +		unsigned int nr_ext, total_extent_cnt, gen_num;
> +		struct cxl_mbox_get_dc_extent_in get_dc_extent;
> +		int rc;
> +
> +		get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> +			.extent_cnt = cpu_to_le32(exp_cnt - start_index),
> +			.start_extent_index = cpu_to_le32(start_index),
> +		};
> +
> +		mbox_cmd = (struct cxl_mbox_cmd) {
> +			.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> +			.payload_in = &get_dc_extent,
> +			.size_in = sizeof(get_dc_extent),
> +			.size_out = mds->payload_size,
> +			.payload_out = dc_extents,
> +			.min_out = 1,

Why 1?

> +		};
> +
> +		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +		if (rc < 0)
> +			return rc;
> +
> +		nr_ext = le32_to_cpu(dc_extents->ret_extent_cnt);
> +		total_read += nr_ext;
> +		total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
> +		gen_num = le32_to_cpu(dc_extents->extent_list_num);
> +
> +		dev_dbg(dev, "Get extent list count:%d generation Num:%d\n",
> +			total_extent_cnt, gen_num);
> +
> +		if (gen_num != start_gen_num || exp_cnt != total_extent_cnt) {
> +			dev_err(dev, "Possible incomplete extent list; gen %u != %u : cnt %u != %u\n",
> +				gen_num, start_gen_num, exp_cnt, total_extent_cnt);
> +			return -EIO;
> +		}
> +
> +		for (int i = 0; i < nr_ext ; i++) {
> +			dev_dbg(dev, "Processing extent %d/%d\n",
> +				start_index + i, exp_cnt);
> +			rc = cxl_validate_extent(mds, &dc_extents->extent[i]);
> +			if (rc)
> +				continue;

A blank line here

> +			if (!cxl_dc_extent_in_ed(cxled, &dc_extents->extent[i]))
> +				continue;
and here would make this more readable I think.

> +			rc = cxl_ed_add_one_extent(cxled, &dc_extents->extent[i]);
> +			if (rc)
> +				return rc;
> +		}
> +
> +		start_index += nr_ext;
> +	} while (exp_cnt > total_read);
> +
> +	return 0;
> +}

> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 0d7b09a49dcf..3e563ab29afe 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c

>  
> +static int cxl_region_read_extents(struct cxl_region *cxlr)
> +{
> +	struct cxl_region_params *p = &cxlr->params;
> +	int i;
> +
> +	for (i = 0; i < p->nr_targets; i++) {
> +		int rc;
Maybe worth giving up early if we see nr_targets > 1?

If nothing else it saves people trying to figure out what happens if we
reboot into an older kernel that doesn't support interleave (from one
that does)

> +
> +		rc = cxl_read_dc_extents(p->targets[i]);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	return 0;
> +}
> +

> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 01bee6eedff3..8f2d8944d334 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h

> +/*
> + * CXL rev 3.1 section 8.2.9.2.1.6; Table 8-51

Throw a table name or section name into the reference so people
can find it in CXL rNext.

> + */
> +#define CXL_DC_EXTENT_TAG_LEN 0x10
> +struct cxl_dc_extent {
> +	__le64 start_dpa;
> +	__le64 length;
> +	u8 tag[CXL_DC_EXTENT_TAG_LEN];
> +	__le16 shared_extn_seq;
> +	u8 reserved[6];
> +} __packed;
> +

> +/*
> + * Get Dynamic Capacity Extent List; Output Payload
> + * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-167
> + */
> +struct cxl_mbox_get_dc_extent_out {
> +	__le32 ret_extent_cnt;
> +	__le32 total_extent_cnt;
> +	__le32 extent_list_num;

Naming isn't that clear given generation bit missing.

> +	u8 rsvd[4];
> +	struct cxl_dc_extent extent[];
> +} __packed;
> +
Jonathan Cameron April 4, 2024, 4:13 p.m. UTC | #6
> +static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,
> +				struct cxl_dc_extent *extent)
> +{
> +	uint64_t start = le64_to_cpu(extent->start_dpa);
> +	uint64_t length = le64_to_cpu(extent->length);
> +	struct range ext_range = (struct range){
space ) {

> +		.start = start,
> +		.end = start + length - 1,
> +	};
> +	struct range ed_range = (struct range) {
> +		.start = cxled->dpa_res->start,
> +		.end = cxled->dpa_res->end,
> +	};
Ira Weiny April 10, 2024, 5:46 a.m. UTC | #7
fan wrote:
> On Sun, Mar 24, 2024 at 04:18:17PM -0700, ira.weiny@intel.com wrote:
> > From: Navneet Singh <navneet.singh@intel.com>
> > 

[snip]

> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 58b31fa47b93..9e33a0976828 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -870,6 +870,53 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
> >  }
> >  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
> >  
> > +static int cxl_validate_extent(struct cxl_memdev_state *mds,
> > +			       struct cxl_dc_extent *dc_extent)
> > +{
> > +	struct device *dev = mds->cxlds.dev;
> > +	uint64_t start, len;
> > +
> > +	start = le64_to_cpu(dc_extent->start_dpa);
> > +	len = le64_to_cpu(dc_extent->length);
> > +
> > +	/* Extents must not cross region boundary's */
> > +	for (int i = 0; i < mds->nr_dc_region; i++) {
> > +		struct cxl_dc_region_info *dcr = &mds->dc_region[i];
> > +
> > +		if (dcr->base <= start &&
> > +		    (start + len) <= (dcr->base + dcr->decode_len)) {
> 
> Why not use range_contains here as below?

Because when I initially wrote this I (or perhaps Navneet, I can't remember) we
were not using ranges.  This version I tried to convert to ranges and I missed
this one.

Good catch!

Ira
Ira Weiny April 10, 2024, 6:09 a.m. UTC | #8
Dave Jiang wrote:
> 
> 
> On 3/24/24 4:18 PM, ira.weiny@intel.com wrote:
> > From: Navneet Singh <navneet.singh@intel.com>
> > 

[snip]

> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 58b31fa47b93..9e33a0976828 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -870,6 +870,53 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
> >  }
> >  EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
> >  
> > +static int cxl_validate_extent(struct cxl_memdev_state *mds,
> > +			       struct cxl_dc_extent *dc_extent)
> > +{
> > +	struct device *dev = mds->cxlds.dev;
> > +	uint64_t start, len;
> u64
> 

Yep

> 
> > +
> > +	start = le64_to_cpu(dc_extent->start_dpa);
> > +	len = le64_to_cpu(dc_extent->length);
> > +
> > +	/* Extents must not cross region boundary's */
> > +	for (int i = 0; i < mds->nr_dc_region; i++) {
> > +		struct cxl_dc_region_info *dcr = &mds->dc_region[i];
> > +
> > +		if (dcr->base <= start &&
> > +		    (start + len) <= (dcr->base + dcr->decode_len)) {
> 
> Can range_contains() be used here as well?

Yep and done.

> 
> > +			dev_dbg(dev, "DC extent DPA %#llx - %#llx (DCR:%d:%#llx)\n",
> > +				start, start + len - 1, i, start - dcr->base);
> > +			return 0;
> > +		}
> > +	}
> > +
> > +	dev_err_ratelimited(dev,
> > +			    "DC extent DPA %#llx - %#llx is not in any DC region\n",
> > +			    start, start + len - 1);
> > +	return -EINVAL;
> > +}
> > +
> > +static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,
> 
> cxl_dc_extent_in_endpoint_decoder() is more readable

Sure but we are getting awful close to Java naming there...  j/k  ;-)  Changed.

> 
> > +				struct cxl_dc_extent *extent)
> > +{
> > +	uint64_t start = le64_to_cpu(extent->start_dpa);
> > +	uint64_t length = le64_to_cpu(extent->length);
> u64
> 

Yep


[snip]

> >  
> > +static struct cxl_memdev_state *
> > +cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> > +{
> > +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> > +	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> > +
> > +	return container_of(cxlds, struct cxl_memdev_state, cxlds);
> > +}
> > +
> >  static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
> >  				    enum cxl_event_log_type type)
> >  {
> > @@ -1406,6 +1462,142 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
> >  }
> >  EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);
> >  
> > +static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> 
> cxl_dev_get_dc_extent_generation()? or spell out count

I'll spell out count because that is the primary goal.  The generation number
is just to be able to check each query to ensure the list does not change
whilst reading.

Ira
Ira Weiny April 10, 2024, 6:19 a.m. UTC | #9
fan wrote:
> On Sun, Mar 24, 2024 at 04:18:17PM -0700, ira.weiny@intel.com wrote:
> > From: Navneet Singh <navneet.singh@intel.com>
> > 

[snip]

> > +
> > +/**
> > + * cxl_read_dc_extents() - Read any existing extents
> > + * @cxled: Endpoint decoder which is part of a region
> > + *
> > + * Issue the Get Dynamic Capacity Extent List command to the device
> > + * and add any existing extents found which belong to this decoder.
> > + *
> > + * Return: 0 if command was executed successfully, -ERRNO on error.
> > + */
> > +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled)
> > +{
> > +	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> > +	struct device *dev = mds->cxlds.dev;
> > +	unsigned int extent_gen_num;
> > +	int rc;
> > +
> > +	if (!cxl_dcd_supported(mds)) {
> > +		dev_dbg(dev, "DCD unsupported\n");
> > +		return 0;
> > +	}
> > +
> > +	rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
> > +	dev_dbg(mds->cxlds.dev, "Extent count: %d Generation Num: %d\n",
> > +		rc, extent_gen_num);
> > +	if (rc <= 0) /* 0 == no records found */
> > +		return rc;
> > +
> > +	return cxl_dev_get_dc_extents(cxled, extent_gen_num, rc);
> 
> Not sure about the behaviour here. From the cxl_dev_get_dc_extents
> implementation below, if gen_num changed or the expected extent count
> changed, it will return error. 

yep.

> If I understand it correctly, if the above two values change, it means
> the extent list has been updated due to extent add/release since last
> time we read the extent list info (cxl_dev_get_dc_extent_cnt), do we
> need to fail the operation or try again?

The original series was safe to fail the operation because the list was read on
memory device driver load and not when the regions were created.  This is an
oversight with the new architecture.  Now that regions query for the list
independent of other regions being active the list could indeed change during
this operation.  :-/  So a retry is necessary.

Let me work on the retry because some of the extents may have been surfaced
during the list processing which means a re-read of the list will need to
properly ignore those already found.  Or some other tracking needs to be put in
place.

Ira
Ira Weiny April 10, 2024, 6:29 a.m. UTC | #10
Jørgen Hansen wrote:
> On 3/25/24 00:18, ira.weiny@intel.com wrote:
> > From: Navneet Singh <navneet.singh@intel.com>
> > 
> > Dynamic capacity device extents may be left in an accepted state on a
> > device due to an unexpected host crash.  In this case creation of a new
> > region on top of the DC partition (region) is expected to expose those
> > extents for continued use.
> > 
> > Once all endpoint decoders are part of a region and the region is being
> > realized read the device extent list.  For ease of review, this patch
> > stops after reading the extent list and leaves realization of the region
> > extents to a future patch.
> > 
> > Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> > Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > 
> > ---
> > Changes for v1:
> > [iweiny: remove extent list xarray]
> > [iweiny: Update spec references to 3.1]
> > [iweiny: use struct range in extents]
> > [iweiny: remove all reference tracking and let regions track extents
> >           through the extent devices.]
> > [djbw/Jonathan/Fan: move extent tracking to endpoint decoders]
> > ---
> >   drivers/cxl/core/core.h   |   9 +++
> >   drivers/cxl/core/mbox.c   | 192 ++++++++++++++++++++++++++++++++++++++++++++++
> >   drivers/cxl/core/region.c |  29 +++++++
> >   drivers/cxl/cxlmem.h      |  49 ++++++++++++
> >   4 files changed, 279 insertions(+)
> > 
> > diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> > index 91abeffbe985..119b12362977 100644
> > --- a/drivers/cxl/core/core.h
> > +++ b/drivers/cxl/core/core.h
> > @@ -4,6 +4,8 @@
> >   #ifndef __CXL_CORE_H__
> >   #define __CXL_CORE_H__
> > 
> > +#include <cxlmem.h>
> > +
> >   extern const struct device_type cxl_nvdimm_bridge_type;
> >   extern const struct device_type cxl_nvdimm_type;
> >   extern const struct device_type cxl_pmu_type;
> > @@ -28,6 +30,8 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
> >   int cxl_region_init(void);
> >   void cxl_region_exit(void);
> >   int cxl_get_poison_by_endpoint(struct cxl_port *port);
> > +int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> > +                         struct cxl_dc_extent *dc_extent);
> >   #else
> >   static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
> >   {
> > @@ -43,6 +47,11 @@ static inline int cxl_region_init(void)
> >   static inline void cxl_region_exit(void)
> >   {
> >   }
> > +static inline int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
> > +                                       struct cxl_dc_extent *dc_extent)
> > +{
> > +       return 0;
> > +}
> >   #define CXL_REGION_ATTR(x) NULL
> >   #define CXL_REGION_TYPE(x) NULL
> >   #define SET_CXL_REGION_ATTR(x)
> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 58b31fa47b93..9e33a0976828 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -870,6 +870,53 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
> >   }
> >   EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
> > 
> > +static int cxl_validate_extent(struct cxl_memdev_state *mds,
> > +                              struct cxl_dc_extent *dc_extent)
> > +{
> > +       struct device *dev = mds->cxlds.dev;
> > +       uint64_t start, len;
> > +
> > +       start = le64_to_cpu(dc_extent->start_dpa);
> > +       len = le64_to_cpu(dc_extent->length);
> > +
> > +       /* Extents must not cross region boundary's */
> > +       for (int i = 0; i < mds->nr_dc_region; i++) {
> > +               struct cxl_dc_region_info *dcr = &mds->dc_region[i];
> > +
> > +               if (dcr->base <= start &&
> > +                   (start + len) <= (dcr->base + dcr->decode_len)) {
> > +                       dev_dbg(dev, "DC extent DPA %#llx - %#llx (DCR:%d:%#llx)\n",
> > +                               start, start + len - 1, i, start - dcr->base);
> > +                       return 0;
> > +               }
> > +       }
> > +
> > +       dev_err_ratelimited(dev,
> > +                           "DC extent DPA %#llx - %#llx is not in any DC region\n",
> > +                           start, start + len - 1);
> > +       return -EINVAL;
> > +}
> > +
> > +static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,
> > +                               struct cxl_dc_extent *extent)
> > +{
> > +       uint64_t start = le64_to_cpu(extent->start_dpa);
> > +       uint64_t length = le64_to_cpu(extent->length);
> > +       struct range ext_range = (struct range){
> > +               .start = start,
> > +               .end = start + length - 1,
> > +       };
> > +       struct range ed_range = (struct range) {
> > +               .start = cxled->dpa_res->start,
> > +               .end = cxled->dpa_res->end,
> > +       };
> > +
> > +       dev_dbg(&cxled->cxld.dev, "Checking ED (%pr) for extent DPA:%#llx LEN:%#llx\n",
> > +               cxled->dpa_res, start, length);
> > +
> > +       return range_contains(&ed_range, &ext_range);
> > +}
> > +
> >   void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> >                              enum cxl_event_log_type type,
> >                              enum cxl_event_type event_type,
> > @@ -973,6 +1020,15 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
> >          return rc;
> >   }
> > 
> > +static struct cxl_memdev_state *
> > +cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> > +{
> > +       struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> > +       struct cxl_dev_state *cxlds = cxlmd->cxlds;
> > +
> > +       return container_of(cxlds, struct cxl_memdev_state, cxlds);
> > +}
> > +
> >   static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
> >                                      enum cxl_event_log_type type)
> >   {
> > @@ -1406,6 +1462,142 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
> >   }
> >   EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);
> > 
> > +static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> > +                                    unsigned int *extent_gen_num)
> > +{
> > +       struct cxl_mbox_get_dc_extent_in get_dc_extent;
> > +       struct cxl_mbox_get_dc_extent_out dc_extents;
> > +       struct cxl_mbox_cmd mbox_cmd;
> > +       unsigned int count;
> > +       int rc;
> > +
> > +       get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> > +               .extent_cnt = cpu_to_le32(0),
> > +               .start_extent_index = cpu_to_le32(0),
> > +       };
> > +
> > +       mbox_cmd = (struct cxl_mbox_cmd) {
> > +               .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> > +               .payload_in = &get_dc_extent,
> > +               .size_in = sizeof(get_dc_extent),
> > +               .size_out = sizeof(dc_extents),
> > +               .payload_out = &dc_extents,
> > +               .min_out = 1,
> > +       };
> > +
> > +       rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> > +       if (rc < 0)
> > +               return rc;
> > +
> > +       count = le32_to_cpu(dc_extents.total_extent_cnt);
> > +       *extent_gen_num = le32_to_cpu(dc_extents.extent_list_num);
> > +
> > +       return count;
> > +}
> > +
> > +static int cxl_dev_get_dc_extents(struct cxl_endpoint_decoder *cxled,
> > +                                 unsigned int start_gen_num,
> > +                                 unsigned int exp_cnt)
> > +{
> > +       struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> > +       unsigned int start_index, total_read;
> > +       struct device *dev = mds->cxlds.dev;
> > +       struct cxl_mbox_cmd mbox_cmd;
> > +
> > +       struct cxl_mbox_get_dc_extent_out *dc_extents __free(kfree) =
> > +                               kvmalloc(mds->payload_size, GFP_KERNEL);
> > +       if (!dc_extents)
> > +               return -ENOMEM;
> > +
> > +       total_read = 0;
> > +       start_index = 0;
> > +       do {
> > +               unsigned int nr_ext, total_extent_cnt, gen_num;
> > +               struct cxl_mbox_get_dc_extent_in get_dc_extent;
> > +               int rc;
> > +
> > +               get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
> > +                       .extent_cnt = cpu_to_le32(exp_cnt - start_index),
> > +                       .start_extent_index = cpu_to_le32(start_index),
> > +               };
> > +
> > +               mbox_cmd = (struct cxl_mbox_cmd) {
> > +                       .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> > +                       .payload_in = &get_dc_extent,
> > +                       .size_in = sizeof(get_dc_extent),
> > +                       .size_out = mds->payload_size,
> > +                       .payload_out = dc_extents,
> > +                       .min_out = 1,
> > +               };
> > +
> > +               rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> > +               if (rc < 0)
> > +                       return rc;
> > +
> > +               nr_ext = le32_to_cpu(dc_extents->ret_extent_cnt);
> > +               total_read += nr_ext;
> > +               total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
> > +               gen_num = le32_to_cpu(dc_extents->extent_list_num);
> > +
> > +               dev_dbg(dev, "Get extent list count:%d generation Num:%d\n",
> > +                       total_extent_cnt, gen_num);
> > +
> > +               if (gen_num != start_gen_num || exp_cnt != total_extent_cnt) {
> > +                       dev_err(dev, "Possible incomplete extent list; gen %u != %u : cnt %u != %u\n",
> > +                               gen_num, start_gen_num, exp_cnt, total_extent_cnt);
> > +                       return -EIO;
> > +               }
> > +
> > +               for (int i = 0; i < nr_ext ; i++) {
> > +                       dev_dbg(dev, "Processing extent %d/%d\n",
> > +                               start_index + i, exp_cnt);
> > +                       rc = cxl_validate_extent(mds, &dc_extents->extent[i]);
> > +                       if (rc)
> > +                               continue;
> > +                       if (!cxl_dc_extent_in_ed(cxled, &dc_extents->extent[i]))
> > +                               continue;
> > +                       rc = cxl_ed_add_one_extent(cxled, &dc_extents->extent[i]);
> > +                       if (rc)
> > +                               return rc;
> > +               }
> > +
> > +               start_index += nr_ext;
> > +       } while (exp_cnt > total_read);
> > +
> > +       return 0;
> > +}
> > +
> > +/**
> > + * cxl_read_dc_extents() - Read any existing extents
> > + * @cxled: Endpoint decoder which is part of a region
> > + *
> > + * Issue the Get Dynamic Capacity Extent List command to the device
> > + * and add any existing extents found which belong to this decoder.
> > + *
> > + * Return: 0 if command was executed successfully, -ERRNO on error.
> > + */
> > +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled)
> > +{
> > +       struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> > +       struct device *dev = mds->cxlds.dev;
> > +       unsigned int extent_gen_num;
> > +       int rc;
> > +
> > +       if (!cxl_dcd_supported(mds)) {
> > +               dev_dbg(dev, "DCD unsupported\n");
> > +               return 0;
> > +       }
> > +
> > +       rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
> > +       dev_dbg(mds->cxlds.dev, "Extent count: %d Generation Num: %d\n",
> > +               rc, extent_gen_num);
> > +       if (rc <= 0) /* 0 == no records found */
> > +               return rc;
> > +
> > +       return cxl_dev_get_dc_extents(cxled, extent_gen_num, rc);
> 
> Is it necessary to spend a device interaction to get the generation 
> number?

Not completely necessary no.

> Couldn't cxl_dev_get_dc_extents obtain that as part of the first 
> call to the device, and then use it to ensure the consistency of any 
> remaining calls, if any are necessary?

... However, this is not a critical path and the extra query to hardware makes
the code a bit easier to follow IMO.  There are 2 distinct steps.

	1) get expected number of extents and the current generation number
	2) query for that number whilst checking that the gen number is stable

Doing what you suggest results in special casing the first query within the
loop which is kind of ugly IMO.

That said, with the new retry requirement Fan pointed out I'll consider this in
that new algorithm context.

Ira
Alison Schofield April 10, 2024, 5:44 p.m. UTC | #11
On Sun, Mar 24, 2024 at 04:18:17PM -0700, Ira Weiny wrote:
> From: Navneet Singh <navneet.singh@intel.com>
> 
> Dynamic capacity device extents may be left in an accepted state on a
> device due to an unexpected host crash.  In this case creation of a new
> region on top of the DC partition (region) is expected to expose those
> extents for continued use.
> 
> Once all endpoint decoders are part of a region and the region is being
> realized read the device extent list.  For ease of review, this patch
> stops after reading the extent list and leaves realization of the region
> extents to a future patch.
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes for v1:
> [iweiny: remove extent list xarray]
> [iweiny: Update spec references to 3.1]
> [iweiny: use struct range in extents]
> [iweiny: remove all reference tracking and let regions track extents
> 	 through the extent devices.]
> [djbw/Jonathan/Fan: move extent tracking to endpoint decoders]
> ---
>  drivers/cxl/core/core.h   |   9 +++
>  drivers/cxl/core/mbox.c   | 192 ++++++++++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/region.c |  29 +++++++
>  drivers/cxl/cxlmem.h      |  49 ++++++++++++
>  4 files changed, 279 insertions(+)

snip

> 
> +static int cxl_validate_extent(struct cxl_memdev_state *mds,
> +			       struct cxl_dc_extent *dc_extent)
> +{
> +	struct device *dev = mds->cxlds.dev;
> +	uint64_t start, len;
> +
> +	start = le64_to_cpu(dc_extent->start_dpa);
> +	len = le64_to_cpu(dc_extent->length);
> +
> +	/* Extents must not cross region boundary's */
> +	for (int i = 0; i < mds->nr_dc_region; i++) {
> +		struct cxl_dc_region_info *dcr = &mds->dc_region[i];
> +

I think you already got range_contains suggestion

> +		if (dcr->base <= start &&
> +		    (start + len) <= (dcr->base + dcr->decode_len)) {
> +			dev_dbg(dev, "DC extent DPA %#llx - %#llx (DCR:%d:%#llx)\n",
> +				start, start + len - 1, i, start - dcr->base);
> +			return 0;
> +		}
> +	}
> +
> +	dev_err_ratelimited(dev,
> +			    "DC extent DPA %#llx - %#llx is not in any DC region\n",
> +			    start, start + len - 1);

Need some clarification.
Isn't this checking that the extent is fully contained within a region?
And then, it dev_err's if not fully contained. There is not actually
a check and an error message about crossing region boundary's as the
comment suggests.  Maybe update the comment to reflect the work.. like:

/* Extent must be fully contained in a region */


> +	return -EINVAL;
> +}
> +
> +static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,
> +				struct cxl_dc_extent *extent)
> +{
> +	uint64_t start = le64_to_cpu(extent->start_dpa);
> +	uint64_t length = le64_to_cpu(extent->length);

u64 here (and in other places too)


> +	struct range ext_range = (struct range){
> +		.start = start,
> +		.end = start + length - 1,
> +	};
> +	struct range ed_range = (struct range) {
> +		.start = cxled->dpa_res->start,
> +		.end = cxled->dpa_res->end,
> +	};
> +
> +	dev_dbg(&cxled->cxld.dev, "Checking ED (%pr) for extent DPA:%#llx LEN:%#llx\n",
> +		cxled->dpa_res, start, length);
> +
> +	return range_contains(&ed_range, &ext_range);
> +}
> +
>  void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>  			    enum cxl_event_log_type type,
>  			    enum cxl_event_type event_type,
> @@ -973,6 +1020,15 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
>  	return rc;
>  }
>  
> +static struct cxl_memdev_state *
> +cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> +	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> +
> +	return container_of(cxlds, struct cxl_memdev_state, cxlds);
> +}
> +

That's nice!


>  static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>  				    enum cxl_event_log_type type)
>  {
> @@ -1406,6 +1462,142 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);
>  
> +static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,

Perhaps drop the _dev_ from this (and other, like below) function names.

> +static int cxl_dev_get_dc_extents(struct cxl_endpoint_decoder *cxled,


snip

> +/**
> + * cxl_read_dc_extents() - Read any existing extents
> + * @cxled: Endpoint decoder which is part of a region
> + *
> + * Issue the Get Dynamic Capacity Extent List command to the device
> + * and add any existing extents found which belong to this decoder.
> + *
> + * Return: 0 if command was executed successfully, -ERRNO on error.
> + */
> +int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
> +	struct device *dev = mds->cxlds.dev;
> +	unsigned int extent_gen_num;
> +	int rc;
> +
> +	if (!cxl_dcd_supported(mds)) {
> +		dev_dbg(dev, "DCD unsupported\n");
> +		return 0;
> +	}
> +
> +	rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
> +	dev_dbg(mds->cxlds.dev, "Extent count: %d Generation Num: %d\n",

Either use the *dev defined in both dev_dbg()'s or get rid of it use mds->cxlds.dev.

> +		rc, extent_gen_num);
> +	if (rc <= 0) /* 0 == no records found */
> +		return rc;
> +
> +	return cxl_dev_get_dc_extents(cxled, extent_gen_num, rc);
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_read_dc_extents, CXL);
> +

snip

>  
> +static int cxl_region_read_extents(struct cxl_region *cxlr)
> +{

How about:
static int cxl_region_read_extents(struct cxl_region_params *p)


> +	struct cxl_region_params *p = &cxlr->params;
> +	int i;
> +
> +	for (i = 0; i < p->nr_targets; i++) {
> +		int rc;
> +
> +		rc = cxl_read_dc_extents(p->targets[i]);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	return 0;
> +}

snip to end
diff mbox series

Patch

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 91abeffbe985..119b12362977 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -4,6 +4,8 @@ 
 #ifndef __CXL_CORE_H__
 #define __CXL_CORE_H__
 
+#include <cxlmem.h>
+
 extern const struct device_type cxl_nvdimm_bridge_type;
 extern const struct device_type cxl_nvdimm_type;
 extern const struct device_type cxl_pmu_type;
@@ -28,6 +30,8 @@  void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
 int cxl_region_init(void);
 void cxl_region_exit(void);
 int cxl_get_poison_by_endpoint(struct cxl_port *port);
+int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
+			  struct cxl_dc_extent *dc_extent);
 #else
 static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
 {
@@ -43,6 +47,11 @@  static inline int cxl_region_init(void)
 static inline void cxl_region_exit(void)
 {
 }
+static inline int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
+					struct cxl_dc_extent *dc_extent)
+{
+	return 0;
+}
 #define CXL_REGION_ATTR(x) NULL
 #define CXL_REGION_TYPE(x) NULL
 #define SET_CXL_REGION_ATTR(x)
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 58b31fa47b93..9e33a0976828 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -870,6 +870,53 @@  int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
 
+static int cxl_validate_extent(struct cxl_memdev_state *mds,
+			       struct cxl_dc_extent *dc_extent)
+{
+	struct device *dev = mds->cxlds.dev;
+	uint64_t start, len;
+
+	start = le64_to_cpu(dc_extent->start_dpa);
+	len = le64_to_cpu(dc_extent->length);
+
+	/* Extents must not cross region boundary's */
+	for (int i = 0; i < mds->nr_dc_region; i++) {
+		struct cxl_dc_region_info *dcr = &mds->dc_region[i];
+
+		if (dcr->base <= start &&
+		    (start + len) <= (dcr->base + dcr->decode_len)) {
+			dev_dbg(dev, "DC extent DPA %#llx - %#llx (DCR:%d:%#llx)\n",
+				start, start + len - 1, i, start - dcr->base);
+			return 0;
+		}
+	}
+
+	dev_err_ratelimited(dev,
+			    "DC extent DPA %#llx - %#llx is not in any DC region\n",
+			    start, start + len - 1);
+	return -EINVAL;
+}
+
+static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,
+				struct cxl_dc_extent *extent)
+{
+	uint64_t start = le64_to_cpu(extent->start_dpa);
+	uint64_t length = le64_to_cpu(extent->length);
+	struct range ext_range = (struct range){
+		.start = start,
+		.end = start + length - 1,
+	};
+	struct range ed_range = (struct range) {
+		.start = cxled->dpa_res->start,
+		.end = cxled->dpa_res->end,
+	};
+
+	dev_dbg(&cxled->cxld.dev, "Checking ED (%pr) for extent DPA:%#llx LEN:%#llx\n",
+		cxled->dpa_res, start, length);
+
+	return range_contains(&ed_range, &ext_range);
+}
+
 void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
 			    enum cxl_event_log_type type,
 			    enum cxl_event_type event_type,
@@ -973,6 +1020,15 @@  static int cxl_clear_event_record(struct cxl_memdev_state *mds,
 	return rc;
 }
 
+static struct cxl_memdev_state *
+cxled_to_mds(struct cxl_endpoint_decoder *cxled)
+{
+	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+	return container_of(cxlds, struct cxl_memdev_state, cxlds);
+}
+
 static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
 				    enum cxl_event_log_type type)
 {
@@ -1406,6 +1462,142 @@  int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);
 
+static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
+				     unsigned int *extent_gen_num)
+{
+	struct cxl_mbox_get_dc_extent_in get_dc_extent;
+	struct cxl_mbox_get_dc_extent_out dc_extents;
+	struct cxl_mbox_cmd mbox_cmd;
+	unsigned int count;
+	int rc;
+
+	get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
+		.extent_cnt = cpu_to_le32(0),
+		.start_extent_index = cpu_to_le32(0),
+	};
+
+	mbox_cmd = (struct cxl_mbox_cmd) {
+		.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
+		.payload_in = &get_dc_extent,
+		.size_in = sizeof(get_dc_extent),
+		.size_out = sizeof(dc_extents),
+		.payload_out = &dc_extents,
+		.min_out = 1,
+	};
+
+	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+	if (rc < 0)
+		return rc;
+
+	count = le32_to_cpu(dc_extents.total_extent_cnt);
+	*extent_gen_num = le32_to_cpu(dc_extents.extent_list_num);
+
+	return count;
+}
+
+static int cxl_dev_get_dc_extents(struct cxl_endpoint_decoder *cxled,
+				  unsigned int start_gen_num,
+				  unsigned int exp_cnt)
+{
+	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
+	unsigned int start_index, total_read;
+	struct device *dev = mds->cxlds.dev;
+	struct cxl_mbox_cmd mbox_cmd;
+
+	struct cxl_mbox_get_dc_extent_out *dc_extents __free(kfree) =
+				kvmalloc(mds->payload_size, GFP_KERNEL);
+	if (!dc_extents)
+		return -ENOMEM;
+
+	total_read = 0;
+	start_index = 0;
+	do {
+		unsigned int nr_ext, total_extent_cnt, gen_num;
+		struct cxl_mbox_get_dc_extent_in get_dc_extent;
+		int rc;
+
+		get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
+			.extent_cnt = cpu_to_le32(exp_cnt - start_index),
+			.start_extent_index = cpu_to_le32(start_index),
+		};
+
+		mbox_cmd = (struct cxl_mbox_cmd) {
+			.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
+			.payload_in = &get_dc_extent,
+			.size_in = sizeof(get_dc_extent),
+			.size_out = mds->payload_size,
+			.payload_out = dc_extents,
+			.min_out = 1,
+		};
+
+		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+		if (rc < 0)
+			return rc;
+
+		nr_ext = le32_to_cpu(dc_extents->ret_extent_cnt);
+		total_read += nr_ext;
+		total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
+		gen_num = le32_to_cpu(dc_extents->extent_list_num);
+
+		dev_dbg(dev, "Get extent list count:%d generation Num:%d\n",
+			total_extent_cnt, gen_num);
+
+		if (gen_num != start_gen_num || exp_cnt != total_extent_cnt) {
+			dev_err(dev, "Possible incomplete extent list; gen %u != %u : cnt %u != %u\n",
+				gen_num, start_gen_num, exp_cnt, total_extent_cnt);
+			return -EIO;
+		}
+
+		for (int i = 0; i < nr_ext ; i++) {
+			dev_dbg(dev, "Processing extent %d/%d\n",
+				start_index + i, exp_cnt);
+			rc = cxl_validate_extent(mds, &dc_extents->extent[i]);
+			if (rc)
+				continue;
+			if (!cxl_dc_extent_in_ed(cxled, &dc_extents->extent[i]))
+				continue;
+			rc = cxl_ed_add_one_extent(cxled, &dc_extents->extent[i]);
+			if (rc)
+				return rc;
+		}
+
+		start_index += nr_ext;
+	} while (exp_cnt > total_read);
+
+	return 0;
+}
+
+/**
+ * cxl_read_dc_extents() - Read any existing extents
+ * @cxled: Endpoint decoder which is part of a region
+ *
+ * Issue the Get Dynamic Capacity Extent List command to the device
+ * and add any existing extents found which belong to this decoder.
+ *
+ * Return: 0 if command was executed successfully, -ERRNO on error.
+ */
+int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled)
+{
+	struct cxl_memdev_state *mds = cxled_to_mds(cxled);
+	struct device *dev = mds->cxlds.dev;
+	unsigned int extent_gen_num;
+	int rc;
+
+	if (!cxl_dcd_supported(mds)) {
+		dev_dbg(dev, "DCD unsupported\n");
+		return 0;
+	}
+
+	rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
+	dev_dbg(mds->cxlds.dev, "Extent count: %d Generation Num: %d\n",
+		rc, extent_gen_num);
+	if (rc <= 0) /* 0 == no records found */
+		return rc;
+
+	return cxl_dev_get_dc_extents(cxled, extent_gen_num, rc);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_read_dc_extents, CXL);
+
 static int add_dpa_res(struct device *dev, struct resource *parent,
 		       struct resource *res, resource_size_t start,
 		       resource_size_t size, const char *type)
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 0d7b09a49dcf..3e563ab29afe 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1450,6 +1450,13 @@  static int cxl_region_validate_position(struct cxl_region *cxlr,
 	return 0;
 }
 
+/* Callers are expected to ensure cxled has been attached to a region */
+int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
+			  struct cxl_dc_extent *dc_extent)
+{
+	return 0;
+}
+
 static int cxl_region_attach_position(struct cxl_region *cxlr,
 				      struct cxl_root_decoder *cxlrd,
 				      struct cxl_endpoint_decoder *cxled,
@@ -2773,6 +2780,22 @@  static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
 	return rc;
 }
 
+static int cxl_region_read_extents(struct cxl_region *cxlr)
+{
+	struct cxl_region_params *p = &cxlr->params;
+	int i;
+
+	for (i = 0; i < p->nr_targets; i++) {
+		int rc;
+
+		rc = cxl_read_dc_extents(p->targets[i]);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
 static void cxlr_dax_unregister(void *_cxlr_dax)
 {
 	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
@@ -2807,6 +2830,12 @@  static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
 	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
 		dev_name(dev));
 
+	if (cxlr->mode == CXL_REGION_DC) {
+		rc = cxl_region_read_extents(cxlr);
+		if (rc)
+			goto err;
+	}
+
 	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
 					cxlr_dax);
 err:
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 01bee6eedff3..8f2d8944d334 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -604,6 +604,54 @@  enum cxl_opcode {
 	UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19,     \
 		  0x40, 0x3d, 0x86)
 
+/*
+ * Add Dynamic Capacity Response
+ * CXL rev 3.1 section 8.2.9.9.9.3; Table 8-168 & Table 8-169
+ */
+struct cxl_mbox_dc_response {
+	__le32 extent_list_size;
+	u8 flags;
+	u8 reserved[3];
+	struct updated_extent_list {
+		__le64 dpa_start;
+		__le64 length;
+		u8 reserved[8];
+	} __packed extent_list[];
+} __packed;
+
+/*
+ * CXL rev 3.1 section 8.2.9.2.1.6; Table 8-51
+ */
+#define CXL_DC_EXTENT_TAG_LEN 0x10
+struct cxl_dc_extent {
+	__le64 start_dpa;
+	__le64 length;
+	u8 tag[CXL_DC_EXTENT_TAG_LEN];
+	__le16 shared_extn_seq;
+	u8 reserved[6];
+} __packed;
+
+/*
+ * Get Dynamic Capacity Extent List; Input Payload
+ * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-166
+ */
+struct cxl_mbox_get_dc_extent_in {
+	__le32 extent_cnt;
+	__le32 start_extent_index;
+} __packed;
+
+/*
+ * Get Dynamic Capacity Extent List; Output Payload
+ * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-167
+ */
+struct cxl_mbox_get_dc_extent_out {
+	__le32 ret_extent_cnt;
+	__le32 total_extent_cnt;
+	__le32 extent_list_num;
+	u8 rsvd[4];
+	struct cxl_dc_extent extent[];
+} __packed;
+
 struct cxl_mbox_get_supported_logs {
 	__le16 entries;
 	u8 rsvd[6];
@@ -879,6 +927,7 @@  int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
 			  struct cxl_mbox_cmd *cmd);
 int cxl_dev_state_identify(struct cxl_memdev_state *mds);
 int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds);
+int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled);
 int cxl_await_media_ready(struct cxl_dev_state *cxlds);
 int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
 int cxl_mem_create_range_info(struct cxl_memdev_state *mds);