diff mbox series

[06/26] cxl/port: Add Dynamic Capacity mode support to endpoint decoders

Message ID 20240324-dcd-type2-upstream-v1-6-b7b00d623625@intel.com
State Changes Requested
Headers show
Series DCD: Add support for Dynamic Capacity Devices (DCD) | expand

Commit Message

Ira Weiny March 24, 2024, 11:18 p.m. UTC
From: Navneet Singh <navneet.singh@intel.com>

Endpoint decoders which are used to map Dynamic Capacity must be
configured to point to the correct Dynamic Capacity (DC) Region.  The
decoder mode currently represents the partition the decoder points to
such as ram or pmem.

Expand the mode to include DC regions [partitions].

Signed-off-by: Navneet Singh <navneet.singh@intel.com>
Co-developed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes for v1:
[iweiny: eliminate added gotos]
[iweiny: Mark DC support for 6.10 kernel]
---
 Documentation/ABI/testing/sysfs-bus-cxl | 21 +++++++++++----------
 drivers/cxl/core/hdm.c                  | 19 +++++++++++++++++++
 drivers/cxl/core/port.c                 | 16 ++++++++++++++++
 3 files changed, 46 insertions(+), 10 deletions(-)

Comments

Fan Ni March 26, 2024, 4:35 p.m. UTC | #1
On Sun, Mar 24, 2024 at 04:18:09PM -0700, ira.weiny@intel.com wrote:
> From: Navneet Singh <navneet.singh@intel.com>
> 
> Endpoint decoders which are used to map Dynamic Capacity must be
> configured to point to the correct Dynamic Capacity (DC) Region.  The
> decoder mode currently represents the partition the decoder points to
> such as ram or pmem.
> 
> Expand the mode to include DC regions [partitions].
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

Minor comments below.

> 
> ---
> Changes for v1:
> [iweiny: eliminate added gotos]
> [iweiny: Mark DC support for 6.10 kernel]
> ---
>  Documentation/ABI/testing/sysfs-bus-cxl | 21 +++++++++++----------
>  drivers/cxl/core/hdm.c                  | 19 +++++++++++++++++++
>  drivers/cxl/core/port.c                 | 16 ++++++++++++++++
>  3 files changed, 46 insertions(+), 10 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
> index fff2581b8033..8b3efaf6563c 100644
> --- a/Documentation/ABI/testing/sysfs-bus-cxl
> +++ b/Documentation/ABI/testing/sysfs-bus-cxl
> @@ -316,23 +316,24 @@ Description:
>  
>  
>  What:		/sys/bus/cxl/devices/decoderX.Y/mode
> -Date:		May, 2022
> -KernelVersion:	v6.0
> +Date:		May, 2022, June 2024
> +KernelVersion:	v6.0, v6.10 (dcY)
>  Contact:	linux-cxl@vger.kernel.org
>  Description:
>  		(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
>  		translates from a host physical address range, to a device local
>  		address range. Device-local address ranges are further split
> -		into a 'ram' (volatile memory) range and 'pmem' (persistent
> -		memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
> -		'mixed', or 'none'. The 'mixed' indication is for error cases
> -		when a decoder straddles the volatile/persistent partition
> -		boundary, and 'none' indicates the decoder is not actively
> -		decoding, or no DPA allocation policy has been set.
> +		into a 'ram' (volatile memory) range, 'pmem' (persistent
> +		memory) range, or Dynamic Capacity (DC) range. The 'mode'
> +		attribute emits one of 'ram', 'pmem', 'dcY', 'mixed', or
> +		'none'. The 'mixed' indication is for error cases when a
> +		decoder straddles the volatile/persistent partition boundary,
> +		and 'none' indicates the decoder is not actively decoding, or
> +		no DPA allocation policy has been set.
>  
>  		'mode' can be written, when the decoder is in the 'disabled'
> -		state, with either 'ram' or 'pmem' to set the boundaries for the
> -		next allocation.
> +		state, with 'ram', 'pmem', or 'dcY' to set the boundaries for
> +		the next allocation.
>  
>  
>  What:		/sys/bus/cxl/devices/decoderX.Y/dpa_resource
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 66b8419fd0c3..e22b6f4f7145 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -255,6 +255,14 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
>  	__cxl_dpa_release(cxled);
>  }
>  
> +static int dc_mode_to_region_index(enum cxl_decoder_mode mode)
> +{
> +	if (mode < CXL_DECODER_DC0 || CXL_DECODER_DC7 < mode)

To me, it is more natural to have something like x<a || x>b other than
x<a || b < x for out of range check.

> +		return -EINVAL;
> +
> +	return mode - CXL_DECODER_DC0;
> +}
> +
>  static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
>  			     resource_size_t base, resource_size_t len,
>  			     resource_size_t skipped)
> @@ -411,6 +419,7 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
>  	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>  	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>  	struct device *dev = &cxled->cxld.dev;
> +	int rc;
>  
>  	guard(rwsem_write)(&cxl_dpa_rwsem);
>  	if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
> @@ -433,6 +442,16 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
>  			return -ENXIO;
>  		}
>  		break;
> +	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
> +		rc = dc_mode_to_region_index(mode);
> +		if (rc < 0)
> +			return rc;
> +
> +		if (resource_size(&cxlds->dc_res[rc]) == 0) {

The other similar checks above use "!resource_size(...)".

Fan

> +			dev_dbg(dev, "no available dynamic capacity\n");
> +			return -ENXIO;
> +		}
> +		break;
>  	default:
>  		dev_dbg(dev, "unsupported mode: %d\n", mode);
>  		return -EINVAL;
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index e59d9d37aa65..80c0651794eb 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -208,6 +208,22 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
>  		mode = CXL_DECODER_PMEM;
>  	else if (sysfs_streq(buf, "ram"))
>  		mode = CXL_DECODER_RAM;
> +	else if (sysfs_streq(buf, "dc0"))
> +		mode = CXL_DECODER_DC0;
> +	else if (sysfs_streq(buf, "dc1"))
> +		mode = CXL_DECODER_DC1;
> +	else if (sysfs_streq(buf, "dc2"))
> +		mode = CXL_DECODER_DC2;
> +	else if (sysfs_streq(buf, "dc3"))
> +		mode = CXL_DECODER_DC3;
> +	else if (sysfs_streq(buf, "dc4"))
> +		mode = CXL_DECODER_DC4;
> +	else if (sysfs_streq(buf, "dc5"))
> +		mode = CXL_DECODER_DC5;
> +	else if (sysfs_streq(buf, "dc6"))
> +		mode = CXL_DECODER_DC6;
> +	else if (sysfs_streq(buf, "dc7"))
> +		mode = CXL_DECODER_DC7;
>  	else
>  		return -EINVAL;
>  
> 
> -- 
> 2.44.0
>
Dave Jiang March 26, 2024, 5:58 p.m. UTC | #2
On 3/24/24 4:18 PM, ira.weiny@intel.com wrote:
> From: Navneet Singh <navneet.singh@intel.com>
> 
> Endpoint decoders which are used to map Dynamic Capacity must be
> configured to point to the correct Dynamic Capacity (DC) Region.  The
> decoder mode currently represents the partition the decoder points to
> such as ram or pmem.
> 
> Expand the mode to include DC regions [partitions].
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes for v1:
> [iweiny: eliminate added gotos]
> [iweiny: Mark DC support for 6.10 kernel]
> ---
>  Documentation/ABI/testing/sysfs-bus-cxl | 21 +++++++++++----------
>  drivers/cxl/core/hdm.c                  | 19 +++++++++++++++++++
>  drivers/cxl/core/port.c                 | 16 ++++++++++++++++
>  3 files changed, 46 insertions(+), 10 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
> index fff2581b8033..8b3efaf6563c 100644
> --- a/Documentation/ABI/testing/sysfs-bus-cxl
> +++ b/Documentation/ABI/testing/sysfs-bus-cxl
> @@ -316,23 +316,24 @@ Description:
>  
>  
>  What:		/sys/bus/cxl/devices/decoderX.Y/mode
> -Date:		May, 2022
> -KernelVersion:	v6.0
> +Date:		May, 2022, June 2024
> +KernelVersion:	v6.0, v6.10 (dcY)
>  Contact:	linux-cxl@vger.kernel.org
>  Description:
>  		(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
>  		translates from a host physical address range, to a device local
>  		address range. Device-local address ranges are further split
> -		into a 'ram' (volatile memory) range and 'pmem' (persistent
> -		memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
> -		'mixed', or 'none'. The 'mixed' indication is for error cases
> -		when a decoder straddles the volatile/persistent partition
> -		boundary, and 'none' indicates the decoder is not actively
> -		decoding, or no DPA allocation policy has been set.
> +		into a 'ram' (volatile memory) range, 'pmem' (persistent
> +		memory) range, or Dynamic Capacity (DC) range. The 'mode'
> +		attribute emits one of 'ram', 'pmem', 'dcY', 'mixed', or
> +		'none'. The 'mixed' indication is for error cases when a
> +		decoder straddles the volatile/persistent partition boundary,
> +		and 'none' indicates the decoder is not actively decoding, or
> +		no DPA allocation policy has been set.
>  
>  		'mode' can be written, when the decoder is in the 'disabled'
> -		state, with either 'ram' or 'pmem' to set the boundaries for the
> -		next allocation.
> +		state, with 'ram', 'pmem', or 'dcY' to set the boundaries for
> +		the next allocation.
>  
>  
>  What:		/sys/bus/cxl/devices/decoderX.Y/dpa_resource
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 66b8419fd0c3..e22b6f4f7145 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -255,6 +255,14 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
>  	__cxl_dpa_release(cxled);
>  }
>  
> +static int dc_mode_to_region_index(enum cxl_decoder_mode mode)
> +{
> +	if (mode < CXL_DECODER_DC0 || CXL_DECODER_DC7 < mode)

I second what Fan said about readability here if you do (mode > CXL_DECODER_DC7) for upper bound check instead.

> +		return -EINVAL;
> +
> +	return mode - CXL_DECODER_DC0;
> +}
> +
>  static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
>  			     resource_size_t base, resource_size_t len,
>  			     resource_size_t skipped)
> @@ -411,6 +419,7 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
>  	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>  	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>  	struct device *dev = &cxled->cxld.dev;
> +	int rc;
>  
>  	guard(rwsem_write)(&cxl_dpa_rwsem);
>  	if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
> @@ -433,6 +442,16 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
>  			return -ENXIO;
>  		}
>  		break;
> +	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
> +		rc = dc_mode_to_region_index(mode);
> +		if (rc < 0)
> +			return rc;
> +
> +		if (resource_size(&cxlds->dc_res[rc]) == 0) {
> +			dev_dbg(dev, "no available dynamic capacity\n");
> +			return -ENXIO;
> +		}
> +		break;
>  	default:
>  		dev_dbg(dev, "unsupported mode: %d\n", mode);
>  		return -EINVAL;
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index e59d9d37aa65..80c0651794eb 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -208,6 +208,22 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
>  		mode = CXL_DECODER_PMEM;
>  	else if (sysfs_streq(buf, "ram"))
>  		mode = CXL_DECODER_RAM;
> +	else if (sysfs_streq(buf, "dc0"))
> +		mode = CXL_DECODER_DC0;
> +	else if (sysfs_streq(buf, "dc1"))
> +		mode = CXL_DECODER_DC1;
> +	else if (sysfs_streq(buf, "dc2"))
> +		mode = CXL_DECODER_DC2;
> +	else if (sysfs_streq(buf, "dc3"))
> +		mode = CXL_DECODER_DC3;
> +	else if (sysfs_streq(buf, "dc4"))
> +		mode = CXL_DECODER_DC4;
> +	else if (sysfs_streq(buf, "dc5"))
> +		mode = CXL_DECODER_DC5;
> +	else if (sysfs_streq(buf, "dc6"))
> +		mode = CXL_DECODER_DC6;
> +	else if (sysfs_streq(buf, "dc7"))
> +		mode = CXL_DECODER_DC7;

I think maybe create a static string table that correlates cxl_decoder_mode to string. Then you can simplify cxl_decoder_mode_name() and as well as here. And here I think you can just do a for loop and go through the entire static table. 

>  	else
>  		return -EINVAL;
>  
>
Jonathan Cameron April 4, 2024, 8:32 a.m. UTC | #3
On Sun, 24 Mar 2024 16:18:09 -0700
ira.weiny@intel.com wrote:

> From: Navneet Singh <navneet.singh@intel.com>
> 
> Endpoint decoders which are used to map Dynamic Capacity must be
> configured to point to the correct Dynamic Capacity (DC) Region.  The
> decoder mode currently represents the partition the decoder points to
> such as ram or pmem.
> 
> Expand the mode to include DC regions [partitions].
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes for v1:
> [iweiny: eliminate added gotos]
> [iweiny: Mark DC support for 6.10 kernel]
> ---
>  Documentation/ABI/testing/sysfs-bus-cxl | 21 +++++++++++----------
>  drivers/cxl/core/hdm.c                  | 19 +++++++++++++++++++
>  drivers/cxl/core/port.c                 | 16 ++++++++++++++++
>  3 files changed, 46 insertions(+), 10 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
> index fff2581b8033..8b3efaf6563c 100644
> --- a/Documentation/ABI/testing/sysfs-bus-cxl
> +++ b/Documentation/ABI/testing/sysfs-bus-cxl
> @@ -316,23 +316,24 @@ Description:
>  
>  
>  What:		/sys/bus/cxl/devices/decoderX.Y/mode
> -Date:		May, 2022
> -KernelVersion:	v6.0
> +Date:		May, 2022, June 2024
> +KernelVersion:	v6.0, v6.10 (dcY)
>  Contact:	linux-cxl@vger.kernel.org
>  Description:
>  		(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
>  		translates from a host physical address range, to a device local
>  		address range. Device-local address ranges are further split
> -		into a 'ram' (volatile memory) range and 'pmem' (persistent
> -		memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
> -		'mixed', or 'none'. The 'mixed' indication is for error cases
> -		when a decoder straddles the volatile/persistent partition
> -		boundary, and 'none' indicates the decoder is not actively
> -		decoding, or no DPA allocation policy has been set.
> +		into a 'ram' (volatile memory) range, 'pmem' (persistent
> +		memory) range, or Dynamic Capacity (DC) range. The 'mode'
> +		attribute emits one of 'ram', 'pmem', 'dcY', 'mixed', or
> +		'none'. The 'mixed' indication is for error cases when a
> +		decoder straddles the volatile/persistent partition boundary,

I love corners.  What happen if no persistent and decoder straddles
volatile / dc0?  Would only happen if the bios was having fun I think...

> +		and 'none' indicates the decoder is not actively decoding, or
> +		no DPA allocation policy has been set.
>  
>  		'mode' can be written, when the decoder is in the 'disabled'
> -		state, with either 'ram' or 'pmem' to set the boundaries for the
> -		next allocation.
> +		state, with 'ram', 'pmem', or 'dcY' to set the boundaries for
> +		the next allocation.
>  
>  
>  What:		/sys/bus/cxl/devices/decoderX.Y/dpa_resource
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 66b8419fd0c3..e22b6f4f7145 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -255,6 +255,14 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
>  	__cxl_dpa_release(cxled);
>  }
>  
> +static int dc_mode_to_region_index(enum cxl_decoder_mode mode)
> +{
> +	if (mode < CXL_DECODER_DC0 || CXL_DECODER_DC7 < mode)
> +		return -EINVAL;
> +
> +	return mode - CXL_DECODER_DC0;
> +}
> +
>  static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
>  			     resource_size_t base, resource_size_t len,
>  			     resource_size_t skipped)
> @@ -411,6 +419,7 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
>  	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>  	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>  	struct device *dev = &cxled->cxld.dev;
> +	int rc;
>  
>  	guard(rwsem_write)(&cxl_dpa_rwsem);
>  	if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
> @@ -433,6 +442,16 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
>  			return -ENXIO;
>  		}
>  		break;
> +	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
> +		rc = dc_mode_to_region_index(mode);
> +		if (rc < 0)
> +			return rc;

Can't fail, so you could not bother checking..  Seems very unlikely
that function will gain other error cases in the future.

> +
> +		if (resource_size(&cxlds->dc_res[rc]) == 0) {
> +			dev_dbg(dev, "no available dynamic capacity\n");
> +			return -ENXIO;
> +		}
> +		break;
>  	default:
>  		dev_dbg(dev, "unsupported mode: %d\n", mode);
>  		return -EINVAL;
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index e59d9d37aa65..80c0651794eb 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -208,6 +208,22 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
>  		mode = CXL_DECODER_PMEM;
>  	else if (sysfs_streq(buf, "ram"))
>  		mode = CXL_DECODER_RAM;
> +	else if (sysfs_streq(buf, "dc0"))
> +		mode = CXL_DECODER_DC0;
> +	else if (sysfs_streq(buf, "dc1"))
> +		mode = CXL_DECODER_DC1;
> +	else if (sysfs_streq(buf, "dc2"))
> +		mode = CXL_DECODER_DC2;
> +	else if (sysfs_streq(buf, "dc3"))
> +		mode = CXL_DECODER_DC3;
> +	else if (sysfs_streq(buf, "dc4"))
> +		mode = CXL_DECODER_DC4;
> +	else if (sysfs_streq(buf, "dc5"))
> +		mode = CXL_DECODER_DC5;
> +	else if (sysfs_streq(buf, "dc6"))
> +		mode = CXL_DECODER_DC6;
> +	else if (sysfs_streq(buf, "dc7"))
> +		mode = CXL_DECODER_DC7;

Fully agree with the comment that a string + enum table and search
is probably appropriate here.

>  	else
>  		return -EINVAL;
>  
>
Ira Weiny April 5, 2024, 7:50 p.m. UTC | #4
fan wrote:
> On Sun, Mar 24, 2024 at 04:18:09PM -0700, ira.weiny@intel.com wrote:
> > From: Navneet Singh <navneet.singh@intel.com>
> > 

[snip]

> > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> > index 66b8419fd0c3..e22b6f4f7145 100644
> > --- a/drivers/cxl/core/hdm.c
> > +++ b/drivers/cxl/core/hdm.c
> > @@ -255,6 +255,14 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
> >  	__cxl_dpa_release(cxled);
> >  }
> >  
> > +static int dc_mode_to_region_index(enum cxl_decoder_mode mode)
> > +{
> > +	if (mode < CXL_DECODER_DC0 || CXL_DECODER_DC7 < mode)
> 
> To me, it is more natural to have something like x<a || x>b other than
> x<a || b < x for out of range check.

Ok.


[snip]

> >  
> >  	guard(rwsem_write)(&cxl_dpa_rwsem);
> >  	if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
> > @@ -433,6 +442,16 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
> >  			return -ENXIO;
> >  		}
> >  		break;
> > +	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
> > +		rc = dc_mode_to_region_index(mode);
> > +		if (rc < 0)
> > +			return rc;
> > +
> > +		if (resource_size(&cxlds->dc_res[rc]) == 0) {
> 
> The other similar checks above use "!resource_size(...)".
> 

Yea good catch.
Ira
Ira Weiny April 5, 2024, 8:34 p.m. UTC | #5
Dave Jiang wrote:
> 
> 
> On 3/24/24 4:18 PM, ira.weiny@intel.com wrote:
> > From: Navneet Singh <navneet.singh@intel.com>
> > 

[snip]

> > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> > index 66b8419fd0c3..e22b6f4f7145 100644
> > --- a/drivers/cxl/core/hdm.c
> > +++ b/drivers/cxl/core/hdm.c
> > @@ -255,6 +255,14 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
> >  	__cxl_dpa_release(cxled);
> >  }
> >  
> > +static int dc_mode_to_region_index(enum cxl_decoder_mode mode)
> > +{
> > +	if (mode < CXL_DECODER_DC0 || CXL_DECODER_DC7 < mode)
> 
> I second what Fan said about readability here if you do (mode > CXL_DECODER_DC7) for upper bound check instead.
> 

Sure...


[snip]

> > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> > index e59d9d37aa65..80c0651794eb 100644
> > --- a/drivers/cxl/core/port.c
> > +++ b/drivers/cxl/core/port.c
> > @@ -208,6 +208,22 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
> >  		mode = CXL_DECODER_PMEM;
> >  	else if (sysfs_streq(buf, "ram"))
> >  		mode = CXL_DECODER_RAM;
> > +	else if (sysfs_streq(buf, "dc0"))
> > +		mode = CXL_DECODER_DC0;
> > +	else if (sysfs_streq(buf, "dc1"))
> > +		mode = CXL_DECODER_DC1;
> > +	else if (sysfs_streq(buf, "dc2"))
> > +		mode = CXL_DECODER_DC2;
> > +	else if (sysfs_streq(buf, "dc3"))
> > +		mode = CXL_DECODER_DC3;
> > +	else if (sysfs_streq(buf, "dc4"))
> > +		mode = CXL_DECODER_DC4;
> > +	else if (sysfs_streq(buf, "dc5"))
> > +		mode = CXL_DECODER_DC5;
> > +	else if (sysfs_streq(buf, "dc6"))
> > +		mode = CXL_DECODER_DC6;
> > +	else if (sysfs_streq(buf, "dc7"))
> > +		mode = CXL_DECODER_DC7;
> 
> I think maybe create a static string table that correlates cxl_decoder_mode
> to string. Then you can simplify cxl_decoder_mode_name() and as well as here.
> And here I think you can just do a for loop and go through the entire static
> table. 

Ok...  Sure.

Ira
Ira Weiny April 5, 2024, 8:56 p.m. UTC | #6
Jonathan Cameron wrote:
> On Sun, 24 Mar 2024 16:18:09 -0700
> ira.weiny@intel.com wrote:
> 
> > From: Navneet Singh <navneet.singh@intel.com>
> > 

[snip]

> > diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
> > index fff2581b8033..8b3efaf6563c 100644
> > --- a/Documentation/ABI/testing/sysfs-bus-cxl
> > +++ b/Documentation/ABI/testing/sysfs-bus-cxl
> > @@ -316,23 +316,24 @@ Description:
> >  
> >  
> >  What:		/sys/bus/cxl/devices/decoderX.Y/mode
> > -Date:		May, 2022
> > -KernelVersion:	v6.0
> > +Date:		May, 2022, June 2024
> > +KernelVersion:	v6.0, v6.10 (dcY)
> >  Contact:	linux-cxl@vger.kernel.org
> >  Description:
> >  		(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
> >  		translates from a host physical address range, to a device local
> >  		address range. Device-local address ranges are further split
> > -		into a 'ram' (volatile memory) range and 'pmem' (persistent
> > -		memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
> > -		'mixed', or 'none'. The 'mixed' indication is for error cases
> > -		when a decoder straddles the volatile/persistent partition
> > -		boundary, and 'none' indicates the decoder is not actively
> > -		decoding, or no DPA allocation policy has been set.
> > +		into a 'ram' (volatile memory) range, 'pmem' (persistent
> > +		memory) range, or Dynamic Capacity (DC) range. The 'mode'
> > +		attribute emits one of 'ram', 'pmem', 'dcY', 'mixed', or
> > +		'none'. The 'mixed' indication is for error cases when a
> > +		decoder straddles the volatile/persistent partition boundary,
> 
> I love corners.  What happen if no persistent and decoder straddles
> volatile / dc0?  Would only happen if the bios was having fun I think..

Yep same issue with a wonky bios...  I did not change this text.  Only got reformatted.  But it is worth changing to:


... The 'mixed' indication is for error cases when a decoder
    straddles partition boundaries, ...

> 
> > +		and 'none' indicates the decoder is not actively decoding, or
> > +		no DPA allocation policy has been set.
> >  
> >  		'mode' can be written, when the decoder is in the 'disabled'
> > -		state, with either 'ram' or 'pmem' to set the boundaries for the
> > -		next allocation.
> > +		state, with 'ram', 'pmem', or 'dcY' to set the boundaries for
> > +		the next allocation.
> >  
> >  
> >  What:		/sys/bus/cxl/devices/decoderX.Y/dpa_resource
> > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> > index 66b8419fd0c3..e22b6f4f7145 100644
> > --- a/drivers/cxl/core/hdm.c
> > +++ b/drivers/cxl/core/hdm.c
> > @@ -255,6 +255,14 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
> >  	__cxl_dpa_release(cxled);
> >  }
> >  
> > +static int dc_mode_to_region_index(enum cxl_decoder_mode mode)
> > +{
> > +	if (mode < CXL_DECODER_DC0 || CXL_DECODER_DC7 < mode)
> > +		return -EINVAL;
> > +
> > +	return mode - CXL_DECODER_DC0;
> > +}
> > +
> >  static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
> >  			     resource_size_t base, resource_size_t len,
> >  			     resource_size_t skipped)
> > @@ -411,6 +419,7 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
> >  	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> >  	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> >  	struct device *dev = &cxled->cxld.dev;
> > +	int rc;
> >  
> >  	guard(rwsem_write)(&cxl_dpa_rwsem);
> >  	if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
> > @@ -433,6 +442,16 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
> >  			return -ENXIO;
> >  		}
> >  		break;
> > +	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
> > +		rc = dc_mode_to_region_index(mode);
> > +		if (rc < 0)
> > +			return rc;
> 
> Can't fail, so you could not bother checking..  Seems very unlikely
> that function will gain other error cases in the future.

Sure, done.

> 
> > +
> > +		if (resource_size(&cxlds->dc_res[rc]) == 0) {
> > +			dev_dbg(dev, "no available dynamic capacity\n");
> > +			return -ENXIO;
> > +		}
> > +		break;
> >  	default:
> >  		dev_dbg(dev, "unsupported mode: %d\n", mode);
> >  		return -EINVAL;
> > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> > index e59d9d37aa65..80c0651794eb 100644
> > --- a/drivers/cxl/core/port.c
> > +++ b/drivers/cxl/core/port.c
> > @@ -208,6 +208,22 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
> >  		mode = CXL_DECODER_PMEM;
> >  	else if (sysfs_streq(buf, "ram"))
> >  		mode = CXL_DECODER_RAM;
> > +	else if (sysfs_streq(buf, "dc0"))
> > +		mode = CXL_DECODER_DC0;
> > +	else if (sysfs_streq(buf, "dc1"))
> > +		mode = CXL_DECODER_DC1;
> > +	else if (sysfs_streq(buf, "dc2"))
> > +		mode = CXL_DECODER_DC2;
> > +	else if (sysfs_streq(buf, "dc3"))
> > +		mode = CXL_DECODER_DC3;
> > +	else if (sysfs_streq(buf, "dc4"))
> > +		mode = CXL_DECODER_DC4;
> > +	else if (sysfs_streq(buf, "dc5"))
> > +		mode = CXL_DECODER_DC5;
> > +	else if (sysfs_streq(buf, "dc6"))
> > +		mode = CXL_DECODER_DC6;
> > +	else if (sysfs_streq(buf, "dc7"))
> > +		mode = CXL_DECODER_DC7;
> 
> Fully agree with the comment that a string + enum table and search
> is probably appropriate here.
> 

Ok, done.

Ira
Alison Schofield April 10, 2024, 8:33 p.m. UTC | #7
On Sun, Mar 24, 2024 at 04:18:09PM -0700, Ira Weiny wrote:
> From: Navneet Singh <navneet.singh@intel.com>
> 
> Endpoint decoders which are used to map Dynamic Capacity must be
> configured to point to the correct Dynamic Capacity (DC) Region.  The
> decoder mode currently represents the partition the decoder points to
> such as ram or pmem.
> 
> Expand the mode to include DC regions [partitions].
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes for v1:
> [iweiny: eliminate added gotos]
> [iweiny: Mark DC support for 6.10 kernel]
> ---
>  Documentation/ABI/testing/sysfs-bus-cxl | 21 +++++++++++----------
>  drivers/cxl/core/hdm.c                  | 19 +++++++++++++++++++
>  drivers/cxl/core/port.c                 | 16 ++++++++++++++++
>  3 files changed, 46 insertions(+), 10 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
> index fff2581b8033..8b3efaf6563c 100644
> --- a/Documentation/ABI/testing/sysfs-bus-cxl
> +++ b/Documentation/ABI/testing/sysfs-bus-cxl
> @@ -316,23 +316,24 @@ Description:
>  
>  
>  What:		/sys/bus/cxl/devices/decoderX.Y/mode
> -Date:		May, 2022
> -KernelVersion:	v6.0
> +Date:		May, 2022, June 2024
> +KernelVersion:	v6.0, v6.10 (dcY)
>  Contact:	linux-cxl@vger.kernel.org
>  Description:
>  		(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
>  		translates from a host physical address range, to a device local
>  		address range. Device-local address ranges are further split
> -		into a 'ram' (volatile memory) range and 'pmem' (persistent
> -		memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
> -		'mixed', or 'none'. The 'mixed' indication is for error cases
> -		when a decoder straddles the volatile/persistent partition
> -		boundary, and 'none' indicates the decoder is not actively
> -		decoding, or no DPA allocation policy has been set.
> +		into a 'ram' (volatile memory) range, 'pmem' (persistent
> +		memory) range, or Dynamic Capacity (DC) range. The 'mode'
> +		attribute emits one of 'ram', 'pmem', 'dcY', 'mixed', or
> +		'none'. The 'mixed' indication is for error cases when a
> +		decoder straddles the volatile/persistent partition boundary,
> +		and 'none' indicates the decoder is not actively decoding, or
> +		no DPA allocation policy has been set.
>  
>  		'mode' can be written, when the decoder is in the 'disabled'
> -		state, with either 'ram' or 'pmem' to set the boundaries for the
> -		next allocation.
> +		state, with 'ram', 'pmem', or 'dcY' to set the boundaries for
> +		the next allocation.
>  
>  
>  What:		/sys/bus/cxl/devices/decoderX.Y/dpa_resource
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 66b8419fd0c3..e22b6f4f7145 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -255,6 +255,14 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
>  	__cxl_dpa_release(cxled);
>  }
>  
> +static int dc_mode_to_region_index(enum cxl_decoder_mode mode)
> +{
> +	if (mode < CXL_DECODER_DC0 || CXL_DECODER_DC7 < mode)
> +		return -EINVAL;
> +
> +	return mode - CXL_DECODER_DC0;
> +}

Curious why this works?


> +
>  static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
>  			     resource_size_t base, resource_size_t len,
>  			     resource_size_t skipped)
> @@ -411,6 +419,7 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
>  	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>  	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>  	struct device *dev = &cxled->cxld.dev;
> +	int rc;
>  
>  	guard(rwsem_write)(&cxl_dpa_rwsem);
>  	if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
> @@ -433,6 +442,16 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
>  			return -ENXIO;
>  		}
>  		break;
> +	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
> +		rc = dc_mode_to_region_index(mode);
> +		if (rc < 0)
> +			return rc;
> +
> +		if (resource_size(&cxlds->dc_res[rc]) == 0) {
> +			dev_dbg(dev, "no available dynamic capacity\n");
> +			return -ENXIO;
> +		}


return rc breaks the pattern in this function.

Feels like 'rc' can be named 'index' since we index into dc_res[].
Explicitly show the return -EINVAL here as other cases do.

like:
	index = dc_mode_to_region_index(mode);
	if (index < 0)
		return -EINVAL;

and then: 
	dc_mode_to_region_index() can just return -1 or still return
	-EINVAL.

> +		break;
>  	default:
>  		dev_dbg(dev, "unsupported mode: %d\n", mode);
>  		return -EINVAL;
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index e59d9d37aa65..80c0651794eb 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -208,6 +208,22 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
>  		mode = CXL_DECODER_PMEM;
>  	else if (sysfs_streq(buf, "ram"))
>  		mode = CXL_DECODER_RAM;
> +	else if (sysfs_streq(buf, "dc0"))
> +		mode = CXL_DECODER_DC0;
> +	else if (sysfs_streq(buf, "dc1"))
> +		mode = CXL_DECODER_DC1;
> +	else if (sysfs_streq(buf, "dc2"))
> +		mode = CXL_DECODER_DC2;
> +	else if (sysfs_streq(buf, "dc3"))
> +		mode = CXL_DECODER_DC3;
> +	else if (sysfs_streq(buf, "dc4"))
> +		mode = CXL_DECODER_DC4;
> +	else if (sysfs_streq(buf, "dc5"))
> +		mode = CXL_DECODER_DC5;
> +	else if (sysfs_streq(buf, "dc6"))
> +		mode = CXL_DECODER_DC6;
> +	else if (sysfs_streq(buf, "dc7"))
> +		mode = CXL_DECODER_DC7;
>  	else
>  		return -EINVAL;
>  
> 
> -- 
> 2.44.0
>
Dan Williams May 6, 2024, 4:22 p.m. UTC | #8
Ira Weiny wrote:
[..]
> > > +	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
> > > +		rc = dc_mode_to_region_index(mode);
> > > +		if (rc < 0)
> > > +			return rc;
> > 
> > Can't fail, so you could not bother checking..  Seems very unlikely
> > that function will gain other error cases in the future.
> 
> Sure, done.

Can dc_mode_to_region_index() be dropped altogether? Is there any
scenario where dc_mode_to_region_index() is really handling an anonymous
@mode argument? I.e. just replace all dc_mode_to_region_index() with
"mode - CXL_DECODER_DC0"?
Ira Weiny May 10, 2024, 5:31 a.m. UTC | #9
Dan Williams wrote:
> Ira Weiny wrote:
> [..]
> > > > +	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
> > > > +		rc = dc_mode_to_region_index(mode);
> > > > +		if (rc < 0)
> > > > +			return rc;
> > > 
> > > Can't fail, so you could not bother checking..  Seems very unlikely
> > > that function will gain other error cases in the future.
> > 
> > Sure, done.
> 
> Can dc_mode_to_region_index() be dropped altogether? Is there any
> scenario where dc_mode_to_region_index() is really handling an anonymous
> @mode argument? I.e. just replace all dc_mode_to_region_index() with
> "mode - CXL_DECODER_DC0"?

Once we open up DC partitions to support multiple regions, no.

How about we remove the check in dc_mode_to_region_index() but keep the
function?  That makes it clear that we are converting from an enum to int
index?

Ira
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index fff2581b8033..8b3efaf6563c 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -316,23 +316,24 @@  Description:
 
 
 What:		/sys/bus/cxl/devices/decoderX.Y/mode
-Date:		May, 2022
-KernelVersion:	v6.0
+Date:		May, 2022, June 2024
+KernelVersion:	v6.0, v6.10 (dcY)
 Contact:	linux-cxl@vger.kernel.org
 Description:
 		(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
 		translates from a host physical address range, to a device local
 		address range. Device-local address ranges are further split
-		into a 'ram' (volatile memory) range and 'pmem' (persistent
-		memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
-		'mixed', or 'none'. The 'mixed' indication is for error cases
-		when a decoder straddles the volatile/persistent partition
-		boundary, and 'none' indicates the decoder is not actively
-		decoding, or no DPA allocation policy has been set.
+		into a 'ram' (volatile memory) range, 'pmem' (persistent
+		memory) range, or Dynamic Capacity (DC) range. The 'mode'
+		attribute emits one of 'ram', 'pmem', 'dcY', 'mixed', or
+		'none'. The 'mixed' indication is for error cases when a
+		decoder straddles the volatile/persistent partition boundary,
+		and 'none' indicates the decoder is not actively decoding, or
+		no DPA allocation policy has been set.
 
 		'mode' can be written, when the decoder is in the 'disabled'
-		state, with either 'ram' or 'pmem' to set the boundaries for the
-		next allocation.
+		state, with 'ram', 'pmem', or 'dcY' to set the boundaries for
+		the next allocation.
 
 
 What:		/sys/bus/cxl/devices/decoderX.Y/dpa_resource
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 66b8419fd0c3..e22b6f4f7145 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -255,6 +255,14 @@  static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
 	__cxl_dpa_release(cxled);
 }
 
+static int dc_mode_to_region_index(enum cxl_decoder_mode mode)
+{
+	if (mode < CXL_DECODER_DC0 || CXL_DECODER_DC7 < mode)
+		return -EINVAL;
+
+	return mode - CXL_DECODER_DC0;
+}
+
 static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
 			     resource_size_t base, resource_size_t len,
 			     resource_size_t skipped)
@@ -411,6 +419,7 @@  int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct device *dev = &cxled->cxld.dev;
+	int rc;
 
 	guard(rwsem_write)(&cxl_dpa_rwsem);
 	if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
@@ -433,6 +442,16 @@  int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
 			return -ENXIO;
 		}
 		break;
+	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
+		rc = dc_mode_to_region_index(mode);
+		if (rc < 0)
+			return rc;
+
+		if (resource_size(&cxlds->dc_res[rc]) == 0) {
+			dev_dbg(dev, "no available dynamic capacity\n");
+			return -ENXIO;
+		}
+		break;
 	default:
 		dev_dbg(dev, "unsupported mode: %d\n", mode);
 		return -EINVAL;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index e59d9d37aa65..80c0651794eb 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -208,6 +208,22 @@  static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
 		mode = CXL_DECODER_PMEM;
 	else if (sysfs_streq(buf, "ram"))
 		mode = CXL_DECODER_RAM;
+	else if (sysfs_streq(buf, "dc0"))
+		mode = CXL_DECODER_DC0;
+	else if (sysfs_streq(buf, "dc1"))
+		mode = CXL_DECODER_DC1;
+	else if (sysfs_streq(buf, "dc2"))
+		mode = CXL_DECODER_DC2;
+	else if (sysfs_streq(buf, "dc3"))
+		mode = CXL_DECODER_DC3;
+	else if (sysfs_streq(buf, "dc4"))
+		mode = CXL_DECODER_DC4;
+	else if (sysfs_streq(buf, "dc5"))
+		mode = CXL_DECODER_DC5;
+	else if (sysfs_streq(buf, "dc6"))
+		mode = CXL_DECODER_DC6;
+	else if (sysfs_streq(buf, "dc7"))
+		mode = CXL_DECODER_DC7;
 	else
 		return -EINVAL;