diff mbox series

[v2,12/15] cxl: allow region creation by type2 drivers

Message ID 20240715172835.24757-13-alejandro.lucero-palau@amd.com
State Superseded
Headers show
Series cxl: add Type2 device support | expand

Commit Message

Lucero Palau, Alejandro July 15, 2024, 5:28 p.m. UTC
From: Alejandro Lucero <alucerop@amd.com>

Creating a CXL region requires userspace intervention through the cxl
sysfs files. Type2 support should allow accelerator drivers to create
such cxl region from kernel code.

Adding that functionality and integrating it with current support for
memory expanders.

Based on https://lore.kernel.org/linux-cxl/168592149709.1948938.8663425987110396027.stgit@dwillia2-xfh.jf.intel.com/T/#m84598b534cc5664f5bb31521ba6e41c7bc213758
Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/region.c          | 265 ++++++++++++++++++++++-------
 drivers/cxl/cxl.h                  |   1 +
 drivers/cxl/cxlmem.h               |   4 +-
 drivers/net/ethernet/sfc/efx_cxl.c |  15 +-
 include/linux/cxl_accel_mem.h      |   5 +
 5 files changed, 231 insertions(+), 59 deletions(-)

Comments

Jonathan Cameron Aug. 4, 2024, 6:29 p.m. UTC | #1
On Mon, 15 Jul 2024 18:28:32 +0100
alejandro.lucero-palau@amd.com wrote:

> From: Alejandro Lucero <alucerop@amd.com>
> 
> Creating a CXL region requires userspace intervention through the cxl
> sysfs files. Type2 support should allow accelerator drivers to create
> such cxl region from kernel code.
> 
> Adding that functionality and integrating it with current support for
> memory expanders.
> 
> Based on https://lore.kernel.org/linux-cxl/168592149709.1948938.8663425987110396027.stgit@dwillia2-xfh.jf.intel.com/T/#m84598b534cc5664f5bb31521ba6e41c7bc213758
> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Needs a co-developed or similar given Dan didn't email this patch
(which this sign off list suggests he did).

I'll take another look at the locking, but my main comment is
that it is really confusing so I have no idea if it's right.
Consider different ways of breaking up the code you need
to try and keep the locking obvious.

Jonathan

> +
> +static ssize_t interleave_ways_store(struct device *dev,
> +				     struct device_attribute *attr,
> +				     const char *buf, size_t len)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	unsigned int val;
> +	int rc;
> +
> +	rc = kstrtouint(buf, 0, &val);
> +	if (rc)
> +		return rc;
> +
> +	rc = down_write_killable(&cxl_region_rwsem);
> +	if (rc)
> +		return rc;
> +
> +	rc = set_interleave_ways(cxlr, val);
>  	up_write(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
>  	return len;
>  }
> +
This was probably intentional. Common to group a macro like this
with the function it is using by not having a blank line.
>  static DEVICE_ATTR_RW(interleave_ways);
>  
>  static ssize_t interleave_granularity_show(struct device *dev,
> @@ -547,21 +556,14 @@ static ssize_t interleave_granularity_show(struct device *dev,
>  	return rc;
>  }

> +static ssize_t interleave_granularity_store(struct device *dev,
> +					    struct device_attribute *attr,
> +					    const char *buf, size_t len)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	int rc, val;
> +
> +	rc = kstrtoint(buf, 0, &val);
> +	if (rc)
> +		return rc;
> +
>  	rc = down_write_killable(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
> -	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> -		rc = -EBUSY;
> -		goto out;
> -	}
>  
> -	p->interleave_granularity = val;
> -out:
> +	rc = set_interleave_granularity(cxlr, val);
>  	up_write(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
>  	return len;
>  }
> +

grump.

>  static DEVICE_ATTR_RW(interleave_granularity);

> +/* Establish an empty region covering the given HPA range */
> +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
> +					   struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> +	struct range *hpa = &cxled->cxld.hpa_range;
> +	struct cxl_region_params *p;
> +	struct cxl_region *cxlr;
> +	struct resource *res;
> +	int rc;
> +
> +	cxlr = construct_region_begin(cxlrd, cxled);
> +	if (IS_ERR(cxlr))
> +		return cxlr;
>  
>  	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>  
>  	res = kmalloc(sizeof(*res), GFP_KERNEL);
>  	if (!res) {
>  		rc = -ENOMEM;
> -		goto err;
> +		goto out;
>  	}
>  
>  	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
>  				    dev_name(&cxlr->dev));
> +
>  	rc = insert_resource(cxlrd->res, res);
>  	if (rc) {
>  		/*
> @@ -3412,6 +3462,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>  			 __func__, dev_name(&cxlr->dev));
>  	}
>  
> +	p = &cxlr->params;
>  	p->res = res;
>  	p->interleave_ways = cxled->cxld.interleave_ways;
>  	p->interleave_granularity = cxled->cxld.interleave_granularity;
> @@ -3419,24 +3470,124 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>  
>  	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
>  	if (rc)
> -		goto err;
> +		goto out;
>  
>  	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
> -		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
> -		dev_name(&cxlr->dev), p->res, p->interleave_ways,
> -		p->interleave_granularity);
> +				   dev_name(&cxlmd->dev),
> +				   dev_name(&cxled->cxld.dev), __func__,
> +				   dev_name(&cxlr->dev), p->res,
> +				   p->interleave_ways,
> +				   p->interleave_granularity);
>  
>  	/* ...to match put_device() in cxl_add_to_region() */
>  	get_device(&cxlr->dev);
>  	up_write(&cxl_region_rwsem);
> +out:
> +	construct_region_end();

two calls to up_write(&cxl_region_rwsem) next to each other?

> +	if (rc) {
> +		drop_region(cxlr);
> +		return ERR_PTR(rc);
> +	}
> +	return cxlr;
> +}
> +
> +static struct cxl_region *
> +__construct_new_region(struct cxl_root_decoder *cxlrd,
> +		       struct cxl_endpoint_decoder **cxled, int ways)
> +{
> +	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
> +	struct cxl_region_params *p;
> +	resource_size_t size = 0;
> +	struct cxl_region *cxlr;
> +	int rc, i;
> +
> +	/* If interleaving is not supported, why does ways need to be at least 1? */

I think 1 means no interleave. It's simpler to do this than have 0 and 1 both 
mean no interleave because 1 works for programmable decoders.

> +	if (ways < 1)
> +		return ERR_PTR(-EINVAL);
> +
> +	cxlr = construct_region_begin(cxlrd, cxled[0]);

rethink how this broken up.  Taking the cxl_dpa_rwsem
inside this function and is really hard to follow.  Ideally
manage it with scoped_guard()


> +	if (IS_ERR(cxlr))
> +		return cxlr;
> +
> +	rc = set_interleave_ways(cxlr, ways);
> +	if (rc)
> +		goto out;
> +
> +	rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
> +	if (rc)
here I think cxl_dpa_rwsem is held.
> +		goto out;
> +
> +	down_read(&cxl_dpa_rwsem);
> +	for (i = 0; i < ways; i++) {
> +		if (!cxled[i]->dpa_res)
> +			break;
> +		size += resource_size(cxled[i]->dpa_res);
> +	}
> +	up_read(&cxl_dpa_rwsem);
> +
> +	if (i < ways)

but not here and they go to the same place.

> +		goto out;
> +
> +	rc = alloc_hpa(cxlr, size);
> +	if (rc)
> +		goto out;
> +
> +	down_read(&cxl_dpa_rwsem);
> +	for (i = 0; i < ways; i++) {
> +		rc = cxl_region_attach(cxlr, cxled[i], i);
> +		if (rc)
> +			break;
> +	}
> +	up_read(&cxl_dpa_rwsem);
> +
> +	if (rc)
> +		goto out;
> +
> +	rc = cxl_region_decode_commit(cxlr);
> +	if (rc)
> +		goto out;
>  
> +	p = &cxlr->params;
> +	p->state = CXL_CONFIG_COMMIT;
> +out:
> +	construct_region_end();
> +	if (rc) {
> +		drop_region(cxlr);
> +		return ERR_PTR(rc);
> +	}
>  	return cxlr;
> +}

> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index a0e0795ec064..377bb3cd2d47 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -881,5 +881,7 @@ struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_port *endpoint,
>  					       int interleave_ways,
>  					       unsigned long flags,
>  					       resource_size_t *max);
> -
Avoid whitespace noise.

> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> +				     struct cxl_endpoint_decoder **cxled,
> +				     int ways);
>  #endif /* __CXL_MEM_H__ */
Alejandro Lucero Palau Aug. 19, 2024, 4:11 p.m. UTC | #2
On 8/4/24 19:29, Jonathan Cameron wrote:
> On Mon, 15 Jul 2024 18:28:32 +0100
> alejandro.lucero-palau@amd.com wrote:
>
>> From: Alejandro Lucero <alucerop@amd.com>
>>
>> Creating a CXL region requires userspace intervention through the cxl
>> sysfs files. Type2 support should allow accelerator drivers to create
>> such cxl region from kernel code.
>>
>> Adding that functionality and integrating it with current support for
>> memory expanders.
>>
>> Based on https://lore.kernel.org/linux-cxl/168592149709.1948938.8663425987110396027.stgit@dwillia2-xfh.jf.intel.com/T/#m84598b534cc5664f5bb31521ba6e41c7bc213758
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> Needs a co-developed or similar given Dan didn't email this patch
> (which this sign off list suggests he did).


Yes, I'll fix it.


>
> I'll take another look at the locking, but my main comment is
> that it is really confusing so I have no idea if it's right.
> Consider different ways of breaking up the code you need
> to try and keep the locking obvious.


I have to agree and this means I need to work on it. I know it works for 
my case, what was my main focus for the RFC, but not looked at it with 
the right mindset.

I take your next comments as valuable inputs for the required work.

Thanks!


> Jonathan
>
>> +
>> +static ssize_t interleave_ways_store(struct device *dev,
>> +				     struct device_attribute *attr,
>> +				     const char *buf, size_t len)
>> +{
>> +	struct cxl_region *cxlr = to_cxl_region(dev);
>> +	unsigned int val;
>> +	int rc;
>> +
>> +	rc = kstrtouint(buf, 0, &val);
>> +	if (rc)
>> +		return rc;
>> +
>> +	rc = down_write_killable(&cxl_region_rwsem);
>> +	if (rc)
>> +		return rc;
>> +
>> +	rc = set_interleave_ways(cxlr, val);
>>   	up_write(&cxl_region_rwsem);
>>   	if (rc)
>>   		return rc;
>>   	return len;
>>   }
>> +
> This was probably intentional. Common to group a macro like this
> with the function it is using by not having a blank line.
>>   static DEVICE_ATTR_RW(interleave_ways);
>>   
>>   static ssize_t interleave_granularity_show(struct device *dev,
>> @@ -547,21 +556,14 @@ static ssize_t interleave_granularity_show(struct device *dev,
>>   	return rc;
>>   }
>> +static ssize_t interleave_granularity_store(struct device *dev,
>> +					    struct device_attribute *attr,
>> +					    const char *buf, size_t len)
>> +{
>> +	struct cxl_region *cxlr = to_cxl_region(dev);
>> +	int rc, val;
>> +
>> +	rc = kstrtoint(buf, 0, &val);
>> +	if (rc)
>> +		return rc;
>> +
>>   	rc = down_write_killable(&cxl_region_rwsem);
>>   	if (rc)
>>   		return rc;
>> -	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>> -		rc = -EBUSY;
>> -		goto out;
>> -	}
>>   
>> -	p->interleave_granularity = val;
>> -out:
>> +	rc = set_interleave_granularity(cxlr, val);
>>   	up_write(&cxl_region_rwsem);
>>   	if (rc)
>>   		return rc;
>>   	return len;
>>   }
>> +
> grump.
>
>>   static DEVICE_ATTR_RW(interleave_granularity);
>> +/* Establish an empty region covering the given HPA range */
>> +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>> +					   struct cxl_endpoint_decoder *cxled)
>> +{
>> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>> +	struct range *hpa = &cxled->cxld.hpa_range;
>> +	struct cxl_region_params *p;
>> +	struct cxl_region *cxlr;
>> +	struct resource *res;
>> +	int rc;
>> +
>> +	cxlr = construct_region_begin(cxlrd, cxled);
>> +	if (IS_ERR(cxlr))
>> +		return cxlr;
>>   
>>   	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>>   
>>   	res = kmalloc(sizeof(*res), GFP_KERNEL);
>>   	if (!res) {
>>   		rc = -ENOMEM;
>> -		goto err;
>> +		goto out;
>>   	}
>>   
>>   	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
>>   				    dev_name(&cxlr->dev));
>> +
>>   	rc = insert_resource(cxlrd->res, res);
>>   	if (rc) {
>>   		/*
>> @@ -3412,6 +3462,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>   			 __func__, dev_name(&cxlr->dev));
>>   	}
>>   
>> +	p = &cxlr->params;
>>   	p->res = res;
>>   	p->interleave_ways = cxled->cxld.interleave_ways;
>>   	p->interleave_granularity = cxled->cxld.interleave_granularity;
>> @@ -3419,24 +3470,124 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>   
>>   	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
>>   	if (rc)
>> -		goto err;
>> +		goto out;
>>   
>>   	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
>> -		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
>> -		dev_name(&cxlr->dev), p->res, p->interleave_ways,
>> -		p->interleave_granularity);
>> +				   dev_name(&cxlmd->dev),
>> +				   dev_name(&cxled->cxld.dev), __func__,
>> +				   dev_name(&cxlr->dev), p->res,
>> +				   p->interleave_ways,
>> +				   p->interleave_granularity);
>>   
>>   	/* ...to match put_device() in cxl_add_to_region() */
>>   	get_device(&cxlr->dev);
>>   	up_write(&cxl_region_rwsem);
>> +out:
>> +	construct_region_end();
> two calls to up_write(&cxl_region_rwsem) next to each other?
>
>> +	if (rc) {
>> +		drop_region(cxlr);
>> +		return ERR_PTR(rc);
>> +	}
>> +	return cxlr;
>> +}
>> +
>> +static struct cxl_region *
>> +__construct_new_region(struct cxl_root_decoder *cxlrd,
>> +		       struct cxl_endpoint_decoder **cxled, int ways)
>> +{
>> +	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
>> +	struct cxl_region_params *p;
>> +	resource_size_t size = 0;
>> +	struct cxl_region *cxlr;
>> +	int rc, i;
>> +
>> +	/* If interleaving is not supported, why does ways need to be at least 1? */
> I think 1 means no interleave. It's simpler to do this than have 0 and 1 both
> mean no interleave because 1 works for programmable decoders.
>
>> +	if (ways < 1)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	cxlr = construct_region_begin(cxlrd, cxled[0]);
> rethink how this broken up.  Taking the cxl_dpa_rwsem
> inside this function and is really hard to follow.  Ideally
> manage it with scoped_guard()
>
>
>> +	if (IS_ERR(cxlr))
>> +		return cxlr;
>> +
>> +	rc = set_interleave_ways(cxlr, ways);
>> +	if (rc)
>> +		goto out;
>> +
>> +	rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
>> +	if (rc)
> here I think cxl_dpa_rwsem is held.
>> +		goto out;
>> +
>> +	down_read(&cxl_dpa_rwsem);
>> +	for (i = 0; i < ways; i++) {
>> +		if (!cxled[i]->dpa_res)
>> +			break;
>> +		size += resource_size(cxled[i]->dpa_res);
>> +	}
>> +	up_read(&cxl_dpa_rwsem);
>> +
>> +	if (i < ways)
> but not here and they go to the same place.
>
>> +		goto out;
>> +
>> +	rc = alloc_hpa(cxlr, size);
>> +	if (rc)
>> +		goto out;
>> +
>> +	down_read(&cxl_dpa_rwsem);
>> +	for (i = 0; i < ways; i++) {
>> +		rc = cxl_region_attach(cxlr, cxled[i], i);
>> +		if (rc)
>> +			break;
>> +	}
>> +	up_read(&cxl_dpa_rwsem);
>> +
>> +	if (rc)
>> +		goto out;
>> +
>> +	rc = cxl_region_decode_commit(cxlr);
>> +	if (rc)
>> +		goto out;
>>   
>> +	p = &cxlr->params;
>> +	p->state = CXL_CONFIG_COMMIT;
>> +out:
>> +	construct_region_end();
>> +	if (rc) {
>> +		drop_region(cxlr);
>> +		return ERR_PTR(rc);
>> +	}
>>   	return cxlr;
>> +}
>> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
>> index a0e0795ec064..377bb3cd2d47 100644
>> --- a/drivers/cxl/cxlmem.h
>> +++ b/drivers/cxl/cxlmem.h
>> @@ -881,5 +881,7 @@ struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_port *endpoint,
>>   					       int interleave_ways,
>>   					       unsigned long flags,
>>   					       resource_size_t *max);
>> -
> Avoid whitespace noise.
>
>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>> +				     struct cxl_endpoint_decoder **cxled,
>> +				     int ways);
>>   #endif /* __CXL_MEM_H__ */
Zhi Wang Aug. 22, 2024, 1:12 p.m. UTC | #3
On Mon, 15 Jul 2024 18:28:32 +0100
<alejandro.lucero-palau@amd.com> wrote:

> From: Alejandro Lucero <alucerop@amd.com>
> 
> Creating a CXL region requires userspace intervention through the cxl
> sysfs files. Type2 support should allow accelerator drivers to create
> such cxl region from kernel code.
> 
> Adding that functionality and integrating it with current support for
> memory expanders.
> 
> Based on
> https://lore.kernel.org/linux-cxl/168592149709.1948938.8663425987110396027.stgit@dwillia2-xfh.jf.intel.com/T/#m84598b534cc5664f5bb31521ba6e41c7bc213758
> Signed-off-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan
> Williams <dan.j.williams@intel.com> ---
>  drivers/cxl/core/region.c          | 265
> ++++++++++++++++++++++------- drivers/cxl/cxl.h                  |
> 1 + drivers/cxl/cxlmem.h               |   4 +-
>  drivers/net/ethernet/sfc/efx_cxl.c |  15 +-
>  include/linux/cxl_accel_mem.h      |   5 +
>  5 files changed, 231 insertions(+), 59 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 5cc71b8868bc..697c8df83a4b 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -479,22 +479,14 @@ static ssize_t interleave_ways_show(struct
> device *dev, 
>  static const struct attribute_group
> *get_cxl_region_target_group(void); 
> -static ssize_t interleave_ways_store(struct device *dev,
> -				     struct device_attribute *attr,
> -				     const char *buf, size_t len)
> +static int set_interleave_ways(struct cxl_region *cxlr, int val)
>  {
> -	struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(dev->parent);
> +	struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld =
> &cxlrd->cxlsd.cxld;
> -	struct cxl_region *cxlr = to_cxl_region(dev);
>  	struct cxl_region_params *p = &cxlr->params;
> -	unsigned int val, save;
> -	int rc;
> +	int save, rc;
>  	u8 iw;
>  
> -	rc = kstrtouint(buf, 0, &val);
> -	if (rc)
> -		return rc;
> -
>  	rc = ways_to_eiw(val, &iw);
>  	if (rc)
>  		return rc;
> @@ -509,25 +501,42 @@ static ssize_t interleave_ways_store(struct
> device *dev, return -EINVAL;
>  	}
>  
> -	rc = down_write_killable(&cxl_region_rwsem);
> -	if (rc)
> -		return rc;
> -	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> -		rc = -EBUSY;
> -		goto out;
> -	}
> +	lockdep_assert_held_write(&cxl_region_rwsem);
> +	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
> +		return -EBUSY;
>  
>  	save = p->interleave_ways;
>  	p->interleave_ways = val;
>  	rc = sysfs_update_group(&cxlr->dev.kobj,
> get_cxl_region_target_group()); if (rc)
>  		p->interleave_ways = save;
> -out:
> +
> +	return rc;
> +}
> +
> +static ssize_t interleave_ways_store(struct device *dev,
> +				     struct device_attribute *attr,
> +				     const char *buf, size_t len)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	unsigned int val;
> +	int rc;
> +
> +	rc = kstrtouint(buf, 0, &val);
> +	if (rc)
> +		return rc;
> +
> +	rc = down_write_killable(&cxl_region_rwsem);
> +	if (rc)
> +		return rc;
> +
> +	rc = set_interleave_ways(cxlr, val);
>  	up_write(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
>  	return len;
>  }
> +
>  static DEVICE_ATTR_RW(interleave_ways);
>  
>  static ssize_t interleave_granularity_show(struct device *dev,
> @@ -547,21 +556,14 @@ static ssize_t
> interleave_granularity_show(struct device *dev, return rc;
>  }
>  
> -static ssize_t interleave_granularity_store(struct device *dev,
> -					    struct device_attribute
> *attr,
> -					    const char *buf, size_t
> len) +static int set_interleave_granularity(struct cxl_region *cxlr,
> int val) {
> -	struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(dev->parent);
> +	struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld =
> &cxlrd->cxlsd.cxld;
> -	struct cxl_region *cxlr = to_cxl_region(dev);
>  	struct cxl_region_params *p = &cxlr->params;
> -	int rc, val;
> +	int rc;
>  	u16 ig;
>  
> -	rc = kstrtoint(buf, 0, &val);
> -	if (rc)
> -		return rc;
> -
>  	rc = granularity_to_eig(val, &ig);
>  	if (rc)
>  		return rc;
> @@ -577,21 +579,36 @@ static ssize_t
> interleave_granularity_store(struct device *dev, if
> (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
> return -EINVAL; 
> +	lockdep_assert_held_write(&cxl_region_rwsem);
> +	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
> +		return -EBUSY;
> +
> +	p->interleave_granularity = val;
> +	return 0;
> +}
> +
> +static ssize_t interleave_granularity_store(struct device *dev,
> +					    struct device_attribute
> *attr,
> +					    const char *buf, size_t
> len) +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	int rc, val;
> +
> +	rc = kstrtoint(buf, 0, &val);
> +	if (rc)
> +		return rc;
> +
>  	rc = down_write_killable(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
> -	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> -		rc = -EBUSY;
> -		goto out;
> -	}
>  
> -	p->interleave_granularity = val;
> -out:
> +	rc = set_interleave_granularity(cxlr, val);
>  	up_write(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
>  	return len;
>  }
> +
>  static DEVICE_ATTR_RW(interleave_granularity);
>  
>  static ssize_t resource_show(struct device *dev, struct
> device_attribute *attr, @@ -2193,7 +2210,7 @@ static int
> cxl_region_attach(struct cxl_region *cxlr, return 0;
>  }
>  
> -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>  {
>  	struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
>  	struct cxl_region *cxlr = cxled->cxld.region;
> @@ -2252,6 +2269,7 @@ static int cxl_region_detach(struct
> cxl_endpoint_decoder *cxled) put_device(&cxlr->dev);
>  	return rc;
>  }
> +EXPORT_SYMBOL_NS_GPL(cxl_region_detach, CXL);
>  
>  void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
>  {
> @@ -2746,6 +2764,14 @@ cxl_find_region_by_name(struct
> cxl_root_decoder *cxlrd, const char *name) return
> to_cxl_region(region_dev); }
>  
> +static void drop_region(struct cxl_region *cxlr)
> +{
> +	struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(cxlr->dev.parent);
> +	struct cxl_port *port = cxlrd_to_port(cxlrd);
> +
> +	devm_release_action(port->uport_dev, unregister_region,
> cxlr); +}
> +
>  static ssize_t delete_region_store(struct device *dev,
>  				   struct device_attribute *attr,
>  				   const char *buf, size_t len)
> @@ -3353,17 +3379,18 @@ static int match_region_by_range(struct
> device *dev, void *data) return rc;
>  }
>  
> -/* Establish an empty region covering the given HPA range */
> -static struct cxl_region *construct_region(struct cxl_root_decoder
> *cxlrd,
> -					   struct
> cxl_endpoint_decoder *cxled) +static void construct_region_end(void)
> +{
> +	up_write(&cxl_region_rwsem);
> +}
> +
> +static struct cxl_region *construct_region_begin(struct
> cxl_root_decoder *cxlrd,
> +						 struct
> cxl_endpoint_decoder *cxled) {
>  	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> -	struct cxl_port *port = cxlrd_to_port(cxlrd);
> -	struct range *hpa = &cxled->cxld.hpa_range;
>  	struct cxl_region_params *p;
>  	struct cxl_region *cxlr;
> -	struct resource *res;
> -	int rc;
> +	int err = 0;
>  
>  	do {
>  		cxlr = __create_region(cxlrd, cxled->mode,
> @@ -3372,8 +3399,7 @@ static struct cxl_region
> *construct_region(struct cxl_root_decoder *cxlrd, } while
> (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); 
>  	if (IS_ERR(cxlr)) {
> -		dev_err(cxlmd->dev.parent,
> -			"%s:%s: %s failed assign region: %ld\n",
> +		dev_err(cxlmd->dev.parent,"%s:%s: %s failed assign
> region: %ld\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>  			__func__, PTR_ERR(cxlr));
>  		return cxlr;
> @@ -3383,23 +3409,47 @@ static struct cxl_region
> *construct_region(struct cxl_root_decoder *cxlrd, p = &cxlr->params;
>  	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>  		dev_err(cxlmd->dev.parent,
> -			"%s:%s: %s autodiscovery interrupted\n",
> +			"%s:%s: %s region setup interrupted\n",
>  			dev_name(&cxlmd->dev),
> dev_name(&cxled->cxld.dev), __func__);
> -		rc = -EBUSY;
> -		goto err;
> +		err = -EBUSY;
> +	}
> +
> +	if (err) {
> +		construct_region_end();
> +		drop_region(cxlr);
> +		return ERR_PTR(err);
>  	}
> +	return cxlr;
> +}
> +
> +
> +/* Establish an empty region covering the given HPA range */
> +static struct cxl_region *construct_region(struct cxl_root_decoder
> *cxlrd,
> +					   struct
> cxl_endpoint_decoder *cxled) +{
> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> +	struct range *hpa = &cxled->cxld.hpa_range;
> +	struct cxl_region_params *p;
> +	struct cxl_region *cxlr;
> +	struct resource *res;
> +	int rc;
> +
> +	cxlr = construct_region_begin(cxlrd, cxled);
> +	if (IS_ERR(cxlr))
> +		return cxlr;
>  
>  	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>  
>  	res = kmalloc(sizeof(*res), GFP_KERNEL);
>  	if (!res) {
>  		rc = -ENOMEM;
> -		goto err;
> +		goto out;
>  	}
>  
>  	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
>  				    dev_name(&cxlr->dev));
> +
>  	rc = insert_resource(cxlrd->res, res);
>  	if (rc) {
>  		/*
> @@ -3412,6 +3462,7 @@ static struct cxl_region
> *construct_region(struct cxl_root_decoder *cxlrd, __func__,
> dev_name(&cxlr->dev)); }
>  
> +	p = &cxlr->params;
>  	p->res = res;
>  	p->interleave_ways = cxled->cxld.interleave_ways;
>  	p->interleave_granularity =
> cxled->cxld.interleave_granularity; @@ -3419,24 +3470,124 @@ static
> struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, 
>  	rc = sysfs_update_group(&cxlr->dev.kobj,
> get_cxl_region_target_group()); if (rc)
> -		goto err;
> +		goto out;
>  
>  	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig:
> %d\n",
> -		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
> __func__,
> -		dev_name(&cxlr->dev), p->res, p->interleave_ways,
> -		p->interleave_granularity);
> +				   dev_name(&cxlmd->dev),
> +				   dev_name(&cxled->cxld.dev),
> __func__,
> +				   dev_name(&cxlr->dev), p->res,
> +				   p->interleave_ways,
> +				   p->interleave_granularity);
>  
>  	/* ...to match put_device() in cxl_add_to_region() */
>  	get_device(&cxlr->dev);
>  	up_write(&cxl_region_rwsem);
> +out:
> +	construct_region_end();
> +	if (rc) {
> +		drop_region(cxlr);
> +		return ERR_PTR(rc);
> +	}
> +	return cxlr;
> +}
> +
> +static struct cxl_region *
> +__construct_new_region(struct cxl_root_decoder *cxlrd,
> +		       struct cxl_endpoint_decoder **cxled, int ways)
> +{
> +	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
> +	struct cxl_region_params *p;
> +	resource_size_t size = 0;
> +	struct cxl_region *cxlr;
> +	int rc, i;
> +
> +	/* If interleaving is not supported, why does ways need to
> be at least 1? */
> +	if (ways < 1)
> +		return ERR_PTR(-EINVAL);
> +
> +	cxlr = construct_region_begin(cxlrd, cxled[0]);
> +	if (IS_ERR(cxlr))
> +		return cxlr;
> +
> +	rc = set_interleave_ways(cxlr, ways);
> +	if (rc)
> +		goto out;
> +
> +	rc = set_interleave_granularity(cxlr,
> cxld->interleave_granularity);
> +	if (rc)
> +		goto out;
> +
> +	down_read(&cxl_dpa_rwsem);
> +	for (i = 0; i < ways; i++) {
> +		if (!cxled[i]->dpa_res)
> +			break;
> +		size += resource_size(cxled[i]->dpa_res);
> +	}
> +	up_read(&cxl_dpa_rwsem);
> +
> +	if (i < ways)
> +		goto out;
> +
> +	rc = alloc_hpa(cxlr, size);
> +	if (rc)
> +		goto out;
> +
> +	down_read(&cxl_dpa_rwsem);
> +	for (i = 0; i < ways; i++) {
> +		rc = cxl_region_attach(cxlr, cxled[i], i);
> +		if (rc)
> +			break;
> +	}
> +	up_read(&cxl_dpa_rwsem);
> +
> +	if (rc)
> +		goto out;
> +
> +	rc = cxl_region_decode_commit(cxlr);
> +	if (rc)
> +		goto out;
>  
> +	p = &cxlr->params;
> +	p->state = CXL_CONFIG_COMMIT;
> +out:
> +	construct_region_end();
> +	if (rc) {
> +		drop_region(cxlr);
> +		return ERR_PTR(rc);
> +	}
>  	return cxlr;
> +}
>  
> -err:
> -	up_write(&cxl_region_rwsem);
> -	devm_release_action(port->uport_dev, unregister_region,
> cxlr);
> -	return ERR_PTR(rc);
> +/**
> + * cxl_create_region - Establish a region given an array of endpoint
> decoders
> + * @cxlrd: root decoder to allocate HPA
> + * @cxled: array of endpoint decoders with reserved DPA capacity
> + * @ways: size of @cxled array
> + *
> + * Returns a fully formed region in the commit state and attached to
> the
> + * cxl_region driver.
> + */
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> +				     struct cxl_endpoint_decoder
> **cxled,
> +				     int ways)
> +{
> +	struct cxl_region *cxlr;
> +
> +	mutex_lock(&cxlrd->range_lock);
> +	cxlr = __construct_new_region(cxlrd, cxled, ways);
> +	mutex_unlock(&cxlrd->range_lock);
> +
> +	if (IS_ERR(cxlr))
> +		return cxlr;
> +
> +	if (device_attach(&cxlr->dev) <= 0) {
> +		dev_err(&cxlr->dev, "failed to create region\n");
> +		drop_region(cxlr);
> +		return ERR_PTR(-ENODEV);
> +	}
> +	return cxlr;
>  }
> +EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
>  
>  int cxl_add_to_region(struct cxl_port *root, struct
> cxl_endpoint_decoder *cxled) {
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index d3fdd2c1e066..1bf3b74ff959 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -905,6 +905,7 @@ void cxl_coordinates_combine(struct
> access_coordinate *out, 
>  bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
>  
> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled);
>  /*
>   * Unit test builds overrides this to __weak, find the 'strong'
> version
>   * of these symbols in tools/testing/cxl/.
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index a0e0795ec064..377bb3cd2d47 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -881,5 +881,7 @@ struct cxl_root_decoder
> *cxl_get_hpa_freespace(struct cxl_port *endpoint, int interleave_ways,
>  					       unsigned long flags,
>  					       resource_size_t *max);
> -
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> +				     struct cxl_endpoint_decoder
> **cxled,
> +				     int ways);
>  #endif /* __CXL_MEM_H__ */
> diff --git a/drivers/net/ethernet/sfc/efx_cxl.c
> b/drivers/net/ethernet/sfc/efx_cxl.c index b5626d724b52..4012e3faa298
> 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c
> +++ b/drivers/net/ethernet/sfc/efx_cxl.c
> @@ -92,8 +92,18 @@ void efx_cxl_init(struct efx_nic *efx)
>  
>  	cxl->cxled = cxl_request_dpa(cxl->endpoint, true,
> EFX_CTPIO_BUFFER_SIZE, EFX_CTPIO_BUFFER_SIZE);
> -	if (IS_ERR(cxl->cxled))
> +	if (IS_ERR(cxl->cxled)) {
>  		pci_info(pci_dev, "CXL accel request DPA failed");
> +		return;
> +	}
> +
> +	cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled,
> 1);
> +	if (!cxl->efx_region) {

if (IS_ERR(cxl->efx_region))

> +		pci_info(pci_dev, "CXL accel create region failed");
> +		cxl_dpa_free(cxl->cxled);
> +		return;
> +	}
> +
>  out:
>  	cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
>  }
> @@ -102,6 +112,9 @@ void efx_cxl_exit(struct efx_nic *efx)
>  {
>  	struct efx_cxl *cxl = efx->cxl;
>  
> +	if (cxl->efx_region)
> +		cxl_region_detach(cxl->cxled);
> +
>  	if (cxl->cxled)
>  		cxl_dpa_free(cxl->cxled);
>   
> diff --git a/include/linux/cxl_accel_mem.h
> b/include/linux/cxl_accel_mem.h index d4ecb5bb4fc8..a5f9ffc24509
> 100644 --- a/include/linux/cxl_accel_mem.h
> +++ b/include/linux/cxl_accel_mem.h
> @@ -48,4 +48,9 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct
> cxl_port *endpoint, resource_size_t min,
>  					     resource_size_t max);
>  int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> +				     struct cxl_endpoint_decoder
> **cxled,
> +				     int ways);
> +
> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled);
>  #endif
Alejandro Lucero Palau Aug. 23, 2024, 9:31 a.m. UTC | #4
On 8/22/24 14:12, Zhi Wang wrote:
> On Mon, 15 Jul 2024 18:28:32 +0100
> <alejandro.lucero-palau@amd.com> wrote:
>
>> From: Alejandro Lucero <alucerop@amd.com>
>>
>> Creating a CXL region requires userspace intervention through the cxl
>> sysfs files. Type2 support should allow accelerator drivers to create
>> such cxl region from kernel code.
>>
>> Adding that functionality and integrating it with current support for
>> memory expanders.
>>
>> Based on
>> https://lore.kernel.org/linux-cxl/168592149709.1948938.8663425987110396027.stgit@dwillia2-xfh.jf.intel.com/T/#m84598b534cc5664f5bb31521ba6e41c7bc213758
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan
>> Williams <dan.j.williams@intel.com> ---
>>   drivers/cxl/core/region.c          | 265
>> ++++++++++++++++++++++------- drivers/cxl/cxl.h                  |
>> 1 + drivers/cxl/cxlmem.h               |   4 +-
>>   drivers/net/ethernet/sfc/efx_cxl.c |  15 +-
>>   include/linux/cxl_accel_mem.h      |   5 +
>>   5 files changed, 231 insertions(+), 59 deletions(-)
>>
>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>> index 5cc71b8868bc..697c8df83a4b 100644
>> --- a/drivers/cxl/core/region.c
>> +++ b/drivers/cxl/core/region.c
>> @@ -479,22 +479,14 @@ static ssize_t interleave_ways_show(struct
>> device *dev,
>>   static const struct attribute_group
>> *get_cxl_region_target_group(void);
>> -static ssize_t interleave_ways_store(struct device *dev,
>> -				     struct device_attribute *attr,
>> -				     const char *buf, size_t len)
>> +static int set_interleave_ways(struct cxl_region *cxlr, int val)
>>   {
>> -	struct cxl_root_decoder *cxlrd =
>> to_cxl_root_decoder(dev->parent);
>> +	struct cxl_root_decoder *cxlrd =
>> to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld =
>> &cxlrd->cxlsd.cxld;
>> -	struct cxl_region *cxlr = to_cxl_region(dev);
>>   	struct cxl_region_params *p = &cxlr->params;
>> -	unsigned int val, save;
>> -	int rc;
>> +	int save, rc;
>>   	u8 iw;
>>   
>> -	rc = kstrtouint(buf, 0, &val);
>> -	if (rc)
>> -		return rc;
>> -
>>   	rc = ways_to_eiw(val, &iw);
>>   	if (rc)
>>   		return rc;
>> @@ -509,25 +501,42 @@ static ssize_t interleave_ways_store(struct
>> device *dev, return -EINVAL;
>>   	}
>>   
>> -	rc = down_write_killable(&cxl_region_rwsem);
>> -	if (rc)
>> -		return rc;
>> -	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>> -		rc = -EBUSY;
>> -		goto out;
>> -	}
>> +	lockdep_assert_held_write(&cxl_region_rwsem);
>> +	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
>> +		return -EBUSY;
>>   
>>   	save = p->interleave_ways;
>>   	p->interleave_ways = val;
>>   	rc = sysfs_update_group(&cxlr->dev.kobj,
>> get_cxl_region_target_group()); if (rc)
>>   		p->interleave_ways = save;
>> -out:
>> +
>> +	return rc;
>> +}
>> +
>> +static ssize_t interleave_ways_store(struct device *dev,
>> +				     struct device_attribute *attr,
>> +				     const char *buf, size_t len)
>> +{
>> +	struct cxl_region *cxlr = to_cxl_region(dev);
>> +	unsigned int val;
>> +	int rc;
>> +
>> +	rc = kstrtouint(buf, 0, &val);
>> +	if (rc)
>> +		return rc;
>> +
>> +	rc = down_write_killable(&cxl_region_rwsem);
>> +	if (rc)
>> +		return rc;
>> +
>> +	rc = set_interleave_ways(cxlr, val);
>>   	up_write(&cxl_region_rwsem);
>>   	if (rc)
>>   		return rc;
>>   	return len;
>>   }
>> +
>>   static DEVICE_ATTR_RW(interleave_ways);
>>   
>>   static ssize_t interleave_granularity_show(struct device *dev,
>> @@ -547,21 +556,14 @@ static ssize_t
>> interleave_granularity_show(struct device *dev, return rc;
>>   }
>>   
>> -static ssize_t interleave_granularity_store(struct device *dev,
>> -					    struct device_attribute
>> *attr,
>> -					    const char *buf, size_t
>> len) +static int set_interleave_granularity(struct cxl_region *cxlr,
>> int val) {
>> -	struct cxl_root_decoder *cxlrd =
>> to_cxl_root_decoder(dev->parent);
>> +	struct cxl_root_decoder *cxlrd =
>> to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld =
>> &cxlrd->cxlsd.cxld;
>> -	struct cxl_region *cxlr = to_cxl_region(dev);
>>   	struct cxl_region_params *p = &cxlr->params;
>> -	int rc, val;
>> +	int rc;
>>   	u16 ig;
>>   
>> -	rc = kstrtoint(buf, 0, &val);
>> -	if (rc)
>> -		return rc;
>> -
>>   	rc = granularity_to_eig(val, &ig);
>>   	if (rc)
>>   		return rc;
>> @@ -577,21 +579,36 @@ static ssize_t
>> interleave_granularity_store(struct device *dev, if
>> (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
>> return -EINVAL;
>> +	lockdep_assert_held_write(&cxl_region_rwsem);
>> +	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
>> +		return -EBUSY;
>> +
>> +	p->interleave_granularity = val;
>> +	return 0;
>> +}
>> +
>> +static ssize_t interleave_granularity_store(struct device *dev,
>> +					    struct device_attribute
>> *attr,
>> +					    const char *buf, size_t
>> len) +{
>> +	struct cxl_region *cxlr = to_cxl_region(dev);
>> +	int rc, val;
>> +
>> +	rc = kstrtoint(buf, 0, &val);
>> +	if (rc)
>> +		return rc;
>> +
>>   	rc = down_write_killable(&cxl_region_rwsem);
>>   	if (rc)
>>   		return rc;
>> -	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>> -		rc = -EBUSY;
>> -		goto out;
>> -	}
>>   
>> -	p->interleave_granularity = val;
>> -out:
>> +	rc = set_interleave_granularity(cxlr, val);
>>   	up_write(&cxl_region_rwsem);
>>   	if (rc)
>>   		return rc;
>>   	return len;
>>   }
>> +
>>   static DEVICE_ATTR_RW(interleave_granularity);
>>   
>>   static ssize_t resource_show(struct device *dev, struct
>> device_attribute *attr, @@ -2193,7 +2210,7 @@ static int
>> cxl_region_attach(struct cxl_region *cxlr, return 0;
>>   }
>>   
>> -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>>   {
>>   	struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
>>   	struct cxl_region *cxlr = cxled->cxld.region;
>> @@ -2252,6 +2269,7 @@ static int cxl_region_detach(struct
>> cxl_endpoint_decoder *cxled) put_device(&cxlr->dev);
>>   	return rc;
>>   }
>> +EXPORT_SYMBOL_NS_GPL(cxl_region_detach, CXL);
>>   
>>   void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
>>   {
>> @@ -2746,6 +2764,14 @@ cxl_find_region_by_name(struct
>> cxl_root_decoder *cxlrd, const char *name) return
>> to_cxl_region(region_dev); }
>>   
>> +static void drop_region(struct cxl_region *cxlr)
>> +{
>> +	struct cxl_root_decoder *cxlrd =
>> to_cxl_root_decoder(cxlr->dev.parent);
>> +	struct cxl_port *port = cxlrd_to_port(cxlrd);
>> +
>> +	devm_release_action(port->uport_dev, unregister_region,
>> cxlr); +}
>> +
>>   static ssize_t delete_region_store(struct device *dev,
>>   				   struct device_attribute *attr,
>>   				   const char *buf, size_t len)
>> @@ -3353,17 +3379,18 @@ static int match_region_by_range(struct
>> device *dev, void *data) return rc;
>>   }
>>   
>> -/* Establish an empty region covering the given HPA range */
>> -static struct cxl_region *construct_region(struct cxl_root_decoder
>> *cxlrd,
>> -					   struct
>> cxl_endpoint_decoder *cxled) +static void construct_region_end(void)
>> +{
>> +	up_write(&cxl_region_rwsem);
>> +}
>> +
>> +static struct cxl_region *construct_region_begin(struct
>> cxl_root_decoder *cxlrd,
>> +						 struct
>> cxl_endpoint_decoder *cxled) {
>>   	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>> -	struct cxl_port *port = cxlrd_to_port(cxlrd);
>> -	struct range *hpa = &cxled->cxld.hpa_range;
>>   	struct cxl_region_params *p;
>>   	struct cxl_region *cxlr;
>> -	struct resource *res;
>> -	int rc;
>> +	int err = 0;
>>   
>>   	do {
>>   		cxlr = __create_region(cxlrd, cxled->mode,
>> @@ -3372,8 +3399,7 @@ static struct cxl_region
>> *construct_region(struct cxl_root_decoder *cxlrd, } while
>> (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
>>   	if (IS_ERR(cxlr)) {
>> -		dev_err(cxlmd->dev.parent,
>> -			"%s:%s: %s failed assign region: %ld\n",
>> +		dev_err(cxlmd->dev.parent,"%s:%s: %s failed assign
>> region: %ld\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>>   			__func__, PTR_ERR(cxlr));
>>   		return cxlr;
>> @@ -3383,23 +3409,47 @@ static struct cxl_region
>> *construct_region(struct cxl_root_decoder *cxlrd, p = &cxlr->params;
>>   	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>>   		dev_err(cxlmd->dev.parent,
>> -			"%s:%s: %s autodiscovery interrupted\n",
>> +			"%s:%s: %s region setup interrupted\n",
>>   			dev_name(&cxlmd->dev),
>> dev_name(&cxled->cxld.dev), __func__);
>> -		rc = -EBUSY;
>> -		goto err;
>> +		err = -EBUSY;
>> +	}
>> +
>> +	if (err) {
>> +		construct_region_end();
>> +		drop_region(cxlr);
>> +		return ERR_PTR(err);
>>   	}
>> +	return cxlr;
>> +}
>> +
>> +
>> +/* Establish an empty region covering the given HPA range */
>> +static struct cxl_region *construct_region(struct cxl_root_decoder
>> *cxlrd,
>> +					   struct
>> cxl_endpoint_decoder *cxled) +{
>> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>> +	struct range *hpa = &cxled->cxld.hpa_range;
>> +	struct cxl_region_params *p;
>> +	struct cxl_region *cxlr;
>> +	struct resource *res;
>> +	int rc;
>> +
>> +	cxlr = construct_region_begin(cxlrd, cxled);
>> +	if (IS_ERR(cxlr))
>> +		return cxlr;
>>   
>>   	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>>   
>>   	res = kmalloc(sizeof(*res), GFP_KERNEL);
>>   	if (!res) {
>>   		rc = -ENOMEM;
>> -		goto err;
>> +		goto out;
>>   	}
>>   
>>   	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
>>   				    dev_name(&cxlr->dev));
>> +
>>   	rc = insert_resource(cxlrd->res, res);
>>   	if (rc) {
>>   		/*
>> @@ -3412,6 +3462,7 @@ static struct cxl_region
>> *construct_region(struct cxl_root_decoder *cxlrd, __func__,
>> dev_name(&cxlr->dev)); }
>>   
>> +	p = &cxlr->params;
>>   	p->res = res;
>>   	p->interleave_ways = cxled->cxld.interleave_ways;
>>   	p->interleave_granularity =
>> cxled->cxld.interleave_granularity; @@ -3419,24 +3470,124 @@ static
>> struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>   	rc = sysfs_update_group(&cxlr->dev.kobj,
>> get_cxl_region_target_group()); if (rc)
>> -		goto err;
>> +		goto out;
>>   
>>   	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig:
>> %d\n",
>> -		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>> __func__,
>> -		dev_name(&cxlr->dev), p->res, p->interleave_ways,
>> -		p->interleave_granularity);
>> +				   dev_name(&cxlmd->dev),
>> +				   dev_name(&cxled->cxld.dev),
>> __func__,
>> +				   dev_name(&cxlr->dev), p->res,
>> +				   p->interleave_ways,
>> +				   p->interleave_granularity);
>>   
>>   	/* ...to match put_device() in cxl_add_to_region() */
>>   	get_device(&cxlr->dev);
>>   	up_write(&cxl_region_rwsem);
>> +out:
>> +	construct_region_end();
>> +	if (rc) {
>> +		drop_region(cxlr);
>> +		return ERR_PTR(rc);
>> +	}
>> +	return cxlr;
>> +}
>> +
>> +static struct cxl_region *
>> +__construct_new_region(struct cxl_root_decoder *cxlrd,
>> +		       struct cxl_endpoint_decoder **cxled, int ways)
>> +{
>> +	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
>> +	struct cxl_region_params *p;
>> +	resource_size_t size = 0;
>> +	struct cxl_region *cxlr;
>> +	int rc, i;
>> +
>> +	/* If interleaving is not supported, why does ways need to
>> be at least 1? */
>> +	if (ways < 1)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	cxlr = construct_region_begin(cxlrd, cxled[0]);
>> +	if (IS_ERR(cxlr))
>> +		return cxlr;
>> +
>> +	rc = set_interleave_ways(cxlr, ways);
>> +	if (rc)
>> +		goto out;
>> +
>> +	rc = set_interleave_granularity(cxlr,
>> cxld->interleave_granularity);
>> +	if (rc)
>> +		goto out;
>> +
>> +	down_read(&cxl_dpa_rwsem);
>> +	for (i = 0; i < ways; i++) {
>> +		if (!cxled[i]->dpa_res)
>> +			break;
>> +		size += resource_size(cxled[i]->dpa_res);
>> +	}
>> +	up_read(&cxl_dpa_rwsem);
>> +
>> +	if (i < ways)
>> +		goto out;
>> +
>> +	rc = alloc_hpa(cxlr, size);
>> +	if (rc)
>> +		goto out;
>> +
>> +	down_read(&cxl_dpa_rwsem);
>> +	for (i = 0; i < ways; i++) {
>> +		rc = cxl_region_attach(cxlr, cxled[i], i);
>> +		if (rc)
>> +			break;
>> +	}
>> +	up_read(&cxl_dpa_rwsem);
>> +
>> +	if (rc)
>> +		goto out;
>> +
>> +	rc = cxl_region_decode_commit(cxlr);
>> +	if (rc)
>> +		goto out;
>>   
>> +	p = &cxlr->params;
>> +	p->state = CXL_CONFIG_COMMIT;
>> +out:
>> +	construct_region_end();
>> +	if (rc) {
>> +		drop_region(cxlr);
>> +		return ERR_PTR(rc);
>> +	}
>>   	return cxlr;
>> +}
>>   
>> -err:
>> -	up_write(&cxl_region_rwsem);
>> -	devm_release_action(port->uport_dev, unregister_region,
>> cxlr);
>> -	return ERR_PTR(rc);
>> +/**
>> + * cxl_create_region - Establish a region given an array of endpoint
>> decoders
>> + * @cxlrd: root decoder to allocate HPA
>> + * @cxled: array of endpoint decoders with reserved DPA capacity
>> + * @ways: size of @cxled array
>> + *
>> + * Returns a fully formed region in the commit state and attached to
>> the
>> + * cxl_region driver.
>> + */
>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>> +				     struct cxl_endpoint_decoder
>> **cxled,
>> +				     int ways)
>> +{
>> +	struct cxl_region *cxlr;
>> +
>> +	mutex_lock(&cxlrd->range_lock);
>> +	cxlr = __construct_new_region(cxlrd, cxled, ways);
>> +	mutex_unlock(&cxlrd->range_lock);
>> +
>> +	if (IS_ERR(cxlr))
>> +		return cxlr;
>> +
>> +	if (device_attach(&cxlr->dev) <= 0) {
>> +		dev_err(&cxlr->dev, "failed to create region\n");
>> +		drop_region(cxlr);
>> +		return ERR_PTR(-ENODEV);
>> +	}
>> +	return cxlr;
>>   }
>> +EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
>>   
>>   int cxl_add_to_region(struct cxl_port *root, struct
>> cxl_endpoint_decoder *cxled) {
>> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
>> index d3fdd2c1e066..1bf3b74ff959 100644
>> --- a/drivers/cxl/cxl.h
>> +++ b/drivers/cxl/cxl.h
>> @@ -905,6 +905,7 @@ void cxl_coordinates_combine(struct
>> access_coordinate *out,
>>   bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
>>   
>> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled);
>>   /*
>>    * Unit test builds overrides this to __weak, find the 'strong'
>> version
>>    * of these symbols in tools/testing/cxl/.
>> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
>> index a0e0795ec064..377bb3cd2d47 100644
>> --- a/drivers/cxl/cxlmem.h
>> +++ b/drivers/cxl/cxlmem.h
>> @@ -881,5 +881,7 @@ struct cxl_root_decoder
>> *cxl_get_hpa_freespace(struct cxl_port *endpoint, int interleave_ways,
>>   					       unsigned long flags,
>>   					       resource_size_t *max);
>> -
>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>> +				     struct cxl_endpoint_decoder
>> **cxled,
>> +				     int ways);
>>   #endif /* __CXL_MEM_H__ */
>> diff --git a/drivers/net/ethernet/sfc/efx_cxl.c
>> b/drivers/net/ethernet/sfc/efx_cxl.c index b5626d724b52..4012e3faa298
>> 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c
>> +++ b/drivers/net/ethernet/sfc/efx_cxl.c
>> @@ -92,8 +92,18 @@ void efx_cxl_init(struct efx_nic *efx)
>>   
>>   	cxl->cxled = cxl_request_dpa(cxl->endpoint, true,
>> EFX_CTPIO_BUFFER_SIZE, EFX_CTPIO_BUFFER_SIZE);
>> -	if (IS_ERR(cxl->cxled))
>> +	if (IS_ERR(cxl->cxled)) {
>>   		pci_info(pci_dev, "CXL accel request DPA failed");
>> +		return;
>> +	}
>> +
>> +	cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled,
>> 1);
>> +	if (!cxl->efx_region) {
> if (IS_ERR(cxl->efx_region))
>

I'll fix it.

Thanks
Jonathan Cameron Aug. 27, 2024, 3:20 p.m. UTC | #5
On Fri, 23 Aug 2024 10:31:20 +0100
Alejandro Lucero Palau <alucerop@amd.com> wrote:

> On 8/22/24 14:12, Zhi Wang wrote:
> > On Mon, 15 Jul 2024 18:28:32 +0100
> > <alejandro.lucero-palau@amd.com> wrote:
> >  
> >> From: Alejandro Lucero <alucerop@amd.com>
> >>
> >> Creating a CXL region requires userspace intervention through the cxl
> >> sysfs files. Type2 support should allow accelerator drivers to create
> >> such cxl region from kernel code.
> >>
> >> Adding that functionality and integrating it with current support for
> >> memory expanders.
> >>
> >> diff --git a/drivers/net/ethernet/sfc/efx_cxl.c
> >> b/drivers/net/ethernet/sfc/efx_cxl.c index b5626d724b52..4012e3faa298
> >> 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c
> >> +++ b/drivers/net/ethernet/sfc/efx_cxl.c
> >> @@ -92,8 +92,18 @@ void efx_cxl_init(struct efx_nic *efx)
> >>   
> >>   	cxl->cxled = cxl_request_dpa(cxl->endpoint, true,
> >> EFX_CTPIO_BUFFER_SIZE, EFX_CTPIO_BUFFER_SIZE);
> >> -	if (IS_ERR(cxl->cxled))
> >> +	if (IS_ERR(cxl->cxled)) {
> >>   		pci_info(pci_dev, "CXL accel request DPA failed");
> >> +		return;
> >> +	}
> >> +
> >> +	cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled,
> >> 1);
> >> +	if (!cxl->efx_region) {  
> > if (IS_ERR(cxl->efx_region))
> >  
> 
> I'll fix it.
> 
> Thanks
Please crop replies. It's really easy to miss the important stuff
otherwise!

Jonathan
diff mbox series

Patch

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 5cc71b8868bc..697c8df83a4b 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -479,22 +479,14 @@  static ssize_t interleave_ways_show(struct device *dev,
 
 static const struct attribute_group *get_cxl_region_target_group(void);
 
-static ssize_t interleave_ways_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t len)
+static int set_interleave_ways(struct cxl_region *cxlr, int val)
 {
-	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
-	struct cxl_region *cxlr = to_cxl_region(dev);
 	struct cxl_region_params *p = &cxlr->params;
-	unsigned int val, save;
-	int rc;
+	int save, rc;
 	u8 iw;
 
-	rc = kstrtouint(buf, 0, &val);
-	if (rc)
-		return rc;
-
 	rc = ways_to_eiw(val, &iw);
 	if (rc)
 		return rc;
@@ -509,25 +501,42 @@  static ssize_t interleave_ways_store(struct device *dev,
 		return -EINVAL;
 	}
 
-	rc = down_write_killable(&cxl_region_rwsem);
-	if (rc)
-		return rc;
-	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
-		rc = -EBUSY;
-		goto out;
-	}
+	lockdep_assert_held_write(&cxl_region_rwsem);
+	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
+		return -EBUSY;
 
 	save = p->interleave_ways;
 	p->interleave_ways = val;
 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
 	if (rc)
 		p->interleave_ways = save;
-out:
+
+	return rc;
+}
+
+static ssize_t interleave_ways_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t len)
+{
+	struct cxl_region *cxlr = to_cxl_region(dev);
+	unsigned int val;
+	int rc;
+
+	rc = kstrtouint(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	rc = down_write_killable(&cxl_region_rwsem);
+	if (rc)
+		return rc;
+
+	rc = set_interleave_ways(cxlr, val);
 	up_write(&cxl_region_rwsem);
 	if (rc)
 		return rc;
 	return len;
 }
+
 static DEVICE_ATTR_RW(interleave_ways);
 
 static ssize_t interleave_granularity_show(struct device *dev,
@@ -547,21 +556,14 @@  static ssize_t interleave_granularity_show(struct device *dev,
 	return rc;
 }
 
-static ssize_t interleave_granularity_store(struct device *dev,
-					    struct device_attribute *attr,
-					    const char *buf, size_t len)
+static int set_interleave_granularity(struct cxl_region *cxlr, int val)
 {
-	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
-	struct cxl_region *cxlr = to_cxl_region(dev);
 	struct cxl_region_params *p = &cxlr->params;
-	int rc, val;
+	int rc;
 	u16 ig;
 
-	rc = kstrtoint(buf, 0, &val);
-	if (rc)
-		return rc;
-
 	rc = granularity_to_eig(val, &ig);
 	if (rc)
 		return rc;
@@ -577,21 +579,36 @@  static ssize_t interleave_granularity_store(struct device *dev,
 	if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
 		return -EINVAL;
 
+	lockdep_assert_held_write(&cxl_region_rwsem);
+	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
+		return -EBUSY;
+
+	p->interleave_granularity = val;
+	return 0;
+}
+
+static ssize_t interleave_granularity_store(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf, size_t len)
+{
+	struct cxl_region *cxlr = to_cxl_region(dev);
+	int rc, val;
+
+	rc = kstrtoint(buf, 0, &val);
+	if (rc)
+		return rc;
+
 	rc = down_write_killable(&cxl_region_rwsem);
 	if (rc)
 		return rc;
-	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
-		rc = -EBUSY;
-		goto out;
-	}
 
-	p->interleave_granularity = val;
-out:
+	rc = set_interleave_granularity(cxlr, val);
 	up_write(&cxl_region_rwsem);
 	if (rc)
 		return rc;
 	return len;
 }
+
 static DEVICE_ATTR_RW(interleave_granularity);
 
 static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
@@ -2193,7 +2210,7 @@  static int cxl_region_attach(struct cxl_region *cxlr,
 	return 0;
 }
 
-static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
+int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
 {
 	struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
 	struct cxl_region *cxlr = cxled->cxld.region;
@@ -2252,6 +2269,7 @@  static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
 	put_device(&cxlr->dev);
 	return rc;
 }
+EXPORT_SYMBOL_NS_GPL(cxl_region_detach, CXL);
 
 void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
 {
@@ -2746,6 +2764,14 @@  cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
 	return to_cxl_region(region_dev);
 }
 
+static void drop_region(struct cxl_region *cxlr)
+{
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+	struct cxl_port *port = cxlrd_to_port(cxlrd);
+
+	devm_release_action(port->uport_dev, unregister_region, cxlr);
+}
+
 static ssize_t delete_region_store(struct device *dev,
 				   struct device_attribute *attr,
 				   const char *buf, size_t len)
@@ -3353,17 +3379,18 @@  static int match_region_by_range(struct device *dev, void *data)
 	return rc;
 }
 
-/* Establish an empty region covering the given HPA range */
-static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
-					   struct cxl_endpoint_decoder *cxled)
+static void construct_region_end(void)
+{
+	up_write(&cxl_region_rwsem);
+}
+
+static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd,
+						 struct cxl_endpoint_decoder *cxled)
 {
 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
-	struct cxl_port *port = cxlrd_to_port(cxlrd);
-	struct range *hpa = &cxled->cxld.hpa_range;
 	struct cxl_region_params *p;
 	struct cxl_region *cxlr;
-	struct resource *res;
-	int rc;
+	int err = 0;
 
 	do {
 		cxlr = __create_region(cxlrd, cxled->mode,
@@ -3372,8 +3399,7 @@  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
 
 	if (IS_ERR(cxlr)) {
-		dev_err(cxlmd->dev.parent,
-			"%s:%s: %s failed assign region: %ld\n",
+		dev_err(cxlmd->dev.parent,"%s:%s: %s failed assign region: %ld\n",
 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
 			__func__, PTR_ERR(cxlr));
 		return cxlr;
@@ -3383,23 +3409,47 @@  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 	p = &cxlr->params;
 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
 		dev_err(cxlmd->dev.parent,
-			"%s:%s: %s autodiscovery interrupted\n",
+			"%s:%s: %s region setup interrupted\n",
 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
 			__func__);
-		rc = -EBUSY;
-		goto err;
+		err = -EBUSY;
+	}
+
+	if (err) {
+		construct_region_end();
+		drop_region(cxlr);
+		return ERR_PTR(err);
 	}
+	return cxlr;
+}
+
+
+/* Establish an empty region covering the given HPA range */
+static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
+					   struct cxl_endpoint_decoder *cxled)
+{
+	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+	struct range *hpa = &cxled->cxld.hpa_range;
+	struct cxl_region_params *p;
+	struct cxl_region *cxlr;
+	struct resource *res;
+	int rc;
+
+	cxlr = construct_region_begin(cxlrd, cxled);
+	if (IS_ERR(cxlr))
+		return cxlr;
 
 	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
 
 	res = kmalloc(sizeof(*res), GFP_KERNEL);
 	if (!res) {
 		rc = -ENOMEM;
-		goto err;
+		goto out;
 	}
 
 	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
 				    dev_name(&cxlr->dev));
+
 	rc = insert_resource(cxlrd->res, res);
 	if (rc) {
 		/*
@@ -3412,6 +3462,7 @@  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 			 __func__, dev_name(&cxlr->dev));
 	}
 
+	p = &cxlr->params;
 	p->res = res;
 	p->interleave_ways = cxled->cxld.interleave_ways;
 	p->interleave_granularity = cxled->cxld.interleave_granularity;
@@ -3419,24 +3470,124 @@  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 
 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
 	if (rc)
-		goto err;
+		goto out;
 
 	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
-		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
-		dev_name(&cxlr->dev), p->res, p->interleave_ways,
-		p->interleave_granularity);
+				   dev_name(&cxlmd->dev),
+				   dev_name(&cxled->cxld.dev), __func__,
+				   dev_name(&cxlr->dev), p->res,
+				   p->interleave_ways,
+				   p->interleave_granularity);
 
 	/* ...to match put_device() in cxl_add_to_region() */
 	get_device(&cxlr->dev);
 	up_write(&cxl_region_rwsem);
+out:
+	construct_region_end();
+	if (rc) {
+		drop_region(cxlr);
+		return ERR_PTR(rc);
+	}
+	return cxlr;
+}
+
+static struct cxl_region *
+__construct_new_region(struct cxl_root_decoder *cxlrd,
+		       struct cxl_endpoint_decoder **cxled, int ways)
+{
+	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+	struct cxl_region_params *p;
+	resource_size_t size = 0;
+	struct cxl_region *cxlr;
+	int rc, i;
+
+	/* If interleaving is not supported, why does ways need to be at least 1? */
+	if (ways < 1)
+		return ERR_PTR(-EINVAL);
+
+	cxlr = construct_region_begin(cxlrd, cxled[0]);
+	if (IS_ERR(cxlr))
+		return cxlr;
+
+	rc = set_interleave_ways(cxlr, ways);
+	if (rc)
+		goto out;
+
+	rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
+	if (rc)
+		goto out;
+
+	down_read(&cxl_dpa_rwsem);
+	for (i = 0; i < ways; i++) {
+		if (!cxled[i]->dpa_res)
+			break;
+		size += resource_size(cxled[i]->dpa_res);
+	}
+	up_read(&cxl_dpa_rwsem);
+
+	if (i < ways)
+		goto out;
+
+	rc = alloc_hpa(cxlr, size);
+	if (rc)
+		goto out;
+
+	down_read(&cxl_dpa_rwsem);
+	for (i = 0; i < ways; i++) {
+		rc = cxl_region_attach(cxlr, cxled[i], i);
+		if (rc)
+			break;
+	}
+	up_read(&cxl_dpa_rwsem);
+
+	if (rc)
+		goto out;
+
+	rc = cxl_region_decode_commit(cxlr);
+	if (rc)
+		goto out;
 
+	p = &cxlr->params;
+	p->state = CXL_CONFIG_COMMIT;
+out:
+	construct_region_end();
+	if (rc) {
+		drop_region(cxlr);
+		return ERR_PTR(rc);
+	}
 	return cxlr;
+}
 
-err:
-	up_write(&cxl_region_rwsem);
-	devm_release_action(port->uport_dev, unregister_region, cxlr);
-	return ERR_PTR(rc);
+/**
+ * cxl_create_region - Establish a region given an array of endpoint decoders
+ * @cxlrd: root decoder to allocate HPA
+ * @cxled: array of endpoint decoders with reserved DPA capacity
+ * @ways: size of @cxled array
+ *
+ * Returns a fully formed region in the commit state and attached to the
+ * cxl_region driver.
+ */
+struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
+				     struct cxl_endpoint_decoder **cxled,
+				     int ways)
+{
+	struct cxl_region *cxlr;
+
+	mutex_lock(&cxlrd->range_lock);
+	cxlr = __construct_new_region(cxlrd, cxled, ways);
+	mutex_unlock(&cxlrd->range_lock);
+
+	if (IS_ERR(cxlr))
+		return cxlr;
+
+	if (device_attach(&cxlr->dev) <= 0) {
+		dev_err(&cxlr->dev, "failed to create region\n");
+		drop_region(cxlr);
+		return ERR_PTR(-ENODEV);
+	}
+	return cxlr;
 }
+EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
 
 int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
 {
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index d3fdd2c1e066..1bf3b74ff959 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -905,6 +905,7 @@  void cxl_coordinates_combine(struct access_coordinate *out,
 
 bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
 
+int cxl_region_detach(struct cxl_endpoint_decoder *cxled);
 /*
  * Unit test builds overrides this to __weak, find the 'strong' version
  * of these symbols in tools/testing/cxl/.
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index a0e0795ec064..377bb3cd2d47 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -881,5 +881,7 @@  struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_port *endpoint,
 					       int interleave_ways,
 					       unsigned long flags,
 					       resource_size_t *max);
-
+struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
+				     struct cxl_endpoint_decoder **cxled,
+				     int ways);
 #endif /* __CXL_MEM_H__ */
diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
index b5626d724b52..4012e3faa298 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.c
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -92,8 +92,18 @@  void efx_cxl_init(struct efx_nic *efx)
 
 	cxl->cxled = cxl_request_dpa(cxl->endpoint, true, EFX_CTPIO_BUFFER_SIZE,
 				     EFX_CTPIO_BUFFER_SIZE);
-	if (IS_ERR(cxl->cxled))
+	if (IS_ERR(cxl->cxled)) {
 		pci_info(pci_dev, "CXL accel request DPA failed");
+		return;
+	}
+
+	cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled, 1);
+	if (!cxl->efx_region) {
+		pci_info(pci_dev, "CXL accel create region failed");
+		cxl_dpa_free(cxl->cxled);
+		return;
+	}
+
 out:
 	cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
 }
@@ -102,6 +112,9 @@  void efx_cxl_exit(struct efx_nic *efx)
 {
 	struct efx_cxl *cxl = efx->cxl;
 
+	if (cxl->efx_region)
+		cxl_region_detach(cxl->cxled);
+
 	if (cxl->cxled)
 		cxl_dpa_free(cxl->cxled);
  
diff --git a/include/linux/cxl_accel_mem.h b/include/linux/cxl_accel_mem.h
index d4ecb5bb4fc8..a5f9ffc24509 100644
--- a/include/linux/cxl_accel_mem.h
+++ b/include/linux/cxl_accel_mem.h
@@ -48,4 +48,9 @@  struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_port *endpoint,
 					     resource_size_t min,
 					     resource_size_t max);
 int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
+struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
+				     struct cxl_endpoint_decoder **cxled,
+				     int ways);
+
+int cxl_region_detach(struct cxl_endpoint_decoder *cxled);
 #endif