diff mbox series

[09/46] cxl/acpi: Track CXL resources in iomem_resource

Message ID 165603876550.551046.11015869763159096807.stgit@dwillia2-xfh (mailing list archive)
State Superseded
Headers show
Series CXL PMEM Region Provisioning | expand

Commit Message

Dan Williams June 24, 2022, 2:46 a.m. UTC
Recall that CXL capable address ranges, on ACPI platforms, are published
in the CEDT.CFMWS (CXL Early Discovery Table - CXL Fixed Memory Window
Structures). These windows represent both the actively mapped capacity
and the potential address space that can be dynamically assigned to a
new CXL decode configuration.

CXL endpoints like DDR DIMMs can be mapped at any physical address
including 0 and legacy ranges.

There is an expectation and requirement that the /proc/iomem interface
and the iomem_resource in the kernel reflect the full set of platform
address ranges. I.e. that every address range that platform firmware and
bus drivers enumerate be reflected as an iomem_resource entry. The hard
requirement to do this for CXL arises from the fact that capabilities
like CONFIG_DEVICE_PRIVATE expect to be able to treat empty
iomem_resource ranges as free for software to use as proxy address
space. Without CXL publishing its potential address ranges in
iomem_resource, the CONFIG_DEVICE_PRIVATE mechanism may inadvertently
steal capacity reserved for runtime provisioning of new CXL regions.

The approach taken supports dynamically publishing the CXL window map on
demand when a CXL platform driver like cxl_acpi loads. The windows are
then forced into the first level of iomem_resource tree via the
insert_resource_expand_to_fit() API. This forcing sacrifices some
resource boundary accurracy in order to better reflect the decode
hierarchy of a CXL window hosting "System RAM" and other resources.

Walkers of the iomem_resource tree will also need to have access to the
related 'struct cxl_decoder' instances to disambiguate which portions of
a CXL memory resource are present vs expanded to enforce the expected
resource topology.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/acpi.c |  110 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/resource.c  |    7 +++
 2 files changed, 114 insertions(+), 3 deletions(-)

Comments

Jonathan Cameron June 28, 2022, 4:43 p.m. UTC | #1
On Thu, 23 Jun 2022 19:46:05 -0700
Dan Williams <dan.j.williams@intel.com> wrote:

> Recall that CXL capable address ranges, on ACPI platforms, are published
> in the CEDT.CFMWS (CXL Early Discovery Table - CXL Fixed Memory Window
> Structures). These windows represent both the actively mapped capacity
> and the potential address space that can be dynamically assigned to a
> new CXL decode configuration.
> 
> CXL endpoints like DDR DIMMs can be mapped at any physical address
> including 0 and legacy ranges.
> 
> There is an expectation and requirement that the /proc/iomem interface
> and the iomem_resource in the kernel reflect the full set of platform
> address ranges. I.e. that every address range that platform firmware and
> bus drivers enumerate be reflected as an iomem_resource entry. The hard
> requirement to do this for CXL arises from the fact that capabilities
> like CONFIG_DEVICE_PRIVATE expect to be able to treat empty
> iomem_resource ranges as free for software to use as proxy address
> space. Without CXL publishing its potential address ranges in
> iomem_resource, the CONFIG_DEVICE_PRIVATE mechanism may inadvertently
> steal capacity reserved for runtime provisioning of new CXL regions.
> 
> The approach taken supports dynamically publishing the CXL window map on
> demand when a CXL platform driver like cxl_acpi loads. The windows are
> then forced into the first level of iomem_resource tree via the
> insert_resource_expand_to_fit() API. This forcing sacrifices some
> resource boundary accurracy in order to better reflect the decode
> hierarchy of a CXL window hosting "System RAM" and other resources.

I don't fully understand this and in particular what assumptions it
is making.  How do we end up with overlaping resources via just parsing
the CFMWS for instance...

I would shout a lot louder in this description about using the CXL NS
for that export.  That's liable to be controversial.

> 
> Walkers of the iomem_resource tree will also need to have access to the
> related 'struct cxl_decoder' instances to disambiguate which portions of
> a CXL memory resource are present vs expanded to enforce the expected
> resource topology.
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/acpi.c |  110 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  kernel/resource.c  |    7 +++
>  2 files changed, 114 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index d1b914dfa36c..003fa4fde357 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -73,6 +73,7 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
>  struct cxl_cfmws_context {
>  	struct device *dev;
>  	struct cxl_port *root_port;
> +	int id;
>  };
>  
>  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
> @@ -84,8 +85,10 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
>  	struct cxl_switch_decoder *cxlsd;
>  	struct device *dev = ctx->dev;
>  	struct acpi_cedt_cfmws *cfmws;
> +	struct resource *cxl_res;
>  	struct cxl_decoder *cxld;
>  	unsigned int ways, i, ig;
> +	struct resource *res;
>  	int rc;
>  
>  	cfmws = (struct acpi_cedt_cfmws *) header;
> @@ -107,6 +110,24 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
>  	for (i = 0; i < ways; i++)
>  		target_map[i] = cfmws->interleave_targets[i];
>  
> +	res = kzalloc(sizeof(*res), GFP_KERNEL);
> +	if (!res)
> +		return -ENOMEM;
> +
> +	res->name = kasprintf(GFP_KERNEL, "CXL Window %d", ctx->id++);
> +	if (!res->name)
> +		goto err_name;
> +
> +	res->start = cfmws->base_hpa;
> +	res->end = cfmws->base_hpa + cfmws->window_size - 1;
> +	res->flags = IORESOURCE_MEM;
> +
> +	/* add to the local resource tracking to establish a sort order */
> +	cxl_res = dev_get_drvdata(&root_port->dev);

As mentioned below, why not add cxl_res to the ctx?

> +	rc = insert_resource(cxl_res, res);
> +	if (rc)
> +		goto err_insert;
> +
>  	cxlsd = cxl_root_decoder_alloc(root_port, ways);
>  	if (IS_ERR(cxld))
>  		return 0;
> @@ -115,8 +136,8 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
>  	cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
>  	cxld->target_type = CXL_DECODER_EXPANDER;
>  	cxld->hpa_range = (struct range) {
> -		.start = cfmws->base_hpa,
> -		.end = cfmws->base_hpa + cfmws->window_size - 1,
> +		.start = res->start,
> +		.end = res->end,
>  	};
>  	cxld->interleave_ways = ways;
>  	cxld->interleave_granularity = ig;
> @@ -131,12 +152,19 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
>  			cxld->hpa_range.start, cxld->hpa_range.end);
>  		return 0;
>  	}
> +
Another whitespace tweak that shouldn't be in a patch like this...

>  	dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
>  		dev_name(&cxld->dev),
>  		phys_to_target_node(cxld->hpa_range.start),
>  		cxld->hpa_range.start, cxld->hpa_range.end);
>  
>  	return 0;
> +
> +err_insert:
> +	kfree(res->name);
> +err_name:
> +	kfree(res);
> +	return -ENOMEM;
>  }
>  
>  __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
> @@ -291,9 +319,66 @@ static void cxl_acpi_lock_reset_class(void *dev)
>  	device_lock_reset_class(dev);
>  }
>  
> +static void del_cxl_resource(struct resource *res)
> +{
> +	kfree(res->name);
> +	kfree(res);
> +}
> +
> +static void remove_cxl_resources(void *data)
> +{
> +	struct resource *res, *next, *cxl = data;
> +
> +	for (res = cxl->child; res; res = next) {
> +		struct resource *victim = (struct resource *) res->desc;
> +
> +		next = res->sibling;
> +		remove_resource(res);
> +
> +		if (victim) {
> +			remove_resource(victim);
> +			kfree(victim);
> +		}
> +
> +		del_cxl_resource(res);
> +	}
> +}
> +
> +static int add_cxl_resources(struct resource *cxl)

I'd like to see some documentation of what this is doing...

> +{
> +	struct resource *res, *new, *next;
> +
> +	for (res = cxl->child; res; res = next) {
> +		new = kzalloc(sizeof(*new), GFP_KERNEL);
> +		if (!new)
> +			return -ENOMEM;
> +		new->name = res->name;
> +		new->start = res->start;
> +		new->end = res->end;
> +		new->flags = IORESOURCE_MEM;
> +		res->desc = (unsigned long) new;
> +
> +		insert_resource_expand_to_fit(&iomem_resource, new);

Given you've called out limitations of this call in the patch description
it would be good to have some of that info in the code.

> +
> +		next = res->sibling;
> +		while (next && resource_overlaps(new, next)) {

I'm struggling to grasp why we'd have overlaps, comments would probably help.

> +			if (resource_contains(new, next)) {
> +				struct resource *_next = next->sibling;
> +
> +				remove_resource(next);
> +				del_cxl_resource(next);
> +				next = _next;
> +			} else
> +				next->start = new->end + 1;
> +		}
> +	}
> +	return 0;
> +}
> +
>  static int cxl_acpi_probe(struct platform_device *pdev)
>  {
>  	int rc;
> +	struct resource *cxl_res;
>  	struct cxl_port *root_port;
>  	struct device *host = &pdev->dev;
>  	struct acpi_device *adev = ACPI_COMPANION(host);
> @@ -305,21 +390,40 @@ static int cxl_acpi_probe(struct platform_device *pdev)
>  	if (rc)
>  		return rc;
>  
> +	cxl_res = devm_kzalloc(host, sizeof(*cxl_res), GFP_KERNEL);
> +	if (!cxl_res)
> +		return -ENOMEM;
> +	cxl_res->name = "CXL mem";
> +	cxl_res->start = 0;
> +	cxl_res->end = -1;
> +	cxl_res->flags = IORESOURCE_MEM;
> +
>  	root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL);
>  	if (IS_ERR(root_port))
>  		return PTR_ERR(root_port);
>  	dev_dbg(host, "add: %s\n", dev_name(&root_port->dev));
> +	dev_set_drvdata(&root_port->dev, cxl_res);

Rather ugly way of sneaking it into the callback. If that is the only
purpose, perhaps better to just add to the cxl_cfmws_context.

>  
>  	rc = bus_for_each_dev(adev->dev.bus, NULL, root_port,
>  			      add_host_bridge_dport);
>  	if (rc < 0)
>  		return rc;
>  
> +	rc = devm_add_action_or_reset(host, remove_cxl_resources, cxl_res);
> +	if (rc)
> +		return rc;
> +
>  	ctx = (struct cxl_cfmws_context) {
>  		.dev = host,
>  		.root_port = root_port,
>  	};
> -	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, cxl_parse_cfmws, &ctx);
> +	rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, cxl_parse_cfmws, &ctx);
> +	if (rc < 0)
> +		return -ENXIO;
> +
> +	rc = add_cxl_resources(cxl_res);
> +	if (rc)
> +		return rc;
>  
>  	/*
>  	 * Root level scanned with host-bridge as dports, now scan host-bridges
Dan Williams July 10, 2022, 2:12 a.m. UTC | #2
Jonathan Cameron wrote:
> On Thu, 23 Jun 2022 19:46:05 -0700
> Dan Williams <dan.j.williams@intel.com> wrote:
> 
> > Recall that CXL capable address ranges, on ACPI platforms, are published
> > in the CEDT.CFMWS (CXL Early Discovery Table - CXL Fixed Memory Window
> > Structures). These windows represent both the actively mapped capacity
> > and the potential address space that can be dynamically assigned to a
> > new CXL decode configuration.
> > 
> > CXL endpoints like DDR DIMMs can be mapped at any physical address
> > including 0 and legacy ranges.
> > 
> > There is an expectation and requirement that the /proc/iomem interface
> > and the iomem_resource in the kernel reflect the full set of platform
> > address ranges. I.e. that every address range that platform firmware and
> > bus drivers enumerate be reflected as an iomem_resource entry. The hard
> > requirement to do this for CXL arises from the fact that capabilities
> > like CONFIG_DEVICE_PRIVATE expect to be able to treat empty
> > iomem_resource ranges as free for software to use as proxy address
> > space. Without CXL publishing its potential address ranges in
> > iomem_resource, the CONFIG_DEVICE_PRIVATE mechanism may inadvertently
> > steal capacity reserved for runtime provisioning of new CXL regions.
> > 
> > The approach taken supports dynamically publishing the CXL window map on
> > demand when a CXL platform driver like cxl_acpi loads. The windows are
> > then forced into the first level of iomem_resource tree via the
> > insert_resource_expand_to_fit() API. This forcing sacrifices some
> > resource boundary accurracy in order to better reflect the decode
> > hierarchy of a CXL window hosting "System RAM" and other resources.
> 
> I don't fully understand this and in particular what assumptions it
> is making.  How do we end up with overlaping resources via just parsing
> the CFMWS for instance...

Consider the case of platform firmware placing CXL memory in the EFI
memory map. In that case the CXL address range will already exist in
iomem_resource as a "System RAM" resource. The goal of this patch is to
reflect the true hierarchy of the resource tree, but late in the boot
cycle when the CXL driver stack loads.

I will add a clarification along these lines to the changelog.


> I would shout a lot louder in this description about using the CXL NS
> for that export.  That's liable to be controversial.

Added some folks to this reply and will cc them on the resend (Greg,
David, Jason), but I will remind anyone following along that proposed
solution here is the one discussed at LSF/MM:

https://lwn.net/Articles/894626/

...and suggested by Jason:

https://lore.kernel.org/all/20220420143406.GY2120790@nvidia.com/

This also builds on David's work to remove "top level resource" special
casing in various kernel paths.

Otherwise, if your concern is the export itself, I think this is a
straightforward example of why namespaces were created in the first
place to limit exports to a specific scope when there is no intent to
make the export available more generally.

> 
> > 
> > Walkers of the iomem_resource tree will also need to have access to the
> > related 'struct cxl_decoder' instances to disambiguate which portions of
> > a CXL memory resource are present vs expanded to enforce the expected
> > resource topology.
> > 
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > ---
> >  drivers/cxl/acpi.c |  110 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> >  kernel/resource.c  |    7 +++
> >  2 files changed, 114 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> > index d1b914dfa36c..003fa4fde357 100644
> > --- a/drivers/cxl/acpi.c
> > +++ b/drivers/cxl/acpi.c
> > @@ -73,6 +73,7 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
> >  struct cxl_cfmws_context {
> >  	struct device *dev;
> >  	struct cxl_port *root_port;
> > +	int id;
> >  };
> >  
> >  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
> > @@ -84,8 +85,10 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
> >  	struct cxl_switch_decoder *cxlsd;
> >  	struct device *dev = ctx->dev;
> >  	struct acpi_cedt_cfmws *cfmws;
> > +	struct resource *cxl_res;
> >  	struct cxl_decoder *cxld;
> >  	unsigned int ways, i, ig;
> > +	struct resource *res;
> >  	int rc;
> >  
> >  	cfmws = (struct acpi_cedt_cfmws *) header;
> > @@ -107,6 +110,24 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
> >  	for (i = 0; i < ways; i++)
> >  		target_map[i] = cfmws->interleave_targets[i];
> >  
> > +	res = kzalloc(sizeof(*res), GFP_KERNEL);
> > +	if (!res)
> > +		return -ENOMEM;
> > +
> > +	res->name = kasprintf(GFP_KERNEL, "CXL Window %d", ctx->id++);
> > +	if (!res->name)
> > +		goto err_name;
> > +
> > +	res->start = cfmws->base_hpa;
> > +	res->end = cfmws->base_hpa + cfmws->window_size - 1;
> > +	res->flags = IORESOURCE_MEM;
> > +
> > +	/* add to the local resource tracking to establish a sort order */
> > +	cxl_res = dev_get_drvdata(&root_port->dev);
> 
> As mentioned below, why not add cxl_res to the ctx?

Good idea.

> 
> > +	rc = insert_resource(cxl_res, res);
> > +	if (rc)
> > +		goto err_insert;
> > +
> >  	cxlsd = cxl_root_decoder_alloc(root_port, ways);
> >  	if (IS_ERR(cxld))
> >  		return 0;
> > @@ -115,8 +136,8 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
> >  	cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
> >  	cxld->target_type = CXL_DECODER_EXPANDER;
> >  	cxld->hpa_range = (struct range) {
> > -		.start = cfmws->base_hpa,
> > -		.end = cfmws->base_hpa + cfmws->window_size - 1,
> > +		.start = res->start,
> > +		.end = res->end,
> >  	};
> >  	cxld->interleave_ways = ways;
> >  	cxld->interleave_granularity = ig;
> > @@ -131,12 +152,19 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
> >  			cxld->hpa_range.start, cxld->hpa_range.end);
> >  		return 0;
> >  	}
> > +
> Another whitespace tweak that shouldn't be in a patch like this...

sure.

> 
> >  	dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
> >  		dev_name(&cxld->dev),
> >  		phys_to_target_node(cxld->hpa_range.start),
> >  		cxld->hpa_range.start, cxld->hpa_range.end);
> >  
> >  	return 0;
> > +
> > +err_insert:
> > +	kfree(res->name);
> > +err_name:
> > +	kfree(res);
> > +	return -ENOMEM;
> >  }
> >  
> >  __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
> > @@ -291,9 +319,66 @@ static void cxl_acpi_lock_reset_class(void *dev)
> >  	device_lock_reset_class(dev);
> >  }
> >  
> > +static void del_cxl_resource(struct resource *res)
> > +{
> > +	kfree(res->name);
> > +	kfree(res);
> > +}
> > +
> > +static void remove_cxl_resources(void *data)
> > +{
> > +	struct resource *res, *next, *cxl = data;
> > +
> > +	for (res = cxl->child; res; res = next) {
> > +		struct resource *victim = (struct resource *) res->desc;
> > +
> > +		next = res->sibling;
> > +		remove_resource(res);
> > +
> > +		if (victim) {
> > +			remove_resource(victim);
> > +			kfree(victim);
> > +		}
> > +
> > +		del_cxl_resource(res);
> > +	}
> > +}
> > +
> > +static int add_cxl_resources(struct resource *cxl)
> 
> I'd like to see some documentation of what this is doing...
> 
> > +{
> > +	struct resource *res, *new, *next;
> > +
> > +	for (res = cxl->child; res; res = next) {
> > +		new = kzalloc(sizeof(*new), GFP_KERNEL);
> > +		if (!new)
> > +			return -ENOMEM;
> > +		new->name = res->name;
> > +		new->start = res->start;
> > +		new->end = res->end;
> > +		new->flags = IORESOURCE_MEM;
> > +		res->desc = (unsigned long) new;
> > +
> > +		insert_resource_expand_to_fit(&iomem_resource, new);
> 
> Given you've called out limitations of this call in the patch description
> it would be good to have some of that info in the code.
> 
> > +
> > +		next = res->sibling;
> > +		while (next && resource_overlaps(new, next)) {
> 
> I'm struggling to grasp why we'd have overlaps, comments would probably help.

Added the following...

/**
 * add_cxl_resources() - reflect CXL fixed memory windows in iomem_resource
 * @cxl_res: A standalone resource tree where each CXL window is a sibling
 *
 * Walk each CXL window in @cxl_res and add it to iomem_resource potentially
 * expanding its boundaries to ensure that any conflicting resources become
 * children. If a window is expanded it may then conflict with a another window
 * entry and require the window to be truncated or trimmed. Consider this
 * situation:
 *
 * |-- "CXL Window 0" --||----- "CXL Window 1" -----|
 * |--------------- "System RAM" -------------|
 *
 * ...where platform firmware has established as System RAM resource across 2
 * windows, but has left some portion of window 1 for dynamic CXL region
 * provisioning. In this case "Window 0" will span the entirety of the "System
 * RAM" span, and "CXL Window 1" is truncated to the remaining tail past the end
 * of that "System RAM" resource.
 */


Also, if you're wondering, the mismatch of iomem_resource entries to the
CXL windows does not matter in practice as dynamic region provisioning
only cares about the portions of the CXL windows that do not intersect
with any other resource. All that matters is that all intersections are
accounted for when it comes time to scan for free address space.

> 
> > +			if (resource_contains(new, next)) {
> > +				struct resource *_next = next->sibling;
> > +
> > +				remove_resource(next);
> > +				del_cxl_resource(next);
> > +				next = _next;
> > +			} else
> > +				next->start = new->end + 1;
> > +		}
> > +	}
> > +	return 0;
> > +}
> > +
> >  static int cxl_acpi_probe(struct platform_device *pdev)
> >  {
> >  	int rc;
> > +	struct resource *cxl_res;
> >  	struct cxl_port *root_port;
> >  	struct device *host = &pdev->dev;
> >  	struct acpi_device *adev = ACPI_COMPANION(host);
> > @@ -305,21 +390,40 @@ static int cxl_acpi_probe(struct platform_device *pdev)
> >  	if (rc)
> >  		return rc;
> >  
> > +	cxl_res = devm_kzalloc(host, sizeof(*cxl_res), GFP_KERNEL);
> > +	if (!cxl_res)
> > +		return -ENOMEM;
> > +	cxl_res->name = "CXL mem";
> > +	cxl_res->start = 0;
> > +	cxl_res->end = -1;
> > +	cxl_res->flags = IORESOURCE_MEM;
> > +
> >  	root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL);
> >  	if (IS_ERR(root_port))
> >  		return PTR_ERR(root_port);
> >  	dev_dbg(host, "add: %s\n", dev_name(&root_port->dev));
> > +	dev_set_drvdata(&root_port->dev, cxl_res);
> 
> Rather ugly way of sneaking it into the callback. If that is the only
> purpose, perhaps better to just add to the cxl_cfmws_context.

yup.
Jonathan Cameron July 19, 2022, 2:24 p.m. UTC | #3
> Added the following...
> 
> /**
>  * add_cxl_resources() - reflect CXL fixed memory windows in iomem_resource
>  * @cxl_res: A standalone resource tree where each CXL window is a sibling
>  *
>  * Walk each CXL window in @cxl_res and add it to iomem_resource potentially
>  * expanding its boundaries to ensure that any conflicting resources become
>  * children. If a window is expanded it may then conflict with a another window
>  * entry and require the window to be truncated or trimmed. Consider this
>  * situation:
>  *
>  * |-- "CXL Window 0" --||----- "CXL Window 1" -----|
>  * |--------------- "System RAM" -------------|
>  *
>  * ...where platform firmware has established as System RAM resource across 2
>  * windows, but has left some portion of window 1 for dynamic CXL region
>  * provisioning. In this case "Window 0" will span the entirety of the "System
>  * RAM" span, and "CXL Window 1" is truncated to the remaining tail past the end
>  * of that "System RAM" resource.
>  */

Very nice.  Thanks!

J
diff mbox series

Patch

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index d1b914dfa36c..003fa4fde357 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -73,6 +73,7 @@  static int cxl_acpi_cfmws_verify(struct device *dev,
 struct cxl_cfmws_context {
 	struct device *dev;
 	struct cxl_port *root_port;
+	int id;
 };
 
 static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
@@ -84,8 +85,10 @@  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 	struct cxl_switch_decoder *cxlsd;
 	struct device *dev = ctx->dev;
 	struct acpi_cedt_cfmws *cfmws;
+	struct resource *cxl_res;
 	struct cxl_decoder *cxld;
 	unsigned int ways, i, ig;
+	struct resource *res;
 	int rc;
 
 	cfmws = (struct acpi_cedt_cfmws *) header;
@@ -107,6 +110,24 @@  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 	for (i = 0; i < ways; i++)
 		target_map[i] = cfmws->interleave_targets[i];
 
+	res = kzalloc(sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
+
+	res->name = kasprintf(GFP_KERNEL, "CXL Window %d", ctx->id++);
+	if (!res->name)
+		goto err_name;
+
+	res->start = cfmws->base_hpa;
+	res->end = cfmws->base_hpa + cfmws->window_size - 1;
+	res->flags = IORESOURCE_MEM;
+
+	/* add to the local resource tracking to establish a sort order */
+	cxl_res = dev_get_drvdata(&root_port->dev);
+	rc = insert_resource(cxl_res, res);
+	if (rc)
+		goto err_insert;
+
 	cxlsd = cxl_root_decoder_alloc(root_port, ways);
 	if (IS_ERR(cxld))
 		return 0;
@@ -115,8 +136,8 @@  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 	cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
 	cxld->target_type = CXL_DECODER_EXPANDER;
 	cxld->hpa_range = (struct range) {
-		.start = cfmws->base_hpa,
-		.end = cfmws->base_hpa + cfmws->window_size - 1,
+		.start = res->start,
+		.end = res->end,
 	};
 	cxld->interleave_ways = ways;
 	cxld->interleave_granularity = ig;
@@ -131,12 +152,19 @@  static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 			cxld->hpa_range.start, cxld->hpa_range.end);
 		return 0;
 	}
+
 	dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
 		dev_name(&cxld->dev),
 		phys_to_target_node(cxld->hpa_range.start),
 		cxld->hpa_range.start, cxld->hpa_range.end);
 
 	return 0;
+
+err_insert:
+	kfree(res->name);
+err_name:
+	kfree(res);
+	return -ENOMEM;
 }
 
 __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
@@ -291,9 +319,66 @@  static void cxl_acpi_lock_reset_class(void *dev)
 	device_lock_reset_class(dev);
 }
 
+static void del_cxl_resource(struct resource *res)
+{
+	kfree(res->name);
+	kfree(res);
+}
+
+static void remove_cxl_resources(void *data)
+{
+	struct resource *res, *next, *cxl = data;
+
+	for (res = cxl->child; res; res = next) {
+		struct resource *victim = (struct resource *) res->desc;
+
+		next = res->sibling;
+		remove_resource(res);
+
+		if (victim) {
+			remove_resource(victim);
+			kfree(victim);
+		}
+
+		del_cxl_resource(res);
+	}
+}
+
+static int add_cxl_resources(struct resource *cxl)
+{
+	struct resource *res, *new, *next;
+
+	for (res = cxl->child; res; res = next) {
+		new = kzalloc(sizeof(*new), GFP_KERNEL);
+		if (!new)
+			return -ENOMEM;
+		new->name = res->name;
+		new->start = res->start;
+		new->end = res->end;
+		new->flags = IORESOURCE_MEM;
+		res->desc = (unsigned long) new;
+
+		insert_resource_expand_to_fit(&iomem_resource, new);
+
+		next = res->sibling;
+		while (next && resource_overlaps(new, next)) {
+			if (resource_contains(new, next)) {
+				struct resource *_next = next->sibling;
+
+				remove_resource(next);
+				del_cxl_resource(next);
+				next = _next;
+			} else
+				next->start = new->end + 1;
+		}
+	}
+	return 0;
+}
+
 static int cxl_acpi_probe(struct platform_device *pdev)
 {
 	int rc;
+	struct resource *cxl_res;
 	struct cxl_port *root_port;
 	struct device *host = &pdev->dev;
 	struct acpi_device *adev = ACPI_COMPANION(host);
@@ -305,21 +390,40 @@  static int cxl_acpi_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
+	cxl_res = devm_kzalloc(host, sizeof(*cxl_res), GFP_KERNEL);
+	if (!cxl_res)
+		return -ENOMEM;
+	cxl_res->name = "CXL mem";
+	cxl_res->start = 0;
+	cxl_res->end = -1;
+	cxl_res->flags = IORESOURCE_MEM;
+
 	root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL);
 	if (IS_ERR(root_port))
 		return PTR_ERR(root_port);
 	dev_dbg(host, "add: %s\n", dev_name(&root_port->dev));
+	dev_set_drvdata(&root_port->dev, cxl_res);
 
 	rc = bus_for_each_dev(adev->dev.bus, NULL, root_port,
 			      add_host_bridge_dport);
 	if (rc < 0)
 		return rc;
 
+	rc = devm_add_action_or_reset(host, remove_cxl_resources, cxl_res);
+	if (rc)
+		return rc;
+
 	ctx = (struct cxl_cfmws_context) {
 		.dev = host,
 		.root_port = root_port,
 	};
-	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, cxl_parse_cfmws, &ctx);
+	rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, cxl_parse_cfmws, &ctx);
+	if (rc < 0)
+		return -ENXIO;
+
+	rc = add_cxl_resources(cxl_res);
+	if (rc)
+		return rc;
 
 	/*
 	 * Root level scanned with host-bridge as dports, now scan host-bridges
diff --git a/kernel/resource.c b/kernel/resource.c
index 34eaee179689..53a534db350e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -891,6 +891,13 @@  void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
 	}
 	write_unlock(&resource_lock);
 }
+/*
+ * Not for general consumption, only early boot memory map parsing, PCI
+ * resource discovery, and late discovery of CXL resources are expected
+ * to use this interface. The former are built-in and only the latter,
+ * CXL, is a module.
+ */
+EXPORT_SYMBOL_NS_GPL(insert_resource_expand_to_fit, CXL);
 
 /**
  * remove_resource - Remove a resource in the resource tree