diff mbox series

[RFC,v2] cxl: Update Soft Reserved resources upon region creation

Message ID 20241030172751.81392-1-nathan.fontenot@amd.com
State New
Headers show
Series [RFC,v2] cxl: Update Soft Reserved resources upon region creation | expand

Commit Message

Nathan Fontenot Oct. 30, 2024, 5:27 p.m. UTC
Update handling of SOFT RESERVE iomem resources that intersect with
CXL region resources to remove the intersections from the SOFT RESERVE
resources. The current approach of leaving the SOFT RESERVE
resource as is can cause failures during hotplug replace of CXL
devices because the resource is not available for reuse after
teardown.

The failure occurs after the hotplug replace operation when trying to
recreate the CXL region. As part of the cxl command's steps is a wite
to sysfs to set the region size resulting in a call to
alloc_free_mem_region(). This fails due to the presence of the Soft
Reserve resource which covers the same address range as the CFMWS.
 
The approach sought is to trim out any pieces of SOFT RESERVE
resources that intersect with CXL regions. To do this, first set
aside any SOFT RESERVE resources that intersect with a CFMWS
into a separate resource tree during e820__reserve_resources_late()
that would have been otherwise added to the iomem resource tree.

As CXL regions are created the cxl resource created for the new
region is used to trim intersections from the SOFT RESERVE
resources that were previously set aside.

The next steps are to add any SOFT RESERVE resources remaining to the
iomem resource tree after CXL device probe completes and to notify
the dax driver so it may consume the added SOFT RESERVE resources.

This patch includes the use of a delayed work queue to wait
for CXL device probe completion and then have a worker thread merge
any remaining SOFT RESERVE resources in to the iomem resource tree.

Not in this patch is notification of the dax driver so it may consume
the SOFT RESERVE resources that have been merged in to the iomem
resource tree.

The goal of presenting this RFC is to drive discussion of the
current approach for trimming SOFT RESERVE resources, the use of
a delayed work queue to add remaining SOFT RESERVE resources to
the iomem resource tree, and methods for notifying the dax driver
of any newly added SOFT RESERVE resources.

NOTE: As this is a RFC the temporary pr_err("CXL DEBUG...")  messages
have been left in to aid in testing and validation. These would all
be removed for the final version.

Co-developed-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com>
---

V2 Updates:
- Move the handling of the SOFT RESERVE resource tree from the e820
  code to kernel/resource.c to make this generic.
- Multiple general code cleanups as suggtested by Jonathan
- Remove cxl_sr_rwsem, no longer needed
- Update delayed worker thread handling to just use mod_delayed_work()
---
 arch/x86/kernel/e820.c    |  20 ++++--
 drivers/cxl/core/region.c |   8 ++-
 drivers/cxl/port.c        |  17 +++++
 include/linux/ioport.h    |   3 +
 kernel/resource.c         | 138 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 181 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 4893d30ce438..942597f104a6 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1210,14 +1210,26 @@  static unsigned long __init ram_alignment(resource_size_t pos)
 
 void __init e820__reserve_resources_late(void)
 {
-	int i;
 	struct resource *res;
+	int i;
 
+	/*
+	 * Prior to inserting SOFT_RESERVED resources we want to check for an
+	 * intersection with potential CXL resources. Any SOFT_RESERVED resources
+	 * that do intersect a potential CXL resource are set aside so they
+	 * can be trimmed to accommodate CXL resource intersections and added to
+	 * the iomem resource tree after the CXL drivers have completed their
+	 * device probe.
+	 */
 	res = e820_res;
-	for (i = 0; i < e820_table->nr_entries; i++) {
-		if (!res->parent && res->end)
+	for (i = 0; i < e820_table->nr_entries; i++, res++) {
+		if (res->desc == IORES_DESC_SOFT_RESERVED) {
+			pr_err("CXL DEBUG Inserting Soft Reserve %llx - %llx\n",
+			       res->start, res->end);
+			insert_soft_reserve_resource(res);
+		} else if (!res->parent && res->end) {
 			insert_resource_expand_to_fit(&iomem_resource, res);
-		res++;
+		}
 	}
 
 	/*
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 21ad5f242875..c458a6313b31 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3226,6 +3226,12 @@  static int match_region_by_range(struct device *dev, void *data)
 	return rc;
 }
 
+static int insert_region_resource(struct resource *parent, struct resource *res)
+{
+	trim_soft_reserve_resources(res);
+	return insert_resource(parent, res);
+}
+
 /* Establish an empty region covering the given HPA range */
 static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 					   struct cxl_endpoint_decoder *cxled)
@@ -3272,7 +3278,7 @@  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 
 	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
 				    dev_name(&cxlr->dev));
-	rc = insert_resource(cxlrd->res, res);
+	rc = insert_region_resource(cxlrd->res, res);
 	if (rc) {
 		/*
 		 * Platform-firmware may not have split resources like "System
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index d7d5d982ce69..83fe794e887a 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -89,6 +89,22 @@  static int cxl_switch_port_probe(struct cxl_port *port)
 	return -ENXIO;
 }
 
+static void cxl_sr_update(struct work_struct *w)
+{
+	pr_err("CXL DEBUG Updating soft reserves\n");
+	merge_soft_reserve_resources();
+}
+
+DECLARE_DELAYED_WORK(cxl_sr_work, cxl_sr_update);
+
+static void schedule_soft_reserve_update(void)
+{
+	int timeout = 5 * HZ;
+
+	pr_err("CXL DEBUG Adding/modifying delayed work timeout\n");
+	mod_delayed_work(system_wq, &cxl_sr_work, timeout);
+}
+
 static int cxl_endpoint_port_probe(struct cxl_port *port)
 {
 	struct cxl_endpoint_dvsec_info info = { .port = port };
@@ -140,6 +156,7 @@  static int cxl_endpoint_port_probe(struct cxl_port *port)
 	 */
 	device_for_each_child(&port->dev, root, discover_region);
 
+	schedule_soft_reserve_update();
 	return 0;
 }
 
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 6e9fb667a1c5..0260e993ebbd 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -249,6 +249,9 @@  struct resource *lookup_resource(struct resource *root, resource_size_t start);
 int adjust_resource(struct resource *res, resource_size_t start,
 		    resource_size_t size);
 resource_size_t resource_alignment(struct resource *res);
+extern void trim_soft_reserve_resources(const struct resource *res);
+extern void merge_soft_reserve_resources(void);
+extern int insert_soft_reserve_resource(struct resource *res);
 static inline resource_size_t resource_size(const struct resource *res)
 {
 	return res->end - res->start + 1;
diff --git a/kernel/resource.c b/kernel/resource.c
index a83040fde236..e7ed99cf4243 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -31,6 +31,7 @@ 
 #include <linux/vmalloc.h>
 #include <asm/io.h>
 
+#include <linux/acpi.h>
 
 struct resource ioport_resource = {
 	.name	= "PCI IO",
@@ -48,6 +49,13 @@  struct resource iomem_resource = {
 };
 EXPORT_SYMBOL(iomem_resource);
 
+struct resource srmem_resource = {
+	.name	= "Soft Reserved mem",
+	.start	= 0,
+	.end	= -1,
+	.flags	= IORESOURCE_MEM,
+};
+
 static DEFINE_RWLOCK(resource_lock);
 
 static struct resource *next_resource(struct resource *p, bool skip_children)
@@ -1034,6 +1042,136 @@  int adjust_resource(struct resource *res, resource_size_t start,
 }
 EXPORT_SYMBOL(adjust_resource);
 
+static void trim_soft_reserve(struct resource *sr_res,
+			      const struct resource *res)
+{
+	struct resource *new_res;
+
+	pr_err("CXL DEBUG Trimming Soft Reserve %pr\n", res);
+
+	if (sr_res->start == res->start && sr_res->end == res->end) {
+		pr_err("CXL DEBUG Releasing resource %pr\n", res);
+		__release_resource(sr_res, false);
+		free_resource(sr_res);
+	} else if (sr_res->start == res->start) {
+		pr_err("CXL DEBUG Adjusting resource %pr (%llx - %llx)\n",
+		       sr_res, res->end + 1, sr_res->end - res->end);
+		WARN_ON(__adjust_resource(sr_res, res->end + 1,
+					  sr_res->end - res->end));
+	} else if (sr_res->end == res->end) {
+		pr_err("CXL DEBUG Adjusting resource %pr (%llx - %llx)\n",
+		       sr_res, sr_res->start, res->start - sr_res->start);
+		WARN_ON(__adjust_resource(sr_res, sr_res->start,
+					  res->start - sr_res->start));
+	} else {
+		/* Adjust existing to beginning resource */
+		pr_err("CXL DEBUG Adjusting resource %pr (%llx - %llx)\n",
+		       sr_res, sr_res->start, res->start);
+		__adjust_resource(sr_res, sr_res->start,
+				  res->start - sr_res->start);
+
+		/* Add new resource for end piece */
+		new_res = alloc_resource(GFP_KERNEL);
+		if (!new_res)
+			return;
+
+		*new_res = DEFINE_RES_NAMED(res->end + 1, sr_res->end - res->end,
+					    "Soft Reserved", sr_res->flags);
+		new_res->desc = IORES_DESC_SOFT_RESERVED;
+		pr_err("CXL DEBUG Adding resource %pr\n", new_res);
+		__insert_resource(&srmem_resource, new_res);
+	}
+}
+
+void trim_soft_reserve_resources(const struct resource *res)
+{
+	struct resource *sr_res;
+
+	write_lock(&resource_lock);
+	for (sr_res = srmem_resource.child; sr_res; sr_res = sr_res->sibling) {
+		if (resource_contains(sr_res, res)) {
+			trim_soft_reserve(sr_res, res);
+			break;
+		}
+	}
+	write_unlock(&resource_lock);
+}
+EXPORT_SYMBOL(trim_soft_reserve_resources);
+
+void merge_soft_reserve_resources(void)
+{
+	struct resource *sr_res, *next;
+
+	write_lock(&resource_lock);
+	for (sr_res = srmem_resource.child; sr_res; sr_res = next) {
+		next = sr_res->sibling;
+
+		pr_err("CXL DEBUG Merging Soft Reserve %pr\n", sr_res);
+		__release_resource(sr_res, false);
+		WARN_ON(__insert_resource(&iomem_resource, sr_res));
+	}
+	write_unlock(&resource_lock);
+}
+EXPORT_SYMBOL(merge_soft_reserve_resources);
+
+struct srmem_arg {
+	struct resource *res;
+	int overlaps;
+};
+
+static int srmem_parse_cfmws(union acpi_subtable_headers *hdr,
+			     void *arg, const unsigned long unused)
+{
+	struct acpi_cedt_cfmws *cfmws;
+	struct srmem_arg *args = arg;
+	struct resource cfmws_res;
+	struct resource *res;
+
+	res = args->res;
+
+	pr_err("CXL DEBUG Checking Soft Reserve for CFMWS overlap %pr\n", res);
+
+	cfmws = (struct acpi_cedt_cfmws *)hdr;
+	cfmws_res = DEFINE_RES_MEM(cfmws->base_hpa,
+				   cfmws->base_hpa + cfmws->window_size);
+	pr_err("CXL DEBUG Found CFMWS: %pr\n", &cfmws_res);
+
+	if (resource_overlaps(&cfmws_res, res)) {
+		pr_err("CXL DEBUG Found SOFT RESERVE intersection %llx - %llx : %llx - %llx\n",
+		       res->start, res->end, cfmws_res.start, cfmws_res.end);
+		args->overlaps += 1;
+		return 1;
+	}
+
+	return 0;
+}
+
+static bool resource_overlaps_cfmws(struct resource *res)
+{
+	struct srmem_arg arg = {
+		.res = res,
+		.overlaps = 0
+	};
+
+	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, srmem_parse_cfmws, &arg);
+
+	if (arg.overlaps)
+		return true;
+
+	return false;
+}
+
+int insert_soft_reserve_resource(struct resource *res)
+{
+	if (resource_overlaps_cfmws(res)) {
+		pr_err("CXL DEBUG Reserving Soft Reserve %pr\n", res);
+		return insert_resource(&srmem_resource, res);
+	}
+
+	return insert_resource(&iomem_resource, res);
+}
+EXPORT_SYMBOL(insert_soft_reserve_resource);
+
 static void __init
 __reserve_region_with_split(struct resource *root, resource_size_t start,
 			    resource_size_t end, const char *name)