diff mbox series

[v4,11/11] cxl: Deal with numa nodes not enumarated by SRAT

Message ID 170568505106.1008395.11049680010287891282.stgit@djiang5-mobl3 (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series cxl: Add support to report region access coordinates to numa nodes | expand

Commit Message

Dave Jiang Jan. 19, 2024, 5:24 p.m. UTC
For the numa nodes that are not created by SRAT, no memory_target is
allocated and is not managed by the HMAT_REPORTING code. Therefore
hmat_callback() memory hotplug notifier will exit early on those NUMA
nodes. The CXL memory hotplug notifier will need to call
node_set_perf_attrs() directly in order to setup the access sysfs
attributes.

In acpi_numa_init(), the last proximity domain (pxm) id created by SRAT is
stored. Add a helper function acpi_node_backed_by_real_pxm() in order to
check if a NUMA node id is defined by SRAT or created by CFMWS or some
other methods.

node_set_perf_attrs() symbol is exported to allow update of perf attribs
for a node. The sysfs path of
/sys/devices/system/node/nodeX/access0/initiators/* is created by
ndoe_set_perf_attrs() for the various attributes where nodeX is matched
to the NUMA node of the CXL region.

Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/acpi/numa/srat.c  |   11 +++++++++++
 drivers/base/node.c       |    1 +
 drivers/cxl/core/cdat.c   |    5 +++++
 drivers/cxl/core/core.h   |    1 +
 drivers/cxl/core/region.c |    7 ++++++-
 include/linux/acpi.h      |    1 +
 6 files changed, 25 insertions(+), 1 deletion(-)

Comments

Alison Schofield Jan. 20, 2024, 3:55 a.m. UTC | #1
On Fri, Jan 19, 2024 at 10:24:11AM -0700, Dave Jiang wrote:
> For the numa nodes that are not created by SRAT, no memory_target is
> allocated and is not managed by the HMAT_REPORTING code. Therefore
> hmat_callback() memory hotplug notifier will exit early on those NUMA
> nodes. The CXL memory hotplug notifier will need to call
> node_set_perf_attrs() directly in order to setup the access sysfs
> attributes.
> 
> In acpi_numa_init(), the last proximity domain (pxm) id created by SRAT is
> stored. Add a helper function acpi_node_backed_by_real_pxm() in order to
> check if a NUMA node id is defined by SRAT or created by CFMWS or some
> other methods.

I'm thinking the 'or some other methods' can be dropped. In chat,
we mentioned emulated nodes, but they don't make PXM assignments.
Maybe I misunderstand, but I thought NUMA emulation can only be
enabled when there is no physical NUMA architecture.

Aside from clearing up the emulated or other nodes story...LGTM.

Reviewed-by: Alison Schofield <alison.schofield@intel.com>

> 
> node_set_perf_attrs() symbol is exported to allow update of perf attribs
> for a node. The sysfs path of
> /sys/devices/system/node/nodeX/access0/initiators/* is created by
> ndoe_set_perf_attrs() for the various attributes where nodeX is matched
> to the NUMA node of the CXL region.
> 
> Cc: Rafael J. Wysocki <rafael@kernel.org>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/acpi/numa/srat.c  |   11 +++++++++++
>  drivers/base/node.c       |    1 +
>  drivers/cxl/core/cdat.c   |    5 +++++
>  drivers/cxl/core/core.h   |    1 +
>  drivers/cxl/core/region.c |    7 ++++++-
>  include/linux/acpi.h      |    1 +
>  6 files changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
> index 12f330b0eac0..2f6f15b3891d 100644
> --- a/drivers/acpi/numa/srat.c
> +++ b/drivers/acpi/numa/srat.c
> @@ -29,6 +29,8 @@ static int node_to_pxm_map[MAX_NUMNODES]
>  unsigned char acpi_srat_revision __initdata;
>  static int acpi_numa __initdata;
>  
> +static int last_real_pxm;
> +
>  void __init disable_srat(void)
>  {
>  	acpi_numa = -1;
> @@ -536,6 +538,7 @@ int __init acpi_numa_init(void)
>  		if (node_to_pxm_map[i] > fake_pxm)
>  			fake_pxm = node_to_pxm_map[i];
>  	}
> +	last_real_pxm = fake_pxm;
>  	fake_pxm++;
>  	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
>  			      &fake_pxm);
> @@ -547,6 +550,14 @@ int __init acpi_numa_init(void)
>  	return 0;
>  }
>  
> +bool acpi_node_backed_by_real_pxm(int nid)
> +{
> +	int pxm = node_to_pxm(nid);
> +
> +	return pxm <= last_real_pxm;
> +}
> +EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm);
> +
>  static int acpi_get_pxm(acpi_handle h)
>  {
>  	unsigned long long pxm;
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index b4a449f07f2a..8d0b09769b77 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -215,6 +215,7 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
>  		}
>  	}
>  }
> +EXPORT_SYMBOL_GPL(node_set_perf_attrs);
>  
>  /**
>   * struct node_cache_info - Internal tracking for memory node caches
> diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
> index 3556c897ece4..7d7163f999e8 100644
> --- a/drivers/cxl/core/cdat.c
> +++ b/drivers/cxl/core/cdat.c
> @@ -626,3 +626,8 @@ int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
>  {
>  	return hmat_update_target_coordinates(nid, &cxlr->coord[access], access);
>  }
> +
> +bool cxl_need_node_perf_attrs_update(int nid)
> +{
> +	return !acpi_node_backed_by_real_pxm(nid);
> +}
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index e19800a7ce06..bc5a95665aa0 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -92,5 +92,6 @@ long cxl_pci_get_latency(struct pci_dev *pdev);
>  
>  int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
>  				       enum access_coordinate_class access);
> +bool cxl_need_node_perf_attrs_update(int nid);
>  
>  #endif /* __CXL_CORE_H__ */
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index ae1f34e1cd05..66f126067bda 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -3084,7 +3084,12 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>  
>  	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
>  		if (cxlr->coord[i].read_bandwidth) {
> -			rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
> +			rc = 0;
> +			if (cxl_need_node_perf_attrs_update(nid))
> +				node_set_perf_attrs(nid, &cxlr->coord[i], i);
> +			else
> +				rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
> +
>  			if (rc == 0)
>  				cset++;
>  		}
> diff --git a/include/linux/acpi.h b/include/linux/acpi.h
> index 1c664948b2ae..3067c6aad431 100644
> --- a/include/linux/acpi.h
> +++ b/include/linux/acpi.h
> @@ -447,6 +447,7 @@ static inline int hmat_update_target_coordinates(int nid,
>  #ifdef CONFIG_ACPI_NUMA
>  int acpi_map_pxm_to_node(int pxm);
>  int acpi_get_node(acpi_handle handle);
> +bool acpi_node_backed_by_real_pxm(int nid);
>  
>  /**
>   * pxm_to_online_node - Map proximity ID to online node
> 
>
diff mbox series

Patch

diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
index 12f330b0eac0..2f6f15b3891d 100644
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -29,6 +29,8 @@  static int node_to_pxm_map[MAX_NUMNODES]
 unsigned char acpi_srat_revision __initdata;
 static int acpi_numa __initdata;
 
+static int last_real_pxm;
+
 void __init disable_srat(void)
 {
 	acpi_numa = -1;
@@ -536,6 +538,7 @@  int __init acpi_numa_init(void)
 		if (node_to_pxm_map[i] > fake_pxm)
 			fake_pxm = node_to_pxm_map[i];
 	}
+	last_real_pxm = fake_pxm;
 	fake_pxm++;
 	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
 			      &fake_pxm);
@@ -547,6 +550,14 @@  int __init acpi_numa_init(void)
 	return 0;
 }
 
+bool acpi_node_backed_by_real_pxm(int nid)
+{
+	int pxm = node_to_pxm(nid);
+
+	return pxm <= last_real_pxm;
+}
+EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm);
+
 static int acpi_get_pxm(acpi_handle h)
 {
 	unsigned long long pxm;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index b4a449f07f2a..8d0b09769b77 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -215,6 +215,7 @@  void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
 		}
 	}
 }
+EXPORT_SYMBOL_GPL(node_set_perf_attrs);
 
 /**
  * struct node_cache_info - Internal tracking for memory node caches
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index 3556c897ece4..7d7163f999e8 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -626,3 +626,8 @@  int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
 {
 	return hmat_update_target_coordinates(nid, &cxlr->coord[access], access);
 }
+
+bool cxl_need_node_perf_attrs_update(int nid)
+{
+	return !acpi_node_backed_by_real_pxm(nid);
+}
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index e19800a7ce06..bc5a95665aa0 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -92,5 +92,6 @@  long cxl_pci_get_latency(struct pci_dev *pdev);
 
 int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
 				       enum access_coordinate_class access);
+bool cxl_need_node_perf_attrs_update(int nid);
 
 #endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index ae1f34e1cd05..66f126067bda 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3084,7 +3084,12 @@  static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
 
 	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
 		if (cxlr->coord[i].read_bandwidth) {
-			rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
+			rc = 0;
+			if (cxl_need_node_perf_attrs_update(nid))
+				node_set_perf_attrs(nid, &cxlr->coord[i], i);
+			else
+				rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
+
 			if (rc == 0)
 				cset++;
 		}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 1c664948b2ae..3067c6aad431 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -447,6 +447,7 @@  static inline int hmat_update_target_coordinates(int nid,
 #ifdef CONFIG_ACPI_NUMA
 int acpi_map_pxm_to_node(int pxm);
 int acpi_get_node(acpi_handle handle);
+bool acpi_node_backed_by_real_pxm(int nid);
 
 /**
  * pxm_to_online_node - Map proximity ID to online node