diff mbox series

[3/3] cxl: Add memory hotplug notifier for cxl region

Message ID 170199192262.3543815.6979022920061286874.stgit@djiang5-mobl3
State Superseded
Headers show
Series cxl: Add support to report region access coordinates to numa nodes | expand

Commit Message

Dave Jiang Dec. 7, 2023, 11:32 p.m. UTC
When the CXL region is formed, the driver would computed the performance
data for the region. However this data is not available at the node data
collection that has been populated by the HMAT during kernel
initialization. Add a memory hotplug notifier to update the performance
data to the node hmem_attrs to expose the newly calculated region
performance data. The CXL region is created under specific CFMWS. The
node for the CFMWS is created during SRAT parsing by acpi_parse_cfmws().
The notifier will run once only and turn itself off after the initial
run. Additional regions may overwrite the initial data, but since this is
for the same poximity domain it's a don't care for now.

node_set_perf_attrs() is exported to allow update of perf attribs for a
node. Given that only CXL is using this, export only to CXL namespace.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/base/node.c       |    1 +
 drivers/cxl/core/region.c |   44 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h         |    2 ++
 3 files changed, 47 insertions(+)

Comments

Huang, Ying Dec. 8, 2023, 3:35 a.m. UTC | #1
Dave Jiang <dave.jiang@intel.com> writes:

> When the CXL region is formed, the driver would computed the performance
> data for the region. However this data is not available at the node data
> collection that has been populated by the HMAT during kernel
> initialization. Add a memory hotplug notifier to update the performance
> data to the node hmem_attrs to expose the newly calculated region
> performance data. The CXL region is created under specific CFMWS. The
> node for the CFMWS is created during SRAT parsing by acpi_parse_cfmws().
> The notifier will run once only and turn itself off after the initial
> run. Additional regions may overwrite the initial data, but since this is
> for the same poximity domain it's a don't care for now.
>
> node_set_perf_attrs() is exported to allow update of perf attribs for a
> node. Given that only CXL is using this, export only to CXL namespace.
>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Rafael J. Wysocki <rafael@kernel.org>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/base/node.c       |    1 +
>  drivers/cxl/core/region.c |   44 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/cxl.h         |    2 ++
>  3 files changed, 47 insertions(+)
>
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index cb2b6cc7f6e6..f5b5a3f11894 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -215,6 +215,7 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
>  		}
>  	}
>  }
> +EXPORT_SYMBOL_NS_GPL(node_set_perf_attrs, CXL);
>  
>  /**
>   * struct node_cache_info - Internal tracking for memory node caches
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 72c47f624d63..3794e91e12b1 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -4,6 +4,7 @@
>  #include <linux/genalloc.h>
>  #include <linux/device.h>
>  #include <linux/module.h>
> +#include <linux/memory.h>
>  #include <linux/slab.h>
>  #include <linux/uuid.h>
>  #include <linux/sort.h>
> @@ -2958,6 +2959,37 @@ static int is_system_ram(struct resource *res, void *arg)
>  	return 1;
>  }
>  
> +static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
> +					  unsigned long action, void *arg)
> +{
> +	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
> +					       memory_notifier);
> +	struct cxl_region_params *p = &cxlr->params;
> +	struct cxl_endpoint_decoder *cxled = p->targets[0];
> +	struct cxl_decoder *cxld = &cxled->cxld;
> +	struct memory_notify *mnb = arg;
> +	int nid = mnb->status_change_nid;
> +	struct access_coordinate coord;
> +	int region_nid;
> +
> +	if (nid == NUMA_NO_NODE || action != MEM_ONLINE || !cxlr->coord)
> +		return NOTIFY_STOP;
> +
> +	region_nid = phys_to_target_node(cxld->hpa_range.start);
> +	if (nid != region_nid)
> +		return NOTIFY_STOP;
> +
> +	/* Adjust latencies from psec to nsec to be consistent with HMAT targets */
> +	coord = *cxlr->coord;
> +	coord.read_latency = DIV_ROUND_UP(coord.read_latency, 1000);
> +	coord.write_latency = DIV_ROUND_UP(coord.write_latency, 1000);
> +
> +	node_set_perf_attrs(nid, &coord, 0);
> +	node_set_perf_attrs(nid, &coord, 1);
> +
> +	return NOTIFY_STOP;
> +}

It unfortunate that we still need to add another callback for abstract
distance calculation.  Because the abstract distance needs to be
calculated before hot-add the node.  But, this provides all information
to do that, and we can add another callback for that.

The patch itself looks good to me.  Feel free to add,

Reviewed-by: "Huang, Ying" <ying.huang@intel.com>

>  static int cxl_region_perf_data_calculate(struct cxl_region *cxlr)
>  {
>  	struct cxl_region_params *p = &cxlr->params;
> @@ -3077,6 +3109,10 @@ static int cxl_region_probe(struct device *dev)
>  
>  	cxl_region_perf_data_calculate(cxlr);
>  
> +	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
> +	cxlr->memory_notifier.priority = HMAT_CALLBACK_PRI;
> +	register_memory_notifier(&cxlr->memory_notifier);
> +
>  	/*
>  	 * From this point on any path that changes the region's state away from
>  	 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
> @@ -3108,9 +3144,17 @@ static int cxl_region_probe(struct device *dev)
>  	}
>  }
>  
> +static void cxl_region_remove(struct device *dev)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +
> +	unregister_memory_notifier(&cxlr->memory_notifier);
> +}
> +
>  static struct cxl_driver cxl_region_driver = {
>  	.name = "cxl_region",
>  	.probe = cxl_region_probe,
> +	.remove = cxl_region_remove,
>  	.id = CXL_DEVICE_REGION,
>  };
>  
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 265da412c5bd..c326ee8956ec 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -6,6 +6,7 @@
>  
>  #include <linux/libnvdimm.h>
>  #include <linux/bitfield.h>
> +#include <linux/notifier.h>
>  #include <linux/bitops.h>
>  #include <linux/node.h>
>  #include <linux/log2.h>
> @@ -530,6 +531,7 @@ struct cxl_region {
>  	unsigned long flags;
>  	struct cxl_region_params params;
>  	struct access_coordinate *coord;
> +	struct notifier_block memory_notifier;
>  };
>  
>  struct cxl_nvdimm_bridge {

--
Best Regards,
Huang, Ying
Dan Williams Dec. 12, 2023, 12:30 a.m. UTC | #2
Dave Jiang wrote:
> When the CXL region is formed, the driver would computed the performance
> data for the region. However this data is not available at the node data
> collection that has been populated by the HMAT during kernel
> initialization. Add a memory hotplug notifier to update the performance
> data to the node hmem_attrs to expose the newly calculated region
> performance data. The CXL region is created under specific CFMWS. The
> node for the CFMWS is created during SRAT parsing by acpi_parse_cfmws().
> The notifier will run once only and turn itself off after the initial
> run. Additional regions may overwrite the initial data, but since this is
> for the same poximity domain it's a don't care for now.
> 
> node_set_perf_attrs() is exported to allow update of perf attribs for a
> node. Given that only CXL is using this, export only to CXL namespace.
> 
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Rafael J. Wysocki <rafael@kernel.org>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/base/node.c       |    1 +
>  drivers/cxl/core/region.c |   44 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/cxl.h         |    2 ++
>  3 files changed, 47 insertions(+)
> 
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index cb2b6cc7f6e6..f5b5a3f11894 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -215,6 +215,7 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
>  		}
>  	}
>  }
> +EXPORT_SYMBOL_NS_GPL(node_set_perf_attrs, CXL);
>  
>  /**
>   * struct node_cache_info - Internal tracking for memory node caches
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 72c47f624d63..3794e91e12b1 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -4,6 +4,7 @@
>  #include <linux/genalloc.h>
>  #include <linux/device.h>
>  #include <linux/module.h>
> +#include <linux/memory.h>
>  #include <linux/slab.h>
>  #include <linux/uuid.h>
>  #include <linux/sort.h>
> @@ -2958,6 +2959,37 @@ static int is_system_ram(struct resource *res, void *arg)
>  	return 1;
>  }
>  
> +static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
> +					  unsigned long action, void *arg)
> +{
> +	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
> +					       memory_notifier);
> +	struct cxl_region_params *p = &cxlr->params;
> +	struct cxl_endpoint_decoder *cxled = p->targets[0];
> +	struct cxl_decoder *cxld = &cxled->cxld;
> +	struct memory_notify *mnb = arg;
> +	int nid = mnb->status_change_nid;
> +	struct access_coordinate coord;
> +	int region_nid;
> +
> +	if (nid == NUMA_NO_NODE || action != MEM_ONLINE || !cxlr->coord)
> +		return NOTIFY_STOP;

Shouldn't this be NOTIFY_DONE? NOTIFY_STOP prevents other callbacks on
the same chain from getting notified.

> +
> +	region_nid = phys_to_target_node(cxld->hpa_range.start);
> +	if (nid != region_nid)
> +		return NOTIFY_STOP;

NOTIFY_DONE?

> +
> +	/* Adjust latencies from psec to nsec to be consistent with HMAT targets */
> +	coord = *cxlr->coord;
> +	coord.read_latency = DIV_ROUND_UP(coord.read_latency, 1000);
> +	coord.write_latency = DIV_ROUND_UP(coord.write_latency, 1000);
> +
> +	node_set_perf_attrs(nid, &coord, 0);
> +	node_set_perf_attrs(nid, &coord, 1);
> +
> +	return NOTIFY_STOP;

This should be NOTIFY_OK, right?

> +}
> +
>  static int cxl_region_perf_data_calculate(struct cxl_region *cxlr)
>  {
>  	struct cxl_region_params *p = &cxlr->params;
> @@ -3077,6 +3109,10 @@ static int cxl_region_probe(struct device *dev)
>  
>  	cxl_region_perf_data_calculate(cxlr);
>  
> +	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
> +	cxlr->memory_notifier.priority = HMAT_CALLBACK_PRI;
> +	register_memory_notifier(&cxlr->memory_notifier);
> +
>  	/*
>  	 * From this point on any path that changes the region's state away from
>  	 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
> @@ -3108,9 +3144,17 @@ static int cxl_region_probe(struct device *dev)
>  	}
>  }
>  
> +static void cxl_region_remove(struct device *dev)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +
> +	unregister_memory_notifier(&cxlr->memory_notifier);
> +}
> +
>  static struct cxl_driver cxl_region_driver = {
>  	.name = "cxl_region",
>  	.probe = cxl_region_probe,
> +	.remove = cxl_region_remove,

Keep the region driver from needing a remove() callback by using a
devm_add_action_or_reset() after the notifier registration.
diff mbox series

Patch

diff --git a/drivers/base/node.c b/drivers/base/node.c
index cb2b6cc7f6e6..f5b5a3f11894 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -215,6 +215,7 @@  void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
 		}
 	}
 }
+EXPORT_SYMBOL_NS_GPL(node_set_perf_attrs, CXL);
 
 /**
  * struct node_cache_info - Internal tracking for memory node caches
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 72c47f624d63..3794e91e12b1 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -4,6 +4,7 @@ 
 #include <linux/genalloc.h>
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/memory.h>
 #include <linux/slab.h>
 #include <linux/uuid.h>
 #include <linux/sort.h>
@@ -2958,6 +2959,37 @@  static int is_system_ram(struct resource *res, void *arg)
 	return 1;
 }
 
+static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
+					  unsigned long action, void *arg)
+{
+	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
+					       memory_notifier);
+	struct cxl_region_params *p = &cxlr->params;
+	struct cxl_endpoint_decoder *cxled = p->targets[0];
+	struct cxl_decoder *cxld = &cxled->cxld;
+	struct memory_notify *mnb = arg;
+	int nid = mnb->status_change_nid;
+	struct access_coordinate coord;
+	int region_nid;
+
+	if (nid == NUMA_NO_NODE || action != MEM_ONLINE || !cxlr->coord)
+		return NOTIFY_STOP;
+
+	region_nid = phys_to_target_node(cxld->hpa_range.start);
+	if (nid != region_nid)
+		return NOTIFY_STOP;
+
+	/* Adjust latencies from psec to nsec to be consistent with HMAT targets */
+	coord = *cxlr->coord;
+	coord.read_latency = DIV_ROUND_UP(coord.read_latency, 1000);
+	coord.write_latency = DIV_ROUND_UP(coord.write_latency, 1000);
+
+	node_set_perf_attrs(nid, &coord, 0);
+	node_set_perf_attrs(nid, &coord, 1);
+
+	return NOTIFY_STOP;
+}
+
 static int cxl_region_perf_data_calculate(struct cxl_region *cxlr)
 {
 	struct cxl_region_params *p = &cxlr->params;
@@ -3077,6 +3109,10 @@  static int cxl_region_probe(struct device *dev)
 
 	cxl_region_perf_data_calculate(cxlr);
 
+	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
+	cxlr->memory_notifier.priority = HMAT_CALLBACK_PRI;
+	register_memory_notifier(&cxlr->memory_notifier);
+
 	/*
 	 * From this point on any path that changes the region's state away from
 	 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
@@ -3108,9 +3144,17 @@  static int cxl_region_probe(struct device *dev)
 	}
 }
 
+static void cxl_region_remove(struct device *dev)
+{
+	struct cxl_region *cxlr = to_cxl_region(dev);
+
+	unregister_memory_notifier(&cxlr->memory_notifier);
+}
+
 static struct cxl_driver cxl_region_driver = {
 	.name = "cxl_region",
 	.probe = cxl_region_probe,
+	.remove = cxl_region_remove,
 	.id = CXL_DEVICE_REGION,
 };
 
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 265da412c5bd..c326ee8956ec 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -6,6 +6,7 @@ 
 
 #include <linux/libnvdimm.h>
 #include <linux/bitfield.h>
+#include <linux/notifier.h>
 #include <linux/bitops.h>
 #include <linux/node.h>
 #include <linux/log2.h>
@@ -530,6 +531,7 @@  struct cxl_region {
 	unsigned long flags;
 	struct cxl_region_params params;
 	struct access_coordinate *coord;
+	struct notifier_block memory_notifier;
 };
 
 struct cxl_nvdimm_bridge {