diff mbox series

[v3] cxl/region: Remove lock from memory notifier callback

Message ID 20240904-fix-notifiers-v3-1-576b4e950266@intel.com
State Accepted
Commit d9a476c837fab38856c6b6ff9f794c33907a9f81
Headers show
Series [v3] cxl/region: Remove lock from memory notifier callback | expand

Commit Message

Ira Weiny Sept. 4, 2024, 2:47 p.m. UTC
In testing Dynamic Capacity Device (DCD) support, a lockdep splat
revealed an ABBA issue between the memory notifiers and the DCD extent
processing code.[0]  Changing the lock ordering within DCD proved
difficult because regions must be stable while searching for the proper
region and then the device lock must be held to properly notify the DAX
region driver of memory changes.

Dan points out in the thread that notifiers should be able to trust that
it is safe to access static data.  Region data is static once the device
is realized and until it's destruction.  Thus it is better to manage the
notifiers within the region driver.

Remove the need for a lock by ensuring the notifiers are active only
during the region's lifetime.

Furthermore, remove cxl_region_nid() because resource can't be NULL
while the region is stable.

Link: https://lore.kernel.org/all/66b4cf539a79b_a36e829416@iweiny-mobl.notmuch/ [0]
Cc: Ying Huang <ying.huang@intel.com>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ying Huang <ying.huang@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
Changes in v3:
- [Ying: Add comment regarding cxl_region_rwsem]
- [iweiny: pickup tags]
- Link to v2: https://patch.msgid.link/20240814-fix-notifiers-v2-1-6bab38192c7c@intel.com

Changes in v2:
- [djbw: remove cxl_region_nid()]
- Link to v1: https://patch.msgid.link/20240813-fix-notifiers-v1-1-efd23a18688d@intel.com
---
 drivers/cxl/core/region.c | 54 ++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 24 deletions(-)


---
base-commit: 67784a74e258a467225f0e68335df77acd67b7ab
change-id: 20240813-fix-notifiers-99c350b044a2

Best regards,

Comments

Jonathan Cameron Sept. 13, 2024, 1:13 p.m. UTC | #1
On Wed, 04 Sep 2024 09:47:54 -0500
Ira Weiny <ira.weiny@intel.com> wrote:

> In testing Dynamic Capacity Device (DCD) support, a lockdep splat
> revealed an ABBA issue between the memory notifiers and the DCD extent
> processing code.[0]  Changing the lock ordering within DCD proved
> difficult because regions must be stable while searching for the proper
> region and then the device lock must be held to properly notify the DAX
> region driver of memory changes.
> 
> Dan points out in the thread that notifiers should be able to trust that
> it is safe to access static data.  Region data is static once the device
> is realized and until it's destruction.  Thus it is better to manage the
> notifiers within the region driver.
> 
> Remove the need for a lock by ensuring the notifiers are active only
> during the region's lifetime.
> 
> Furthermore, remove cxl_region_nid() because resource can't be NULL
> while the region is stable.
> 
> Link: https://lore.kernel.org/all/66b4cf539a79b_a36e829416@iweiny-mobl.notmuch/ [0]
> Cc: Ying Huang <ying.huang@intel.com>
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Reviewed-by: Ying Huang <ying.huang@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
A few comments on looking at this again, but all things that apply equally to old
code so maybe things for another day.

Jonathan

> ---
> Changes in v3:
> - [Ying: Add comment regarding cxl_region_rwsem]
> - [iweiny: pickup tags]
> - Link to v2: https://patch.msgid.link/20240814-fix-notifiers-v2-1-6bab38192c7c@intel.com
> 
> Changes in v2:
> - [djbw: remove cxl_region_nid()]
> - Link to v1: https://patch.msgid.link/20240813-fix-notifiers-v1-1-efd23a18688d@intel.com
> ---
>  drivers/cxl/core/region.c | 54 ++++++++++++++++++++++++++---------------------
>  1 file changed, 30 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 21ad5f242875..dbc9f8a4f603 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2313,8 +2313,6 @@ static void unregister_region(void *_cxlr)
>  	struct cxl_region_params *p = &cxlr->params;
>  	int i;
>  
> -	unregister_memory_notifier(&cxlr->memory_notifier);
> -	unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
>  	device_del(&cxlr->dev);
>  
>  	/*
> @@ -2391,18 +2389,6 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>  	return true;
>  }
>  
> -static int cxl_region_nid(struct cxl_region *cxlr)
> -{
> -	struct cxl_region_params *p = &cxlr->params;
> -	struct resource *res;
> -
> -	guard(rwsem_read)(&cxl_region_rwsem);
> -	res = p->res;
> -	if (!res)
> -		return NUMA_NO_NODE;
> -	return phys_to_target_node(res->start);
> -}
> -
>  static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
>  					  unsigned long action, void *arg)
>  {
> @@ -2415,7 +2401,11 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
>  	if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
>  		return NOTIFY_DONE;
>  
> -	region_nid = cxl_region_nid(cxlr);
> +	/*
> +	 * No need to hold cxl_region_rwsem; region parameters are stable
> +	 * within the cxl_region driver.
> +	 */
> +	region_nid = phys_to_target_node(cxlr->params.res->start);
>  	if (nid != region_nid)
>  		return NOTIFY_DONE;
>  
> @@ -2434,7 +2424,11 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb,
>  	int *adist = data;
>  	int region_nid;
>  
> -	region_nid = cxl_region_nid(cxlr);
> +	/*
> +	 * No need to hold cxl_region_rwsem; region parameters are stable
> +	 * within the cxl_region driver.
> +	 */
> +	region_nid = phys_to_target_node(cxlr->params.res->start);
>  	if (nid != region_nid)
>  		return NOTIFY_OK;
>  
> @@ -2484,14 +2478,6 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
>  	if (rc)
>  		goto err;
>  
> -	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
> -	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
> -	register_memory_notifier(&cxlr->memory_notifier);
> -
> -	cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
> -	cxlr->adist_notifier.priority = 100;
> -	register_mt_adistance_algorithm(&cxlr->adist_notifier);
> -
>  	rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
>  	if (rc)
>  		return ERR_PTR(rc);
> @@ -3386,6 +3372,14 @@ static int is_system_ram(struct resource *res, void *arg)
>  	return 1;
>  }
>  
> +static void shutdown_notifiers(void *_cxlr)
> +{
> +	struct cxl_region *cxlr = _cxlr;
> +
> +	unregister_memory_notifier(&cxlr->memory_notifier);
> +	unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
Flip order.

Makes zero real difference, but if we later end up with more to do
here for some reason there may be ordering requirements that will
care that this doesn't tear down in reverse of setup.

Mind you, see below.

> +}
> +
>  static int cxl_region_probe(struct device *dev)
>  {
>  	struct cxl_region *cxlr = to_cxl_region(dev);
> @@ -3418,6 +3412,18 @@ static int cxl_region_probe(struct device *dev)
>  out:
>  	up_read(&cxl_region_rwsem);
>  
> +	if (rc)
> +		return rc;
> +
> +	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
> +	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
> +	register_memory_notifier(&cxlr->memory_notifier);
Can in theory fail.  Today that is EEXIST only but who knows in future.
I think we should handle that and do two devm_add_action_or_reset() perhaps?


> +
> +	cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
> +	cxlr->adist_notifier.priority = 100;
> +	register_mt_adistance_algorithm(&cxlr->adist_notifier);
> +
> +	rc = devm_add_action_or_reset(&cxlr->dev, shutdown_notifiers, cxlr);
>  	if (rc)
>  		return rc;
>  
> 
> ---
> base-commit: 67784a74e258a467225f0e68335df77acd67b7ab
> change-id: 20240813-fix-notifiers-99c350b044a2
> 
> Best regards,
Ira Weiny Sept. 13, 2024, 6:41 p.m. UTC | #2
Jonathan Cameron wrote:
> On Wed, 04 Sep 2024 09:47:54 -0500
> Ira Weiny <ira.weiny@intel.com> wrote:
> 

[snip]

> > 
> > Link: https://lore.kernel.org/all/66b4cf539a79b_a36e829416@iweiny-mobl.notmuch/ [0]
> > Cc: Ying Huang <ying.huang@intel.com>
> > Suggested-by: Dan Williams <dan.j.williams@intel.com>
> > Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > Reviewed-by: Ying Huang <ying.huang@intel.com>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> A few comments on looking at this again, but all things that apply equally to old
> code so maybe things for another day.

Yea this was solely a move of existing code to fix the locking issue.  I
did not evaluate the original code.  However...

[snip]

> >  }
> >  
> > +static void shutdown_notifiers(void *_cxlr)
> > +{
> > +	struct cxl_region *cxlr = _cxlr;
> > +
> > +	unregister_memory_notifier(&cxlr->memory_notifier);
> > +	unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
> Flip order.
> 
> Makes zero real difference, but if we later end up with more to do
> here for some reason there may be ordering requirements that will
> care that this doesn't tear down in reverse of setup.

Generally I agree with you however, the memory and adist notifiers are
unrelated.  So failing to unwind in reverse order is a matter of taste and
is not required even if some other logic was introduced between the
registrations I don't see how this backwards order would be an issue.

> 
> Mind you, see below.
> 
> > +}
> > +
> >  static int cxl_region_probe(struct device *dev)
> >  {
> >  	struct cxl_region *cxlr = to_cxl_region(dev);
> > @@ -3418,6 +3412,18 @@ static int cxl_region_probe(struct device *dev)
> >  out:
> >  	up_read(&cxl_region_rwsem);
> >  
> > +	if (rc)
> > +		return rc;
> > +
> > +	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
> > +	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
> > +	register_memory_notifier(&cxlr->memory_notifier);
> Can in theory fail.  Today that is EEXIST only but who knows in future.
> I think we should handle that and do two devm_add_action_or_reset() perhaps?
> 

First we should not fail the probe if this fails.

Second, nothing bad happens in unregister if the registration failed.
Therefore, register failing is benign and I don't see a need for the extra
action callback.

Ira

[snip]
Jonathan Cameron Sept. 16, 2024, 9:36 a.m. UTC | #3
On Fri, 13 Sep 2024 13:41:06 -0500
Ira Weiny <ira.weiny@intel.com> wrote:

> Jonathan Cameron wrote:
> > On Wed, 04 Sep 2024 09:47:54 -0500
> > Ira Weiny <ira.weiny@intel.com> wrote:
> >   
> 
> [snip]
> 
> > > 
> > > Link: https://lore.kernel.org/all/66b4cf539a79b_a36e829416@iweiny-mobl.notmuch/ [0]
> > > Cc: Ying Huang <ying.huang@intel.com>
> > > Suggested-by: Dan Williams <dan.j.williams@intel.com>
> > > Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
> > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > Reviewed-by: Ying Huang <ying.huang@intel.com>
> > > Signed-off-by: Ira Weiny <ira.weiny@intel.com>  
> > A few comments on looking at this again, but all things that apply equally to old
> > code so maybe things for another day.  
> 
> Yea this was solely a move of existing code to fix the locking issue.  I
> did not evaluate the original code.  However...
> 
> [snip]
> 
> > >  }
> > >  
> > > +static void shutdown_notifiers(void *_cxlr)
> > > +{
> > > +	struct cxl_region *cxlr = _cxlr;
> > > +
> > > +	unregister_memory_notifier(&cxlr->memory_notifier);
> > > +	unregister_mt_adistance_algorithm(&cxlr->adist_notifier);  
> > Flip order.
> > 
> > Makes zero real difference, but if we later end up with more to do
> > here for some reason there may be ordering requirements that will
> > care that this doesn't tear down in reverse of setup.  
> 
> Generally I agree with you however, the memory and adist notifiers are
> unrelated.  So failing to unwind in reverse order is a matter of taste and
> is not required even if some other logic was introduced between the
> registrations I don't see how this backwards order would be an issue.

Not an issue as such, but if it requires a tiny bit of thinking
that we've engaged in here then it was worth the obviously correct
ordering. Not worth churn of a patch though.

> 
> > 
> > Mind you, see below.
> >   
> > > +}
> > > +
> > >  static int cxl_region_probe(struct device *dev)
> > >  {
> > >  	struct cxl_region *cxlr = to_cxl_region(dev);
> > > @@ -3418,6 +3412,18 @@ static int cxl_region_probe(struct device *dev)
> > >  out:
> > >  	up_read(&cxl_region_rwsem);
> > >  
> > > +	if (rc)
> > > +		return rc;
> > > +
> > > +	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
> > > +	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
> > > +	register_memory_notifier(&cxlr->memory_notifier);  
> > Can in theory fail.  Today that is EEXIST only but who knows in future.
> > I think we should handle that and do two devm_add_action_or_reset() perhaps?
> >   
> 
> First we should not fail the probe if this fails.
> 
> Second, nothing bad happens in unregister if the registration failed.
> Therefore, register failing is benign and I don't see a need for the extra
> action callback.

It's probably a case of print a warning only I guess.
Might be useful to know it's going wrong even though impact is not huge.
Personally I'd argue in favor of always failing to probe on a 'software issue'
like this where we don't expect to fail.  Things get less clear if we
fail on an optional hardware related part where carrying on may be
the reasonable thing to do.

Jonathan

> 
> Ira
> 
> [snip]
>
diff mbox series

Patch

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 21ad5f242875..dbc9f8a4f603 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2313,8 +2313,6 @@  static void unregister_region(void *_cxlr)
 	struct cxl_region_params *p = &cxlr->params;
 	int i;
 
-	unregister_memory_notifier(&cxlr->memory_notifier);
-	unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
 	device_del(&cxlr->dev);
 
 	/*
@@ -2391,18 +2389,6 @@  static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
 	return true;
 }
 
-static int cxl_region_nid(struct cxl_region *cxlr)
-{
-	struct cxl_region_params *p = &cxlr->params;
-	struct resource *res;
-
-	guard(rwsem_read)(&cxl_region_rwsem);
-	res = p->res;
-	if (!res)
-		return NUMA_NO_NODE;
-	return phys_to_target_node(res->start);
-}
-
 static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
 					  unsigned long action, void *arg)
 {
@@ -2415,7 +2401,11 @@  static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
 	if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
 		return NOTIFY_DONE;
 
-	region_nid = cxl_region_nid(cxlr);
+	/*
+	 * No need to hold cxl_region_rwsem; region parameters are stable
+	 * within the cxl_region driver.
+	 */
+	region_nid = phys_to_target_node(cxlr->params.res->start);
 	if (nid != region_nid)
 		return NOTIFY_DONE;
 
@@ -2434,7 +2424,11 @@  static int cxl_region_calculate_adistance(struct notifier_block *nb,
 	int *adist = data;
 	int region_nid;
 
-	region_nid = cxl_region_nid(cxlr);
+	/*
+	 * No need to hold cxl_region_rwsem; region parameters are stable
+	 * within the cxl_region driver.
+	 */
+	region_nid = phys_to_target_node(cxlr->params.res->start);
 	if (nid != region_nid)
 		return NOTIFY_OK;
 
@@ -2484,14 +2478,6 @@  static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
 	if (rc)
 		goto err;
 
-	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
-	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
-	register_memory_notifier(&cxlr->memory_notifier);
-
-	cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
-	cxlr->adist_notifier.priority = 100;
-	register_mt_adistance_algorithm(&cxlr->adist_notifier);
-
 	rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
 	if (rc)
 		return ERR_PTR(rc);
@@ -3386,6 +3372,14 @@  static int is_system_ram(struct resource *res, void *arg)
 	return 1;
 }
 
+static void shutdown_notifiers(void *_cxlr)
+{
+	struct cxl_region *cxlr = _cxlr;
+
+	unregister_memory_notifier(&cxlr->memory_notifier);
+	unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
+}
+
 static int cxl_region_probe(struct device *dev)
 {
 	struct cxl_region *cxlr = to_cxl_region(dev);
@@ -3418,6 +3412,18 @@  static int cxl_region_probe(struct device *dev)
 out:
 	up_read(&cxl_region_rwsem);
 
+	if (rc)
+		return rc;
+
+	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
+	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
+	register_memory_notifier(&cxlr->memory_notifier);
+
+	cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
+	cxlr->adist_notifier.priority = 100;
+	register_mt_adistance_algorithm(&cxlr->adist_notifier);
+
+	rc = devm_add_action_or_reset(&cxlr->dev, shutdown_notifiers, cxlr);
 	if (rc)
 		return rc;