diff mbox series

cxl/core: Hold the region rwsem during poison ops

Message ID 20231114025342.1123681-1-alison.schofield@intel.com
State New, archived
Headers show
Series cxl/core: Hold the region rwsem during poison ops | expand

Commit Message

Alison Schofield Nov. 14, 2023, 2:53 a.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper")
added a lockdep_assert_held() to make sure all callers hold the
region state stable while doing work that depends on the number
of committed decoders.

That lockdep assert triggered in poison list, inject, and clear
functions highlighting a gap between region attach and decoder
commit where holding the dpa_rwsem is not enough to assure that
a DPA is not added to a region. In such a case, if poison is
found in at that DPA, the trace event omits the region info
that users expect.

Close the gap by snapshotting an unchangeable region state during
all poison ops. Hold the region_rwsem in all the places that hold
the dpa_rwsem rather than in the region specific function only.

Fixes: 7ff6ad107588 ("cxl/memdev: Add trigger_poison_list sysfs attribute")
Fixes: f0832a586396 ("cxl/region: Provide region info to the cxl_poison trace event")
Fixes: d2fbc4865802 ("cxl/memdev: Add support for the Inject Poison mailbox command")
Fixes: 9690b07748d1 ("cxl/memdev: Add support for the Clear Poison mailbox command")
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 drivers/cxl/core/memdev.c | 18 +++++++++---------
 drivers/cxl/core/region.c |  5 -----
 2 files changed, 9 insertions(+), 14 deletions(-)


base-commit: b85ea95d086471afb4ad062012a4d73cd328fa86

Comments

Dave Jiang Nov. 14, 2023, 6:20 p.m. UTC | #1
On 11/13/23 19:53, alison.schofield@intel.com wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper")
> added a lockdep_assert_held() to make sure all callers hold the
> region state stable while doing work that depends on the number
> of committed decoders.
> 
> That lockdep assert triggered in poison list, inject, and clear
> functions highlighting a gap between region attach and decoder
> commit where holding the dpa_rwsem is not enough to assure that
> a DPA is not added to a region. In such a case, if poison is
> found in at that DPA, the trace event omits the region info
> that users expect.
> 
> Close the gap by snapshotting an unchangeable region state during
> all poison ops. Hold the region_rwsem in all the places that hold
> the dpa_rwsem rather than in the region specific function only.
> 
> Fixes: 7ff6ad107588 ("cxl/memdev: Add trigger_poison_list sysfs attribute")
> Fixes: f0832a586396 ("cxl/region: Provide region info to the cxl_poison trace event")
> Fixes: d2fbc4865802 ("cxl/memdev: Add support for the Inject Poison mailbox command")
> Fixes: 9690b07748d1 ("cxl/memdev: Add support for the Clear Poison mailbox command")
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>

Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
>  drivers/cxl/core/memdev.c | 18 +++++++++---------
>  drivers/cxl/core/region.c |  5 -----
>  2 files changed, 9 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index fc5c2b414793..961da365b097 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -227,9 +227,8 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
>  	if (!port || !is_cxl_endpoint(port))
>  		return -EINVAL;
>  
> -	rc = down_read_interruptible(&cxl_dpa_rwsem);
> -	if (rc)
> -		return rc;
> +	down_read(&cxl_region_rwsem);
> +	down_read(&cxl_dpa_rwsem);
>  
>  	if (cxl_num_decoders_committed(port) == 0) {
>  		/* No regions mapped to this memdev */
> @@ -239,6 +238,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
>  		rc =  cxl_get_poison_by_endpoint(port);
>  	}
>  	up_read(&cxl_dpa_rwsem);
> +	up_read(&cxl_region_rwsem);
>  
>  	return rc;
>  }
> @@ -324,9 +324,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	if (!IS_ENABLED(CONFIG_DEBUG_FS))
>  		return 0;
>  
> -	rc = down_read_interruptible(&cxl_dpa_rwsem);
> -	if (rc)
> -		return rc;
> +	down_read(&cxl_region_rwsem);
> +	down_read(&cxl_dpa_rwsem);
>  
>  	rc = cxl_validate_poison_dpa(cxlmd, dpa);
>  	if (rc)
> @@ -355,6 +354,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
>  out:
>  	up_read(&cxl_dpa_rwsem);
> +	up_read(&cxl_region_rwsem);
>  
>  	return rc;
>  }
> @@ -372,9 +372,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	if (!IS_ENABLED(CONFIG_DEBUG_FS))
>  		return 0;
>  
> -	rc = down_read_interruptible(&cxl_dpa_rwsem);
> -	if (rc)
> -		return rc;
> +	down_read(&cxl_region_rwsem);
> +	down_read(&cxl_dpa_rwsem);
>  
>  	rc = cxl_validate_poison_dpa(cxlmd, dpa);
>  	if (rc)
> @@ -412,6 +411,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
>  out:
>  	up_read(&cxl_dpa_rwsem);
> +	up_read(&cxl_region_rwsem);
>  
>  	return rc;
>  }
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 56e575c79bb4..3e817a6f94c6 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2467,10 +2467,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
>  	struct cxl_poison_context ctx;
>  	int rc = 0;
>  
> -	rc = down_read_interruptible(&cxl_region_rwsem);
> -	if (rc)
> -		return rc;
> -
>  	ctx = (struct cxl_poison_context) {
>  		.port = port
>  	};
> @@ -2480,7 +2476,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
>  		rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
>  					     &ctx);
>  
> -	up_read(&cxl_region_rwsem);
>  	return rc;
>  }
>  
> 
> base-commit: b85ea95d086471afb4ad062012a4d73cd328fa86
Davidlohr Bueso Nov. 16, 2023, 10:14 p.m. UTC | #2
On Mon, 13 Nov 2023, alison.schofield@intel.com wrote:

>From: Alison Schofield <alison.schofield@intel.com>
>
>Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper")
>added a lockdep_assert_held() to make sure all callers hold the
>region state stable while doing work that depends on the number
>of committed decoders.

Unrelated, but that kind of helper would be better as a static
inline in a header file.

>
>That lockdep assert triggered in poison list, inject, and clear
>functions highlighting a gap between region attach and decoder
>commit where holding the dpa_rwsem is not enough to assure that
>a DPA is not added to a region. In such a case, if poison is
>found in at that DPA, the trace event omits the region info
>that users expect.
>
>Close the gap by snapshotting an unchangeable region state during
>all poison ops. Hold the region_rwsem in all the places that hold
>the dpa_rwsem rather than in the region specific function only.

Makes sense and lock oder is consistent with that of attach_target().
I do think that the interruptible semantics should be kept considering
this is from sysfs/debugfs.

Thanks,
Davidlohr
Alison Schofield Nov. 20, 2023, 7:07 p.m. UTC | #3
On Thu, Nov 16, 2023 at 02:14:37PM -0800, Davidlohr Bueso wrote:
> On Mon, 13 Nov 2023, alison.schofield@intel.com wrote:
> 
> > From: Alison Schofield <alison.schofield@intel.com>
> > 
> > Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper")
> > added a lockdep_assert_held() to make sure all callers hold the
> > region state stable while doing work that depends on the number
> > of committed decoders.
> 
> Unrelated, but that kind of helper would be better as a static
> inline in a header file.
> 
> > 
> > That lockdep assert triggered in poison list, inject, and clear
> > functions highlighting a gap between region attach and decoder
> > commit where holding the dpa_rwsem is not enough to assure that
> > a DPA is not added to a region. In such a case, if poison is
> > found in at that DPA, the trace event omits the region info
> > that users expect.
> > 
> > Close the gap by snapshotting an unchangeable region state during
> > all poison ops. Hold the region_rwsem in all the places that hold
> > the dpa_rwsem rather than in the region specific function only.
> 
> Makes sense and lock oder is consistent with that of attach_target().
> I do think that the interruptible semantics should be kept considering
> this is from sysfs/debugfs.

Good eye, bad me!  It was intentional to remove the interruptible,
but I should have noted it in the commit log.

At the point the lock is taken, the driver is committed to completing
the action. It is not interruptible once begun. This notion of a poison
state was first introduced here:
d0abf5787adc ("cxl/mbox: Initialize the poison state")

The basic premise is that the driver will synchronize reads of poison
so as not to return incomplete lists.

> 
> Thanks,
> Davidlohr
Dan Williams Nov. 23, 2023, 1:48 a.m. UTC | #4
alison.schofield@ wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper")
> added a lockdep_assert_held() to make sure all callers hold the
> region state stable while doing work that depends on the number
> of committed decoders.
> 
> That lockdep assert triggered in poison list, inject, and clear
> functions highlighting a gap between region attach and decoder
> commit where holding the dpa_rwsem is not enough to assure that
> a DPA is not added to a region. In such a case, if poison is
> found in at that DPA, the trace event omits the region info
> that users expect.
> 
> Close the gap by snapshotting an unchangeable region state during
> all poison ops. Hold the region_rwsem in all the places that hold
> the dpa_rwsem rather than in the region specific function only.
> 
> Fixes: 7ff6ad107588 ("cxl/memdev: Add trigger_poison_list sysfs attribute")
> Fixes: f0832a586396 ("cxl/region: Provide region info to the cxl_poison trace event")
> Fixes: d2fbc4865802 ("cxl/memdev: Add support for the Inject Poison mailbox command")
> Fixes: 9690b07748d1 ("cxl/memdev: Add support for the Clear Poison mailbox command")

I think this is an indication that the fixes would benefit from being
broken up into at least 2 commits so that the specific side effect of
each can be commented upon.

For example:

- Fix walking committed decoders in cxl_trigger_poison_list()

- Fix walking dpa to region lookups in cxl_{inject,clear}_poison()

...look like 2 separate topics in this combined patch.

> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> ---
>  drivers/cxl/core/memdev.c | 18 +++++++++---------
>  drivers/cxl/core/region.c |  5 -----
>  2 files changed, 9 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index fc5c2b414793..961da365b097 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -227,9 +227,8 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
>  	if (!port || !is_cxl_endpoint(port))
>  		return -EINVAL;
>  
> -	rc = down_read_interruptible(&cxl_dpa_rwsem);
> -	if (rc)
> -		return rc;

What the rationale for dropping interruptible, it seems appropriate here
since this function is directly servicing a debugfs trigger and maybe
someone gets tired of waiting.

> +	down_read(&cxl_region_rwsem);
> +	down_read(&cxl_dpa_rwsem);
>  
>  	if (cxl_num_decoders_committed(port) == 0) {
>  		/* No regions mapped to this memdev */
> @@ -239,6 +238,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
>  		rc =  cxl_get_poison_by_endpoint(port);
>  	}
>  	up_read(&cxl_dpa_rwsem);
> +	up_read(&cxl_region_rwsem);
>  
>  	return rc;
>  }
> @@ -324,9 +324,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	if (!IS_ENABLED(CONFIG_DEBUG_FS))
>  		return 0;
>  
> -	rc = down_read_interruptible(&cxl_dpa_rwsem);
> -	if (rc)
> -		return rc;
> +	down_read(&cxl_region_rwsem);
> +	down_read(&cxl_dpa_rwsem);
>  
>  	rc = cxl_validate_poison_dpa(cxlmd, dpa);
>  	if (rc)
> @@ -355,6 +354,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
>  out:
>  	up_read(&cxl_dpa_rwsem);
> +	up_read(&cxl_region_rwsem);
>  
>  	return rc;
>  }
> @@ -372,9 +372,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	if (!IS_ENABLED(CONFIG_DEBUG_FS))
>  		return 0;
>  
> -	rc = down_read_interruptible(&cxl_dpa_rwsem);
> -	if (rc)
> -		return rc;
> +	down_read(&cxl_region_rwsem);
> +	down_read(&cxl_dpa_rwsem);
>  
>  	rc = cxl_validate_poison_dpa(cxlmd, dpa);
>  	if (rc)
> @@ -412,6 +411,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
>  out:
>  	up_read(&cxl_dpa_rwsem);
> +	up_read(&cxl_region_rwsem);
>  
>  	return rc;
>  }
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 56e575c79bb4..3e817a6f94c6 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2467,10 +2467,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
>  	struct cxl_poison_context ctx;
>  	int rc = 0;
>  
> -	rc = down_read_interruptible(&cxl_region_rwsem);
> -	if (rc)
> -		return rc;
> -
>  	ctx = (struct cxl_poison_context) {
>  		.port = port
>  	};
> @@ -2480,7 +2476,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
>  		rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
>  					     &ctx);
>  
> -	up_read(&cxl_region_rwsem);
>  	return rc;

This hunk deserves to be called out that region locking is being
upleveled as part of topic 1, and this reinforces splitting the 2 topics
into 2 patches.

Keep the _interruptible versions throughout, if you want to drop
interruptible that should be a separate follow-on behavior change patch.
The need to keep _interruptible also obviates conversion to cleanup.h
helpers for now.
Alison Schofield Nov. 27, 2023, 12:03 a.m. UTC | #5
On Wed, Nov 22, 2023 at 05:48:45PM -0800, Dan Williams wrote:
> alison.schofield@ wrote:
> > From: Alison Schofield <alison.schofield@intel.com>

snip

> > 
> > 
> > Fixes: 7ff6ad107588 ("cxl/memdev: Add trigger_poison_list sysfs attribute")
> > Fixes: f0832a586396 ("cxl/region: Provide region info to the cxl_poison trace event")
> > Fixes: d2fbc4865802 ("cxl/memdev: Add support for the Inject Poison mailbox command")
> > Fixes: 9690b07748d1 ("cxl/memdev: Add support for the Clear Poison mailbox command")
> 
> I think this is an indication that the fixes would benefit from being
> broken up into at least 2 commits so that the specific side effect of
> each can be commented upon.
> 

Agree. I've split into 2 patches and expanded the impact in each commit
message.

> For example:
> 
> - Fix walking committed decoders in cxl_trigger_poison_list()

That's not where I found the lock issue. When walking committed
decoders, the region_rwsem was held. It is when we think there
are no regions mapped, and collect by memdev only, that I found
a gap.

> 
> - Fix walking dpa to region lookups in cxl_{inject,clear}_poison()

We're probably in sync here, albeit different words.

> 
> ...look like 2 separate topics in this combined patch.
> 
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> > ---
> >  drivers/cxl/core/memdev.c | 18 +++++++++---------
> >  drivers/cxl/core/region.c |  5 -----
> >  2 files changed, 9 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> > index fc5c2b414793..961da365b097 100644
> > --- a/drivers/cxl/core/memdev.c
> > +++ b/drivers/cxl/core/memdev.c
> > @@ -227,9 +227,8 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
> >  	if (!port || !is_cxl_endpoint(port))
> >  		return -EINVAL;
> >  
> > -	rc = down_read_interruptible(&cxl_dpa_rwsem);
> > -	if (rc)
> > -		return rc;
> 
> What the rationale for dropping interruptible, it seems appropriate here
> since this function is directly servicing a debugfs trigger and maybe
> someone gets tired of waiting.
> 

This one is the sysfs, not debugfs, trigger. They shouldn't get tired of
waiting as it is only reading static info - the existing poison list of 
the device.

However, I did put back the _interruptible...for now.

I am not clear on how it all trickles down if this gets interrupted and
need to understand that further. We are trying to maintain a poison
state, and prevent partial reads of poison lists. That is not part of 
this patch, so I will study that some more and come back at it if I 
still think it's an issue.


> > +	down_read(&cxl_region_rwsem);
> > +	down_read(&cxl_dpa_rwsem);
> >  

snip

> > --- a/drivers/cxl/core/region.c
> > +++ b/drivers/cxl/core/region.c
> > @@ -2467,10 +2467,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
> >  	struct cxl_poison_context ctx;
> >  	int rc = 0;
> >  
> > -	rc = down_read_interruptible(&cxl_region_rwsem);
> > -	if (rc)
> > -		return rc;
> > -
> >  	ctx = (struct cxl_poison_context) {
> >  		.port = port
> >  	};
> > @@ -2480,7 +2476,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
> >  		rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
> >  					     &ctx);
> >  
> > -	up_read(&cxl_region_rwsem);
> >  	return rc;
> 
> This hunk deserves to be called out that region locking is being
> upleveled as part of topic 1, and this reinforces splitting the 2 topics
> into 2 patches.

done.

> 
> Keep the _interruptible versions throughout, if you want to drop
> interruptible that should be a separate follow-on behavior change patch.
> The need to keep _interruptible also obviates conversion to cleanup.h
> helpers for now.

done.
diff mbox series

Patch

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index fc5c2b414793..961da365b097 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -227,9 +227,8 @@  int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
 	if (!port || !is_cxl_endpoint(port))
 		return -EINVAL;
 
-	rc = down_read_interruptible(&cxl_dpa_rwsem);
-	if (rc)
-		return rc;
+	down_read(&cxl_region_rwsem);
+	down_read(&cxl_dpa_rwsem);
 
 	if (cxl_num_decoders_committed(port) == 0) {
 		/* No regions mapped to this memdev */
@@ -239,6 +238,7 @@  int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
 		rc =  cxl_get_poison_by_endpoint(port);
 	}
 	up_read(&cxl_dpa_rwsem);
+	up_read(&cxl_region_rwsem);
 
 	return rc;
 }
@@ -324,9 +324,8 @@  int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
 
-	rc = down_read_interruptible(&cxl_dpa_rwsem);
-	if (rc)
-		return rc;
+	down_read(&cxl_region_rwsem);
+	down_read(&cxl_dpa_rwsem);
 
 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
 	if (rc)
@@ -355,6 +354,7 @@  int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
 out:
 	up_read(&cxl_dpa_rwsem);
+	up_read(&cxl_region_rwsem);
 
 	return rc;
 }
@@ -372,9 +372,8 @@  int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
 
-	rc = down_read_interruptible(&cxl_dpa_rwsem);
-	if (rc)
-		return rc;
+	down_read(&cxl_region_rwsem);
+	down_read(&cxl_dpa_rwsem);
 
 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
 	if (rc)
@@ -412,6 +411,7 @@  int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
 out:
 	up_read(&cxl_dpa_rwsem);
+	up_read(&cxl_region_rwsem);
 
 	return rc;
 }
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 56e575c79bb4..3e817a6f94c6 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2467,10 +2467,6 @@  int cxl_get_poison_by_endpoint(struct cxl_port *port)
 	struct cxl_poison_context ctx;
 	int rc = 0;
 
-	rc = down_read_interruptible(&cxl_region_rwsem);
-	if (rc)
-		return rc;
-
 	ctx = (struct cxl_poison_context) {
 		.port = port
 	};
@@ -2480,7 +2476,6 @@  int cxl_get_poison_by_endpoint(struct cxl_port *port)
 		rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
 					     &ctx);
 
-	up_read(&cxl_region_rwsem);
 	return rc;
 }