diff mbox series

[2/2] cxl/memdev: Hold region_rwsem during inject and clear poison ops

Message ID 08721dc1df0a51e4e38fecd02425c3475912dfd5.1701041440.git.alison.schofield@intel.com
State Accepted
Commit 0e33ac9c3ffe5e4f55c68345f44cea7fec2fe750
Headers show
Series cxl/core: Hold region_rwsem during poison ops | expand

Commit Message

Alison Schofield Nov. 27, 2023, 12:09 a.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

Poison inject and clear are supported via debugfs where a privileged
user can inject and clear poison to a device physical address.

Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper")
added a lockdep assert that highlighted a gap in poison inject and
clear functions where holding the dpa_rwsem does not assure that a
a DPA is not added to a region.

The impact for inject and clear is that if the DPA address being
injected or cleared has been attached to a region, but not yet
committed, the dev_dbg() message intended to alert the debug user
that they are acting on a mapped address is not emitted. Also, the
cxl_poison trace event that serves as a log of the inject and clear
activity will not include region info.

Close this gap by snapshotting an unchangeable region state during
poison inject and clear operations. That means holding both the
region_rwsem and the dpa_rwsem during the inject and clear ops.

Fixes: d2fbc4865802 ("cxl/memdev: Add support for the Inject Poison mailbox command")
Fixes: 9690b07748d1 ("cxl/memdev: Add support for the Clear Poison mailbox command")
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 drivers/cxl/core/memdev.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

Comments

Dave Jiang Nov. 27, 2023, 5:58 p.m. UTC | #1
On 11/26/23 17:09, alison.schofield@intel.com wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> Poison inject and clear are supported via debugfs where a privileged
> user can inject and clear poison to a device physical address.
> 
> Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper")
> added a lockdep assert that highlighted a gap in poison inject and
> clear functions where holding the dpa_rwsem does not assure that a
> a DPA is not added to a region.
> 
> The impact for inject and clear is that if the DPA address being
> injected or cleared has been attached to a region, but not yet
> committed, the dev_dbg() message intended to alert the debug user
> that they are acting on a mapped address is not emitted. Also, the
> cxl_poison trace event that serves as a log of the inject and clear
> activity will not include region info.
> 
> Close this gap by snapshotting an unchangeable region state during
> poison inject and clear operations. That means holding both the
> region_rwsem and the dpa_rwsem during the inject and clear ops.
> 
> Fixes: d2fbc4865802 ("cxl/memdev: Add support for the Inject Poison mailbox command")
> Fixes: 9690b07748d1 ("cxl/memdev: Add support for the Clear Poison mailbox command")
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>

Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
>  drivers/cxl/core/memdev.c | 18 ++++++++++++++++--
>  1 file changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index 5ad1b13e780a..2f43d368ba07 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -331,10 +331,16 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	if (!IS_ENABLED(CONFIG_DEBUG_FS))
>  		return 0;
>  
> -	rc = down_read_interruptible(&cxl_dpa_rwsem);
> +	rc = down_read_interruptible(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
>  
> +	rc = down_read_interruptible(&cxl_dpa_rwsem);
> +	if (rc) {
> +		up_read(&cxl_region_rwsem);
> +		return rc;
> +	}
> +
>  	rc = cxl_validate_poison_dpa(cxlmd, dpa);
>  	if (rc)
>  		goto out;
> @@ -362,6 +368,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
>  out:
>  	up_read(&cxl_dpa_rwsem);
> +	up_read(&cxl_region_rwsem);
>  
>  	return rc;
>  }
> @@ -379,10 +386,16 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	if (!IS_ENABLED(CONFIG_DEBUG_FS))
>  		return 0;
>  
> -	rc = down_read_interruptible(&cxl_dpa_rwsem);
> +	rc = down_read_interruptible(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
>  
> +	rc = down_read_interruptible(&cxl_dpa_rwsem);
> +	if (rc) {
> +		up_read(&cxl_region_rwsem);
> +		return rc;
> +	}
> +
>  	rc = cxl_validate_poison_dpa(cxlmd, dpa);
>  	if (rc)
>  		goto out;
> @@ -419,6 +432,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
>  	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
>  out:
>  	up_read(&cxl_dpa_rwsem);
> +	up_read(&cxl_region_rwsem);
>  
>  	return rc;
>  }
Davidlohr Bueso Nov. 28, 2023, 12:58 a.m. UTC | #2
On Sun, 26 Nov 2023, alison.schofield@intel.com wrote:

>From: Alison Schofield <alison.schofield@intel.com>
>
>Poison inject and clear are supported via debugfs where a privileged
>user can inject and clear poison to a device physical address.
>
>Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper")
>added a lockdep assert that highlighted a gap in poison inject and
>clear functions where holding the dpa_rwsem does not assure that a
>a DPA is not added to a region.
>
>The impact for inject and clear is that if the DPA address being
>injected or cleared has been attached to a region, but not yet
>committed, the dev_dbg() message intended to alert the debug user
>that they are acting on a mapped address is not emitted. Also, the
>cxl_poison trace event that serves as a log of the inject and clear
>activity will not include region info.
>
>Close this gap by snapshotting an unchangeable region state during
>poison inject and clear operations. That means holding both the
>region_rwsem and the dpa_rwsem during the inject and clear ops.
>
>Fixes: d2fbc4865802 ("cxl/memdev: Add support for the Inject Poison mailbox command")
>Fixes: 9690b07748d1 ("cxl/memdev: Add support for the Clear Poison mailbox command")
>Signed-off-by: Alison Schofield <alison.schofield@intel.com>

Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
diff mbox series

Patch

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 5ad1b13e780a..2f43d368ba07 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -331,10 +331,16 @@  int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
 
-	rc = down_read_interruptible(&cxl_dpa_rwsem);
+	rc = down_read_interruptible(&cxl_region_rwsem);
 	if (rc)
 		return rc;
 
+	rc = down_read_interruptible(&cxl_dpa_rwsem);
+	if (rc) {
+		up_read(&cxl_region_rwsem);
+		return rc;
+	}
+
 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
 	if (rc)
 		goto out;
@@ -362,6 +368,7 @@  int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
 out:
 	up_read(&cxl_dpa_rwsem);
+	up_read(&cxl_region_rwsem);
 
 	return rc;
 }
@@ -379,10 +386,16 @@  int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
 
-	rc = down_read_interruptible(&cxl_dpa_rwsem);
+	rc = down_read_interruptible(&cxl_region_rwsem);
 	if (rc)
 		return rc;
 
+	rc = down_read_interruptible(&cxl_dpa_rwsem);
+	if (rc) {
+		up_read(&cxl_region_rwsem);
+		return rc;
+	}
+
 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
 	if (rc)
 		goto out;
@@ -419,6 +432,7 @@  int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
 out:
 	up_read(&cxl_dpa_rwsem);
+	up_read(&cxl_region_rwsem);
 
 	return rc;
 }