diff mbox series

[v5,01/12] cxl/memdev: Add support for the Inject Poison mailbox command

Message ID 976c3fcb43f31c0b303709a14a044652bc267978.1679892337.git.alison.schofield@intel.com
State Superseded
Headers show
Series cxl: CXL Inject & Clear Poison | expand

Commit Message

Alison Schofield March 27, 2023, 5:03 a.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

CXL devices optionally support the INJECT POISON mailbox command. Add
memdev driver support for the mailbox command.

Per the CXL Specification (3.0 8.2.9.8.4.2), after receiving a valid
inject poison request, the device will return poison when the address
is accessed through the CXL.mem bus. Injecting poison adds the address
to the device's Poison List and the error source is set to Injected.
In addition, the device adds a poison creation event to its internal
Informational Event log, updates the Event Status register, and if
configured, interrupts the host.

Also, per the CXL Specification, it is not an error to inject poison
into an address that already has poison present and no error is
returned from the device.

If the address is not contained in the device's dpa resource, or is
not 64 byte aligned, return -EINVAL without issuing the mbox command.

Poison injection is intended for debug only and will be exposed to
userspace through debugfs. Restrict compilation to CONFIG_DEBUG_FS.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 drivers/cxl/core/memdev.c | 55 +++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxlmem.h      |  6 +++++
 2 files changed, 61 insertions(+)

Comments

Jonathan Cameron March 30, 2023, 6:47 p.m. UTC | #1
On Sun, 26 Mar 2023 22:03:07 -0700
alison.schofield@intel.com wrote:

> From: Alison Schofield <alison.schofield@intel.com>
> 
> CXL devices optionally support the INJECT POISON mailbox command. Add
> memdev driver support for the mailbox command.
> 
> Per the CXL Specification (3.0 8.2.9.8.4.2), after receiving a valid
> inject poison request, the device will return poison when the address
> is accessed through the CXL.mem bus. Injecting poison adds the address
> to the device's Poison List and the error source is set to Injected.
> In addition, the device adds a poison creation event to its internal
> Informational Event log, updates the Event Status register, and if
> configured, interrupts the host.
> 
> Also, per the CXL Specification, it is not an error to inject poison
> into an address that already has poison present and no error is
> returned from the device.
> 
> If the address is not contained in the device's dpa resource, or is
> not 64 byte aligned, return -EINVAL without issuing the mbox command.
> 
> Poison injection is intended for debug only and will be exposed to
> userspace through debugfs. Restrict compilation to CONFIG_DEBUG_FS.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
LGTM
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
>  drivers/cxl/core/memdev.c | 55 +++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/cxlmem.h      |  6 +++++
>  2 files changed, 61 insertions(+)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index f26b5b6cda10..3b3ac2868848 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -213,6 +213,61 @@ ssize_t cxl_trigger_poison_list(struct device *dev,
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);
>  
> +static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
> +{
> +	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> +
> +	if (!IS_ENABLED(CONFIG_DEBUG_FS))
> +		return 0;
> +
> +	if (!resource_size(&cxlds->dpa_res)) {
> +		dev_dbg(cxlds->dev, "device has no dpa resource\n");
> +		return -EINVAL;
> +	}
> +	if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
> +		dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
> +			dpa, &cxlds->dpa_res);
> +		return -EINVAL;
> +	}
> +	if (!IS_ALIGNED(dpa, 64)) {
> +		dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +int cxl_inject_poison(struct device *dev, u64 dpa)
> +{
> +	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> +	struct cxl_mbox_inject_poison inject;
> +	struct cxl_mbox_cmd mbox_cmd;
> +	int rc;
> +
> +	if (!IS_ENABLED(CONFIG_DEBUG_FS))
> +		return 0;
> +
> +	down_read(&cxl_dpa_rwsem);
> +	rc = cxl_validate_poison_dpa(cxlmd, dpa);
> +	if (rc)
> +		goto out;
> +
> +	inject = (struct cxl_mbox_inject_poison) {
> +		.address = cpu_to_le64(dpa)
> +	};
> +	mbox_cmd = (struct cxl_mbox_cmd) {
> +		.opcode = CXL_MBOX_OP_INJECT_POISON,
> +		.size_in = sizeof(inject),
> +		.payload_in = &inject,
> +	};
> +	rc = cxl_internal_send_cmd(cxlmd->cxlds, &mbox_cmd);
> +out:
> +	up_read(&cxl_dpa_rwsem);
> +
> +	return rc;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
> +
>  static struct attribute *cxl_memdev_attributes[] = {
>  	&dev_attr_serial.attr,
>  	&dev_attr_firmware_version.attr,
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 5febaa3f9b04..527efef2d700 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -602,6 +602,11 @@ struct cxl_mbox_poison_payload_out {
>  #define CXL_POISON_SOURCE_INJECTED	3
>  #define CXL_POISON_SOURCE_VENDOR	7
>  
> +/* Inject & Clear Poison  CXL 3.0 Spec 8.2.9.8.4.2/3 */
> +struct cxl_mbox_inject_poison {
> +	__le64 address;
> +};
> +
>  /**
>   * struct cxl_mem_command - Driver representation of a memory device command
>   * @info: Command information as it exists for the UAPI
> @@ -678,6 +683,7 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
>  ssize_t cxl_trigger_poison_list(struct device *dev,
>  				struct device_attribute *attr, const char *buf,
>  				size_t len);
> +int cxl_inject_poison(struct device *dev, u64 dpa);
>  
>  #ifdef CONFIG_CXL_SUSPEND
>  void cxl_mem_active_inc(void);
Dave Jiang March 31, 2023, 6:11 p.m. UTC | #2
On 3/26/23 10:03 PM, alison.schofield@intel.com wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> CXL devices optionally support the INJECT POISON mailbox command. Add
> memdev driver support for the mailbox command.
> 
> Per the CXL Specification (3.0 8.2.9.8.4.2), after receiving a valid
> inject poison request, the device will return poison when the address
> is accessed through the CXL.mem bus. Injecting poison adds the address

s/bus/protocol/?

> to the device's Poison List and the error source is set to Injected.
> In addition, the device adds a poison creation event to its internal
> Informational Event log, updates the Event Status register, and if
> configured, interrupts the host.
> 
> Also, per the CXL Specification, it is not an error to inject poison
> into an address that already has poison present and no error is
> returned from the device.
> 
> If the address is not contained in the device's dpa resource, or is
> not 64 byte aligned, return -EINVAL without issuing the mbox command.
> 
> Poison injection is intended for debug only and will be exposed to
> userspace through debugfs. Restrict compilation to CONFIG_DEBUG_FS.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>

Just a NIT below. Otherwise,
Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
>   drivers/cxl/core/memdev.c | 55 +++++++++++++++++++++++++++++++++++++++
>   drivers/cxl/cxlmem.h      |  6 +++++
>   2 files changed, 61 insertions(+)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index f26b5b6cda10..3b3ac2868848 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -213,6 +213,61 @@ ssize_t cxl_trigger_poison_list(struct device *dev,
>   }
>   EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);
>   
> +static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
> +{
> +	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> +
> +	if (!IS_ENABLED(CONFIG_DEBUG_FS))
> +		return 0;
> +
> +	if (!resource_size(&cxlds->dpa_res)) {
> +		dev_dbg(cxlds->dev, "device has no dpa resource\n");
> +		return -EINVAL;
> +	}
> +	if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
> +		dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
> +			dpa, &cxlds->dpa_res);
> +		return -EINVAL;
> +	}
> +	if (!IS_ALIGNED(dpa, 64)) {
> +		dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +int cxl_inject_poison(struct device *dev, u64 dpa)
> +{
> +	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> +	struct cxl_mbox_inject_poison inject;
> +	struct cxl_mbox_cmd mbox_cmd;
> +	int rc;
> +
> +	if (!IS_ENABLED(CONFIG_DEBUG_FS))
> +		return 0;
> +
> +	down_read(&cxl_dpa_rwsem);
> +	rc = cxl_validate_poison_dpa(cxlmd, dpa);
> +	if (rc)
> +		goto out;
> +
> +	inject = (struct cxl_mbox_inject_poison) {
> +		.address = cpu_to_le64(dpa)
> +	};

Why not inject.address = cpu_to_le64(dpa);? Uneless there are more 
assignments coming in later patches?

DJ

> +	mbox_cmd = (struct cxl_mbox_cmd) {
> +		.opcode = CXL_MBOX_OP_INJECT_POISON,
> +		.size_in = sizeof(inject),
> +		.payload_in = &inject,
> +	};
> +	rc = cxl_internal_send_cmd(cxlmd->cxlds, &mbox_cmd);
> +out:
> +	up_read(&cxl_dpa_rwsem);
> +
> +	return rc;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
> +
>   static struct attribute *cxl_memdev_attributes[] = {
>   	&dev_attr_serial.attr,
>   	&dev_attr_firmware_version.attr,
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 5febaa3f9b04..527efef2d700 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -602,6 +602,11 @@ struct cxl_mbox_poison_payload_out {
>   #define CXL_POISON_SOURCE_INJECTED	3
>   #define CXL_POISON_SOURCE_VENDOR	7
>   
> +/* Inject & Clear Poison  CXL 3.0 Spec 8.2.9.8.4.2/3 */
> +struct cxl_mbox_inject_poison {
> +	__le64 address;
> +};
> +
>   /**
>    * struct cxl_mem_command - Driver representation of a memory device command
>    * @info: Command information as it exists for the UAPI
> @@ -678,6 +683,7 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
>   ssize_t cxl_trigger_poison_list(struct device *dev,
>   				struct device_attribute *attr, const char *buf,
>   				size_t len);
> +int cxl_inject_poison(struct device *dev, u64 dpa);
>   
>   #ifdef CONFIG_CXL_SUSPEND
>   void cxl_mem_active_inc(void);
Alison Schofield March 31, 2023, 6:52 p.m. UTC | #3
On Fri, Mar 31, 2023 at 11:11:11AM -0700, Dave Jiang wrote:
> 
> 
> On 3/26/23 10:03 PM, alison.schofield@intel.com wrote:
> > From: Alison Schofield <alison.schofield@intel.com>
> > 
> > CXL devices optionally support the INJECT POISON mailbox command. Add
> > memdev driver support for the mailbox command.
> > 
> > Per the CXL Specification (3.0 8.2.9.8.4.2), after receiving a valid
> > inject poison request, the device will return poison when the address
> > is accessed through the CXL.mem bus. Injecting poison adds the address
> 
> s/bus/protocol/?

Quoting the spec there, but probably better to translate into kernel
driver language and say: ..is accessed through the CXL.mem driver.

> 
> > to the device's Poison List and the error source is set to Injected.
> > In addition, the device adds a poison creation event to its internal
> > Informational Event log, updates the Event Status register, and if
> > configured, interrupts the host.
> > 
> > Also, per the CXL Specification, it is not an error to inject poison
> > into an address that already has poison present and no error is
> > returned from the device.
> > 
> > If the address is not contained in the device's dpa resource, or is
> > not 64 byte aligned, return -EINVAL without issuing the mbox command.
> > 
> > Poison injection is intended for debug only and will be exposed to
> > userspace through debugfs. Restrict compilation to CONFIG_DEBUG_FS.
> > 
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> 
> Just a NIT below. Otherwise,
> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> 
Thanks!
> > ---
snip
> > +
> > +	inject = (struct cxl_mbox_inject_poison) {
> > +		.address = cpu_to_le64(dpa)
> > +	};
> 
> Why not inject.address = cpu_to_le64(dpa);? Uneless there are more
> assignments coming in later patches?

Actually nothing else in that struct. It's just a pattern repeated needlessly.
I'll clean up.

Thanks,
Alison

> 
> DJ
> 

snip to end.
diff mbox series

Patch

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index f26b5b6cda10..3b3ac2868848 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -213,6 +213,61 @@  ssize_t cxl_trigger_poison_list(struct device *dev,
 }
 EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);
 
+static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
+{
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+	if (!IS_ENABLED(CONFIG_DEBUG_FS))
+		return 0;
+
+	if (!resource_size(&cxlds->dpa_res)) {
+		dev_dbg(cxlds->dev, "device has no dpa resource\n");
+		return -EINVAL;
+	}
+	if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
+		dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
+			dpa, &cxlds->dpa_res);
+		return -EINVAL;
+	}
+	if (!IS_ALIGNED(dpa, 64)) {
+		dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int cxl_inject_poison(struct device *dev, u64 dpa)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_mbox_inject_poison inject;
+	struct cxl_mbox_cmd mbox_cmd;
+	int rc;
+
+	if (!IS_ENABLED(CONFIG_DEBUG_FS))
+		return 0;
+
+	down_read(&cxl_dpa_rwsem);
+	rc = cxl_validate_poison_dpa(cxlmd, dpa);
+	if (rc)
+		goto out;
+
+	inject = (struct cxl_mbox_inject_poison) {
+		.address = cpu_to_le64(dpa)
+	};
+	mbox_cmd = (struct cxl_mbox_cmd) {
+		.opcode = CXL_MBOX_OP_INJECT_POISON,
+		.size_in = sizeof(inject),
+		.payload_in = &inject,
+	};
+	rc = cxl_internal_send_cmd(cxlmd->cxlds, &mbox_cmd);
+out:
+	up_read(&cxl_dpa_rwsem);
+
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
+
 static struct attribute *cxl_memdev_attributes[] = {
 	&dev_attr_serial.attr,
 	&dev_attr_firmware_version.attr,
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 5febaa3f9b04..527efef2d700 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -602,6 +602,11 @@  struct cxl_mbox_poison_payload_out {
 #define CXL_POISON_SOURCE_INJECTED	3
 #define CXL_POISON_SOURCE_VENDOR	7
 
+/* Inject & Clear Poison  CXL 3.0 Spec 8.2.9.8.4.2/3 */
+struct cxl_mbox_inject_poison {
+	__le64 address;
+};
+
 /**
  * struct cxl_mem_command - Driver representation of a memory device command
  * @info: Command information as it exists for the UAPI
@@ -678,6 +683,7 @@  int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 ssize_t cxl_trigger_poison_list(struct device *dev,
 				struct device_attribute *attr, const char *buf,
 				size_t len);
+int cxl_inject_poison(struct device *dev, u64 dpa);
 
 #ifdef CONFIG_CXL_SUSPEND
 void cxl_mem_active_inc(void);