diff mbox series

[v2,4/4] cxl/region: Add trigger_poison_list sysfs attribute

Message ID b5e7787816326854b736c922f7fcf195fba71338.1665606782.git.alison.schofield@intel.com
State Superseded
Headers show
Series CXL Poison List Retrieval & Tracing | expand

Commit Message

Alison Schofield Oct. 12, 2022, 9:28 p.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

When a boolean 'true' is written to this attribute the region driver
retrieves the poison list for the capacity each device contributes
to this region. The list includes addresses that are poisoned, or
would result in poison if accessed, and the source of the poison.
The retrieved errors are logged as kernel trace events with the
label 'cxl_poison'.

Devices not supporting the poison list capability are ignored.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 Documentation/ABI/testing/sysfs-bus-cxl | 14 ++++++++++
 drivers/cxl/core/region.c               | 34 +++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

Comments

Jonathan Cameron Oct. 17, 2022, 1:43 p.m. UTC | #1
On Wed, 12 Oct 2022 14:28:20 -0700
alison.schofield@intel.com wrote:

> From: Alison Schofield <alison.schofield@intel.com>
> 
> When a boolean 'true' is written to this attribute the region driver
> retrieves the poison list for the capacity each device contributes
> to this region. The list includes addresses that are poisoned, or
> would result in poison if accessed, and the source of the poison.
> The retrieved errors are logged as kernel trace events with the
> label 'cxl_poison'.
> 
> Devices not supporting the poison list capability are ignored.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>

Hi Alison,

For some reason I don't have cxl_dpa_resource().
Should that be cxl_dpa_resource_start()?

Looks like it got renamed in
cxl/hdm: Add support for allocating DPA to an endpoint decoder
cf880423b6a0599499c1f83542cab0b75daa29ba

Jonathan

> +static ssize_t trigger_poison_list_store(struct device *dev,
> +					 struct device_attribute *attr,
> +					 const char *buf, size_t len)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	struct cxl_region_params *p = &cxlr->params;
> +	struct cxl_endpoint_decoder *cxled;
> +	struct cxl_memdev *cxlmd;
> +	u64 offset, length;
> +	int rc, i;
> +	bool tmp;
> +
> +	if (kstrtobool(buf, &tmp))
> +		return -EINVAL;
> +
> +	for (i = 0; i <  p->nr_targets; i++) {
> +		cxled = p->targets[i];
> +		cxlmd = cxled_to_memdev(cxled);
> +		if (!test_bit(CXL_MEM_COMMAND_ID_GET_POISON,
> +			      cxlmd->cxlds->enabled_cmds))
> +			continue;
> +		offset = cxl_dpa_resource(cxled);
> +		length = cxl_dpa_size(cxled);
> +		rc = cxl_mem_get_poison(cxlmd, offset, length,
> +					dev_name(&cxlr->dev));
> +		if (rc)
> +			return rc;
> +	}
> +	return len;
> +}
> +static DEVICE_ATTR_WO(trigger_poison_list);
Alison Schofield Oct. 17, 2022, 6:01 p.m. UTC | #2
On Mon, Oct 17, 2022 at 02:43:02PM +0100, Jonathan Cameron wrote:
> On Wed, 12 Oct 2022 14:28:20 -0700
> alison.schofield@intel.com wrote:
> 
> > From: Alison Schofield <alison.schofield@intel.com>
> > 
> > When a boolean 'true' is written to this attribute the region driver
> > retrieves the poison list for the capacity each device contributes
> > to this region. The list includes addresses that are poisoned, or
> > would result in poison if accessed, and the source of the poison.
> > The retrieved errors are logged as kernel trace events with the
> > label 'cxl_poison'.
> > 
> > Devices not supporting the poison list capability are ignored.
> > 
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> 
> Hi Alison,
> 
> For some reason I don't have cxl_dpa_resource().
> Should that be cxl_dpa_resource_start()?

Yes.
> 
> Looks like it got renamed in
> cxl/hdm: Add support for allocating DPA to an endpoint decoder
> cf880423b6a0599499c1f83542cab0b75daa29ba

Looks like it got renamed during the patches review. Not worth
unravelling now. I will rebase in next version.

Sorry about that and thanks!
Alison

> 
> Jonathan
> 
> > +static ssize_t trigger_poison_list_store(struct device *dev,
> > +					 struct device_attribute *attr,
> > +					 const char *buf, size_t len)
> > +{
> > +	struct cxl_region *cxlr = to_cxl_region(dev);
> > +	struct cxl_region_params *p = &cxlr->params;
> > +	struct cxl_endpoint_decoder *cxled;
> > +	struct cxl_memdev *cxlmd;
> > +	u64 offset, length;
> > +	int rc, i;
> > +	bool tmp;
> > +
> > +	if (kstrtobool(buf, &tmp))
> > +		return -EINVAL;
> > +
> > +	for (i = 0; i <  p->nr_targets; i++) {
> > +		cxled = p->targets[i];
> > +		cxlmd = cxled_to_memdev(cxled);
> > +		if (!test_bit(CXL_MEM_COMMAND_ID_GET_POISON,
> > +			      cxlmd->cxlds->enabled_cmds))
> > +			continue;
> > +		offset = cxl_dpa_resource(cxled);
> > +		length = cxl_dpa_size(cxled);
> > +		rc = cxl_mem_get_poison(cxlmd, offset, length,
> > +					dev_name(&cxlr->dev));
> > +		if (rc)
> > +			return rc;
> > +	}
> > +	return len;
> > +}
> > +static DEVICE_ATTR_WO(trigger_poison_list);
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index ab3665f8738e..7e33f6ee4992 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -368,3 +368,17 @@  Description:
 		attribute is only visible for devices supporting the
 		capability. The retrieved errors are logged as kernel
 		trace events with the label 'cxl_poison'.
+
+
+What:		/sys/bus/cxl/devices/regionZ/trigger_poison_list
+Date:		October, 2022
+KernelVersion:	v6.2
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(WO) When a boolean 'true' is written to this attribute the
+		region driver retrieves the poison list for the capacity
+		each device contributes to this region. The list includes
+		addresses that are poisoned, or would result in poison if
+		accessed, and the source of the poison. The retrieved
+		errors are logged as kernel trace events with the label
+		'cxl_poison'.
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index ad21b2aa3b0a..e20207934336 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -72,6 +72,38 @@  static int is_dup(struct device *match, void *data)
 	return 0;
 }
 
+static ssize_t trigger_poison_list_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t len)
+{
+	struct cxl_region *cxlr = to_cxl_region(dev);
+	struct cxl_region_params *p = &cxlr->params;
+	struct cxl_endpoint_decoder *cxled;
+	struct cxl_memdev *cxlmd;
+	u64 offset, length;
+	int rc, i;
+	bool tmp;
+
+	if (kstrtobool(buf, &tmp))
+		return -EINVAL;
+
+	for (i = 0; i <  p->nr_targets; i++) {
+		cxled = p->targets[i];
+		cxlmd = cxled_to_memdev(cxled);
+		if (!test_bit(CXL_MEM_COMMAND_ID_GET_POISON,
+			      cxlmd->cxlds->enabled_cmds))
+			continue;
+		offset = cxl_dpa_resource(cxled);
+		length = cxl_dpa_size(cxled);
+		rc = cxl_mem_get_poison(cxlmd, offset, length,
+					dev_name(&cxlr->dev));
+		if (rc)
+			return rc;
+	}
+	return len;
+}
+static DEVICE_ATTR_WO(trigger_poison_list);
+
 static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
 			  const char *buf, size_t len)
 {
@@ -282,6 +314,7 @@  static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
 
 	if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
 		return 0;
+
 	return a->mode;
 }
 
@@ -555,6 +588,7 @@  static struct attribute *cxl_region_attrs[] = {
 	&dev_attr_interleave_granularity.attr,
 	&dev_attr_resource.attr,
 	&dev_attr_size.attr,
+	&dev_attr_trigger_poison_list.attr,
 	NULL,
 };