new file mode 100644
@@ -0,0 +1,85 @@
+What: /sys/bus/edac/devices/<dev-name>/ecs_fru*
+Date: Oct 2024
+KernelVersion: 6.12
+Contact: linux-edac@vger.kernel.org
+Description:
+ The sysfs edac bus devices /<dev-name>/ecs_fru* subdirectory
+ belongs to the memory media ECS (Error Check Scrub) control
+ feature, where <dev-name> directory corresponds to a device
+ registered with the EDAC ECS driver and thus registered with
+ the generic edac ras driver too.
+ /ecs_fru* belongs to the media FRUs (Field replaceable unit)
+ under the memory device.
+ The sysfs ECS attr nodes would be present only if the client
+ driver has implemented the corresponding attr callback
+ function and pass in ops to the EDAC RAS feature driver
+ during registration.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fru*/log_entry_type
+Date: Oct 2024
+KernelVersion: 6.12
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The log entry type of how the DDR5 ECS log is reported.
+ 00b - per DRAM.
+ 01b - per memory media FRU.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fru*/log_entry_type_per_dram
+Date: Oct 2024
+KernelVersion: 6.12
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) True if current log entry type is per DRAM.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fru*/log_entry_type_per_memory_media
+Date: Oct 2024
+KernelVersion: 6.12
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) True if current log entry type is per memory media FRU.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fru*/mode
+Date: Oct 2024
+KernelVersion: 6.12
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The mode of how the DDR5 ECS counts the errors.
+ 0 - ECS counts rows with errors.
+ 1 - ECS counts codewords with errors.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fru*/mode_counts_rows
+Date: Oct 2024
+KernelVersion: 6.12
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) True if current mode is ECS counts rows with errors.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fru*/mode_counts_codewords
+Date: Oct 2024
+KernelVersion: 6.12
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) True if current mode is ECS counts codewords with errors.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fru*/name
+Date: Oct 2024
+KernelVersion: 6.12
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) name of the memory media FRU ECS.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fru*/reset
+Date: Oct 2024
+KernelVersion: 6.12
+Contact: linux-edac@vger.kernel.org
+Description:
+ (WO) ECS reset ECC counter.
+ 0 - normal, ECC counter running actively.
+ 1 - reset ECC counter to the default value.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fru*/threshold
+Date: Oct 2024
+KernelVersion: 6.12
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) ECS threshold count per GB of memory cells.
@@ -10,7 +10,7 @@ obj-$(CONFIG_EDAC) := edac_core.o
edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o
edac_core-y += edac_module.o edac_device_sysfs.o wq.o
-edac_core-y += edac_ras_feature.o edac_scrub.o
+edac_core-y += edac_ras_feature.o edac_scrub.o edac_ecs.o
edac_core-$(CONFIG_EDAC_DEBUG) += debugfs.o
new file mode 100755
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ECS driver supporting controlling on die error check scrub
+ * (e.g. DDR5 ECS). The common sysfs ECS interface promotes
+ * unambiguous access from the userspace.
+ *
+ * Copyright (c) 2024 HiSilicon Limited.
+ */
+
+#define pr_fmt(fmt) "EDAC ECS: " fmt
+
+#include <linux/edac_ras_feature.h>
+
+#define EDAC_ECS_FRU_NAME "ecs_fru"
+
+enum edac_ecs_attributes {
+ ECS_LOG_ENTRY_TYPE,
+ ECS_LOG_ENTRY_TYPE_PER_DRAM,
+ ECS_LOG_ENTRY_TYPE_PER_MEMORY_MEDIA,
+ ECS_MODE,
+ ECS_MODE_COUNTS_ROWS,
+ ECS_MODE_COUNTS_CODEWORDS,
+ ECS_RESET,
+ ECS_NAME,
+ ECS_THRESHOLD,
+ ECS_MAX_ATTRS
+};
+
+struct edac_ecs_dev_attr {
+ struct device_attribute dev_attr;
+ int fru_id;
+};
+
+struct edac_ecs_fru_context {
+ char name[EDAC_RAS_NAME_LEN];
+ struct edac_ecs_dev_attr ecs_dev_attr[ECS_MAX_ATTRS];
+ struct attribute *ecs_attrs[ECS_MAX_ATTRS + 1];
+ struct attribute_group group;
+};
+
+struct edac_ecs_context {
+ u16 num_media_frus;
+ struct edac_ecs_fru_context *fru_ctxs;
+};
+
+#define to_ecs_dev_attr(_dev_attr) \
+ container_of(_dev_attr, struct edac_ecs_dev_attr, dev_attr)
+
+static ssize_t log_entry_type_show(struct device *ras_feat_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ u32 val;
+ int ret;
+
+ ret = ops->get_log_entry_type(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, &val);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t log_entry_type_store(struct device *ras_feat_dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ long val;
+ int ret;
+
+ ret = kstrtol(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ ret = ops->set_log_entry_type(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, val);
+ if (ret)
+ return ret;
+
+ return len;
+}
+
+static ssize_t log_entry_type_per_dram_show(struct device *ras_feat_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ u32 val;
+ int ret;
+
+ ret = ops->get_log_entry_type_per_dram(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, &val);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t log_entry_type_per_memory_media_show(struct device *ras_feat_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ u32 val;
+ int ret;
+
+ ret = ops->get_log_entry_type_per_memory_media(ras_feat_dev->parent,
+ ctx->ecs.private,
+ ecs_dev_attr->fru_id, &val);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t mode_show(struct device *ras_feat_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ u32 val;
+ int ret;
+
+ ret = ops->get_mode(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, &val);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t mode_store(struct device *ras_feat_dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ long val;
+ int ret;
+
+ ret = kstrtol(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ ret = ops->set_mode(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, val);
+ if (ret)
+ return ret;
+
+ return len;
+}
+
+static ssize_t mode_counts_rows_show(struct device *ras_feat_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ u32 val;
+ int ret;
+
+ ret = ops->get_mode_counts_rows(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, &val);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t mode_counts_codewords_show(struct device *ras_feat_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ u32 val;
+ int ret;
+
+ ret = ops->get_mode_counts_codewords(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, &val);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t reset_store(struct device *ras_feat_dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ long val;
+ int ret;
+
+ ret = kstrtol(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ ret = ops->reset(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, val);
+ if (ret)
+ return ret;
+
+ return len;
+}
+
+static ssize_t name_show(struct device *ras_feat_dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ int ret;
+
+ ret = ops->get_name(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, buf);
+ if (ret)
+ return ret;
+
+ return strlen(buf);
+}
+
+static ssize_t threshold_show(struct device *ras_feat_dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ int ret;
+ u32 val;
+
+ ret = ops->get_threshold(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, &val);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t threshold_store(struct device *ras_feat_dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct edac_ecs_dev_attr *ecs_dev_attr = to_ecs_dev_attr(attr);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+ long val;
+ int ret;
+
+ ret = kstrtol(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+
+ ret = ops->set_threshold(ras_feat_dev->parent, ctx->ecs.private,
+ ecs_dev_attr->fru_id, val);
+ if (ret)
+ return ret;
+
+ return len;
+}
+
+static umode_t ecs_attr_visible(struct kobject *kobj,
+ struct attribute *a, int attr_id)
+{
+ struct device *ras_feat_dev = kobj_to_dev(kobj);
+ struct edac_ras_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ops;
+
+ switch (attr_id) {
+ case ECS_LOG_ENTRY_TYPE:
+ if (ops->get_log_entry_type && ops->set_log_entry_type)
+ return a->mode;
+ if (ops->get_log_entry_type)
+ return 0444;
+ return 0;
+ case ECS_LOG_ENTRY_TYPE_PER_DRAM:
+ return ops->get_log_entry_type_per_dram ? a->mode : 0;
+ case ECS_LOG_ENTRY_TYPE_PER_MEMORY_MEDIA:
+ return ops->get_log_entry_type_per_memory_media ? a->mode : 0;
+ case ECS_MODE:
+ if (ops->get_mode && ops->set_mode)
+ return a->mode;
+ if (ops->get_mode)
+ return 0444;
+ return 0;
+ case ECS_MODE_COUNTS_ROWS:
+ return ops->get_mode_counts_rows ? a->mode : 0;
+ case ECS_MODE_COUNTS_CODEWORDS:
+ return ops->get_mode_counts_codewords ? a->mode : 0;
+ case ECS_RESET:
+ return ops->reset ? a->mode : 0;
+ case ECS_NAME:
+ return ops->get_name ? a->mode : 0;
+ case ECS_THRESHOLD:
+ if (ops->get_threshold && ops->set_threshold)
+ return a->mode;
+ if (ops->get_threshold)
+ return 0444;
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+#define EDAC_ECS_ATTR_RO(_name, _fru_id) \
+ ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RO(_name), \
+ .fru_id = _fru_id })
+
+#define EDAC_ECS_ATTR_WO(_name, _fru_id) \
+ ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_WO(_name), \
+ .fru_id = _fru_id })
+
+#define EDAC_ECS_ATTR_RW(_name, _fru_id) \
+ ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RW(_name), \
+ .fru_id = _fru_id })
+
+static int ecs_create_desc(struct device *ecs_dev,
+ const struct attribute_group **attr_groups,
+ u16 num_media_frus)
+{
+ struct edac_ecs_context *ecs_ctx;
+ u32 fru;
+
+ ecs_ctx = devm_kzalloc(ecs_dev, sizeof(*ecs_ctx), GFP_KERNEL);
+ if (!ecs_ctx)
+ return -ENOMEM;
+
+ ecs_ctx->num_media_frus = num_media_frus;
+ ecs_ctx->fru_ctxs = devm_kcalloc(ecs_dev, num_media_frus,
+ sizeof(*ecs_ctx->fru_ctxs),
+ GFP_KERNEL);
+ if (!ecs_ctx->fru_ctxs)
+ return -ENOMEM;
+
+ for (fru = 0; fru < num_media_frus; fru++) {
+ struct edac_ecs_fru_context *fru_ctx = &ecs_ctx->fru_ctxs[fru];
+ struct attribute_group *group = &fru_ctx->group;
+ int i;
+
+ fru_ctx->ecs_dev_attr[0] = EDAC_ECS_ATTR_RW(log_entry_type, fru);
+ fru_ctx->ecs_dev_attr[1] = EDAC_ECS_ATTR_RO(log_entry_type_per_dram, fru);
+ fru_ctx->ecs_dev_attr[2] = EDAC_ECS_ATTR_RO(log_entry_type_per_memory_media, fru);
+ fru_ctx->ecs_dev_attr[3] = EDAC_ECS_ATTR_RW(mode, fru);
+ fru_ctx->ecs_dev_attr[4] = EDAC_ECS_ATTR_RO(mode_counts_rows, fru);
+ fru_ctx->ecs_dev_attr[5] = EDAC_ECS_ATTR_RO(mode_counts_codewords, fru);
+ fru_ctx->ecs_dev_attr[6] = EDAC_ECS_ATTR_WO(reset, fru);
+ fru_ctx->ecs_dev_attr[7] = EDAC_ECS_ATTR_RO(name, fru);
+ fru_ctx->ecs_dev_attr[8] = EDAC_ECS_ATTR_RW(threshold, fru);
+ for (i = 0; i < ECS_MAX_ATTRS; i++)
+ fru_ctx->ecs_attrs[i] = &fru_ctx->ecs_dev_attr[i].dev_attr.attr;
+
+ sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru);
+ group->name = fru_ctx->name;
+ group->attrs = fru_ctx->ecs_attrs;
+ group->is_visible = ecs_attr_visible;
+
+ attr_groups[fru] = group;
+ }
+
+ return 0;
+}
+
+/**
+ * edac_ecs_get_desc - get edac ecs descriptors
+ * @ecs_dev: client ecs device
+ * @attr_groups: pointer to attrribute group container
+ * @num_media_frus: number of media FRUs in the device
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int edac_ecs_get_desc(struct device *ecs_dev,
+ const struct attribute_group **attr_groups,
+ u16 num_media_frus)
+{
+ if (!ecs_dev || !attr_groups || !num_media_frus)
+ return -EINVAL;
+
+ return ecs_create_desc(ecs_dev, attr_groups, num_media_frus);
+}
@@ -68,10 +68,15 @@ static int edac_ras_feat_ecs_init(struct device *parent,
const struct attribute_group **attr_groups)
{
int num = efeat->ecs_info.num_media_frus;
+ int ret;
edata->ops = efeat->ecs_ops;
edata->private = efeat->ecs_ctx;
+ ret = edac_ecs_get_desc(parent, attr_groups, num);
+ if (ret)
+ return ret;
+
return num;
}
@@ -49,10 +49,46 @@ struct edac_scrub_ops {
const struct attribute_group *edac_scrub_get_desc(void);
+/**
+ * struct ecs_ops - ECS device operations (all elements optional)
+ * @get_log_entry_type: read the log entry type value.
+ * @set_log_entry_type: set the log entry type value.
+ * @get_log_entry_type_per_dram: read the log entry type per dram value.
+ * @get_log_entry_type_memory_media: read the log entry type per memory media value.
+ * @get_mode: read the mode value.
+ * @set_mode: set the mode value.
+ * @get_mode_counts_rows: read the mode counts rows value.
+ * @get_mode_counts_codewords: read the mode counts codewords value.
+ * @reset: reset the ECS counter.
+ * @get_threshold: read the threshold value.
+ * @set_threshold: set the threshold value.
+ * @get_name: get the ECS's name.
+ */
+struct edac_ecs_ops {
+ int (*get_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 *val);
+ int (*set_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 val);
+ int (*get_log_entry_type_per_dram)(struct device *dev, void *drv_data,
+ int fru_id, u32 *val);
+ int (*get_log_entry_type_per_memory_media)(struct device *dev, void *drv_data,
+ int fru_id, u32 *val);
+ int (*get_mode)(struct device *dev, void *drv_data, int fru_id, u32 *val);
+ int (*set_mode)(struct device *dev, void *drv_data, int fru_id, u32 val);
+ int (*get_mode_counts_rows)(struct device *dev, void *drv_data, int fru_id, u32 *val);
+ int (*get_mode_counts_codewords)(struct device *dev, void *drv_data, int fru_id, u32 *val);
+ int (*reset)(struct device *dev, void *drv_data, int fru_id, u32 val);
+ int (*get_threshold)(struct device *dev, void *drv_data, int fru_id, u32 *threshold);
+ int (*set_threshold)(struct device *dev, void *drv_data, int fru_id, u32 threshold);
+ int (*get_name)(struct device *dev, void *drv_data, int fru_id, char *buf);
+};
+
struct edac_ecs_ex_info {
u16 num_media_frus;
};
+int edac_ecs_get_desc(struct device *ecs_dev,
+ const struct attribute_group **attr_groups,
+ u16 num_media_frus);
+
/*
* EDAC RAS feature information structure
*/