@@ -99,3 +99,61 @@ sysfs
Sysfs files are documented in
`Documentation/ABI/testing/sysfs-edac-memory-repair`.
+
+Example
+-------
+
+The usage takes the form shown in this example:
+
+1. CXL memory device Soft Post Package Repair (Soft PPR)
+
+# read capabilities
+
+root@localhost:~# cat /sys/bus/edac/devices/cxl_mem0/mem_repair0/dpa_support
+
+1
+
+root@localhost:~# cat /sys/bus/edac/devices/cxl_mem0/mem_repair0/nibble_mask
+
+0x0
+
+root@localhost:~# cat /sys/bus/edac/devices/cxl_mem0/mem_repair0/persist_mode
+
+0
+
+root@localhost:~# cat /sys/bus/edac/devices/cxl_mem0/mem_repair0/repair_function
+
+0
+
+root@localhost:~# cat /sys/bus/edac/devices/cxl_mem0/mem_repair0/repair_safe_when_in_use
+
+1
+
+# set and readback attributes
+
+root@localhost:~# echo 0x8a2d > /sys/bus/edac/devices/cxl_mem0/mem_repair0/nibble_mask
+
+root@localhost:~# cat /sys/bus/edac/devices/cxl_mem0/mem_repair0/min_dpa
+
+0x0
+
+root@localhost:~# cat /sys/bus/edac/devices/cxl_mem0/mem_repair0/max_dpa
+
+0xfffffff
+
+root@localhost:~# echo 0x300000 > /sys/bus/edac/devices/cxl_mem0/mem_repair0/dpa
+
+root@localhost:~# cat /sys/bus/edac/devices/cxl_mem0/mem_repair0/dpa
+
+0x300000
+
+root@localhost:~# cat /sys/bus/edac/devices/cxl_mem0/mem_repair0/nibble_mask
+
+0x8a2d
+
+# issue repair operations
+
+# reapir returns error if unsupported/resources are not available
+# for the repair operation.
+
+root@localhost:~# echo 1 > /sys/bus/edac/devices/cxl_mem0/mem_repair0/repair
@@ -14,11 +14,13 @@
#include <linux/cleanup.h>
#include <linux/edac.h>
#include <linux/limits.h>
+#include <linux/unaligned.h>
#include <cxl/features.h>
#include <cxl.h>
#include <cxlmem.h>
+#include "core.h"
-#define CXL_DEV_NUM_RAS_FEATURES 2
+#define CXL_DEV_NUM_RAS_FEATURES 3
#define CXL_DEV_HOUR_IN_SECS 3600
#define CXL_DEV_NAME_LEN 128
@@ -610,6 +612,336 @@ static const struct edac_ecs_ops cxl_ecs_ops = {
.set_threshold = cxl_ecs_set_threshold,
};
+/* CXL memory soft PPR & hard PPR control definitions */
+struct cxl_ppr_context {
+ uuid_t repair_uuid;
+ u8 instance;
+ u16 get_feat_size;
+ u16 set_feat_size;
+ u8 get_version;
+ u8 set_version;
+ u16 effects;
+ struct cxl_memdev *cxlmd;
+ enum edac_mem_repair_function repair_function;
+ enum edac_mem_repair_persist_mode persist_mode;
+ u64 dpa;
+ u32 nibble_mask;
+};
+
+/**
+ * struct cxl_memdev_ppr_params - CXL memory PPR parameter data structure.
+ * @op_class: PPR operation class.
+ * @op_subclass: PPR operation subclass.
+ * @dpa_support: device physical address for PPR support.
+ * @media_accessible: memory media is accessible or not during PPR operation.
+ * @data_retained: data is retained or not during PPR operation.
+ * @dpa: device physical address.
+ */
+struct cxl_memdev_ppr_params {
+ u8 op_class;
+ u8 op_subclass;
+ bool dpa_support;
+ bool media_accessible;
+ bool data_retained;
+ u64 dpa;
+};
+
+enum cxl_ppr_param {
+ CXL_PPR_PARAM_DO_QUERY,
+ CXL_PPR_PARAM_DO_PPR,
+};
+
+/* See CXL rev 3.1 @8.2.9.7.2.1 Table 8-113 sPPR Feature Readable Attributes */
+/* See CXL rev 3.1 @8.2.9.7.2.2 Table 8-116 hPPR Feature Readable Attributes */
+#define CXL_MEMDEV_PPR_QUERY_RESOURCE_FLAG BIT(0)
+
+#define CXL_MEMDEV_PPR_DEVICE_INITIATED_MASK BIT(0)
+#define CXL_MEMDEV_PPR_FLAG_DPA_SUPPORT_MASK BIT(0)
+#define CXL_MEMDEV_PPR_FLAG_NIBBLE_SUPPORT_MASK BIT(1)
+#define CXL_MEMDEV_PPR_FLAG_MEM_SPARING_EV_REC_SUPPORT_MASK BIT(2)
+
+#define CXL_MEMDEV_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK BIT(0)
+#define CXL_MEMDEV_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK BIT(2)
+
+#define CXL_MEMDEV_PPR_SPARING_EV_REC_EN_MASK BIT(0)
+
+struct cxl_memdev_repair_rd_attrs_hdr {
+ u8 max_op_latency;
+ __le16 op_cap;
+ __le16 op_mode;
+ u8 op_class;
+ u8 op_subclass;
+ u8 rsvd[9];
+} __packed;
+
+struct cxl_memdev_ppr_rd_attrs {
+ struct cxl_memdev_repair_rd_attrs_hdr hdr;
+ u8 ppr_flags;
+ __le16 restriction_flags;
+ u8 ppr_op_mode;
+} __packed;
+
+/* See CXL rev 3.1 @8.2.9.7.1.2 Table 8-103 sPPR Maintenance Input Payload */
+/* See CXL rev 3.1 @8.2.9.7.1.3 Table 8-104 hPPR Maintenance Input Payload */
+struct cxl_memdev_ppr_maintenance_attrs {
+ u8 flags;
+ __le64 dpa;
+ u8 nibble_mask[3];
+} __packed;
+
+static int cxl_mem_ppr_get_attrs(struct device *dev,
+ struct cxl_ppr_context *cxl_ppr_ctx,
+ struct cxl_memdev_ppr_params *params)
+{
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+ size_t rd_data_size = sizeof(struct cxl_memdev_ppr_rd_attrs);
+ u16 restriction_flags;
+ size_t data_size;
+
+ struct cxl_memdev_ppr_rd_attrs *rd_attrs __free(kfree) =
+ kmalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(&mds->cxlds, cxl_ppr_ctx->repair_uuid,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE,
+ rd_attrs, rd_data_size);
+ if (!data_size)
+ return -EIO;
+
+ params->op_class = rd_attrs->hdr.op_class;
+ params->op_subclass = rd_attrs->hdr.op_subclass;
+ params->dpa_support = FIELD_GET(CXL_MEMDEV_PPR_FLAG_DPA_SUPPORT_MASK,
+ rd_attrs->ppr_flags);
+ restriction_flags = le16_to_cpu(rd_attrs->restriction_flags);
+ params->media_accessible = FIELD_GET(CXL_MEMDEV_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK,
+ restriction_flags) ^ 1;
+ params->data_retained = FIELD_GET(CXL_MEMDEV_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK,
+ restriction_flags) ^ 1;
+
+ return 0;
+}
+
+static int cxl_mem_do_ppr_op(struct device *dev,
+ struct cxl_ppr_context *cxl_ppr_ctx,
+ struct cxl_memdev_ppr_params *rd_params,
+ enum cxl_ppr_param param_type)
+{
+ struct cxl_memdev_ppr_maintenance_attrs maintenance_attrs;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+ int ret;
+
+ if (!rd_params->media_accessible || !rd_params->data_retained) {
+ /* Check if DPA is mapped */
+ if (cxl_dpa_to_region(cxlmd, cxl_ppr_ctx->dpa)) {
+ dev_err(dev, "CXL can't do PPR as DPA is mapped\n");
+ return -EBUSY;
+ }
+ }
+ memset(&maintenance_attrs, 0, sizeof(maintenance_attrs));
+ if (param_type == CXL_PPR_PARAM_DO_QUERY)
+ maintenance_attrs.flags = CXL_MEMDEV_PPR_QUERY_RESOURCE_FLAG;
+ else
+ maintenance_attrs.flags = 0;
+ maintenance_attrs.dpa = cpu_to_le64(cxl_ppr_ctx->dpa);
+ put_unaligned_le24(cxl_ppr_ctx->nibble_mask, maintenance_attrs.nibble_mask);
+ ret = cxl_do_maintenance(mds, rd_params->op_class, rd_params->op_subclass,
+ &maintenance_attrs, sizeof(maintenance_attrs));
+ if (ret) {
+ dev_err(dev, "CXL do PPR failed ret=%d\n", ret);
+ up_read(&cxl_region_rwsem);
+ cxl_ppr_ctx->nibble_mask = 0;
+ cxl_ppr_ctx->dpa = 0;
+ return ret;
+ }
+
+ return 0;
+}
+
+static int cxl_mem_ppr_set_attrs(struct device *dev,
+ struct cxl_ppr_context *cxl_ppr_ctx,
+ enum cxl_ppr_param param_type)
+{
+ struct cxl_memdev_ppr_params rd_params;
+ int ret;
+
+ ret = cxl_mem_ppr_get_attrs(dev, cxl_ppr_ctx, &rd_params);
+ if (ret) {
+ dev_err(dev, "Get cxlmemdev PPR params failed ret=%d\n",
+ ret);
+ return ret;
+ }
+
+ switch (param_type) {
+ case CXL_PPR_PARAM_DO_QUERY:
+ case CXL_PPR_PARAM_DO_PPR:
+ ret = down_read_interruptible(&cxl_region_rwsem);
+ if (ret)
+ return ret;
+ ret = down_read_interruptible(&cxl_dpa_rwsem);
+ if (ret) {
+ up_read(&cxl_region_rwsem);
+ return ret;
+ }
+ ret = cxl_mem_do_ppr_op(dev, cxl_ppr_ctx, &rd_params, param_type);
+ up_read(&cxl_dpa_rwsem);
+ up_read(&cxl_region_rwsem);
+ return ret;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int cxl_ppr_get_repair_function(struct device *dev, void *drv_data,
+ u32 *repair_function)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *repair_function = cxl_ppr_ctx->repair_function;
+
+ return 0;
+}
+
+static int cxl_ppr_get_persist_mode(struct device *dev, void *drv_data,
+ u32 *persist_mode)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *persist_mode = cxl_ppr_ctx->persist_mode;
+
+ return 0;
+}
+
+static int cxl_ppr_get_dpa_support(struct device *dev, void *drv_data,
+ u32 *dpa_support)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev_ppr_params params;
+ int ret;
+
+ ret = cxl_mem_ppr_get_attrs(dev, cxl_ppr_ctx, ¶ms);
+ if (ret)
+ return ret;
+
+ *dpa_support = params.dpa_support;
+
+ return 0;
+}
+
+static int cxl_get_ppr_safe_when_in_use(struct device *dev, void *drv_data,
+ u32 *safe)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev_ppr_params params;
+ int ret;
+
+ ret = cxl_mem_ppr_get_attrs(dev, cxl_ppr_ctx, ¶ms);
+ if (ret)
+ return ret;
+
+ *safe = params.media_accessible & params.data_retained;
+
+ return 0;
+}
+
+static int cxl_ppr_get_min_dpa(struct device *dev, void *drv_data,
+ u64 *min_dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ *min_dpa = cxlds->dpa_res.start;
+
+ return 0;
+}
+
+static int cxl_ppr_get_max_dpa(struct device *dev, void *drv_data,
+ u64 *max_dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ *max_dpa = cxlds->dpa_res.end;
+
+ return 0;
+}
+
+static int cxl_ppr_get_dpa(struct device *dev, void *drv_data,
+ u64 *dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *dpa = cxl_ppr_ctx->dpa;
+
+ return 0;
+}
+
+static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ if (!dpa || dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end)
+ return -EINVAL;
+
+ cxl_ppr_ctx->dpa = dpa;
+
+ return 0;
+}
+
+static int cxl_ppr_get_nibble_mask(struct device *dev, void *drv_data,
+ u64 *nibble_mask)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *nibble_mask = cxl_ppr_ctx->nibble_mask;
+
+ return 0;
+}
+
+static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data, u64 nibble_mask)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ cxl_ppr_ctx->nibble_mask = nibble_mask;
+
+ return 0;
+}
+
+static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ int ret;
+
+ if (!cxl_ppr_ctx->dpa || val != EDAC_DO_MEM_REPAIR)
+ return -EINVAL;
+
+ ret = cxl_mem_ppr_set_attrs(dev, cxl_ppr_ctx, CXL_PPR_PARAM_DO_PPR);
+
+ return ret;
+}
+
+static const struct edac_mem_repair_ops cxl_sppr_ops = {
+ .get_repair_function = cxl_ppr_get_repair_function,
+ .get_persist_mode = cxl_ppr_get_persist_mode,
+ .get_dpa_support = cxl_ppr_get_dpa_support,
+ .get_repair_safe_when_in_use = cxl_get_ppr_safe_when_in_use,
+ .get_min_dpa = cxl_ppr_get_min_dpa,
+ .get_max_dpa = cxl_ppr_get_max_dpa,
+ .get_dpa = cxl_ppr_get_dpa,
+ .set_dpa = cxl_ppr_set_dpa,
+ .get_nibble_mask = cxl_ppr_get_nibble_mask,
+ .set_nibble_mask = cxl_ppr_set_nibble_mask,
+ .do_repair = cxl_do_ppr,
+};
+
static int cxl_memdev_scrub_init(struct cxl_memdev *cxlmd, struct cxl_region *cxlr,
struct edac_dev_feature *ras_feature, u8 scrub_inst)
{
@@ -725,11 +1057,59 @@ static int cxl_memdev_ecs_init(struct cxl_memdev *cxlmd,
return -EOPNOTSUPP;
}
+static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd,
+ struct edac_dev_feature *ras_feature,
+ u8 repair_inst)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+ struct cxl_ppr_context *cxl_sppr_ctx;
+ struct cxl_feat_entry feat_entry;
+ int rc;
+
+ rc = cxl_get_supported_feature_entry(&mds->cxlds, &CXL_FEAT_SPPR_UUID,
+ &feat_entry);
+ if (rc < 0)
+ goto feat_unsupported;
+
+ if (!(le32_to_cpu(feat_entry.flags) & CXL_FEAT_ENTRY_FLAG_CHANGABLE))
+ goto feat_unsupported;
+
+ cxl_sppr_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sppr_ctx),
+ GFP_KERNEL);
+ if (!cxl_sppr_ctx)
+ return -ENOMEM;
+
+ *cxl_sppr_ctx = (struct cxl_ppr_context) {
+ .repair_uuid = CXL_FEAT_SPPR_UUID,
+ .get_feat_size = le16_to_cpu(feat_entry.get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry.set_feat_size),
+ .get_version = feat_entry.get_feat_ver,
+ .set_version = feat_entry.set_feat_ver,
+ .effects = le16_to_cpu(feat_entry.effects),
+ .cxlmd = cxlmd,
+ .repair_function = EDAC_SOFT_PPR,
+ .persist_mode = EDAC_MEM_REPAIR_SOFT,
+ .instance = repair_inst,
+ };
+
+ ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
+ ras_feature->instance = cxl_sppr_ctx->instance;
+ ras_feature->mem_repair_ops = &cxl_sppr_ops;
+ ras_feature->ctx = cxl_sppr_ctx;
+
+ return 0;
+
+feat_unsupported:
+ return -EOPNOTSUPP;
+}
+
int cxl_mem_ras_features_init(struct cxl_memdev *cxlmd, struct cxl_region *cxlr)
{
struct edac_dev_feature ras_features[CXL_DEV_NUM_RAS_FEATURES];
char cxl_dev_name[CXL_DEV_NAME_LEN];
int num_ras_features = 0;
+ u8 repair_inst = 0;
u8 scrub_inst = 0;
int rc;
@@ -762,6 +1142,17 @@ int cxl_mem_ras_features_init(struct cxl_memdev *cxlmd, struct cxl_region *cxlr)
num_ras_features++;
feat_ecs_done:
+ rc = cxl_memdev_soft_ppr_init(cxlmd, &ras_features[num_ras_features],
+ repair_inst);
+ if (rc == -EOPNOTSUPP)
+ goto feat_soft_ppr_done;
+ if (rc < 0)
+ return rc;
+
+ repair_inst++;
+ num_ras_features++;
+
+feat_soft_ppr_done:
feat_register:
return edac_dev_register(&cxlmd->dev, cxl_dev_name, NULL,
num_ras_features, ras_features);