@@ -772,9 +772,32 @@ static const struct cxl_event_flags cxl_dpa_flags[] = {
{ .bit = CXL_DPA_NOT_REPAIRABLE, .flag = "NOT_REPAIRABLE" },
};
+/* CXL rev 3.1 Section 8.2.9.2.1.1; Table 8-45 */
+static const char * const cxl_mem_event_sub_type[] = {
+ "Not Reported",
+ "Internal Datapath Error",
+ "Media Link Command Training Error",
+ "Media Link Control Training Error",
+ "Media Link Data Training Error",
+ "Media Link CRC Error",
+};
+
+#define CXL_CME_EV_FLAG_CME_MULTIPLE_MEDIA BIT(0)
+#define CXL_CME_EV_FLAG_THRESHOLD_EXCEEDED BIT(1)
+static const struct cxl_event_flags cxl_cme_threshold_ev_flags[] = {
+ {
+ .bit = CXL_CME_EV_FLAG_CME_MULTIPLE_MEDIA,
+ .flag = "Corrected Memory Errors in Multiple Media Components"
+ },
+ {
+ .bit = CXL_CME_EV_FLAG_THRESHOLD_EXCEEDED,
+ .flag = "Exceeded Programmable Threshold"
+ },
+};
+
/*
* General Media Event Record - GMER
- * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ * CXL rev 3.1 Section 8.2.9.2.1.1; Table 8-45
*/
#define CXL_GMER_EVT_DESC_UNCORRECTABLE_EVENT BIT(0)
#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1)
@@ -790,11 +813,16 @@ static const struct cxl_event_flags cxl_gmer_event_desc_flags[] = {
#define CXL_GMER_VALID_RANK BIT(1)
#define CXL_GMER_VALID_DEVICE BIT(2)
#define CXL_GMER_VALID_COMPONENT BIT(3)
+#define CXL_GMER_VALID_COMPONENT_ID_FORMAT BIT(4)
static const char * const cxl_gmer_mem_event_type[] = {
"ECC Error",
"Invalid Address",
"Data Path Error",
+ "TE State Violation",
+ "Scrub Media ECC Error",
+ "Advanced Programmable CME Counter Expiration",
+ "CKID Violation",
};
static const char * const cxl_gmer_trans_type[] = {
@@ -805,13 +833,15 @@ static const char * const cxl_gmer_trans_type[] = {
"Host Inject Poison",
"Internal Media Scrub",
"Internal Media Management",
+ "Internal Media Error Check Scrub",
+ "Media Initialization",
};
int ras_cxl_general_media_event_handler(struct trace_seq *s,
struct tep_record *record,
struct tep_event *event, void *context)
{
- int len, i;
+ int len, i, rc;
unsigned long long val;
struct ras_events *ras = context;
struct ras_cxl_general_media_event ev;
@@ -846,11 +876,20 @@ int ras_cxl_general_media_event_handler(struct trace_seq *s,
if (tep_get_field_val(s, event, "type", record, &val, 1) < 0)
return -1;
ev.type = val;
- if (trace_seq_printf(s, "type:%s ",
+ if (trace_seq_printf(s, "memory_event_type:%s ",
get_cxl_type_str(cxl_gmer_mem_event_type,
ARRAY_SIZE(cxl_gmer_mem_event_type), ev.type)) <= 0)
return -1;
+ if (tep_get_field_val(s, event, "sub_type", record, &val, 1) < 0)
+ return -1;
+ ev.sub_type = val;
+ if (trace_seq_printf(s, "memory_event_sub_type:%s ",
+ get_cxl_type_str(cxl_mem_event_sub_type,
+ ARRAY_SIZE(cxl_mem_event_sub_type),
+ ev.sub_type)) <= 0)
+ return -1;
+
if (tep_get_field_val(s, event, "transaction_type", record, &val, 1) < 0)
return -1;
ev.transaction_type = val;
@@ -918,8 +957,36 @@ int ras_cxl_general_media_event_handler(struct trace_seq *s,
if (trace_seq_printf(s, "%02x ", ev.comp_id[i]) <= 0)
break;
}
+
+ if (ev.validity_flags & CXL_GMER_VALID_COMPONENT_ID_FORMAT) {
+ if (trace_seq_printf(s, "comp_id_pldm_valid_flags:") <= 0)
+ return -1;
+ if (decode_cxl_event_flags(s, ev.comp_id[0], cxl_pldm_comp_id_flags,
+ ARRAY_SIZE(cxl_pldm_comp_id_flags)) < 0)
+ return -1;
+
+ rc = ras_cxl_print_component_id(s, ev.comp_id, ev.entity_id, ev.res_id);
+ if (rc)
+ return rc;
+ }
}
+ if (tep_get_field_val(s, event, "cme_threshold_ev_flags", record, &val, 1) < 0)
+ return -1;
+ ev.cme_threshold_ev_flags = val;
+ if (trace_seq_printf(s, "Advanced Programmable CME threshold Event Flags:") <= 0)
+ return -1;
+ if (decode_cxl_event_flags(s, ev.cme_threshold_ev_flags,
+ cxl_cme_threshold_ev_flags,
+ ARRAY_SIZE(cxl_cme_threshold_ev_flags)) < 0)
+ return -1;
+
+ if (tep_get_field_val(s, event, "cme_count", record, &val, 1) < 0)
+ return -1;
+ ev.cme_count = val;
+ if (trace_seq_printf(s, "Corrected Memory Error Count:%u ", ev.cme_count) <= 0)
+ return -1;
+
/* Insert data into the SGBD */
#ifdef HAVE_SQLITE3
ras_store_cxl_general_media_event(ras, &ev);
@@ -889,6 +889,11 @@ static const struct db_fields cxl_general_media_event_fields[] = {
{ .name = "hpa", .type = "INTEGER" },
{ .name = "region", .type = "TEXT" },
{ .name = "region_uuid", .type = "TEXT" },
+ { .name = "pldm_entity_id", .type = "BLOB" },
+ { .name = "pldm_resource_id", .type = "BLOB" },
+ { .name = "sub_type", .type = "INTEGER" },
+ { .name = "cme_threshold_ev_flags", .type = "INTEGER" },
+ { .name = "cme_count", .type = "INTEGER" },
};
static const struct db_table_descriptor cxl_general_media_event_tab = {
@@ -925,6 +930,14 @@ int ras_store_cxl_general_media_event(struct ras_events *ras,
sqlite3_bind_int64(priv->stmt_cxl_general_media_event, idx++, ev->hpa);
sqlite3_bind_text(priv->stmt_cxl_general_media_event, idx++, ev->region, -1, NULL);
sqlite3_bind_text(priv->stmt_cxl_general_media_event, idx++, ev->region_uuid, -1, NULL);
+ sqlite3_bind_blob(priv->stmt_cxl_general_media_event, idx++, ev->entity_id,
+ CXL_PLDM_ENTITY_ID_LEN, NULL);
+ sqlite3_bind_blob(priv->stmt_cxl_general_media_event, idx++, ev->res_id,
+ CXL_PLDM_RES_ID_LEN, NULL);
+ sqlite3_bind_int(priv->stmt_cxl_general_media_event, idx++, ev->sub_type);
+ sqlite3_bind_int(priv->stmt_cxl_general_media_event, idx++,
+ ev->cme_threshold_ev_flags);
+ sqlite3_bind_int(priv->stmt_cxl_general_media_event, idx++, ev->cme_count);
rc = sqlite3_step(priv->stmt_cxl_general_media_event);
if (rc != SQLITE_OK && rc != SQLITE_DONE)
@@ -196,15 +196,20 @@ struct ras_cxl_general_media_event {
uint8_t dpa_flags;
uint8_t descriptor;
uint8_t type;
+ uint8_t sub_type;
uint8_t transaction_type;
uint8_t channel;
uint8_t rank;
uint32_t device;
uint8_t *comp_id;
+ uint8_t entity_id[CXL_PLDM_ENTITY_ID_LEN];
+ uint8_t res_id[CXL_PLDM_RES_ID_LEN];
uint16_t validity_flags;
uint64_t hpa;
const char *region;
const char *region_uuid;
+ uint8_t cme_threshold_ev_flags;
+ uint32_t cme_count;
};
struct ras_cxl_dram_event {
@@ -555,13 +555,16 @@ static int set_cxl_general_media_event_backtrace(char *buf, struct ras_cxl_gener
"dpa_flags=%u\n"
"descriptor=%u\n"
"type=%u\n"
+ "sub_type=0x%x\n"
"transaction_type=%u\n"
"hpa=0x%lx\n"
"region=%s\n"
"region_uuid=%s\n"
"channel=%u\n"
"rank=%u\n"
- "device=0x%x\n",
+ "device=0x%x\n"
+ "cme_threshold_ev_flags=0x%x\n"
+ "cme_count=0x%x\n",
ev->hdr.timestamp,
ev->hdr.memdev,
ev->hdr.host,
@@ -578,13 +581,16 @@ static int set_cxl_general_media_event_backtrace(char *buf, struct ras_cxl_gener
ev->dpa_flags,
ev->descriptor,
ev->type,
+ ev->sub_type,
ev->transaction_type,
ev->hpa,
ev->region,
ev->region_uuid,
ev->channel,
ev->rank,
- ev->device);
+ ev->device,
+ ev->cme_threshold_ev_flags,
+ ev->cme_count);
return 0;
}