Message ID | 20241016163349.1210-4-shiju.jose@huawei.com |
---|---|
State | New |
Headers | show |
Series | Updates for CXL Event Records | expand |
On Wed, 16 Oct 2024 17:33:48 +0100 <shiju.jose@huawei.com> wrote: > From: Shiju Jose <shiju.jose@huawei.com> > > CXL spec 3.1 section 8.2.9.2.1.2 Table 8-46, DRAM Event Record has updated > with following new fields and new types for Memory Event Type, Transaction > Type and Validity Flags fields. > 1. Component Identifier > 2. Sub-channel > 3. Advanced Programmable Corrected Memory Error Threshold Event Flags > 4. Corrected Memory Error Count at Event > 5. Memory Event Sub-Type > > Add updates for the above spec changes in the CXL events record and CXL > DRAM trace event implementations. > > Signed-off-by: Shiju Jose <shiju.jose@huawei.com> Passing comments on two things inline. 1) There are a couple of whitespace consistency changes in here. Spaces to tabs for alignment. That's fine but maybe needs a brief mention in the patch description. 2) Really odd that the spec didn't have a component ID field for DRAM errors. They weren't all that useful before the PLDM format was added but still a curiosity that made me open up the 3.0 spec. Indeed, no such field. With that one line added to the patch description this looks good to me. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > --- > drivers/cxl/core/trace.h | 44 ++++++++++++++++++++++++++++++++-------- > include/cxl/event.h | 7 ++++++- > 2 files changed, 42 insertions(+), 9 deletions(-) > > diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h > index e638e82429bc..20790dffa2b4 100644 > --- a/drivers/cxl/core/trace.h > +++ b/drivers/cxl/core/trace.h > @@ -468,7 +468,7 @@ TRACE_EVENT(cxl_general_media, > /* > * DRAM Event Record - DER > * > - * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 > + * CXL rev 3.1 section 8.2.9.2.1.2; Table 8-46 > */ > /* > * DRAM Event Record defines many fields the same as the General Media Event > @@ -478,11 +478,17 @@ TRACE_EVENT(cxl_general_media, > #define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR 0x01 > #define CXL_DER_MEM_EVT_TYPE_INV_ADDR 0x02 > #define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x03 > -#define show_dram_mem_event_type(type) __print_symbolic(type, \ > +#define CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION 0x04 > +#define CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE 0x05 > +#define CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION 0x06 > +#define show_dram_mem_event_type(type) __print_symbolic(type, \ This change looks odd here but does print the line above into the same formatting style as the other similar cases in the file. Maybe worth a line in the patch description to say "Includes trivial consistency of white space improvements" just to flag up that it was intentional. > { CXL_DER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ > { CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \ > { CXL_DER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ > - { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \ > + { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" }, \ > + { CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION, "TE State Violation" }, \ > + { CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE, "Adv Prog CME Counter Expiration" }, \ > + { CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION, "CKID Violation" } \ > ) > > #define CXL_DER_VALID_CHANNEL BIT(0) > @@ -493,7 +499,10 @@ TRACE_EVENT(cxl_general_media, > #define CXL_DER_VALID_ROW BIT(5) > #define CXL_DER_VALID_COLUMN BIT(6) > #define CXL_DER_VALID_CORRECTION_MASK BIT(7) > -#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ > +#define CXL_DER_VALID_COMPONENT BIT(8) > +#define CXL_DER_VALID_COMPONENT_ID_FORMAT BIT(9) > +#define CXL_DER_VALID_SUB_CHANNEL BIT(10) > +#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ As above this is a minor white space consistency change. > { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \ > { CXL_DER_VALID_RANK, "RANK" }, \ > { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \ > @@ -501,7 +510,9 @@ TRACE_EVENT(cxl_general_media, > { CXL_DER_VALID_BANK, "BANK" }, \ > { CXL_DER_VALID_ROW, "ROW" }, \ > { CXL_DER_VALID_COLUMN, "COLUMN" }, \ > - { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \ > + { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" }, \ > + { CXL_DER_VALID_COMPONENT, "COMPONENT" }, \ > + { CXL_DER_VALID_SUB_CHANNEL, "SUB CHANNEL" } \ > ) > diff --git a/include/cxl/event.h b/include/cxl/event.h > index ea8cd44a52e9..7e98492c85df 100644 > --- a/include/cxl/event.h > +++ b/include/cxl/event.h > @@ -71,7 +71,12 @@ struct cxl_event_dram { > u8 row[3]; > u8 column[2]; > u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; > - u8 reserved[0x17]; > + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; Odd that the general media had this field in 3.0 but DRAM didn't. I checked though and indeed the case! > + u8 sub_channel; > + u8 cme_threshold_ev_flags; > + u8 cvme_count[3]; > + u8 sub_type; > + u8 reserved; > } __packed; > > /*
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index e638e82429bc..20790dffa2b4 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -468,7 +468,7 @@ TRACE_EVENT(cxl_general_media, /* * DRAM Event Record - DER * - * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + * CXL rev 3.1 section 8.2.9.2.1.2; Table 8-46 */ /* * DRAM Event Record defines many fields the same as the General Media Event @@ -478,11 +478,17 @@ TRACE_EVENT(cxl_general_media, #define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR 0x01 #define CXL_DER_MEM_EVT_TYPE_INV_ADDR 0x02 #define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x03 -#define show_dram_mem_event_type(type) __print_symbolic(type, \ +#define CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION 0x04 +#define CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE 0x05 +#define CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION 0x06 +#define show_dram_mem_event_type(type) __print_symbolic(type, \ { CXL_DER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ { CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \ { CXL_DER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ - { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \ + { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" }, \ + { CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION, "TE State Violation" }, \ + { CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE, "Adv Prog CME Counter Expiration" }, \ + { CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION, "CKID Violation" } \ ) #define CXL_DER_VALID_CHANNEL BIT(0) @@ -493,7 +499,10 @@ TRACE_EVENT(cxl_general_media, #define CXL_DER_VALID_ROW BIT(5) #define CXL_DER_VALID_COLUMN BIT(6) #define CXL_DER_VALID_CORRECTION_MASK BIT(7) -#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ +#define CXL_DER_VALID_COMPONENT BIT(8) +#define CXL_DER_VALID_COMPONENT_ID_FORMAT BIT(9) +#define CXL_DER_VALID_SUB_CHANNEL BIT(10) +#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \ { CXL_DER_VALID_RANK, "RANK" }, \ { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \ @@ -501,7 +510,9 @@ TRACE_EVENT(cxl_general_media, { CXL_DER_VALID_BANK, "BANK" }, \ { CXL_DER_VALID_ROW, "ROW" }, \ { CXL_DER_VALID_COLUMN, "COLUMN" }, \ - { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \ + { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" }, \ + { CXL_DER_VALID_COMPONENT, "COMPONENT" }, \ + { CXL_DER_VALID_SUB_CHANNEL, "SUB CHANNEL" } \ ) TRACE_EVENT(cxl_dram, @@ -530,6 +541,11 @@ TRACE_EVENT(cxl_dram, __field(u8, bank_group) /* Out of order to pack trace record */ __field(u8, bank) /* Out of order to pack trace record */ __field(u8, dpa_flags) /* Out of order to pack trace record */ + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) + __field(u32, cvme_count) + __field(u8, sub_channel) + __field(u8, cme_threshold_ev_flags) + __field(u8, sub_type) __string(region_name, cxlr ? dev_name(&cxlr->dev) : "") ), @@ -554,7 +570,13 @@ TRACE_EVENT(cxl_dram, __entry->column = get_unaligned_le16(rec->column); memcpy(__entry->cor_mask, &rec->correction_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE); + memcpy(__entry->comp_id, &rec->component_id, + CXL_EVENT_GEN_MED_COMP_ID_SIZE); __entry->hpa = hpa; + __entry->sub_channel = rec->sub_channel; + __entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags; + __entry->cvme_count = get_unaligned_le24(rec->cvme_count); + __entry->sub_type = rec->sub_type; if (cxlr) { __assign_str(region_name); uuid_copy(&__entry->region_uuid, &cxlr->params.uuid); @@ -567,8 +589,9 @@ TRACE_EVENT(cxl_dram, CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' " \ "transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \ "bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \ - "validity_flags='%s' " \ - "hpa=%llx region=%s region_uuid=%pUb", + "comp_id=%s validity_flags='%s' " \ + "hpa=%llx sub_channel=%u cme_threshold_ev_flags='%s' " \ + "cvme_count=%x sub_type='%s' region=%s region_uuid=%pUb", __entry->dpa, show_dpa_flags(__entry->dpa_flags), show_event_desc_flags(__entry->descriptor), show_dram_mem_event_type(__entry->type), @@ -577,8 +600,13 @@ TRACE_EVENT(cxl_dram, __entry->bank_group, __entry->bank, __entry->row, __entry->column, __print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE), + cxl_print_component_id(__entry->validity_flags, CXL_DER_VALID_COMPONENT, + CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id), show_dram_valid_flags(__entry->validity_flags), - __entry->hpa, __get_str(region_name), &__entry->region_uuid + __entry->hpa, __entry->sub_channel, + show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags), + __entry->cvme_count, show_mem_event_sub_type(__entry->sub_type), + __get_str(region_name), &__entry->region_uuid ) ); diff --git a/include/cxl/event.h b/include/cxl/event.h index ea8cd44a52e9..7e98492c85df 100644 --- a/include/cxl/event.h +++ b/include/cxl/event.h @@ -71,7 +71,12 @@ struct cxl_event_dram { u8 row[3]; u8 column[2]; u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; - u8 reserved[0x17]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 sub_channel; + u8 cme_threshold_ev_flags; + u8 cvme_count[3]; + u8 sub_type; + u8 reserved; } __packed; /*