Message ID | 1393924997-8992-3-git-send-email-gong.chen@linux.intel.com (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
On Tue, Mar 04, 2014 at 04:23:17AM -0500, Chen, Gong wrote: > Add trace interface to elaborate all H/W error related information. > > Signed-off-by: Chen, Gong <gong.chen@linux.intel.com> > --- > drivers/acpi/Kconfig | 3 +- > drivers/acpi/Makefile | 1 + > drivers/acpi/acpi_extlog.c | 131 +++++++++++++++++++++++++++++++++++++++++++- > drivers/firmware/efi/cper.c | 13 ++++- > include/linux/cper.h | 2 + > include/ras/ras_event.h | 62 +++++++++++++++++++++ > kernel/trace/ras-traces.c | 1 + > 7 files changed, 208 insertions(+), 5 deletions(-) > > diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig > index 4770de5..3e569d4 100644 > --- a/drivers/acpi/Kconfig > +++ b/drivers/acpi/Kconfig > @@ -363,6 +363,7 @@ config ACPI_EXTLOG > > Enhanced MCA Logging allows firmware to provide additional error > information to system software, synchronous with MCE or CMCI. This > - driver adds support for that functionality. > + driver adds support for that functionality with corresponding > + tracepoint which carries that information to userspace. > > endif # ACPI > diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile > index 0331f91..f6abc4a 100644 > --- a/drivers/acpi/Makefile > +++ b/drivers/acpi/Makefile > @@ -82,4 +82,5 @@ obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o > > obj-$(CONFIG_ACPI_APEI) += apei/ > > +CFLAGS_acpi_extlog.o := -I$(src) > obj-$(CONFIG_ACPI_EXTLOG) += acpi_extlog.o > diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c > index c4a5d87..fbdebad 100644 > --- a/drivers/acpi/acpi_extlog.c > +++ b/drivers/acpi/acpi_extlog.c > @@ -14,8 +14,10 @@ > #include <linux/edac.h> > #include <asm/cpu.h> > #include <asm/mce.h> > +#include <linux/dmi.h> > > #include "apei/apei-internal.h" > +#include <ras/ras_event.h> > > #define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ > > @@ -44,6 +46,11 @@ struct extlog_l1_head { > static int old_edac_report_status; > > static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295"; > +static const uuid_le invalid_uuid = NULL_UUID_LE; > + > +static DEFINE_RAW_SPINLOCK(trace_lock); > +static char mem_location[LOC_LEN]; > +static char dimm_location[LOC_LEN]; > > /* L1 table related physical address */ > static u64 elog_base; > @@ -69,6 +76,106 @@ static u32 l1_percpu_entry; > #define ELOG_ENTRY_ADDR(phyaddr) \ > (phyaddr - elog_base + (u8 *)elog_addr) > > +static void mem_err_location(struct cper_sec_mem_err *mem) > +{ > + char *p; > + u32 n = 0; > + > + memset(mem_location, 0, LOC_LEN); > + p = mem_location; > + if (mem->validation_bits & CPER_MEM_VALID_NODE) > + n += sprintf(p + n, " node: %d", mem->node); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_CARD) > + n += sprintf(p + n, " card: %d", mem->card); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_MODULE) > + n += sprintf(p + n, " module: %d", mem->module); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) > + n += sprintf(p + n, " rank: %d", mem->rank); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_BANK) > + n += sprintf(p + n, " bank: %d", mem->bank); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_DEVICE) > + n += sprintf(p + n, " device: %d", mem->device); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_ROW) > + n += sprintf(p + n, " row: %d", mem->row); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_COLUMN) > + n += sprintf(p + n, " column: %d", mem->column); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) > + n += sprintf(p + n, " bit_position: %d", mem->bit_pos); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) > + n += sprintf(p + n, " requestor_id: 0x%016llx", > + mem->requestor_id); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) > + n += sprintf(p + n, " responder_id: 0x%016llx", > + mem->responder_id); > + if (n >= LOC_LEN) > + goto end; > + if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) > + n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id); > +end: > + return; > +} Looks like this wants to share with cper_print_mem() - definitely a lot of duplication there. > + > +static void dimm_err_location(struct cper_sec_mem_err *mem) > +{ > + const char *bank = NULL, *device = NULL; > + > + memset(dimm_location, 0, LOC_LEN); > + if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) > + return; > + > + dmi_memdev_name(mem->mem_dev_handle, &bank, &device); > + if (bank != NULL && device != NULL) > + snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device); > + else > + snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x", > + mem->mem_dev_handle); > +} This one too. > + > +static void trace_mem_error(const uuid_le *fru_id, char *fru_text, > + u64 err_count, u32 severity, > + struct cper_sec_mem_err *mem) > +{ > + u32 etype = ~0U; > + u64 phy_addr = ~0ull; I'm assuming userspace knows that all 1s means field value is invalid? > + unsigned long flags; > + > + if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) > + etype = mem->error_type; newline. > + if (mem->validation_bits & CPER_MEM_VALID_PA) { > + phy_addr = mem->physical_addr; > + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) > + phy_addr &= mem->physical_addr_mask; > + } > + > + raw_spin_lock_irqsave(&trace_lock, flags); > + mem_err_location(mem); > + dimm_err_location(mem); > + > + trace_extlog_mem_event(etype, dimm_location, fru_id, fru_text, > + err_count, severity, phy_addr, mem_location); > + raw_spin_unlock_irqrestore(&trace_lock, flags); > +} > + > static struct acpi_generic_status *extlog_elog_entry_check(int cpu, int bank) > { > int idx; > @@ -137,7 +244,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, > struct mce *mce = (struct mce *)data; > int bank = mce->bank; > int cpu = mce->extcpu; > - struct acpi_generic_status *estatus; > + struct acpi_generic_status *estatus, *tmp; > + struct acpi_generic_data *gdata; > + const uuid_le *fru_id = &invalid_uuid; > + char *fru_text = ""; > + uuid_le *sec_type; > + static u64 err_count; > int rc; > > estatus = extlog_elog_entry_check(cpu, bank); > @@ -149,6 +261,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, > estatus->block_status = 0; > > rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); > + tmp = (struct acpi_generic_status *)elog_buf; > + gdata = (struct acpi_generic_data *)(tmp + 1); > + rc = print_extlog_rcd(NULL, tmp, cpu); We probably need a mechanism to disable printking to dmesg once userspace has opened the tracepoint. > + /* trace extended error log */ > + err_count++; > + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) > + fru_id = (uuid_le *)gdata->fru_id; > + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) > + fru_text = gdata->fru_text; > + sec_type = (uuid_le *)gdata->section_type; > + if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { > + struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); > + if (gdata->error_data_length >= sizeof(*mem_err)) > + trace_mem_error(fru_id, fru_text, err_count, > + gdata->error_severity, mem_err); > + } > > return NOTIFY_STOP; > } > diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c > index 1491dd4..9d3e2c4 100644 > --- a/drivers/firmware/efi/cper.c > +++ b/drivers/firmware/efi/cper.c > @@ -57,11 +57,12 @@ static const char *cper_severity_strs[] = { > "info", > }; > > -static const char *cper_severity_str(unsigned int severity) > +const char *cper_severity_str(unsigned int severity) > { > return severity < ARRAY_SIZE(cper_severity_strs) ? > cper_severity_strs[severity] : "unknown"; > } > +EXPORT_SYMBOL_GPL(cper_severity_str); Yes, this calls for a common file sharin cper and extlog functionality. > /* > * cper_print_bits - print strings for set bits > @@ -196,6 +197,13 @@ static const char *cper_mem_err_type_strs[] = { > "physical memory map-out event", > }; > > +const char *cper_mem_err_type_str(unsigned int etype) > +{ > + return etype < ARRAY_SIZE(cper_mem_err_type_strs) ? > + cper_mem_err_type_strs[etype] : "unknown"; > +} > +EXPORT_SYMBOL_GPL(cper_mem_err_type_str); > + > static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) > { > if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) > @@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) > if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { > u8 etype = mem->error_type; > printk("%s""error_type: %d, %s\n", pfx, etype, > - etype < ARRAY_SIZE(cper_mem_err_type_strs) ? > - cper_mem_err_type_strs[etype] : "unknown"); > + cper_mem_err_type_str(etype)); > } > if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { > const char *bank = NULL, *device = NULL; Ditto. > diff --git a/include/linux/cper.h b/include/linux/cper.h > index 2fc0ec3..c6d87fc 100644 > --- a/include/linux/cper.h > +++ b/include/linux/cper.h > @@ -395,6 +395,8 @@ struct cper_sec_pcie { > #pragma pack() > > u64 cper_next_record_id(void); > +const char *cper_severity_str(unsigned int); > +const char *cper_mem_err_type_str(unsigned int); > void cper_print_bits(const char *prefix, unsigned int bits, > const char * const strs[], unsigned int strs_size); > > diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h > index 21cdb0b..97f2192 100644 > --- a/include/ras/ras_event.h > +++ b/include/ras/ras_event.h > @@ -8,6 +8,68 @@ > #include <linux/tracepoint.h> > #include <linux/edac.h> > #include <linux/ktime.h> > +#include <linux/cper.h> > + > +/* > + * MCE Extended Error Log trace event > + * > + * These events are generated when hardware detects a corrected or > + * uncorrected event. > + * > + */ > + > +/* memory trace event */ > + > +#define LOC_LEN 512 > + > +TRACE_EVENT(extlog_mem_event, So this is a mem thing so we're defining a tracepoint for memory events, specifically. However, if extlog carries all kinds of errors outside, not only DRAM errors, we should do a TRACE_EVENT_CLASS which contains the shared args to every error type and then make a mem event ontop of it. > + TP_PROTO(u32 etype, > + char *dimm_info, > + const uuid_le *fru_id, > + char *fru_text, > + u64 error_count, > + u32 severity, > + u64 phy_addr, > + char *mem_loc), > + > + TP_ARGS(etype, dimm_info, fru_id, fru_text, error_count, severity, > + phy_addr, mem_loc), > + > + TP_STRUCT__entry( > + __field(u32, etype) > + __dynamic_array(char, dimm_info, LOC_LEN) > + __field(u64, error_count) > + __field(u32, severity) > + __field(u64, paddr) > + __string(mem_loc, mem_loc) > + __dynamic_array(char, fru, LOC_LEN) > + ), > + > + TP_fast_assign( > + __entry->error_count = error_count; > + __entry->severity = severity; > + __entry->etype = etype; > + if (dimm_info[0] != '\0') > + snprintf(__get_dynamic_array(dimm_info), LOC_LEN - 1, > + "%s", dimm_info); > + else > + __assign_str(dimm_info, ""); > + __entry->paddr = phy_addr; > + __assign_str(mem_loc, mem_loc); > + snprintf(__get_dynamic_array(fru), LOC_LEN - 1, > + "FRU: %pUl %.20s", fru_id, fru_text); > + ), > + > + TP_printk("%llu %s error%s: %s %s physical addr: 0x%016llx%s %s", > + __entry->error_count, > + cper_severity_str(__entry->severity), > + __entry->error_count > 1 ? "s" : "", > + cper_mem_err_type_str(__entry->etype), > + __get_str(dimm_info), > + __entry->paddr, > + __get_str(mem_loc), > + __get_str(fru)) > +); > > /* > * Hardware Events Report > diff --git a/kernel/trace/ras-traces.c b/kernel/trace/ras-traces.c > index b0c6ed1..197b1ea 100644 > --- a/kernel/trace/ras-traces.c > +++ b/kernel/trace/ras-traces.c > @@ -9,4 +9,5 @@ > #define TRACE_INCLUDE_PATH ../../include/ras > #include <ras/ras_event.h> > > +EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); > EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); > -- > 1.8.4.3 > >
On Fri, Mar 07, 2014 at 12:44:16PM +0100, Borislav Petkov wrote: [...] > > +static void mem_err_location(struct cper_sec_mem_err *mem) > > +{ > > + char *p; > > + u32 n = 0; > > + > > + memset(mem_location, 0, LOC_LEN); > > + p = mem_location; > > + if (mem->validation_bits & CPER_MEM_VALID_NODE) > > + n += sprintf(p + n, " node: %d", mem->node); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_CARD) > > + n += sprintf(p + n, " card: %d", mem->card); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_MODULE) > > + n += sprintf(p + n, " module: %d", mem->module); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) > > + n += sprintf(p + n, " rank: %d", mem->rank); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_BANK) > > + n += sprintf(p + n, " bank: %d", mem->bank); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_DEVICE) > > + n += sprintf(p + n, " device: %d", mem->device); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_ROW) > > + n += sprintf(p + n, " row: %d", mem->row); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_COLUMN) > > + n += sprintf(p + n, " column: %d", mem->column); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) > > + n += sprintf(p + n, " bit_position: %d", mem->bit_pos); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) > > + n += sprintf(p + n, " requestor_id: 0x%016llx", > > + mem->requestor_id); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) > > + n += sprintf(p + n, " responder_id: 0x%016llx", > > + mem->responder_id); > > + if (n >= LOC_LEN) > > + goto end; > > + if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) > > + n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id); > > +end: > > + return; > > +} > > Looks like this wants to share with cper_print_mem() - definitely a lot > of duplication there. > > > + > > +static void dimm_err_location(struct cper_sec_mem_err *mem) > > +{ > > + const char *bank = NULL, *device = NULL; > > + > > + memset(dimm_location, 0, LOC_LEN); > > + if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) > > + return; > > + > > + dmi_memdev_name(mem->mem_dev_handle, &bank, &device); > > + if (bank != NULL && device != NULL) > > + snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device); > > + else > > + snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x", > > + mem->mem_dev_handle); > > +} > > This one too. > Not really. Firstly they service for different purpose. Secondly the format here can be changed/updated depending on further requirment. I can't assume they always keep the same format. > > + > > +static void trace_mem_error(const uuid_le *fru_id, char *fru_text, > > + u64 err_count, u32 severity, > > + struct cper_sec_mem_err *mem) > > +{ > > + u32 etype = ~0U; > > + u64 phy_addr = ~0ull; > > I'm assuming userspace knows that all 1s means field value is invalid? Yep, I suppose so. > > > + unsigned long flags; > > + > > + if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) > > + etype = mem->error_type; > > newline. Sure. [...] > We probably need a mechanism to disable printking to dmesg once > userspace has opened the tracepoint. Do we really need to do that? IMHO, I think they are used for two different usages, just like dmesg & mcelog. [...] > > static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) > > { > > if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) > > @@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) > > if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { > > u8 etype = mem->error_type; > > printk("%s""error_type: %d, %s\n", pfx, etype, > > - etype < ARRAY_SIZE(cper_mem_err_type_strs) ? > > - cper_mem_err_type_strs[etype] : "unknown"); > > + cper_mem_err_type_str(etype)); > > } > > if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { > > const char *bank = NULL, *device = NULL; > > Ditto. I know you hope the print function in CPER & trace for cpi_extlog can be merged into one. I just have one concern about it. Can we ensure these two functions keeping align all the time? IOW, merge them for now until change happens one day? [...] > > +#define LOC_LEN 512 > > + > > +TRACE_EVENT(extlog_mem_event, > > So this is a mem thing so we're defining a tracepoint for memory events, > specifically. > > However, if extlog carries all kinds of errors outside, not only DRAM > errors, we should do a TRACE_EVENT_CLASS which contains the shared args > to every error type and then make a mem event ontop of it. I agree.
Em Mon, 10 Mar 2014 04:22:42 -0400 "Chen, Gong" <gong.chen@linux.intel.com> escreveu: > On Fri, Mar 07, 2014 at 12:44:16PM +0100, Borislav Petkov wrote: > [...] > > > +static void mem_err_location(struct cper_sec_mem_err *mem) > > > +{ > > > + char *p; > > > + u32 n = 0; > > > + > > > + memset(mem_location, 0, LOC_LEN); > > > + p = mem_location; > > > + if (mem->validation_bits & CPER_MEM_VALID_NODE) > > > + n += sprintf(p + n, " node: %d", mem->node); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_CARD) > > > + n += sprintf(p + n, " card: %d", mem->card); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_MODULE) > > > + n += sprintf(p + n, " module: %d", mem->module); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) > > > + n += sprintf(p + n, " rank: %d", mem->rank); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_BANK) > > > + n += sprintf(p + n, " bank: %d", mem->bank); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_DEVICE) > > > + n += sprintf(p + n, " device: %d", mem->device); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_ROW) > > > + n += sprintf(p + n, " row: %d", mem->row); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_COLUMN) > > > + n += sprintf(p + n, " column: %d", mem->column); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) > > > + n += sprintf(p + n, " bit_position: %d", mem->bit_pos); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) > > > + n += sprintf(p + n, " requestor_id: 0x%016llx", > > > + mem->requestor_id); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) > > > + n += sprintf(p + n, " responder_id: 0x%016llx", > > > + mem->responder_id); > > > + if (n >= LOC_LEN) > > > + goto end; > > > + if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) > > > + n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id); > > > +end: > > > + return; > > > +} > > > > Looks like this wants to share with cper_print_mem() - definitely a lot > > of duplication there. > > > > > + > > > +static void dimm_err_location(struct cper_sec_mem_err *mem) > > > +{ > > > + const char *bank = NULL, *device = NULL; > > > + > > > + memset(dimm_location, 0, LOC_LEN); > > > + if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) > > > + return; > > > + > > > + dmi_memdev_name(mem->mem_dev_handle, &bank, &device); > > > + if (bank != NULL && device != NULL) > > > + snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device); > > > + else > > > + snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x", > > > + mem->mem_dev_handle); > > > +} > > > > This one too. > > > Not really. Firstly they service for different purpose. Secondly the > format here can be changed/updated depending on further requirment. > I can't assume they always keep the same format. Changing the format breaks any userspace application that relies on parsing them. That's an API breakage. Adding more data could be fine, if we take enough care when doing it, and properly document how userspace is supposed to parse it. > > > + > > > +static void trace_mem_error(const uuid_le *fru_id, char *fru_text, > > > + u64 err_count, u32 severity, > > > + struct cper_sec_mem_err *mem) > > > +{ > > > + u32 etype = ~0U; > > > + u64 phy_addr = ~0ull; > > > > I'm assuming userspace knows that all 1s means field value is invalid? > Yep, I suppose so. Well, actually, EDAC drivers use 0 to indicate an unknown physical address. The better is to use the same standard used there. See the code at ghes_edac.c: /* Cleans the error report buffer */ memset(e, 0, sizeof (*e)); e->error_count = 1; strcpy(e->label, "unknown label"); e->msg = pvt->msg; e->other_detail = pvt->other_detail; e->top_layer = -1; e->mid_layer = -1; e->low_layer = -1; *pvt->other_detail = '\0'; *pvt->msg = '\0'; > > > > > > + unsigned long flags; > > > + > > > + if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) > > > + etype = mem->error_type; > > > > newline. > Sure. > > [...] > > We probably need a mechanism to disable printking to dmesg once > > userspace has opened the tracepoint. > Do we really need to do that? IMHO, I think they are used for two different > usages, just like dmesg & mcelog. > > [...] > > > static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) > > > { > > > if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) > > > @@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) > > > if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { > > > u8 etype = mem->error_type; > > > printk("%s""error_type: %d, %s\n", pfx, etype, > > > - etype < ARRAY_SIZE(cper_mem_err_type_strs) ? > > > - cper_mem_err_type_strs[etype] : "unknown"); > > > + cper_mem_err_type_str(etype)); > > > } > > > if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { > > > const char *bank = NULL, *device = NULL; > > > > Ditto. > I know you hope the print function in CPER & trace for cpi_extlog can be > merged into one. I just have one concern about it. Can we ensure these > two functions keeping align all the time? IOW, merge them for now until > change happens one day? IMHO, that's the best. > [...] > > > +#define LOC_LEN 512 > > > + > > > +TRACE_EVENT(extlog_mem_event, > > > > So this is a mem thing so we're defining a tracepoint for memory events, > > specifically. > > > > However, if extlog carries all kinds of errors outside, not only DRAM > > errors, we should do a TRACE_EVENT_CLASS which contains the shared args > > to every error type and then make a mem event ontop of it. > I agree.
On Mon, Mar 10, 2014 at 07:04:35AM -0300, Mauro Carvalho Chehab wrote: > Changing the format breaks any userspace application that relies on > parsing them. That's an API breakage. Adding more data could be > fine, if we take enough care when doing it, and properly document > how userspace is supposed to parse it. Yes, we don't want code duplication if it can be helped. Besides, having one function doing the error format issual keeps us from the case when having two or more diverge from one another. > Well, actually, EDAC drivers use 0 to indicate an unknown physical address. > The better is to use the same standard used there. However, physical address 0 is a valid address... all FFF...Fs is hardly valid.
On Mon, Mar 10, 2014 at 04:22:42AM -0400, Chen, Gong wrote: > Do we really need to do that? IMHO, I think they are used for two > different usages, just like dmesg & mcelog. Yes, we do. We want to be able to disable issuing errors into dmesg so that people don't have to parse it. This is the main reason why we're adding tracepoints - so that we have structured error format going out of the kernel.
Em Mon, 10 Mar 2014 11:31:29 +0100 Borislav Petkov <bp@alien8.de> escreveu: > On Mon, Mar 10, 2014 at 07:04:35AM -0300, Mauro Carvalho Chehab wrote: > > Changing the format breaks any userspace application that relies on > > parsing them. That's an API breakage. Adding more data could be > > fine, if we take enough care when doing it, and properly document > > how userspace is supposed to parse it. > > Yes, we don't want code duplication if it can be helped. Besides, having > one function doing the error format issual keeps us from the case when > having two or more diverge from one another. > > > Well, actually, EDAC drivers use 0 to indicate an unknown physical address. > > The better is to use the same standard used there. > > However, physical address 0 is a valid address... all FFF...Fs is hardly valid. True, but if we decide to go on that direction, a change like that should then be done on all EDAC drivers, and that's an API change. We also need to be sure that userspace will handle this change properly. Regards, Mauro
On Mon, Mar 10, 2014 at 08:41:13AM -0300, Mauro Carvalho Chehab wrote: > True, but if we decide to go on that direction, a change like that > should then be done on all EDAC drivers, and that's an API change. > > We also need to be sure that userspace will handle this change > properly. We don't have a choice: [ 0.000000] e820: BIOS-provided physical RAM map: [ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009e7ff] usable
Pj4gVHJ1ZSwgYnV0IGlmIHdlIGRlY2lkZSB0byBnbyBvbiB0aGF0IGRpcmVjdGlvbiwgYSBjaGFu Z2UgbGlrZSB0aGF0DQo+PiBzaG91bGQgdGhlbiBiZSBkb25lIG9uIGFsbCBFREFDIGRyaXZlcnMs IGFuZCB0aGF0J3MgYW4gQVBJIGNoYW5nZS4NCj4+DQo+PiBXZSBhbHNvIG5lZWQgdG8gYmUgc3Vy ZSB0aGF0IHVzZXJzcGFjZSB3aWxsIGhhbmRsZSB0aGlzIGNoYW5nZQ0KPj4gcHJvcGVybHkuDQo+ DQo+IFdlIGRvbid0IGhhdmUgYSBjaG9pY2U6DQo+DQo+IFsgICAgMC4wMDAwMDBdIGU4MjA6IEJJ T1MtcHJvdmlkZWQgcGh5c2ljYWwgUkFNIG1hcDoNCj4gWyAgICAwLjAwMDAwMF0gQklPUy1lODIw OiBbbWVtIDB4MDAwMDAwMDAwMDAwMDAwMC0weDAwMDAwMDAwMDAwOWU3ZmZdIHVzYWJsZQ0KDQpT aW5jZSB3ZSBoYXZlIHNlcGFyYXRlIHRyYWNlIHBvaW50cyBmb3IgRURDQSBhbmQgZXh0bG9nIC0g d2UgY2FuIHVzZSBkaWZmZXJlbnQNCmNvbnZlbnRpb25zIGZvciAiaW52YWxpZCIuICBJZiB5b3Ug dGhpbmsgdGhhdCBjaGFuZ2luZyB0aGUgQUJJIGluIEVEQUMgd291bGQgYmUgYQ0KcHJvYmxlbSAt IHRoZW4geW91IGNhbiBrZWVwIHJ1bm5pbmcgdGhlIGFsbW9zdCBpbmZpbml0ZXNpbWFsIGNoYW5j ZSB0aGF0IHlvdSBoYXZlDQphIHJlYWwgZXJyb3IgYXQgMHgwMDAwMDAwMDAwMDAwMCB0aGF0IHlv dSB0cmVhdCBhcyB1bmtub3duIGFkZHJlc3MuDQoNCkRvZXMgWDg2IGFjdHVhbGx5IGFsbG9jYXRl IHRoYXQgcGFnZSBmb3IgdXNlPyAtIEkga25vdyBvbiBJdGFuaXVtIGl0IGdvdCB0aHJvd24gb3V0 DQp3aGVuIHJvdW5kaW5nIHVzYWJsZSBibG9ja3Mgb2YgbWVtb3J5IHRvIDE2TUIgYm91bmRhcmll cyBmb3IgY2FjaGUgY29oZXJlbmN5DQpyZWFzb25zLg0KDQotVG9ueQ0K -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> Not really. Firstly they service for different purpose. Secondly the > format here can be changed/updated depending on further requirment. > I can't assume they always keep the same format. I can't imagine a reason why we'd want them to be different though. If a reason does come up in the future, then we can clone & edit the function to do different things - but until then we should share code. Note: there will be changes periodically when UEFI changes the error record format (they have added new fields in the past - I expect they will add more in the future). We should be able to add more stuff to the end of the "mem_loc[]" string as long as we write user-mode parsers that won't be surprised by extra fields there. -Tony
On Mon, Mar 10, 2014 at 05:42:45PM +0000, Luck, Tony wrote: > I can't imagine a reason why we'd want them to be different though. > If a reason does come up in the future, then we can clone & edit > the function to do different things - but until then we should share code. > > Note: there will be changes periodically when UEFI changes the error > record format (they have added new fields in the past - I expect they > will add more in the future). We should be able to add more stuff to > the end of the "mem_loc[]" string as long as we write user-mode > parsers that won't be surprised by extra fields there. > > -Tony OK, I will follow you guys suggestion for next version, like code sharing, trace/printk switching
On Mon, Mar 10, 2014 at 05:37:19PM +0000, Luck, Tony wrote: > Since we have separate trace points for EDCA and extlog - we can use > different conventions for "invalid". If you think that changing the > ABI in EDAC would be a problem - then you can keep running the almost > infinitesimal chance that you have a real error at 0x00000000000000 > that you treat as unknown address. I hardly believe changing that would be noticed by anyone. > Does X86 actually allocate that page for use? - I know on Itanium > it got thrown out when rounding usable blocks of memory to 16MB > boundaries for cache coherency reasons. Yeah, it turns out we reserve at least the first page - 64K by default though - because BIOS likes to make love to that range. Detailed explanation from Kconfig text below. Still, for correctness sake at least, I think we should use a value which denotes an invalid memory address, i.e. ~0x0, for example, and not a valid one. config X86_RESERVE_LOW int "Amount of low memory, in kilobytes, to reserve for the BIOS" default 64 range 4 640 ---help--- Specify the amount of low memory to reserve for the BIOS. The first page contains BIOS data structures that the kernel must not use, so that page must always be reserved. By default we reserve the first 64K of physical RAM, as a number of BIOSes are known to corrupt that memory range during events such as suspend/resume or monitor cable insertion, so it must not be used by the kernel. You can set this to 4 if you are absolutely sure that you trust the BIOS to get all its memory reservations and usages right. If you know your BIOS have problems beyond the default 64K area, you can set this to 640 to avoid using the entire low memory range. If you have doubts about the BIOS (e.g. suspend/resume does not work or there's kernel crashes after certain hardware hotplug events) then you might want to enable X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical corruption patterns. Leave this to the default value of 64 if you are unsure.
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 4770de5..3e569d4 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -363,6 +363,7 @@ config ACPI_EXTLOG Enhanced MCA Logging allows firmware to provide additional error information to system software, synchronous with MCE or CMCI. This - driver adds support for that functionality. + driver adds support for that functionality with corresponding + tracepoint which carries that information to userspace. endif # ACPI diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 0331f91..f6abc4a 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -82,4 +82,5 @@ obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o obj-$(CONFIG_ACPI_APEI) += apei/ +CFLAGS_acpi_extlog.o := -I$(src) obj-$(CONFIG_ACPI_EXTLOG) += acpi_extlog.o diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index c4a5d87..fbdebad 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -14,8 +14,10 @@ #include <linux/edac.h> #include <asm/cpu.h> #include <asm/mce.h> +#include <linux/dmi.h> #include "apei/apei-internal.h" +#include <ras/ras_event.h> #define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ @@ -44,6 +46,11 @@ struct extlog_l1_head { static int old_edac_report_status; static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295"; +static const uuid_le invalid_uuid = NULL_UUID_LE; + +static DEFINE_RAW_SPINLOCK(trace_lock); +static char mem_location[LOC_LEN]; +static char dimm_location[LOC_LEN]; /* L1 table related physical address */ static u64 elog_base; @@ -69,6 +76,106 @@ static u32 l1_percpu_entry; #define ELOG_ENTRY_ADDR(phyaddr) \ (phyaddr - elog_base + (u8 *)elog_addr) +static void mem_err_location(struct cper_sec_mem_err *mem) +{ + char *p; + u32 n = 0; + + memset(mem_location, 0, LOC_LEN); + p = mem_location; + if (mem->validation_bits & CPER_MEM_VALID_NODE) + n += sprintf(p + n, " node: %d", mem->node); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_CARD) + n += sprintf(p + n, " card: %d", mem->card); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_MODULE) + n += sprintf(p + n, " module: %d", mem->module); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) + n += sprintf(p + n, " rank: %d", mem->rank); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_BANK) + n += sprintf(p + n, " bank: %d", mem->bank); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_DEVICE) + n += sprintf(p + n, " device: %d", mem->device); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_ROW) + n += sprintf(p + n, " row: %d", mem->row); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_COLUMN) + n += sprintf(p + n, " column: %d", mem->column); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) + n += sprintf(p + n, " bit_position: %d", mem->bit_pos); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) + n += sprintf(p + n, " requestor_id: 0x%016llx", + mem->requestor_id); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) + n += sprintf(p + n, " responder_id: 0x%016llx", + mem->responder_id); + if (n >= LOC_LEN) + goto end; + if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) + n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id); +end: + return; +} + +static void dimm_err_location(struct cper_sec_mem_err *mem) +{ + const char *bank = NULL, *device = NULL; + + memset(dimm_location, 0, LOC_LEN); + if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) + return; + + dmi_memdev_name(mem->mem_dev_handle, &bank, &device); + if (bank != NULL && device != NULL) + snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device); + else + snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x", + mem->mem_dev_handle); +} + +static void trace_mem_error(const uuid_le *fru_id, char *fru_text, + u64 err_count, u32 severity, + struct cper_sec_mem_err *mem) +{ + u32 etype = ~0U; + u64 phy_addr = ~0ull; + unsigned long flags; + + if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) + etype = mem->error_type; + if (mem->validation_bits & CPER_MEM_VALID_PA) { + phy_addr = mem->physical_addr; + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) + phy_addr &= mem->physical_addr_mask; + } + + raw_spin_lock_irqsave(&trace_lock, flags); + mem_err_location(mem); + dimm_err_location(mem); + + trace_extlog_mem_event(etype, dimm_location, fru_id, fru_text, + err_count, severity, phy_addr, mem_location); + raw_spin_unlock_irqrestore(&trace_lock, flags); +} + static struct acpi_generic_status *extlog_elog_entry_check(int cpu, int bank) { int idx; @@ -137,7 +244,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; int bank = mce->bank; int cpu = mce->extcpu; - struct acpi_generic_status *estatus; + struct acpi_generic_status *estatus, *tmp; + struct acpi_generic_data *gdata; + const uuid_le *fru_id = &invalid_uuid; + char *fru_text = ""; + uuid_le *sec_type; + static u64 err_count; int rc; estatus = extlog_elog_entry_check(cpu, bank); @@ -149,6 +261,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, estatus->block_status = 0; rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); + tmp = (struct acpi_generic_status *)elog_buf; + gdata = (struct acpi_generic_data *)(tmp + 1); + rc = print_extlog_rcd(NULL, tmp, cpu); + + /* trace extended error log */ + err_count++; + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) + fru_id = (uuid_le *)gdata->fru_id; + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) + fru_text = gdata->fru_text; + sec_type = (uuid_le *)gdata->section_type; + if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { + struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); + if (gdata->error_data_length >= sizeof(*mem_err)) + trace_mem_error(fru_id, fru_text, err_count, + gdata->error_severity, mem_err); + } return NOTIFY_STOP; } diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 1491dd4..9d3e2c4 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -57,11 +57,12 @@ static const char *cper_severity_strs[] = { "info", }; -static const char *cper_severity_str(unsigned int severity) +const char *cper_severity_str(unsigned int severity) { return severity < ARRAY_SIZE(cper_severity_strs) ? cper_severity_strs[severity] : "unknown"; } +EXPORT_SYMBOL_GPL(cper_severity_str); /* * cper_print_bits - print strings for set bits @@ -196,6 +197,13 @@ static const char *cper_mem_err_type_strs[] = { "physical memory map-out event", }; +const char *cper_mem_err_type_str(unsigned int etype) +{ + return etype < ARRAY_SIZE(cper_mem_err_type_strs) ? + cper_mem_err_type_strs[etype] : "unknown"; +} +EXPORT_SYMBOL_GPL(cper_mem_err_type_str); + static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) { if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) @@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { u8 etype = mem->error_type; printk("%s""error_type: %d, %s\n", pfx, etype, - etype < ARRAY_SIZE(cper_mem_err_type_strs) ? - cper_mem_err_type_strs[etype] : "unknown"); + cper_mem_err_type_str(etype)); } if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { const char *bank = NULL, *device = NULL; diff --git a/include/linux/cper.h b/include/linux/cper.h index 2fc0ec3..c6d87fc 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -395,6 +395,8 @@ struct cper_sec_pcie { #pragma pack() u64 cper_next_record_id(void); +const char *cper_severity_str(unsigned int); +const char *cper_mem_err_type_str(unsigned int); void cper_print_bits(const char *prefix, unsigned int bits, const char * const strs[], unsigned int strs_size); diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 21cdb0b..97f2192 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -8,6 +8,68 @@ #include <linux/tracepoint.h> #include <linux/edac.h> #include <linux/ktime.h> +#include <linux/cper.h> + +/* + * MCE Extended Error Log trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event. + * + */ + +/* memory trace event */ + +#define LOC_LEN 512 + +TRACE_EVENT(extlog_mem_event, + TP_PROTO(u32 etype, + char *dimm_info, + const uuid_le *fru_id, + char *fru_text, + u64 error_count, + u32 severity, + u64 phy_addr, + char *mem_loc), + + TP_ARGS(etype, dimm_info, fru_id, fru_text, error_count, severity, + phy_addr, mem_loc), + + TP_STRUCT__entry( + __field(u32, etype) + __dynamic_array(char, dimm_info, LOC_LEN) + __field(u64, error_count) + __field(u32, severity) + __field(u64, paddr) + __string(mem_loc, mem_loc) + __dynamic_array(char, fru, LOC_LEN) + ), + + TP_fast_assign( + __entry->error_count = error_count; + __entry->severity = severity; + __entry->etype = etype; + if (dimm_info[0] != '\0') + snprintf(__get_dynamic_array(dimm_info), LOC_LEN - 1, + "%s", dimm_info); + else + __assign_str(dimm_info, ""); + __entry->paddr = phy_addr; + __assign_str(mem_loc, mem_loc); + snprintf(__get_dynamic_array(fru), LOC_LEN - 1, + "FRU: %pUl %.20s", fru_id, fru_text); + ), + + TP_printk("%llu %s error%s: %s %s physical addr: 0x%016llx%s %s", + __entry->error_count, + cper_severity_str(__entry->severity), + __entry->error_count > 1 ? "s" : "", + cper_mem_err_type_str(__entry->etype), + __get_str(dimm_info), + __entry->paddr, + __get_str(mem_loc), + __get_str(fru)) +); /* * Hardware Events Report diff --git a/kernel/trace/ras-traces.c b/kernel/trace/ras-traces.c index b0c6ed1..197b1ea 100644 --- a/kernel/trace/ras-traces.c +++ b/kernel/trace/ras-traces.c @@ -9,4 +9,5 @@ #define TRACE_INCLUDE_PATH ../../include/ras #include <ras/ras_event.h> +EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
Add trace interface to elaborate all H/W error related information. Signed-off-by: Chen, Gong <gong.chen@linux.intel.com> --- drivers/acpi/Kconfig | 3 +- drivers/acpi/Makefile | 1 + drivers/acpi/acpi_extlog.c | 131 +++++++++++++++++++++++++++++++++++++++++++- drivers/firmware/efi/cper.c | 13 ++++- include/linux/cper.h | 2 + include/ras/ras_event.h | 62 +++++++++++++++++++++ kernel/trace/ras-traces.c | 1 + 7 files changed, 208 insertions(+), 5 deletions(-)