[2/2] trace, RAS: Add eMCA trace event interface

Message ID	1393924997-8992-3-git-send-email-gong.chen@linux.intel.com (mailing list archive)
State	Not Applicable, archived
Headers	show Return-Path: <linux-acpi-owner@kernel.org> From: "Chen, Gong" <gong.chen@linux.intel.com> To: tony.luck@intel.com, bp@alien8.de, m.chehab@samsung.com Cc: arozansk@redhat.com, linux-acpi@vger.kernel.org, "Chen, Gong" <gong.chen@linux.intel.com> Subject: [PATCH 2/2] trace, RAS: Add eMCA trace event interface Date: Tue, 4 Mar 2014 04:23:17 -0500 Message-Id: <1393924997-8992-3-git-send-email-gong.chen@linux.intel.com> In-Reply-To: <1393924997-8992-1-git-send-email-gong.chen@linux.intel.com> References: <1393924997-8992-1-git-send-email-gong.chen@linux.intel.com> Sender: linux-acpi-owner@vger.kernel.org Precedence: bulk

Chen Gong March 4, 2014, 9:23 a.m. UTC

Add trace interface to elaborate all H/W error related information.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/acpi/Kconfig        |   3 +-
 drivers/acpi/Makefile       |   1 +
 drivers/acpi/acpi_extlog.c  | 131 +++++++++++++++++++++++++++++++++++++++++++-
 drivers/firmware/efi/cper.c |  13 ++++-
 include/linux/cper.h        |   2 +
 include/ras/ras_event.h     |  62 +++++++++++++++++++++
 kernel/trace/ras-traces.c   |   1 +
 7 files changed, 208 insertions(+), 5 deletions(-)

Borislav Petkov March 7, 2014, 11:44 a.m. UTC | #1

On Tue, Mar 04, 2014 at 04:23:17AM -0500, Chen, Gong wrote:
> Add trace interface to elaborate all H/W error related information.
> 
> Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
> ---
>  drivers/acpi/Kconfig        |   3 +-
>  drivers/acpi/Makefile       |   1 +
>  drivers/acpi/acpi_extlog.c  | 131 +++++++++++++++++++++++++++++++++++++++++++-
>  drivers/firmware/efi/cper.c |  13 ++++-
>  include/linux/cper.h        |   2 +
>  include/ras/ras_event.h     |  62 +++++++++++++++++++++
>  kernel/trace/ras-traces.c   |   1 +
>  7 files changed, 208 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
> index 4770de5..3e569d4 100644
> --- a/drivers/acpi/Kconfig
> +++ b/drivers/acpi/Kconfig
> @@ -363,6 +363,7 @@ config ACPI_EXTLOG
>  
>  	  Enhanced MCA Logging allows firmware to provide additional error
>  	  information to system software, synchronous with MCE or CMCI. This
> -	  driver adds support for that functionality.
> +	  driver adds support for that functionality with corresponding
> +	  tracepoint which carries that information to userspace.
>  
>  endif	# ACPI
> diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
> index 0331f91..f6abc4a 100644
> --- a/drivers/acpi/Makefile
> +++ b/drivers/acpi/Makefile
> @@ -82,4 +82,5 @@ obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
>  
>  obj-$(CONFIG_ACPI_APEI)		+= apei/
>  
> +CFLAGS_acpi_extlog.o := -I$(src)
>  obj-$(CONFIG_ACPI_EXTLOG)	+= acpi_extlog.o
> diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
> index c4a5d87..fbdebad 100644
> --- a/drivers/acpi/acpi_extlog.c
> +++ b/drivers/acpi/acpi_extlog.c
> @@ -14,8 +14,10 @@
>  #include <linux/edac.h>
>  #include <asm/cpu.h>
>  #include <asm/mce.h>
> +#include <linux/dmi.h>
>  
>  #include "apei/apei-internal.h"
> +#include <ras/ras_event.h>
>  
>  #define EXT_ELOG_ENTRY_MASK	GENMASK_ULL(51, 0) /* elog entry address mask */
>  
> @@ -44,6 +46,11 @@ struct extlog_l1_head {
>  static int old_edac_report_status;
>  
>  static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295";
> +static const uuid_le invalid_uuid = NULL_UUID_LE;
> +
> +static DEFINE_RAW_SPINLOCK(trace_lock);
> +static char mem_location[LOC_LEN];
> +static char dimm_location[LOC_LEN];
>  
>  /* L1 table related physical address */
>  static u64 elog_base;
> @@ -69,6 +76,106 @@ static u32 l1_percpu_entry;
>  #define ELOG_ENTRY_ADDR(phyaddr) \
>  	(phyaddr - elog_base + (u8 *)elog_addr)
>  
> +static void mem_err_location(struct cper_sec_mem_err *mem)
> +{
> +	char *p;
> +	u32 n = 0;
> +
> +	memset(mem_location, 0, LOC_LEN);
> +	p = mem_location;
> +	if (mem->validation_bits & CPER_MEM_VALID_NODE)
> +		n += sprintf(p + n, " node: %d", mem->node);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_CARD)
> +		n += sprintf(p + n, " card: %d", mem->card);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
> +		n += sprintf(p + n, " module: %d", mem->module);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
> +		n += sprintf(p + n, " rank: %d", mem->rank);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_BANK)
> +		n += sprintf(p + n, " bank: %d", mem->bank);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
> +		n += sprintf(p + n, " device: %d", mem->device);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_ROW)
> +		n += sprintf(p + n, " row: %d", mem->row);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
> +		n += sprintf(p + n, " column: %d", mem->column);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
> +		n += sprintf(p + n, " bit_position: %d", mem->bit_pos);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
> +		n += sprintf(p + n, " requestor_id: 0x%016llx",
> +				mem->requestor_id);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
> +		n += sprintf(p + n, " responder_id: 0x%016llx",
> +				mem->responder_id);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
> +		n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id);
> +end:
> +	return;
> +}

Looks like this wants to share with cper_print_mem() - definitely a lot
of duplication there.

> +
> +static void dimm_err_location(struct cper_sec_mem_err *mem)
> +{
> +	const char *bank = NULL, *device = NULL;
> +
> +	memset(dimm_location, 0, LOC_LEN);
> +	if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
> +		return;
> +
> +	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
> +	if (bank != NULL && device != NULL)
> +		snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device);
> +	else
> +		snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x",
> +			 mem->mem_dev_handle);
> +}

This one too.

> +
> +static void trace_mem_error(const uuid_le *fru_id, char *fru_text,
> +			    u64 err_count, u32 severity,
> +			    struct cper_sec_mem_err *mem)
> +{
> +	u32 etype = ~0U;
> +	u64 phy_addr = ~0ull;

I'm assuming userspace knows that all 1s means field value is invalid?

> +	unsigned long flags;
> +
> +	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
> +		etype = mem->error_type;

newline.

> +	if (mem->validation_bits & CPER_MEM_VALID_PA) {
> +		phy_addr = mem->physical_addr;
> +		if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
> +			phy_addr &= mem->physical_addr_mask;
> +	}
> +
> +	raw_spin_lock_irqsave(&trace_lock, flags);
> +	mem_err_location(mem);
> +	dimm_err_location(mem);
> +
> +	trace_extlog_mem_event(etype, dimm_location, fru_id, fru_text,
> +			       err_count, severity, phy_addr, mem_location);
> +	raw_spin_unlock_irqrestore(&trace_lock, flags);
> +}
> +
>  static struct acpi_generic_status *extlog_elog_entry_check(int cpu, int bank)
>  {
>  	int idx;
> @@ -137,7 +244,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
>  	struct mce *mce = (struct mce *)data;
>  	int	bank = mce->bank;
>  	int	cpu = mce->extcpu;
> -	struct acpi_generic_status *estatus;
> +	struct acpi_generic_status *estatus, *tmp;
> +	struct acpi_generic_data *gdata;
> +	const uuid_le *fru_id = &invalid_uuid;
> +	char *fru_text = "";
> +	uuid_le *sec_type;
> +	static u64 err_count;
>  	int rc;
>  
>  	estatus = extlog_elog_entry_check(cpu, bank);
> @@ -149,6 +261,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
>  	estatus->block_status = 0;
>  
>  	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
> +	tmp = (struct acpi_generic_status *)elog_buf;
> +	gdata = (struct acpi_generic_data *)(tmp + 1);
> +	rc = print_extlog_rcd(NULL, tmp, cpu);

We probably need a mechanism to disable printking to dmesg once
userspace has opened the tracepoint.

> +	/* trace extended error log */
> +	err_count++;
> +	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
> +		fru_id = (uuid_le *)gdata->fru_id;
> +	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
> +		fru_text = gdata->fru_text;
> +	sec_type = (uuid_le *)gdata->section_type;
> +	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
> +		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
> +		if (gdata->error_data_length >= sizeof(*mem_err))
> +			trace_mem_error(fru_id, fru_text, err_count,
> +					gdata->error_severity, mem_err);
> +	}
>  
>  	return NOTIFY_STOP;
>  }
> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
> index 1491dd4..9d3e2c4 100644
> --- a/drivers/firmware/efi/cper.c
> +++ b/drivers/firmware/efi/cper.c
> @@ -57,11 +57,12 @@ static const char *cper_severity_strs[] = {
>  	"info",
>  };
>  
> -static const char *cper_severity_str(unsigned int severity)
> +const char *cper_severity_str(unsigned int severity)
>  {
>  	return severity < ARRAY_SIZE(cper_severity_strs) ?
>  		cper_severity_strs[severity] : "unknown";
>  }
> +EXPORT_SYMBOL_GPL(cper_severity_str);

Yes, this calls for a common file sharin cper and extlog functionality.

>  /*
>   * cper_print_bits - print strings for set bits
> @@ -196,6 +197,13 @@ static const char *cper_mem_err_type_strs[] = {
>  	"physical memory map-out event",
>  };
>  
> +const char *cper_mem_err_type_str(unsigned int etype)
> +{
> +	return etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
> +		cper_mem_err_type_strs[etype] : "unknown";
> +}
> +EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
> +
>  static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
>  {
>  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
> @@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
>  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
>  		u8 etype = mem->error_type;
>  		printk("%s""error_type: %d, %s\n", pfx, etype,
> -		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
> -		       cper_mem_err_type_strs[etype] : "unknown");
> +			cper_mem_err_type_str(etype));
>  	}
>  	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
>  		const char *bank = NULL, *device = NULL;

Ditto.

> diff --git a/include/linux/cper.h b/include/linux/cper.h
> index 2fc0ec3..c6d87fc 100644
> --- a/include/linux/cper.h
> +++ b/include/linux/cper.h
> @@ -395,6 +395,8 @@ struct cper_sec_pcie {
>  #pragma pack()
>  
>  u64 cper_next_record_id(void);
> +const char *cper_severity_str(unsigned int);
> +const char *cper_mem_err_type_str(unsigned int);
>  void cper_print_bits(const char *prefix, unsigned int bits,
>  		     const char * const strs[], unsigned int strs_size);
>  
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index 21cdb0b..97f2192 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -8,6 +8,68 @@
>  #include <linux/tracepoint.h>
>  #include <linux/edac.h>
>  #include <linux/ktime.h>
> +#include <linux/cper.h>
> +
> +/*
> + * MCE Extended Error Log trace event
> + *
> + * These events are generated when hardware detects a corrected or
> + * uncorrected event.
> + *
> + */
> +
> +/* memory trace event */
> +
> +#define LOC_LEN		512
> +
> +TRACE_EVENT(extlog_mem_event,

So this is a mem thing so we're defining a tracepoint for memory events,
specifically.

However, if extlog carries all kinds of errors outside, not only DRAM
errors, we should do a TRACE_EVENT_CLASS which contains the shared args
to every error type and then make a mem event ontop of it.

> +	TP_PROTO(u32 etype,
> +		 char *dimm_info,
> +		 const uuid_le *fru_id,
> +		 char *fru_text,
> +		 u64 error_count,
> +		 u32 severity,
> +		 u64 phy_addr,
> +		 char *mem_loc),
> +
> +	TP_ARGS(etype, dimm_info, fru_id, fru_text, error_count, severity,
> +		phy_addr, mem_loc),
> +
> +	TP_STRUCT__entry(
> +		__field(u32, etype)
> +		__dynamic_array(char, dimm_info, LOC_LEN)
> +		__field(u64, error_count)
> +		__field(u32, severity)
> +		__field(u64, paddr)
> +		__string(mem_loc, mem_loc)
> +		__dynamic_array(char, fru, LOC_LEN)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->error_count = error_count;
> +		__entry->severity = severity;
> +		__entry->etype = etype;
> +		if (dimm_info[0] != '\0')
> +			snprintf(__get_dynamic_array(dimm_info), LOC_LEN - 1,
> +				 "%s", dimm_info);
> +		else
> +			__assign_str(dimm_info, "");
> +		__entry->paddr = phy_addr;
> +		__assign_str(mem_loc, mem_loc);
> +		snprintf(__get_dynamic_array(fru), LOC_LEN - 1,
> +			 "FRU: %pUl %.20s", fru_id, fru_text);
> +	),
> +
> +	TP_printk("%llu %s error%s: %s %s physical addr: 0x%016llx%s %s",
> +		  __entry->error_count,
> +		  cper_severity_str(__entry->severity),
> +		  __entry->error_count > 1 ? "s" : "",
> +		  cper_mem_err_type_str(__entry->etype),
> +		  __get_str(dimm_info),
> +		  __entry->paddr,
> +		  __get_str(mem_loc),
> +		  __get_str(fru))
> +);
>  
>  /*
>   * Hardware Events Report
> diff --git a/kernel/trace/ras-traces.c b/kernel/trace/ras-traces.c
> index b0c6ed1..197b1ea 100644
> --- a/kernel/trace/ras-traces.c
> +++ b/kernel/trace/ras-traces.c
> @@ -9,4 +9,5 @@
>  #define TRACE_INCLUDE_PATH ../../include/ras
>  #include <ras/ras_event.h>
>  
> +EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
> -- 
> 1.8.4.3
> 
>

Chen Gong March 10, 2014, 8:22 a.m. UTC | #2

On Fri, Mar 07, 2014 at 12:44:16PM +0100, Borislav Petkov wrote:
[...]
> > +static void mem_err_location(struct cper_sec_mem_err *mem)
> > +{
> > +	char *p;
> > +	u32 n = 0;
> > +
> > +	memset(mem_location, 0, LOC_LEN);
> > +	p = mem_location;
> > +	if (mem->validation_bits & CPER_MEM_VALID_NODE)
> > +		n += sprintf(p + n, " node: %d", mem->node);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_CARD)
> > +		n += sprintf(p + n, " card: %d", mem->card);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
> > +		n += sprintf(p + n, " module: %d", mem->module);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
> > +		n += sprintf(p + n, " rank: %d", mem->rank);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_BANK)
> > +		n += sprintf(p + n, " bank: %d", mem->bank);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
> > +		n += sprintf(p + n, " device: %d", mem->device);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_ROW)
> > +		n += sprintf(p + n, " row: %d", mem->row);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
> > +		n += sprintf(p + n, " column: %d", mem->column);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
> > +		n += sprintf(p + n, " bit_position: %d", mem->bit_pos);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
> > +		n += sprintf(p + n, " requestor_id: 0x%016llx",
> > +				mem->requestor_id);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
> > +		n += sprintf(p + n, " responder_id: 0x%016llx",
> > +				mem->responder_id);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
> > +		n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id);
> > +end:
> > +	return;
> > +}
> 
> Looks like this wants to share with cper_print_mem() - definitely a lot
> of duplication there.
> 
> > +
> > +static void dimm_err_location(struct cper_sec_mem_err *mem)
> > +{
> > +	const char *bank = NULL, *device = NULL;
> > +
> > +	memset(dimm_location, 0, LOC_LEN);
> > +	if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
> > +		return;
> > +
> > +	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
> > +	if (bank != NULL && device != NULL)
> > +		snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device);
> > +	else
> > +		snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x",
> > +			 mem->mem_dev_handle);
> > +}
> 
> This one too.
> 
Not really. Firstly they service for different purpose. Secondly the
format here can be changed/updated depending on further requirment.
I can't assume they always keep the same format.

> > +
> > +static void trace_mem_error(const uuid_le *fru_id, char *fru_text,
> > +			    u64 err_count, u32 severity,
> > +			    struct cper_sec_mem_err *mem)
> > +{
> > +	u32 etype = ~0U;
> > +	u64 phy_addr = ~0ull;
> 
> I'm assuming userspace knows that all 1s means field value is invalid?
Yep, I suppose so.

> 
> > +	unsigned long flags;
> > +
> > +	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
> > +		etype = mem->error_type;
> 
> newline.
Sure.

[...]
> We probably need a mechanism to disable printking to dmesg once
> userspace has opened the tracepoint.
Do we really need to do that? IMHO, I think they are used for two different
usages, just like dmesg & mcelog.

[...]
> >  static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
> >  {
> >  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
> > @@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
> >  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
> >  		u8 etype = mem->error_type;
> >  		printk("%s""error_type: %d, %s\n", pfx, etype,
> > -		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
> > -		       cper_mem_err_type_strs[etype] : "unknown");
> > +			cper_mem_err_type_str(etype));
> >  	}
> >  	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
> >  		const char *bank = NULL, *device = NULL;
> 
> Ditto.
I know you hope the print function in CPER & trace for cpi_extlog can be
merged into one. I just have one concern about it. Can we ensure these
two functions keeping align all the time? IOW, merge them for now until
change happens one day?

[...]
> > +#define LOC_LEN		512
> > +
> > +TRACE_EVENT(extlog_mem_event,
> 
> So this is a mem thing so we're defining a tracepoint for memory events,
> specifically.
> 
> However, if extlog carries all kinds of errors outside, not only DRAM
> errors, we should do a TRACE_EVENT_CLASS which contains the shared args
> to every error type and then make a mem event ontop of it.
I agree.

Mauro Carvalho Chehab March 10, 2014, 10:04 a.m. UTC | #3

Em Mon, 10 Mar 2014 04:22:42 -0400
"Chen, Gong" <gong.chen@linux.intel.com> escreveu:

> On Fri, Mar 07, 2014 at 12:44:16PM +0100, Borislav Petkov wrote:
> [...]
> > > +static void mem_err_location(struct cper_sec_mem_err *mem)
> > > +{
> > > +	char *p;
> > > +	u32 n = 0;
> > > +
> > > +	memset(mem_location, 0, LOC_LEN);
> > > +	p = mem_location;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_NODE)
> > > +		n += sprintf(p + n, " node: %d", mem->node);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_CARD)
> > > +		n += sprintf(p + n, " card: %d", mem->card);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
> > > +		n += sprintf(p + n, " module: %d", mem->module);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
> > > +		n += sprintf(p + n, " rank: %d", mem->rank);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_BANK)
> > > +		n += sprintf(p + n, " bank: %d", mem->bank);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
> > > +		n += sprintf(p + n, " device: %d", mem->device);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_ROW)
> > > +		n += sprintf(p + n, " row: %d", mem->row);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
> > > +		n += sprintf(p + n, " column: %d", mem->column);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
> > > +		n += sprintf(p + n, " bit_position: %d", mem->bit_pos);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
> > > +		n += sprintf(p + n, " requestor_id: 0x%016llx",
> > > +				mem->requestor_id);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
> > > +		n += sprintf(p + n, " responder_id: 0x%016llx",
> > > +				mem->responder_id);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
> > > +		n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id);
> > > +end:
> > > +	return;
> > > +}
> > 
> > Looks like this wants to share with cper_print_mem() - definitely a lot
> > of duplication there.
> > 
> > > +
> > > +static void dimm_err_location(struct cper_sec_mem_err *mem)
> > > +{
> > > +	const char *bank = NULL, *device = NULL;
> > > +
> > > +	memset(dimm_location, 0, LOC_LEN);
> > > +	if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
> > > +		return;
> > > +
> > > +	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
> > > +	if (bank != NULL && device != NULL)
> > > +		snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device);
> > > +	else
> > > +		snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x",
> > > +			 mem->mem_dev_handle);
> > > +}
> > 
> > This one too.
> > 
> Not really. Firstly they service for different purpose. Secondly the
> format here can be changed/updated depending on further requirment.
> I can't assume they always keep the same format.

Changing the format breaks any userspace application that relies on
parsing them. That's an API breakage. Adding more data could be
fine, if we take enough care when doing it, and properly document
how userspace is supposed to parse it.

> > > +
> > > +static void trace_mem_error(const uuid_le *fru_id, char *fru_text,
> > > +			    u64 err_count, u32 severity,
> > > +			    struct cper_sec_mem_err *mem)
> > > +{
> > > +	u32 etype = ~0U;
> > > +	u64 phy_addr = ~0ull;
> > 
> > I'm assuming userspace knows that all 1s means field value is invalid?
> Yep, I suppose so.

Well, actually, EDAC drivers use 0 to indicate an unknown physical address.
The better is to use the same standard used there.

See the code at ghes_edac.c:

	/* Cleans the error report buffer */
	memset(e, 0, sizeof (*e));
	e->error_count = 1;
	strcpy(e->label, "unknown label");
	e->msg = pvt->msg;
	e->other_detail = pvt->other_detail;
	e->top_layer = -1;
	e->mid_layer = -1;
	e->low_layer = -1;
	*pvt->other_detail = '\0';
	*pvt->msg = '\0';

> 
> > 
> > > +	unsigned long flags;
> > > +
> > > +	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
> > > +		etype = mem->error_type;
> > 
> > newline.
> Sure.
> 
> [...]
> > We probably need a mechanism to disable printking to dmesg once
> > userspace has opened the tracepoint.
> Do we really need to do that? IMHO, I think they are used for two different
> usages, just like dmesg & mcelog.
> 
> [...]
> > >  static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
> > >  {
> > >  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
> > > @@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
> > >  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
> > >  		u8 etype = mem->error_type;
> > >  		printk("%s""error_type: %d, %s\n", pfx, etype,
> > > -		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
> > > -		       cper_mem_err_type_strs[etype] : "unknown");
> > > +			cper_mem_err_type_str(etype));
> > >  	}
> > >  	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
> > >  		const char *bank = NULL, *device = NULL;
> > 
> > Ditto.
> I know you hope the print function in CPER & trace for cpi_extlog can be
> merged into one. I just have one concern about it. Can we ensure these
> two functions keeping align all the time? IOW, merge them for now until
> change happens one day?

IMHO, that's the best.

> [...]
> > > +#define LOC_LEN		512
> > > +
> > > +TRACE_EVENT(extlog_mem_event,
> > 
> > So this is a mem thing so we're defining a tracepoint for memory events,
> > specifically.
> > 
> > However, if extlog carries all kinds of errors outside, not only DRAM
> > errors, we should do a TRACE_EVENT_CLASS which contains the shared args
> > to every error type and then make a mem event ontop of it.
> I agree.

Borislav Petkov March 10, 2014, 10:31 a.m. UTC | #4

On Mon, Mar 10, 2014 at 07:04:35AM -0300, Mauro Carvalho Chehab wrote:
> Changing the format breaks any userspace application that relies on
> parsing them. That's an API breakage. Adding more data could be
> fine, if we take enough care when doing it, and properly document
> how userspace is supposed to parse it.

Yes, we don't want code duplication if it can be helped. Besides, having
one function doing the error format issual keeps us from the case when
having two or more diverge from one another.

> Well, actually, EDAC drivers use 0 to indicate an unknown physical address.
> The better is to use the same standard used there.

However, physical address 0 is a valid address... all FFF...Fs is hardly valid.

Borislav Petkov March 10, 2014, 10:33 a.m. UTC | #5

On Mon, Mar 10, 2014 at 04:22:42AM -0400, Chen, Gong wrote:
> Do we really need to do that? IMHO, I think they are used for two
> different usages, just like dmesg & mcelog.

Yes, we do. We want to be able to disable issuing errors into dmesg so
that people don't have to parse it. This is the main reason why we're
adding tracepoints - so that we have structured error format going out
of the kernel.

Mauro Carvalho Chehab March 10, 2014, 11:41 a.m. UTC | #6

Em Mon, 10 Mar 2014 11:31:29 +0100
Borislav Petkov <bp@alien8.de> escreveu:

> On Mon, Mar 10, 2014 at 07:04:35AM -0300, Mauro Carvalho Chehab wrote:
> > Changing the format breaks any userspace application that relies on
> > parsing them. That's an API breakage. Adding more data could be
> > fine, if we take enough care when doing it, and properly document
> > how userspace is supposed to parse it.
> 
> Yes, we don't want code duplication if it can be helped. Besides, having
> one function doing the error format issual keeps us from the case when
> having two or more diverge from one another.
> 
> > Well, actually, EDAC drivers use 0 to indicate an unknown physical address.
> > The better is to use the same standard used there.
> 
> However, physical address 0 is a valid address... all FFF...Fs is hardly valid.

True, but if we decide to go on that direction, a change like that should
then be done on all EDAC drivers, and that's an API change. 

We also need to be sure that userspace will handle this change properly.

Regards,
Mauro

Borislav Petkov March 10, 2014, 1:29 p.m. UTC | #7

On Mon, Mar 10, 2014 at 08:41:13AM -0300, Mauro Carvalho Chehab wrote:
> True, but if we decide to go on that direction, a change like that
> should then be done on all EDAC drivers, and that's an API change.
>
> We also need to be sure that userspace will handle this change
> properly.

We don't have a choice:

[    0.000000] e820: BIOS-provided physical RAM map:
[    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009e7ff] usable

Tony Luck March 10, 2014, 5:37 p.m. UTC | #8

Pj4gVHJ1ZSwgYnV0IGlmIHdlIGRlY2lkZSB0byBnbyBvbiB0aGF0IGRpcmVjdGlvbiwgYSBjaGFu
Z2UgbGlrZSB0aGF0DQo+PiBzaG91bGQgdGhlbiBiZSBkb25lIG9uIGFsbCBFREFDIGRyaXZlcnMs
IGFuZCB0aGF0J3MgYW4gQVBJIGNoYW5nZS4NCj4+DQo+PiBXZSBhbHNvIG5lZWQgdG8gYmUgc3Vy
ZSB0aGF0IHVzZXJzcGFjZSB3aWxsIGhhbmRsZSB0aGlzIGNoYW5nZQ0KPj4gcHJvcGVybHkuDQo+
DQo+IFdlIGRvbid0IGhhdmUgYSBjaG9pY2U6DQo+DQo+IFsgICAgMC4wMDAwMDBdIGU4MjA6IEJJ
T1MtcHJvdmlkZWQgcGh5c2ljYWwgUkFNIG1hcDoNCj4gWyAgICAwLjAwMDAwMF0gQklPUy1lODIw
OiBbbWVtIDB4MDAwMDAwMDAwMDAwMDAwMC0weDAwMDAwMDAwMDAwOWU3ZmZdIHVzYWJsZQ0KDQpT
aW5jZSB3ZSBoYXZlIHNlcGFyYXRlIHRyYWNlIHBvaW50cyBmb3IgRURDQSBhbmQgZXh0bG9nIC0g
d2UgY2FuIHVzZSBkaWZmZXJlbnQNCmNvbnZlbnRpb25zIGZvciAiaW52YWxpZCIuICBJZiB5b3Ug
dGhpbmsgdGhhdCBjaGFuZ2luZyB0aGUgQUJJIGluIEVEQUMgd291bGQgYmUgYQ0KcHJvYmxlbSAt
IHRoZW4geW91IGNhbiBrZWVwIHJ1bm5pbmcgdGhlIGFsbW9zdCBpbmZpbml0ZXNpbWFsIGNoYW5j
ZSB0aGF0IHlvdSBoYXZlDQphIHJlYWwgZXJyb3IgYXQgMHgwMDAwMDAwMDAwMDAwMCB0aGF0IHlv
dSB0cmVhdCBhcyB1bmtub3duIGFkZHJlc3MuDQoNCkRvZXMgWDg2IGFjdHVhbGx5IGFsbG9jYXRl
IHRoYXQgcGFnZSBmb3IgdXNlPyAtIEkga25vdyBvbiBJdGFuaXVtIGl0IGdvdCB0aHJvd24gb3V0
DQp3aGVuIHJvdW5kaW5nIHVzYWJsZSBibG9ja3Mgb2YgbWVtb3J5IHRvIDE2TUIgYm91bmRhcmll
cyBmb3IgY2FjaGUgY29oZXJlbmN5DQpyZWFzb25zLg0KDQotVG9ueQ0K
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Tony Luck March 10, 2014, 5:42 p.m. UTC | #9

> Not really. Firstly they service for different purpose. Secondly the

> format here can be changed/updated depending on further requirment.

> I can't assume they always keep the same format.


I can't imagine a reason why we'd want them to be different though.
If a reason does come up in the future, then we can clone & edit
the function to do different things - but until then we should share code.

Note: there will be changes periodically when UEFI changes the error
record format (they have added new fields in the past - I expect they
will add more in the future).  We should be able to add more stuff to
the end of the "mem_loc[]" string as long as we write user-mode
parsers that won't be surprised by extra fields there.

-Tony

Chen Gong March 11, 2014, 7:03 a.m. UTC | #10

On Mon, Mar 10, 2014 at 05:42:45PM +0000, Luck, Tony wrote:
> I can't imagine a reason why we'd want them to be different though.
> If a reason does come up in the future, then we can clone & edit
> the function to do different things - but until then we should share code.
> 
> Note: there will be changes periodically when UEFI changes the error
> record format (they have added new fields in the past - I expect they
> will add more in the future).  We should be able to add more stuff to
> the end of the "mem_loc[]" string as long as we write user-mode
> parsers that won't be surprised by extra fields there.
> 
> -Tony

OK, I will follow you guys suggestion for next version, like code
sharing, trace/printk switching

Borislav Petkov March 11, 2014, 2:27 p.m. UTC | #11

On Mon, Mar 10, 2014 at 05:37:19PM +0000, Luck, Tony wrote:
> Since we have separate trace points for EDCA and extlog - we can use
> different conventions for "invalid". If you think that changing the
> ABI in EDAC would be a problem - then you can keep running the almost
> infinitesimal chance that you have a real error at 0x00000000000000
> that you treat as unknown address.

I hardly believe changing that would be noticed by anyone.

> Does X86 actually allocate that page for use? - I know on Itanium
> it got thrown out when rounding usable blocks of memory to 16MB
> boundaries for cache coherency reasons.

Yeah, it turns out we reserve at least the first page - 64K by default
though - because BIOS likes to make love to that range. Detailed
explanation from Kconfig text below.

Still, for correctness sake at least, I think we should use a value
which denotes an invalid memory address, i.e. ~0x0, for example, and not
a valid one.

config X86_RESERVE_LOW
	int "Amount of low memory, in kilobytes, to reserve for the BIOS"
	default 64
	range 4 640
	---help---
	  Specify the amount of low memory to reserve for the BIOS.

	  The first page contains BIOS data structures that the kernel
	  must not use, so that page must always be reserved.

	  By default we reserve the first 64K of physical RAM, as a
	  number of BIOSes are known to corrupt that memory range
	  during events such as suspend/resume or monitor cable
	  insertion, so it must not be used by the kernel.

	  You can set this to 4 if you are absolutely sure that you
	  trust the BIOS to get all its memory reservations and usages
	  right.  If you know your BIOS have problems beyond the
	  default 64K area, you can set this to 640 to avoid using the
	  entire low memory range.

	  If you have doubts about the BIOS (e.g. suspend/resume does
	  not work or there's kernel crashes after certain hardware
	  hotplug events) then you might want to enable
	  X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check
	  typical corruption patterns.

	  Leave this to the default value of 64 if you are unsure.

[2/2] trace, RAS: Add eMCA trace event interface

Commit Message

Comments

Patch