diff mbox series

[1/2] cper, apei, mce: Pass x86 CPER through the MCA handling chain

Message ID 20200825144710.23584-2-Smita.KoralahalliChannabasappa@amd.com (mailing list archive)
State New, archived
Headers show
Series Decode raw MSR values of MCA registers in BERT | expand

Commit Message

Smita Koralahalli Aug. 25, 2020, 2:47 p.m. UTC
Linux Kernel uses ACPI Boot Error Record Table (BERT) to report fatal
errors that occurred in a previous boot. The MCA errors in the BERT are
reported using the x86 Processor Error Common Platform Error Record (CPER)
format. Currently, the record prints out the raw MSR values and AMD relies
on the raw record to provide MCA information.

Extract the raw MSR values of MCA registers from the BERT and feed it into
the standard mce_log() function through the existing x86/MCA RAS
infrastructure. This will result in better decoding from the EDAC MCE
decoder or the default notifier.

The implementation is SMCA specific as the raw MCA register values are
given in the register offset order of the MCAX address space.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <len.brown@intel.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-edac@vger.kernel.org
Cc: linux-efi@vger.kernel.org
Cc: linux-acpi@vger.kernel.org
Cc: devel@acpica.org
---
 arch/x86/include/asm/mce.h      |  5 +++++
 arch/x86/kernel/acpi/apei.c     | 10 +++++++++
 arch/x86/kernel/cpu/mce/apei.c  | 38 +++++++++++++++++++++++++++++++++
 drivers/firmware/efi/cper-x86.c | 10 +++++----
 include/acpi/apei.h             |  2 ++
 5 files changed, 61 insertions(+), 4 deletions(-)

Comments

kernel test robot Aug. 25, 2020, 8:42 p.m. UTC | #1
Hi Smita,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/x86/core]
[also build test ERROR on tip/auto-latest efi/next v5.9-rc2 next-20200825]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Smita-Koralahalli/Decode-raw-MSR-values-of-MCA-registers-in-BERT/20200825-224828
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git ef2ff0f5d6008d325c9a068e20981c0d0acc4d6b
config: x86_64-randconfig-r024-20200826 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce (this is a W=1 build):
        # save the attached .config to linux build tree
        make W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   ld: drivers/firmware/efi/cper-x86.o: in function `cper_print_proc_ia':
>> drivers/firmware/efi/cper-x86.c:350: undefined reference to `arch_apei_report_x86_error'

# https://github.com/0day-ci/linux/commit/40a67e64577bd2fee8aa3218efdbec2122cbe08d
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Smita-Koralahalli/Decode-raw-MSR-values-of-MCA-registers-in-BERT/20200825-224828
git checkout 40a67e64577bd2fee8aa3218efdbec2122cbe08d
vim +350 drivers/firmware/efi/cper-x86.c

   255	
   256	void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc)
   257	{
   258		int i;
   259		struct cper_ia_err_info *err_info;
   260		struct cper_ia_proc_ctx *ctx_info;
   261		char newpfx[64], infopfx[64];
   262		u8 err_type;
   263	
   264		if (proc->validation_bits & VALID_LAPIC_ID)
   265			printk("%sLocal APIC_ID: 0x%llx\n", pfx, proc->lapic_id);
   266	
   267		if (proc->validation_bits & VALID_CPUID_INFO) {
   268			printk("%sCPUID Info:\n", pfx);
   269			print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, proc->cpuid,
   270				       sizeof(proc->cpuid), 0);
   271		}
   272	
   273		snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
   274	
   275		err_info = (struct cper_ia_err_info *)(proc + 1);
   276		for (i = 0; i < VALID_PROC_ERR_INFO_NUM(proc->validation_bits); i++) {
   277			printk("%sError Information Structure %d:\n", pfx, i);
   278	
   279			err_type = cper_get_err_type(&err_info->err_type);
   280			printk("%sError Structure Type: %s\n", newpfx,
   281			       err_type < ARRAY_SIZE(cper_proc_error_type_strs) ?
   282			       cper_proc_error_type_strs[err_type] : "unknown");
   283	
   284			if (err_type >= N_ERR_TYPES) {
   285				printk("%sError Structure Type: %pUl\n", newpfx,
   286				       &err_info->err_type);
   287			}
   288	
   289			if (err_info->validation_bits & INFO_VALID_CHECK_INFO) {
   290				printk("%sCheck Information: 0x%016llx\n", newpfx,
   291				       err_info->check_info);
   292	
   293				if (err_type < N_ERR_TYPES) {
   294					snprintf(infopfx, sizeof(infopfx), "%s ",
   295						 newpfx);
   296	
   297					print_err_info(infopfx, err_type,
   298						       err_info->check_info);
   299				}
   300			}
   301	
   302			if (err_info->validation_bits & INFO_VALID_TARGET_ID) {
   303				printk("%sTarget Identifier: 0x%016llx\n",
   304				       newpfx, err_info->target_id);
   305			}
   306	
   307			if (err_info->validation_bits & INFO_VALID_REQUESTOR_ID) {
   308				printk("%sRequestor Identifier: 0x%016llx\n",
   309				       newpfx, err_info->requestor_id);
   310			}
   311	
   312			if (err_info->validation_bits & INFO_VALID_RESPONDER_ID) {
   313				printk("%sResponder Identifier: 0x%016llx\n",
   314				       newpfx, err_info->responder_id);
   315			}
   316	
   317			if (err_info->validation_bits & INFO_VALID_IP) {
   318				printk("%sInstruction Pointer: 0x%016llx\n",
   319				       newpfx, err_info->ip);
   320			}
   321	
   322			err_info++;
   323		}
   324	
   325		ctx_info = (struct cper_ia_proc_ctx *)err_info;
   326		for (i = 0; i < VALID_PROC_CXT_INFO_NUM(proc->validation_bits); i++) {
   327			int size = sizeof(*ctx_info) + ctx_info->reg_arr_size;
   328			int groupsize = 4;
   329	
   330			printk("%sContext Information Structure %d:\n", pfx, i);
   331	
   332			printk("%sRegister Context Type: %s\n", newpfx,
   333			       ctx_info->reg_ctx_type < ARRAY_SIZE(ia_reg_ctx_strs) ?
   334			       ia_reg_ctx_strs[ctx_info->reg_ctx_type] : "unknown");
   335	
   336			printk("%sRegister Array Size: 0x%04x\n", newpfx,
   337			       ctx_info->reg_arr_size);
   338	
   339			if (ctx_info->reg_ctx_type == CTX_TYPE_MSR) {
   340				groupsize = 8; /* MSRs are 8 bytes wide. */
   341				printk("%sMSR Address: 0x%08x\n", newpfx,
   342				       ctx_info->msr_addr);
   343			}
   344	
   345			if (ctx_info->reg_ctx_type == CTX_TYPE_MMREG) {
   346				printk("%sMM Register Address: 0x%016llx\n", newpfx,
   347				       ctx_info->mm_reg_addr);
   348			}
   349	
 > 350			if (arch_apei_report_x86_error(proc, ctx_info)) {

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index cf503824529c..dbcb6bd80b7c 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -291,6 +291,11 @@  struct cper_sec_mem_err;
 extern void apei_mce_report_mem_error(int corrected,
 				      struct cper_sec_mem_err *mem_err);
 
+struct cper_ia_proc_ctx;
+struct cper_sec_proc_ia;
+int apei_mce_report_x86_error(const struct cper_sec_proc_ia *proc,
+			      struct cper_ia_proc_ctx *ctx_info);
+
 /*
  * Enumerate new IP types and HWID values in AMD processors which support
  * Scalable MCA.
diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c
index c22fb55abcfd..053d6e994e87 100644
--- a/arch/x86/kernel/acpi/apei.c
+++ b/arch/x86/kernel/acpi/apei.c
@@ -43,3 +43,13 @@  void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 	apei_mce_report_mem_error(sev, mem_err);
 #endif
 }
+
+int arch_apei_report_x86_error(const struct cper_sec_proc_ia *proc,
+			       struct cper_ia_proc_ctx *ctx_info)
+{
+	int err = -EINVAL;
+#ifdef CONFIG_X86_MCE
+	err = apei_mce_report_x86_error(proc, ctx_info);
+#endif
+	return err;
+}
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index af8d37962586..d7f405e55d31 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -51,6 +51,44 @@  void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 }
 EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
 
+int apei_mce_report_x86_error(const struct cper_sec_proc_ia *proc,
+			      struct cper_ia_proc_ctx *ctx_info)
+{
+	const u64 *i_mce = ((const void *) (ctx_info + 1));
+	unsigned int cpu;
+	struct mce m;
+
+	if (!boot_cpu_has(X86_FEATURE_SMCA))
+		return -EINVAL;
+
+	mce_setup(&m);
+
+	m.extcpu = -1;
+	m.socketid = -1;
+
+	for_each_possible_cpu(cpu) {
+		if (cpu_data(cpu).initial_apicid == proc->lapic_id) {
+			m.extcpu = cpu;
+			m.socketid = cpu_data(m.extcpu).phys_proc_id;
+			break;
+		}
+	}
+
+	m.apicid = proc->lapic_id;
+	m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
+	m.status = *i_mce;
+	m.addr = *(i_mce + 1);
+	m.misc = *(i_mce + 2);
+	/* Skipping MCA_CONFIG */
+	m.ipid = *(i_mce + 4);
+	m.synd = *(i_mce + 5);
+
+	mce_log(&m);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(apei_mce_report_x86_error);
+
 #define CPER_CREATOR_MCE						\
 	GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,	\
 		  0x64, 0x90, 0xb8, 0x9d)
diff --git a/drivers/firmware/efi/cper-x86.c b/drivers/firmware/efi/cper-x86.c
index 2531de49f56c..6622e9824416 100644
--- a/drivers/firmware/efi/cper-x86.c
+++ b/drivers/firmware/efi/cper-x86.c
@@ -1,7 +1,7 @@ 
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (C) 2018, Advanced Micro Devices, Inc.
 
-#include <linux/cper.h>
+#include <acpi/apei.h>
 
 /*
  * We don't need a "CPER_IA" prefix since these are all locally defined.
@@ -347,9 +347,11 @@  void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc)
 			       ctx_info->mm_reg_addr);
 		}
 
-		printk("%sRegister Array:\n", newpfx);
-		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize,
-			       (ctx_info + 1), ctx_info->reg_arr_size, 0);
+		if (arch_apei_report_x86_error(proc, ctx_info)) {
+			printk("%sRegister Array:\n", newpfx);
+			print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize,
+				       (ctx_info + 1), ctx_info->reg_arr_size, 0);
+		}
 
 		ctx_info = (struct cper_ia_proc_ctx *)((long)ctx_info + size);
 	}
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index 680f80960c3d..e8ab17395887 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -51,6 +51,8 @@  int erst_clear(u64 record_id);
 
 int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
 void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);
+int arch_apei_report_x86_error(const struct cper_sec_proc_ia *proc,
+			       struct cper_ia_proc_ctx *ctx_info);
 
 #endif
 #endif