Message ID | 1382084624-10857-5-git-send-email-gong.chen@linux.intel.com (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
On 10/18/2013 01:53 PM, Chen, Gong wrote: > This H/W error log driver (a.k.a eMCA driver) is implemented based on > http://www.intel.com/content/www/us/en/architecture-and-technology/enhanced-mca-logging-xeon-paper.html > > After errors are captured, more valuable information can be > got via this new enhanced H/W error log driver. > > v3 -> v2: fix a MACRO definition error and some cleanup > v2 -> v1: eliminate spin_lock & minor fixes suggested by Boris > > Signed-off-by: Chen, Gong <gong.chen@linux.intel.com> > --- > arch/x86/include/asm/mce.h | 5 + > arch/x86/kernel/cpu/mcheck/mce.c | 20 +++ > drivers/acpi/Kconfig | 20 +++ > drivers/acpi/Makefile | 2 + > drivers/acpi/acpi_extlog.c | 319 +++++++++++++++++++++++++++++++++++++++ > drivers/acpi/bus.c | 3 +- > include/linux/acpi.h | 1 + > 7 files changed, 369 insertions(+), 1 deletion(-) > create mode 100644 drivers/acpi/acpi_extlog.c > > diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h > index cbe6b9e..072b2f8 100644 > --- a/arch/x86/include/asm/mce.h > +++ b/arch/x86/include/asm/mce.h > @@ -16,6 +16,7 @@ > #define MCG_EXT_CNT_SHIFT 16 > #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) > #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ > +#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */ > > /* MCG_STATUS register defines */ > #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ > @@ -186,6 +187,10 @@ enum mcp_flags { > MCP_UC = (1 << 1), /* log uncorrected errors */ > MCP_DONTLOG = (1 << 2), /* only clear, don't log */ > }; > + > +void register_elog_handler(int (*f)(const char *, int, int)); > +void unregister_elog_handler(int (*f)(const char *, int, int)); > + > void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); > > int mce_notify_irq(void); > diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c > index b3218cd..981e0d3 100644 > --- a/arch/x86/kernel/cpu/mcheck/mce.c > +++ b/arch/x86/kernel/cpu/mcheck/mce.c > @@ -48,6 +48,8 @@ > > #include "mce-internal.h" > > +static int (*mce_ext_err_print)(const char *, int, int); > + > static DEFINE_MUTEX(mce_chrdev_read_mutex); > > #define rcu_dereference_check_mce(p) \ > @@ -576,6 +578,21 @@ static void mce_read_aux(struct mce *m, int i) > > DEFINE_PER_CPU(unsigned, mce_poll_count); > > +void register_elog_handler(int (*f)(const char *, int, int)) > +{ > + mce_ext_err_print = f; > +} > +EXPORT_SYMBOL_GPL(register_elog_handler); > + > +void unregister_elog_handler(int (*f)(const char *, int, int)) > +{ > + if (f) { > + WARN_ON(mce_ext_err_print != f); > + mce_ext_err_print = NULL; > + } > +} > +EXPORT_SYMBOL_GPL(unregister_elog_handler); > + > /* > * Poll for corrected events or events that happened before reset. > * Those are just logged through /dev/mcelog. > @@ -624,6 +641,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) > (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) > continue; > > + if (mce_ext_err_print) > + mce_ext_err_print(NULL, m.extcpu, i); > + Can we use the notifier chain we already have: mce_register_decode_chain()? EDAC uses this and I'm wondering if it is a good fit here. As an added bonus, it seems to honor dont_log_ce option as well. > mce_read_aux(&m, i); > > if (!(flags & MCP_TIMESTAMP)) > diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig > index 22327e6..c67ec61 100644 > --- a/drivers/acpi/Kconfig > +++ b/drivers/acpi/Kconfig > @@ -372,4 +372,24 @@ config ACPI_BGRT > > source "drivers/acpi/apei/Kconfig" > > +config ACPI_EXTLOG > + tristate "Extended Error Log support" > + depends on X86_MCE I think you also have a dependancy on ACPI_APEI for apei_estatus_print() > + default n > + help > + Certain usages such as Predictive Failure Analysis (PFA) require > + more information about the error than what can be described in > + processor machine check banks. Most server processors log > + additional information about the error in processor uncore > + registers. Since the addresses and layout of these registers vary > + widely from one processor to another, system software cannot > + readily make use of them. To complicate matters further, some of > + the additional error information cannot be constructed space > + between "additional" and "error" without detailed knowledge Oops... looks like copy+paste went wrong ;) > + about platform topology. > + > + Enhanced MCA Logging allows firmware to provide additional error > + information to system software, synchronous with MCE or CMCI. This > + driver adds support for that functionality. > + > endif # ACPI > diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile > index cdaf68b..bce34af 100644 > --- a/drivers/acpi/Makefile > +++ b/drivers/acpi/Makefile > @@ -82,3 +82,5 @@ processor-$(CONFIG_CPU_FREQ) += processor_perflib.o > obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o > > obj-$(CONFIG_ACPI_APEI) += apei/ > + > +obj-$(CONFIG_ACPI_EXTLOG) += acpi_extlog.o > diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c > new file mode 100644 > index 0000000..afeab59 > --- /dev/null > +++ b/drivers/acpi/acpi_extlog.c > @@ -0,0 +1,319 @@ > +/* > + * Extended Error Log driver > + * > + * Copyright (C) 2013 Intel Corp. > + * Author: Chen, Gong <gong.chen@intel.com> > + * > + * This file is licensed under GPLv2. > + */ > + > +#include <linux/module.h> > +#include <linux/acpi.h> > +#include <acpi/acpi_bus.h> > +#include <linux/cper.h> > +#include <linux/ratelimit.h> > +#include <asm/mce.h> > + > +#include "apei/apei-internal.h" > + > +#define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ > + > +#define EXTLOG_DSM_REV 0x0 > +#define EXTLOG_FN_QUERY 0x0 > +#define EXTLOG_FN_ADDR 0x1 > + > +#define FLAG_OS_OPTIN BIT(0) > +#define EXTLOG_QUERY_L1_EXIST BIT(1) > +#define ELOG_ENTRY_VALID (1ULL<<63) > +#define ELOG_ENTRY_LEN 0x1000 > + > +#define EMCA_BUG \ > + "Can not request iomem region <0x%016llx-0x%016llx> - eMCA disabled\n" > + > +struct extlog_l1_head { > + u32 ver; /* Header Version */ > + u32 hdr_len; /* Header Length */ > + u64 total_len; /* entire L1 Directory length including this header */ > + u64 elog_base; /* MCA Error Log Directory base address */ > + u64 elog_len; /* MCA Error Log Directory length */ > + u32 flags; /* bit 0 - OS/VMM Opt-in */ > + u8 rev0[12]; > + u32 entries; /* Valid L1 Directory entries per logical processor */ > + u8 rev1[12]; > +}; > + > +static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295"; > + > +/* L1 table related physical address */ > +static u64 elog_base; > +static size_t elog_size; > +static u64 l1_dirbase; > +static size_t l1_size; > + > +/* L1 table related virtual address */ > +static void __iomem *extlog_l1_addr; > +static void __iomem *elog_addr; > + > +static void *elog_buf; > + > +static u64 *l1_entry_base; > +static u32 l1_percpu_entry; > + > +#define ELOG_IDX(cpu, bank) \ > + (cpu_physical_id(cpu) * l1_percpu_entry + (bank)) > + > +#define ELOG_ENTRY_DATA(idx) \ > + (*(l1_entry_base + (idx))) > + > +#define ELOG_ENTRY_ADDR(phyaddr) \ > + (phyaddr - elog_base + (u8 *)elog_addr) > + > +static struct acpi_generic_status *extlog_elog_entry_check(int cpu, int bank) > +{ > + int idx; > + u64 data; > + struct acpi_generic_status *estatus; > + > + WARN_ON(cpu < 0); > + idx = ELOG_IDX(cpu, bank); > + data = ELOG_ENTRY_DATA(idx); > + if ((data & ELOG_ENTRY_VALID) == 0) > + return NULL; > + > + data &= EXT_ELOG_ENTRY_MASK; > + estatus = (struct acpi_generic_status *)ELOG_ENTRY_ADDR(data); > + > + /* if no valid data in elog entry, just return */ > + if (estatus->block_status == 0) > + return NULL; > + > + return estatus; > +} > + > +static void __print_extlog_rcd(const char *pfx, > + struct acpi_generic_status *estatus, int cpu) > +{ > + static atomic_t seqno; > + unsigned int curr_seqno; > + char pfx_seq[64]; > + > + if (!pfx) { > + if (estatus->error_severity <= CPER_SEV_CORRECTED) > + pfx = KERN_INFO; > + else > + pfx = KERN_ERR; > + } > + curr_seqno = atomic_inc_return(&seqno); > + snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}", pfx, curr_seqno); > + printk("%s""Hardware error detected on CPU%d\n", pfx_seq, cpu); > + cper_estatus_print(pfx_seq, estatus); > +} > + > +static int print_extlog_rcd(const char *pfx, > + struct acpi_generic_status *estatus, int cpu) > +{ > + /* Not more than 2 messages every 5 seconds */ > + static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); > + static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); > + struct ratelimit_state *ratelimit; > + > + if (estatus->error_severity == CPER_SEV_CORRECTED || > + (estatus->error_severity == CPER_SEV_INFORMATIONAL)) > + ratelimit = &ratelimit_corrected; > + else > + ratelimit = &ratelimit_uncorrected; > + if (__ratelimit(ratelimit)) { > + __print_extlog_rcd(pfx, estatus, cpu); > + return 0; > + } > + > + return 1; > +} > + > +static int extlog_print(const char *pfx, int cpu, int bank) > +{ > + struct acpi_generic_status *estatus; > + int rc; > + > + estatus = extlog_elog_entry_check(cpu, bank); > + if (estatus == NULL) > + return -EINVAL; > + > + memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN); > + /* clear record status to enable BIOS to update it again */ > + estatus->block_status = 0; > + > + rc = print_extlog_rcd(pfx, (struct acpi_generic_status *)elog_buf, cpu); > + > + return rc; > +} > + > +static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret) > +{ > + struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL}; > + struct acpi_object_list input; > + union acpi_object params[4], *obj; > + u8 uuid[16]; > + int i; > + > + acpi_str_to_uuid(extlog_dsm_uuid, uuid); > + input.count = 4; > + input.pointer = params; > + params[0].type = ACPI_TYPE_BUFFER; > + params[0].buffer.length = 16; > + params[0].buffer.pointer = uuid; > + params[1].type = ACPI_TYPE_INTEGER; > + params[1].integer.value = rev; > + params[2].type = ACPI_TYPE_INTEGER; > + params[2].integer.value = func; > + params[3].type = ACPI_TYPE_PACKAGE; > + params[3].package.count = 0; > + params[3].package.elements = NULL; > + > + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_DSM", &input, &buf))) > + return -1; > + > + *ret = 0; > + obj = (union acpi_object *)buf.pointer; > + if (obj->type == ACPI_TYPE_INTEGER) { > + *ret = obj->integer.value; > + } else if (obj->type == ACPI_TYPE_BUFFER) { > + if (obj->buffer.length <= 8) { > + for (i = 0; i < obj->buffer.length; i++) > + *ret |= (obj->buffer.pointer[i] << (i * 8)); > + } > + } > + kfree(buf.pointer); > + > + return 0; > +} > + > +static bool extlog_get_l1addr(void) > +{ > + acpi_handle handle; > + u64 ret; > + > + if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) > + return false; > + > + if (extlog_get_dsm(handle, EXTLOG_DSM_REV, EXTLOG_FN_QUERY, &ret) || > + !(ret & EXTLOG_QUERY_L1_EXIST)) > + return false; > + > + if (extlog_get_dsm(handle, EXTLOG_DSM_REV, EXTLOG_FN_ADDR, &ret)) > + return false; > + > + l1_dirbase = ret; > + /* Spec says L1 directory must be 4K aligned, bail out if it isn't */ > + if (l1_dirbase & ((1 << 12) - 1)) { > + pr_warn(FW_BUG "L1 Directory is invalid at physical %llx\n", > + l1_dirbase); > + return false; > + } > + > + return true; > +} > + > +static int __init extlog_init(void) > +{ > + struct extlog_l1_head *l1_head; > + void __iomem *extlog_l1_hdr; > + size_t l1_hdr_size; > + struct resource *r; > + u64 cap; > + int rc; > + > + rc = -ENODEV; > + > + rdmsrl(MSR_IA32_MCG_CAP, cap); > + if (!(cap & MCG_ELOG_P)) > + return rc; > + > + if (!extlog_get_l1addr()) > + return rc; > + > + rc = -EINVAL; > + /* get L1 header to fetch necessary information */ > + l1_hdr_size = sizeof(struct extlog_l1_head); > + r = request_mem_region(l1_dirbase, l1_hdr_size, "L1 DIR HDR"); > + if (!r) { > + pr_warn(FW_BUG EMCA_BUG, > + (unsigned long long)l1_dirbase, > + (unsigned long long)l1_dirbase + l1_hdr_size); > + goto err; > + } > + > + extlog_l1_hdr = acpi_os_map_memory(l1_dirbase, l1_hdr_size); > + l1_head = (struct extlog_l1_head *)extlog_l1_hdr; > + l1_size = l1_head->total_len; > + l1_percpu_entry = l1_head->entries; > + elog_base = l1_head->elog_base; > + elog_size = l1_head->elog_len; > + acpi_os_unmap_memory(extlog_l1_hdr, l1_hdr_size); > + release_mem_region(l1_dirbase, l1_hdr_size); > + > + /* remap L1 header again based on completed information */ > + r = request_mem_region(l1_dirbase, l1_size, "L1 Table"); > + if (!r) { > + pr_warn(FW_BUG EMCA_BUG, > + (unsigned long long)l1_dirbase, > + (unsigned long long)l1_dirbase + l1_size); > + goto err; > + } > + extlog_l1_addr = acpi_os_map_memory(l1_dirbase, l1_size); > + l1_entry_base = (u64 *)((u8 *)extlog_l1_addr + l1_hdr_size); > + > + /* remap elog table */ > + r = request_mem_region(elog_base, elog_size, "Elog Table"); > + if (!r) { > + pr_warn(FW_BUG EMCA_BUG, > + (unsigned long long)elog_base, > + (unsigned long long)elog_base + elog_size); > + goto err_release_l1_dir; > + } > + elog_addr = acpi_os_map_memory(elog_base, elog_size); > + > + rc = -ENOMEM; > + /* allocate buffer to save elog record */ > + elog_buf = kmalloc(ELOG_ENTRY_LEN, GFP_KERNEL); > + if (elog_buf == NULL) > + goto err_release_elog; > + > + register_elog_handler(extlog_print); > + /* enable OS to be involved to take over management from BIOS */ > + ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; > + > + return 0; > + > +err_release_elog: > + if (elog_addr) > + acpi_os_unmap_memory(elog_addr, elog_size); > + release_mem_region(elog_base, elog_size); > +err_release_l1_dir: > + if (extlog_l1_addr) > + acpi_os_unmap_memory(extlog_l1_addr, l1_size); > + release_mem_region(l1_dirbase, l1_size); > +err: > + pr_warn(FW_BUG "Extended error log disabled because of problems parsing f/w tables\n"); > + return rc; > +} > + > +static void __exit extlog_exit(void) > +{ > + unregister_elog_handler(extlog_print); > + ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; > + if (extlog_l1_addr) > + acpi_os_unmap_memory(extlog_l1_addr, l1_size); > + if (elog_addr) > + acpi_os_unmap_memory(elog_addr, elog_size); > + release_mem_region(elog_base, elog_size); > + release_mem_region(l1_dirbase, l1_size); > + kfree(elog_buf); > +} > + > +module_init(extlog_init); > +module_exit(extlog_exit); > + > +MODULE_AUTHOR("Chen, Gong <gong.chen@intel.com>"); > +MODULE_DESCRIPTION("Extended Error Log Driver"); "Extended MCA Error Log Driver"? Regards, Naveen > +MODULE_LICENSE("GPL"); > diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c > index b587ec8..e1bd9a1 100644 > --- a/drivers/acpi/bus.c > +++ b/drivers/acpi/bus.c > @@ -174,7 +174,7 @@ static void acpi_print_osc_error(acpi_handle handle, > printk("\n"); > } > > -static acpi_status acpi_str_to_uuid(char *str, u8 *uuid) > +acpi_status acpi_str_to_uuid(char *str, u8 *uuid) > { > int i; > static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21, > @@ -195,6 +195,7 @@ static acpi_status acpi_str_to_uuid(char *str, u8 *uuid) > } > return AE_OK; > } > +EXPORT_SYMBOL_GPL(acpi_str_to_uuid); > > acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context) > { > diff --git a/include/linux/acpi.h b/include/linux/acpi.h > index a5db4ae..c30bac8 100644 > --- a/include/linux/acpi.h > +++ b/include/linux/acpi.h > @@ -311,6 +311,7 @@ struct acpi_osc_context { > #define OSC_INVALID_REVISION_ERROR 8 > #define OSC_CAPABILITIES_MASK_ERROR 16 > > +acpi_status acpi_str_to_uuid(char *str, u8 *uuid); > acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); > > /* platform-wide _OSC bits */ > -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Oct 18, 2013 at 06:07:56PM +0530, Naveen N. Rao wrote: > >@@ -624,6 +641,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) > > (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) > > continue; > > > >+ if (mce_ext_err_print) > >+ mce_ext_err_print(NULL, m.extcpu, i); > >+ > > Can we use the notifier chain we already have: > mce_register_decode_chain()? EDAC uses this and I'm wondering if it > is a good fit here. As an added bonus, it seems to honor dont_log_ce > option as well. Hmm, that's a good question you raise: but the more important question is, do you guys - Gong and Tony - want to replace the logging we're already doing, i.e. mce_log() with extlog or not. Because if you want to replace the current logging you actually have to exit machine_check_poll() after having done mce_ext_err_print() so that the rest of the chain doesn't see the error. And, does mce_ext_err_print only report DRAM ECC errors or other error types too? Btw, if we keep both, then we're going to have two tracepoints - trace_mce_record() in mce_log() and this one - issuing each a record for the same event. Which is not really what we want I'd say... Thanks.
PiBIbW0sIHRoYXQncyBhIGdvb2QgcXVlc3Rpb24geW91IHJhaXNlOiBidXQgdGhlIG1vcmUgaW1w b3J0YW50IHF1ZXN0aW9uDQo+IGlzLCBkbyB5b3UgZ3V5cyAtIEdvbmcgYW5kIFRvbnkgLSB3YW50 IHRvIHJlcGxhY2UgdGhlIGxvZ2dpbmcgd2UncmUNCj4gYWxyZWFkeSBkb2luZywgaS5lLiBtY2Vf bG9nKCkgd2l0aCBleHRsb2cgb3Igbm90Lg0KDQpMb25nIHRlcm0gLi4uIEknZCBiZSBoYXBweSB0 byBzZWUgbWNlX2xvZygpIGdvIGF3YXkuICBCdXQgd2UgbmVlZCB0byBoYXZlDQphIHJvYnVzdCwg d2VsbCB0ZXN0ZWQgcmVwbGFjZW1lbnQgaW4gcGxhY2UgZm9yIHNvbWUgdGltZSBiZWZvcmUgc3Vj aCBhDQptb3ZlIGlzIHVwIGZvciBkaXNjdXNzaW9uLg0KDQo+IEJlY2F1c2UgaWYgeW91IHdhbnQg dG8gcmVwbGFjZSB0aGUgY3VycmVudCBsb2dnaW5nIHlvdSBhY3R1YWxseSBoYXZlIHRvDQo+IGV4 aXQgbWFjaGluZV9jaGVja19wb2xsKCkgYWZ0ZXIgaGF2aW5nIGRvbmUgbWNlX2V4dF9lcnJfcHJp bnQoKSBzbyB0aGF0DQo+IHRoZSByZXN0IG9mIHRoZSBjaGFpbiBkb2Vzbid0IHNlZSB0aGUgZXJy b3IuDQoNClllcyAtIGRvdWJsZSBlcnJvciByZXBvcnRpbmcgc2hvdWxkIGJlIGF2b2lkZWQuDQoN Cj4gQW5kLCBkb2VzIG1jZV9leHRfZXJyX3ByaW50IG9ubHkgcmVwb3J0IERSQU0gRUNDIGVycm9y cyBvciBvdGhlciBlcnJvcg0KPiB0eXBlcyB0b28/DQoNCk91ciBmaXJzdCBwbGF0Zm9ybXMgdG8g aW1wbGVtZW50IHRoaXMgb25seSBkbyBzbyBmb3IgbWVtb3J5IGVycm9ycy4gIFRoaXMNCmNvdWxk IGNoYW5nZSBpbiB0aGUgZnV0dXJlICh0aGUgVUVGSSBhcHBlbmRpeCBOIGVycm9yIHJlY29yZCBo YXMgZGVmaW5lZA0Kc3ViLXNlY3Rpb25zIGZvciBsb3RzIG9mIHR5cGVzIG9mIGVycm9ycykuDQoN CkN1cnJlbnRseSBFREFDIGhvb2tlZCBpbnRvIHRoZSBtY2UgZXZlbiBub3RpZmljYXRpb24gY2hh aW4gcHJvdmlkZXMgYQ0KcmV0dXJuIGNvZGUgdG8gaW5kaWNhdGUgd2hldGhlciBpdCBjb21wbGV0 ZWx5IHByb2Nlc3NlZCB0aGUgZXJyb3IsIG9yDQp3aGV0aGVyIHRvIGZhbGwgdGhyb3VnaCB0byB0 aGUgcmVzdCBvZiBtY2VfbG9nKCk6DQoNCglpZiAocmV0ID09IE5PVElGWV9TVE9QKQ0KCQlyZXR1 cm47DQoNCkhhdmluZyBib3RoIEVEQUMgYW5kIHRoaXMgbmV3IGV4dGVuZGVkIGVycm9yIGxvZyBi b3RoIHJlZ2lzdGVyZWQgb24gdGhpcw0KY2hhaW4gd291bGQgcHJvYmFibHkgbm90IGJlIGhlbHBm dWwgaW4gbW9zdCBjYXNlcy4gIE5vdCBzdXJlIGlmIHdlIHNob3VsZA0KaGFuZGxlIHRoYXQgd2l0 aCB1c2VyIGVkdWNhdGlvbiB0byBub3QgbG9hZCBib3RoIGFuIEVEQUMgYW5kIGV4dF9sb2cgZHJp dmVyDQpvciBpZiB0aGVyZSBzaG91bGQgYmUgc29tZSBlbmZvcmNlbWVudC4NCg0KPiBCdHcsIGlm IHdlIGtlZXAgYm90aCwgdGhlbiB3ZSdyZSBnb2luZyB0byBoYXZlIHR3byB0cmFjZXBvaW50cyAt DQo+IHRyYWNlX21jZV9yZWNvcmQoKSBpbiBtY2VfbG9nKCkgYW5kIHRoaXMgb25lIC0gaXNzdWlu ZyBlYWNoIGEgcmVjb3JkIGZvcg0KPiB0aGUgc2FtZSBldmVudC4gV2hpY2ggaXMgbm90IHJlYWxs eSB3aGF0IHdlIHdhbnQgSSdkIHNheS4uLg0KDQp0cmFjZV9tY2VfcmVjb3JkKCkgZHVtcHMgdGhl IHJhdyBkYXRhIGZyb20gdGhlIG1hY2hpbmUgY2hlY2sgYmFua3MuDQpJIHRoaW5rIHRoZXJlIG1h eSBzdGlsbCBiZSBhIGNhc2UgZm9yIGhhdmluZyB0aGlzLiAgQW5hbHlzaXMgdG9vbHMgdGhhdCBs b29rIGF0DQp0aGlzIHRyYWNlIGFzIHdlbGwgc2hvdWxkIGJlIHNtYXJ0IGVub3VnaCB0byBjb25u ZWN0IHRoZSBkb3RzLg0KDQotVG9ueQ0K -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Oct 18, 2013 at 06:07:56PM +0530, Naveen N. Rao wrote: > Date: Fri, 18 Oct 2013 18:07:56 +0530 > From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> > To: "Chen, Gong" <gong.chen@linux.intel.com>, tony.luck@intel.com, > bp@alien8.de, joe@perches.com, m.chehab@samsung.com > CC: arozansk@redhat.com, linux-acpi@vger.kernel.org, > linux-kernel@vger.kernel.org > Subject: Re: [PATCH v3 4/9] ACPI, x86: Extended error log driver for x86 > platform > User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 > Thunderbird/24.0 > [...] > >+ > >+MODULE_AUTHOR("Chen, Gong <gong.chen@intel.com>"); > >+MODULE_DESCRIPTION("Extended Error Log Driver"); > > "Extended MCA Error Log Driver"? > Looks fine to me. Tony, would you please help to fix it when you pick up the patch? Thanks in advance!
[...] > >diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig > >index 22327e6..c67ec61 100644 > >--- a/drivers/acpi/Kconfig > >+++ b/drivers/acpi/Kconfig > >@@ -372,4 +372,24 @@ config ACPI_BGRT > > > > source "drivers/acpi/apei/Kconfig" > > > >+config ACPI_EXTLOG > >+ tristate "Extended Error Log support" > >+ depends on X86_MCE > > I think you also have a dependancy on ACPI_APEI for apei_estatus_print() > Oh, yes it is. Furthermore, it reminds me where is the best place to put cper.c from I write this patch series. CPER really doesn't dpend on APEI even ACPI. Maybe lib/ ia an option. I can update this patch and if it is OK, I can add another separate patch to change this dependency. Make sense? > >+ default n > >+ help > >+ Certain usages such as Predictive Failure Analysis (PFA) require > >+ more information about the error than what can be described in > >+ processor machine check banks. Most server processors log > >+ additional information about the error in processor uncore > >+ registers. Since the addresses and layout of these registers vary > >+ widely from one processor to another, system software cannot > >+ readily make use of them. To complicate matters further, some of > >+ the additional error information cannot be constructed space > >+ between "additional" and "error" without detailed knowledge > > Oops... looks like copy+paste went wrong ;) > Sigh, it looks like I have m a little bit hurry.
Btw, your mailer is generating that Mail-Followup-To header which removes you from the To: list and puts everyone else on To: instead. And of course, the patches you've sent with git-send-email don't have that header and replying to all there is fine. And Tony's replies don't have it so replying to him is fine. From reading this here: http://cr.yp.to/proto/replyto.html your mail client seems to think you're subscribed to some list and thus drops your mail address from Mail-Followup-To. On Sun, Oct 20, 2013 at 03:06:15AM -0400, Chen Gong wrote: > Oh, yes it is. Furthermore, it reminds me where is the best place > to put cper.c from I write this patch series. CPER really doesn't > dpend on APEI even ACPI. Maybe lib/ ia an option. I can update this > patch and if it is OK, I can add another separate patch to change this > dependency. Make sense? Yeah, for some reason it is part of the UEFI spec but APEI uses it too. Well, I guess you can add it there as "default n" and have the rest of the code select it in Kconfig. > Sigh, it looks like I have m a little bit hurry. Yeah, why is that? :-)
On 10/20/2013 01:51 PM, Borislav Petkov wrote: > On Sun, Oct 20, 2013 at 03:06:15AM -0400, Chen Gong wrote: >> Oh, yes it is. Furthermore, it reminds me where is the best place >> to put cper.c from I write this patch series. CPER really doesn't >> dpend on APEI even ACPI. Maybe lib/ ia an option. I can update this >> patch and if it is OK, I can add another separate patch to change this >> dependency. Make sense? > > Yeah, for some reason it is part of the UEFI spec but APEI uses it too. > > Well, I guess you can add it there as "default n" and have the rest of > the code select it in Kconfig. Yup, I think that would be a good idea to just separate out the CPER stuff from the APEI code, though I think your enhanced MCA logging code will need to depend on both CPER and ACPI since you use the ACPI structures as well. Thanks, Naveen -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013/10/18 20:37, Naveen N. Rao wrote: > On 10/18/2013 01:53 PM, Chen, Gong wrote: >> This H/W error log driver (a.k.a eMCA driver) is implemented based on >> http://www.intel.com/content/www/us/en/architecture-and-technology/enhanced-mca-logging-xeon-paper.html >> >> After errors are captured, more valuable information can be >> got via this new enhanced H/W error log driver. >> >> v3 -> v2: fix a MACRO definition error and some cleanup >> v2 -> v1: eliminate spin_lock & minor fixes suggested by Boris >> >> Signed-off-by: Chen, Gong <gong.chen@linux.intel.com> >> --- >> arch/x86/include/asm/mce.h | 5 + >> arch/x86/kernel/cpu/mcheck/mce.c | 20 +++ >> drivers/acpi/Kconfig | 20 +++ >> drivers/acpi/Makefile | 2 + [...] >> +} >> +EXPORT_SYMBOL_GPL(unregister_elog_handler); >> + >> /* >> * Poll for corrected events or events that happened before reset. >> * Those are just logged through /dev/mcelog. >> @@ -624,6 +641,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) >> (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) >> continue; >> >> + if (mce_ext_err_print) >> + mce_ext_err_print(NULL, m.extcpu, i); >> + > > Can we use the notifier chain we already have: mce_register_decode_chain()? EDAC uses this and I'm wondering if it is a good fit here. As an added bonus, it seems to honor dont_log_ce option as well. Hi everyone, I have a question here, is it safe when we use printk in MCE context? The call graph is like this, do_machine_check -> mce_log -> atomic_notifier_call_chain(&x86_mce_decoder_chain ...) -> ... -> extlog_print -> print_extlog_rcd -> __print_extlog_rcd -> printk There's a logbuf_lock in printk. If logbuf_lock is held by other cpu, it'll lead to an infinity spin here. Isn't it? -- Thanks, XiuQi > >> mce_read_aux(&m, i); >> >> if (!(flags & MCP_TIMESTAMP)) >> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig >> index 22327e6..c67ec61 100644 >> --- a/drivers/acpi/Kconfig >> +++ b/drivers/acpi/Kconfig >> @@ -372,4 +372,24 @@ config ACPI_BGRT >> >> source "drivers/acpi/apei/Kconfig" >> >> +config ACPI_EXTLOG >> + tristate "Extended Error Log support" >> + depends on X86_MCE ... -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Jun 27, 2014 at 01:34:45PM +0800, Xie XiuQi wrote: > The call graph is like this, > do_machine_check > -> mce_log > -> atomic_notifier_call_chain(&x86_mce_decoder_chain ...) > -> ... > -> extlog_print > -> print_extlog_rcd > -> __print_extlog_rcd > -> printk > > There's a logbuf_lock in printk. If logbuf_lock is held by other cpu, > it'll lead to an infinity spin here. Isn't it? Yes, but we want to take the risk and print something out before the machine dies instead of waiting to get into printk-safe context first and maybe corrupt state. Besides, there's work currently going on to make printk safe in atomic context so...
>> There's a logbuf_lock in printk. If logbuf_lock is held by other cpu, >> it'll lead to an infinity spin here. Isn't it? > > Yes, but we want to take the risk and print something out before the > machine dies instead of waiting to get into printk-safe context first > and maybe corrupt state. Not all machine checks are fatal - it would be bad for us to go into an infinite spin instead of executing the recovery code. > Besides, there's work currently going on to make printk safe in atomic > context so... Good - we need this. -Tony
On Fri, Jun 27, 2014 at 08:43:14PM +0000, Luck, Tony wrote: > Not all machine checks are fatal - it would be bad for us to go into > an infinite spin instead of executing the recovery code. Then for the time being extlog shouldn't hook into the decoder chain but into mce_process_work, i.e. the last should call it. Or maybe add another notifier which is not atomic... -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Not all machine checks are fatal - it would be bad for us to go into >> an infinite spin instead of executing the recovery code. > > Then for the time being extlog shouldn't hook into the decoder chain > but into mce_process_work, i.e. the last should call it. Or maybe add > another notifier which is not atomic... I spoke too quickly. The only MCE for which we have recovery code are those that hit in application code. So the processor that is trying to do the printk() can't possibly be holding the locks. Other processors might have held the lock at the time of the MCE - but they have all returned from the handler at the time we try the printk - so they will make progess and release the lock so that we can acquire it. -Tony
On Fri, Jun 27, 2014 at 10:10:48PM +0000, Luck, Tony wrote: > I spoke too quickly. The only MCE for which we have recovery code are > those that hit in application code. So the processor that is trying to > do the printk() can't possibly be holding the locks. Other processors > might have held the lock at the time of the MCE - but they have all > returned from the handler at the time we try the printk - so they will > make progess and release the lock so that we can acquire it. That could explain why we're not seeing hangs left and right. :-)
On 2014/6/28 6:10, Luck, Tony wrote: >>> Not all machine checks are fatal - it would be bad for us to go into >>> an infinite spin instead of executing the recovery code. >> >> Then for the time being extlog shouldn't hook into the decoder chain >> but into mce_process_work, i.e. the last should call it. Or maybe add >> another notifier which is not atomic... > > I spoke too quickly. The only MCE for which we have recovery code are > those that hit in application code. So the processor that is trying to do > the printk() can't possibly be holding the locks. Other processors might > have held the lock at the time of the MCE - but they have all returned > from the handler at the time we try the printk - so they will make progess > and release the lock so that we can acquire it. Thank you for your reply. When we got a MCE which hit in application code, it will be broadcast to other processors immediately. Other processors who might have held the lock at the time of MCE, have no chance to release the lock and return from the printk. Isn't it? I know this rarely happens in production environments, but I think it's still a risk here. So it's very good if we have a printk safe in atomic context in the future. -- Thanks, XiuQi > > -Tony > -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index cbe6b9e..072b2f8 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -16,6 +16,7 @@ #define MCG_EXT_CNT_SHIFT 16 #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ +#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */ /* MCG_STATUS register defines */ #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ @@ -186,6 +187,10 @@ enum mcp_flags { MCP_UC = (1 << 1), /* log uncorrected errors */ MCP_DONTLOG = (1 << 2), /* only clear, don't log */ }; + +void register_elog_handler(int (*f)(const char *, int, int)); +void unregister_elog_handler(int (*f)(const char *, int, int)); + void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); int mce_notify_irq(void); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b3218cd..981e0d3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -48,6 +48,8 @@ #include "mce-internal.h" +static int (*mce_ext_err_print)(const char *, int, int); + static DEFINE_MUTEX(mce_chrdev_read_mutex); #define rcu_dereference_check_mce(p) \ @@ -576,6 +578,21 @@ static void mce_read_aux(struct mce *m, int i) DEFINE_PER_CPU(unsigned, mce_poll_count); +void register_elog_handler(int (*f)(const char *, int, int)) +{ + mce_ext_err_print = f; +} +EXPORT_SYMBOL_GPL(register_elog_handler); + +void unregister_elog_handler(int (*f)(const char *, int, int)) +{ + if (f) { + WARN_ON(mce_ext_err_print != f); + mce_ext_err_print = NULL; + } +} +EXPORT_SYMBOL_GPL(unregister_elog_handler); + /* * Poll for corrected events or events that happened before reset. * Those are just logged through /dev/mcelog. @@ -624,6 +641,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) continue; + if (mce_ext_err_print) + mce_ext_err_print(NULL, m.extcpu, i); + mce_read_aux(&m, i); if (!(flags & MCP_TIMESTAMP)) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 22327e6..c67ec61 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -372,4 +372,24 @@ config ACPI_BGRT source "drivers/acpi/apei/Kconfig" +config ACPI_EXTLOG + tristate "Extended Error Log support" + depends on X86_MCE + default n + help + Certain usages such as Predictive Failure Analysis (PFA) require + more information about the error than what can be described in + processor machine check banks. Most server processors log + additional information about the error in processor uncore + registers. Since the addresses and layout of these registers vary + widely from one processor to another, system software cannot + readily make use of them. To complicate matters further, some of + the additional error information cannot be constructed space + between "additional" and "error" without detailed knowledge + about platform topology. + + Enhanced MCA Logging allows firmware to provide additional error + information to system software, synchronous with MCE or CMCI. This + driver adds support for that functionality. + endif # ACPI diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index cdaf68b..bce34af 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -82,3 +82,5 @@ processor-$(CONFIG_CPU_FREQ) += processor_perflib.o obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o obj-$(CONFIG_ACPI_APEI) += apei/ + +obj-$(CONFIG_ACPI_EXTLOG) += acpi_extlog.o diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c new file mode 100644 index 0000000..afeab59 --- /dev/null +++ b/drivers/acpi/acpi_extlog.c @@ -0,0 +1,319 @@ +/* + * Extended Error Log driver + * + * Copyright (C) 2013 Intel Corp. + * Author: Chen, Gong <gong.chen@intel.com> + * + * This file is licensed under GPLv2. + */ + +#include <linux/module.h> +#include <linux/acpi.h> +#include <acpi/acpi_bus.h> +#include <linux/cper.h> +#include <linux/ratelimit.h> +#include <asm/mce.h> + +#include "apei/apei-internal.h" + +#define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ + +#define EXTLOG_DSM_REV 0x0 +#define EXTLOG_FN_QUERY 0x0 +#define EXTLOG_FN_ADDR 0x1 + +#define FLAG_OS_OPTIN BIT(0) +#define EXTLOG_QUERY_L1_EXIST BIT(1) +#define ELOG_ENTRY_VALID (1ULL<<63) +#define ELOG_ENTRY_LEN 0x1000 + +#define EMCA_BUG \ + "Can not request iomem region <0x%016llx-0x%016llx> - eMCA disabled\n" + +struct extlog_l1_head { + u32 ver; /* Header Version */ + u32 hdr_len; /* Header Length */ + u64 total_len; /* entire L1 Directory length including this header */ + u64 elog_base; /* MCA Error Log Directory base address */ + u64 elog_len; /* MCA Error Log Directory length */ + u32 flags; /* bit 0 - OS/VMM Opt-in */ + u8 rev0[12]; + u32 entries; /* Valid L1 Directory entries per logical processor */ + u8 rev1[12]; +}; + +static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295"; + +/* L1 table related physical address */ +static u64 elog_base; +static size_t elog_size; +static u64 l1_dirbase; +static size_t l1_size; + +/* L1 table related virtual address */ +static void __iomem *extlog_l1_addr; +static void __iomem *elog_addr; + +static void *elog_buf; + +static u64 *l1_entry_base; +static u32 l1_percpu_entry; + +#define ELOG_IDX(cpu, bank) \ + (cpu_physical_id(cpu) * l1_percpu_entry + (bank)) + +#define ELOG_ENTRY_DATA(idx) \ + (*(l1_entry_base + (idx))) + +#define ELOG_ENTRY_ADDR(phyaddr) \ + (phyaddr - elog_base + (u8 *)elog_addr) + +static struct acpi_generic_status *extlog_elog_entry_check(int cpu, int bank) +{ + int idx; + u64 data; + struct acpi_generic_status *estatus; + + WARN_ON(cpu < 0); + idx = ELOG_IDX(cpu, bank); + data = ELOG_ENTRY_DATA(idx); + if ((data & ELOG_ENTRY_VALID) == 0) + return NULL; + + data &= EXT_ELOG_ENTRY_MASK; + estatus = (struct acpi_generic_status *)ELOG_ENTRY_ADDR(data); + + /* if no valid data in elog entry, just return */ + if (estatus->block_status == 0) + return NULL; + + return estatus; +} + +static void __print_extlog_rcd(const char *pfx, + struct acpi_generic_status *estatus, int cpu) +{ + static atomic_t seqno; + unsigned int curr_seqno; + char pfx_seq[64]; + + if (!pfx) { + if (estatus->error_severity <= CPER_SEV_CORRECTED) + pfx = KERN_INFO; + else + pfx = KERN_ERR; + } + curr_seqno = atomic_inc_return(&seqno); + snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}", pfx, curr_seqno); + printk("%s""Hardware error detected on CPU%d\n", pfx_seq, cpu); + cper_estatus_print(pfx_seq, estatus); +} + +static int print_extlog_rcd(const char *pfx, + struct acpi_generic_status *estatus, int cpu) +{ + /* Not more than 2 messages every 5 seconds */ + static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); + static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); + struct ratelimit_state *ratelimit; + + if (estatus->error_severity == CPER_SEV_CORRECTED || + (estatus->error_severity == CPER_SEV_INFORMATIONAL)) + ratelimit = &ratelimit_corrected; + else + ratelimit = &ratelimit_uncorrected; + if (__ratelimit(ratelimit)) { + __print_extlog_rcd(pfx, estatus, cpu); + return 0; + } + + return 1; +} + +static int extlog_print(const char *pfx, int cpu, int bank) +{ + struct acpi_generic_status *estatus; + int rc; + + estatus = extlog_elog_entry_check(cpu, bank); + if (estatus == NULL) + return -EINVAL; + + memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN); + /* clear record status to enable BIOS to update it again */ + estatus->block_status = 0; + + rc = print_extlog_rcd(pfx, (struct acpi_generic_status *)elog_buf, cpu); + + return rc; +} + +static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret) +{ + struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL}; + struct acpi_object_list input; + union acpi_object params[4], *obj; + u8 uuid[16]; + int i; + + acpi_str_to_uuid(extlog_dsm_uuid, uuid); + input.count = 4; + input.pointer = params; + params[0].type = ACPI_TYPE_BUFFER; + params[0].buffer.length = 16; + params[0].buffer.pointer = uuid; + params[1].type = ACPI_TYPE_INTEGER; + params[1].integer.value = rev; + params[2].type = ACPI_TYPE_INTEGER; + params[2].integer.value = func; + params[3].type = ACPI_TYPE_PACKAGE; + params[3].package.count = 0; + params[3].package.elements = NULL; + + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_DSM", &input, &buf))) + return -1; + + *ret = 0; + obj = (union acpi_object *)buf.pointer; + if (obj->type == ACPI_TYPE_INTEGER) { + *ret = obj->integer.value; + } else if (obj->type == ACPI_TYPE_BUFFER) { + if (obj->buffer.length <= 8) { + for (i = 0; i < obj->buffer.length; i++) + *ret |= (obj->buffer.pointer[i] << (i * 8)); + } + } + kfree(buf.pointer); + + return 0; +} + +static bool extlog_get_l1addr(void) +{ + acpi_handle handle; + u64 ret; + + if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) + return false; + + if (extlog_get_dsm(handle, EXTLOG_DSM_REV, EXTLOG_FN_QUERY, &ret) || + !(ret & EXTLOG_QUERY_L1_EXIST)) + return false; + + if (extlog_get_dsm(handle, EXTLOG_DSM_REV, EXTLOG_FN_ADDR, &ret)) + return false; + + l1_dirbase = ret; + /* Spec says L1 directory must be 4K aligned, bail out if it isn't */ + if (l1_dirbase & ((1 << 12) - 1)) { + pr_warn(FW_BUG "L1 Directory is invalid at physical %llx\n", + l1_dirbase); + return false; + } + + return true; +} + +static int __init extlog_init(void) +{ + struct extlog_l1_head *l1_head; + void __iomem *extlog_l1_hdr; + size_t l1_hdr_size; + struct resource *r; + u64 cap; + int rc; + + rc = -ENODEV; + + rdmsrl(MSR_IA32_MCG_CAP, cap); + if (!(cap & MCG_ELOG_P)) + return rc; + + if (!extlog_get_l1addr()) + return rc; + + rc = -EINVAL; + /* get L1 header to fetch necessary information */ + l1_hdr_size = sizeof(struct extlog_l1_head); + r = request_mem_region(l1_dirbase, l1_hdr_size, "L1 DIR HDR"); + if (!r) { + pr_warn(FW_BUG EMCA_BUG, + (unsigned long long)l1_dirbase, + (unsigned long long)l1_dirbase + l1_hdr_size); + goto err; + } + + extlog_l1_hdr = acpi_os_map_memory(l1_dirbase, l1_hdr_size); + l1_head = (struct extlog_l1_head *)extlog_l1_hdr; + l1_size = l1_head->total_len; + l1_percpu_entry = l1_head->entries; + elog_base = l1_head->elog_base; + elog_size = l1_head->elog_len; + acpi_os_unmap_memory(extlog_l1_hdr, l1_hdr_size); + release_mem_region(l1_dirbase, l1_hdr_size); + + /* remap L1 header again based on completed information */ + r = request_mem_region(l1_dirbase, l1_size, "L1 Table"); + if (!r) { + pr_warn(FW_BUG EMCA_BUG, + (unsigned long long)l1_dirbase, + (unsigned long long)l1_dirbase + l1_size); + goto err; + } + extlog_l1_addr = acpi_os_map_memory(l1_dirbase, l1_size); + l1_entry_base = (u64 *)((u8 *)extlog_l1_addr + l1_hdr_size); + + /* remap elog table */ + r = request_mem_region(elog_base, elog_size, "Elog Table"); + if (!r) { + pr_warn(FW_BUG EMCA_BUG, + (unsigned long long)elog_base, + (unsigned long long)elog_base + elog_size); + goto err_release_l1_dir; + } + elog_addr = acpi_os_map_memory(elog_base, elog_size); + + rc = -ENOMEM; + /* allocate buffer to save elog record */ + elog_buf = kmalloc(ELOG_ENTRY_LEN, GFP_KERNEL); + if (elog_buf == NULL) + goto err_release_elog; + + register_elog_handler(extlog_print); + /* enable OS to be involved to take over management from BIOS */ + ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; + + return 0; + +err_release_elog: + if (elog_addr) + acpi_os_unmap_memory(elog_addr, elog_size); + release_mem_region(elog_base, elog_size); +err_release_l1_dir: + if (extlog_l1_addr) + acpi_os_unmap_memory(extlog_l1_addr, l1_size); + release_mem_region(l1_dirbase, l1_size); +err: + pr_warn(FW_BUG "Extended error log disabled because of problems parsing f/w tables\n"); + return rc; +} + +static void __exit extlog_exit(void) +{ + unregister_elog_handler(extlog_print); + ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; + if (extlog_l1_addr) + acpi_os_unmap_memory(extlog_l1_addr, l1_size); + if (elog_addr) + acpi_os_unmap_memory(elog_addr, elog_size); + release_mem_region(elog_base, elog_size); + release_mem_region(l1_dirbase, l1_size); + kfree(elog_buf); +} + +module_init(extlog_init); +module_exit(extlog_exit); + +MODULE_AUTHOR("Chen, Gong <gong.chen@intel.com>"); +MODULE_DESCRIPTION("Extended Error Log Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index b587ec8..e1bd9a1 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -174,7 +174,7 @@ static void acpi_print_osc_error(acpi_handle handle, printk("\n"); } -static acpi_status acpi_str_to_uuid(char *str, u8 *uuid) +acpi_status acpi_str_to_uuid(char *str, u8 *uuid) { int i; static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21, @@ -195,6 +195,7 @@ static acpi_status acpi_str_to_uuid(char *str, u8 *uuid) } return AE_OK; } +EXPORT_SYMBOL_GPL(acpi_str_to_uuid); acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context) { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a5db4ae..c30bac8 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -311,6 +311,7 @@ struct acpi_osc_context { #define OSC_INVALID_REVISION_ERROR 8 #define OSC_CAPABILITIES_MASK_ERROR 16 +acpi_status acpi_str_to_uuid(char *str, u8 *uuid); acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); /* platform-wide _OSC bits */
This H/W error log driver (a.k.a eMCA driver) is implemented based on http://www.intel.com/content/www/us/en/architecture-and-technology/enhanced-mca-logging-xeon-paper.html After errors are captured, more valuable information can be got via this new enhanced H/W error log driver. v3 -> v2: fix a MACRO definition error and some cleanup v2 -> v1: eliminate spin_lock & minor fixes suggested by Boris Signed-off-by: Chen, Gong <gong.chen@linux.intel.com> --- arch/x86/include/asm/mce.h | 5 + arch/x86/kernel/cpu/mcheck/mce.c | 20 +++ drivers/acpi/Kconfig | 20 +++ drivers/acpi/Makefile | 2 + drivers/acpi/acpi_extlog.c | 319 +++++++++++++++++++++++++++++++++++++++ drivers/acpi/bus.c | 3 +- include/linux/acpi.h | 1 + 7 files changed, 369 insertions(+), 1 deletion(-) create mode 100644 drivers/acpi/acpi_extlog.c