Message ID | 20200622120527.690-2-shiju.jose@huawei.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | ACPI / APEI: Add support to notify the vendor specific HW errors | expand |
Hi Rafael, Hi James, Can you help to merge this patch because I added and tested all the suggestions from James. Thanks, Shiju >-----Original Message----- >From: linux-pci-owner@vger.kernel.org [mailto:linux-pci- >owner@vger.kernel.org] On Behalf Of Shiju Jose >Sent: 22 June 2020 13:05 >To: linux-acpi@vger.kernel.org; linux-pci@vger.kernel.org; linux- >kernel@vger.kernel.org; rjw@rjwysocki.net; helgaas@kernel.org; >bp@alien8.de; james.morse@arm.com; lenb@kernel.org; >tony.luck@intel.com; dan.carpenter@oracle.com; >zhangliguang@linux.alibaba.com; andriy.shevchenko@linux.intel.com; >Wangkefeng (OS Kernel Lab) <wangkefeng.wang@huawei.com>; >jroedel@suse.de >Cc: Linuxarm <linuxarm@huawei.com>; yangyicong ><yangyicong@huawei.com>; Jonathan Cameron ><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com> >Subject: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown >(vendor) CPER records > >CPER records describing a firmware-first error are identified by GUID. >The ghes driver currently logs, but ignores any unknown CPER records. >This prevents describing errors that can't be represented by a standard entry, >that would otherwise allow a driver to recover from an error. >The UEFI spec calls these 'Non-standard Section Body' (N.2.3 of version 2.8). > >Add a notifier chain for these non-standard/vendor-records. Callers must >identify their type of records by GUID. > >Record data is copied to memory from the ghes_estatus_pool to allow us to >keep it until after the notifier has run. > >Signed-off-by: Shiju Jose <shiju.jose@huawei.com> [ Removed kfifo and >ghes_gdata_pool. Expanded commit message ] >Signed-off-by: James Morse <james.morse@arm.com> >--- > drivers/acpi/apei/ghes.c | 63 >++++++++++++++++++++++++++++++++++++++++ > include/acpi/ghes.h | 27 +++++++++++++++++ > 2 files changed, 90 insertions(+) > >diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index >81bf71b10d44..99df00f64306 100644 >--- a/drivers/acpi/apei/ghes.c >+++ b/drivers/acpi/apei/ghes.c >@@ -79,6 +79,12 @@ > ((struct acpi_hest_generic_status *) \ > ((struct ghes_estatus_node *)(estatus_node) + 1)) > >+#define GHES_VENDOR_ENTRY_LEN(gdata_len) \ >+ (sizeof(struct ghes_vendor_record_entry) + (gdata_len)) >+#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \ >+ ((struct acpi_hest_generic_data *) \ >+ ((struct ghes_vendor_record_entry *)(vendor_entry) + 1)) >+ > /* > * NMI-like notifications vary by architecture, before the compiler can prune > * unused static functions it needs a value for these enums. >@@ -123,6 +129,12 @@ static DEFINE_MUTEX(ghes_list_mutex); > */ > static DEFINE_SPINLOCK(ghes_notify_lock_irq); > >+struct ghes_vendor_record_entry { >+ struct work_struct work; >+ int error_severity; >+ char vendor_record[]; >+}; >+ > static struct gen_pool *ghes_estatus_pool; static unsigned long >ghes_estatus_pool_size_request; > >@@ -511,6 +523,56 @@ static void ghes_handle_aer(struct >acpi_hest_generic_data *gdata) #endif } > >+static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list); >+ >+int ghes_register_vendor_record_notifier(struct notifier_block *nb) { >+ return blocking_notifier_chain_register(&vendor_record_notify_list, >+nb); } EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier); >+ >+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb) >+{ >+ blocking_notifier_chain_unregister(&vendor_record_notify_list, nb); >} >+EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier); >+ >+static void ghes_vendor_record_work_func(struct work_struct *work) { >+ struct ghes_vendor_record_entry *entry; >+ struct acpi_hest_generic_data *gdata; >+ u32 len; >+ >+ entry = container_of(work, struct ghes_vendor_record_entry, work); >+ gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); >+ >+ blocking_notifier_call_chain(&vendor_record_notify_list, >+ entry->error_severity, gdata); >+ >+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); >+ gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len); } >+ >+static void ghes_defer_non_standard_event(struct acpi_hest_generic_data >*gdata, >+ int sev) >+{ >+ struct acpi_hest_generic_data *copied_gdata; >+ struct ghes_vendor_record_entry *entry; >+ u32 len; >+ >+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); >+ entry = (void *)gen_pool_alloc(ghes_estatus_pool, len); >+ if (!entry) >+ return; >+ >+ copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); >+ memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata)); >+ entry->error_severity = sev; >+ >+ INIT_WORK(&entry->work, ghes_vendor_record_work_func); >+ schedule_work(&entry->work); >+} >+ > static bool ghes_do_proc(struct ghes *ghes, > const struct acpi_hest_generic_status *estatus) { >@@ -549,6 +611,7 @@ static bool ghes_do_proc(struct ghes *ghes, > } else { > void *err = acpi_hest_get_payload(gdata); > >+ ghes_defer_non_standard_event(gdata, sev); > log_non_standard_event(sec_type, fru_id, fru_text, > sec_sev, err, > gdata->error_data_length); >diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index >517a5231cc1b..ae0e8847fdd5 100644 >--- a/include/acpi/ghes.h >+++ b/include/acpi/ghes.h >@@ -53,6 +53,33 @@ enum { > GHES_SEV_PANIC = 0x3, > }; > >+#ifdef CONFIG_ACPI_APEI_GHES >+/** >+ * ghes_register_vendor_record_notifier - register a notifier for >+vendor >+ * records that the kernel would otherwise ignore. >+ * @nb: pointer to the notifier_block structure of the event handler. >+ * >+ * return 0 : SUCCESS, non-zero : FAIL >+ */ >+int ghes_register_vendor_record_notifier(struct notifier_block *nb); >+ >+/** >+ * ghes_unregister_vendor_record_notifier - unregister the previously >+ * registered vendor record notifier. >+ * @nb: pointer to the notifier_block structure of the vendor record >handler. >+ */ >+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb); >+#else static inline int ghes_register_vendor_record_notifier(struct >+notifier_block *nb) { >+ return -ENODEV; >+} >+ >+static inline void ghes_unregister_vendor_record_notifier(struct >+notifier_block *nb) { } #endif >+ > int ghes_estatus_pool_init(int num_ghes); > > /* From drivers/edac/ghes_edac.c */ >-- >2.17.1 >
On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <shiju.jose@huawei.com> wrote: > > Hi Rafael, Hi James, > > Can you help to merge this patch because I added and tested all the suggestions from James. I could apply the [1/2] in principle, but I need an ACK for the [2/2] from the PCI side. That said, it looks like the [1/2] is a James' patch that you are sending with some changes made by you. In that case the ordering of the S-o-b tags under it should be different (the S-o-b from James, the what-you-have-change line and the S-o-b from you) and also the From: tag should point to James. Thanks!
Hi Rafael, >-----Original Message----- >From: Rafael J. Wysocki [mailto:rafael@kernel.org] >Sent: 13 July 2020 12:18 >To: Shiju Jose <shiju.jose@huawei.com> >Cc: linux-acpi@vger.kernel.org; linux-pci@vger.kernel.org; linux- >kernel@vger.kernel.org; rjw@rjwysocki.net; helgaas@kernel.org; >bp@alien8.de; james.morse@arm.com; lenb@kernel.org; >tony.luck@intel.com; dan.carpenter@oracle.com; >zhangliguang@linux.alibaba.com; andriy.shevchenko@linux.intel.com; >Wangkefeng (OS Kernel Lab) <wangkefeng.wang@huawei.com>; >jroedel@suse.de; Linuxarm <linuxarm@huawei.com>; yangyicong ><yangyicong@huawei.com>; Jonathan Cameron ><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com> >Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown >(vendor) CPER records > >On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <shiju.jose@huawei.com> >wrote: >> >> Hi Rafael, Hi James, >> >> Can you help to merge this patch because I added and tested all the >suggestions from James. > >I could apply the [1/2] in principle, but I need an ACK for the [2/2] from the >PCI side. > >That said, it looks like the [1/2] is a James' patch that you are sending with >some changes made by you. James added following changes on top of the original patch(V10) by me, [ Removed kfifo and ghes_gdata_pool. Expanded commit message ] I had confusion how the S-o-b tag to be added for James's changes in the V11 patch posted. > >In that case the ordering of the S-o-b tags under it should be different (the S- >o-b from James, the what-you-have-change line and the S-o-b from you) and >also the From: tag should point to James. > >Thanks! Thanks, Shiju
On Mon, Jul 13, 2020 at 3:33 PM Shiju Jose <shiju.jose@huawei.com> wrote: > > Hi Rafael, > > >-----Original Message----- > >From: Rafael J. Wysocki [mailto:rafael@kernel.org] > >Sent: 13 July 2020 12:18 > >To: Shiju Jose <shiju.jose@huawei.com> > >Cc: linux-acpi@vger.kernel.org; linux-pci@vger.kernel.org; linux- > >kernel@vger.kernel.org; rjw@rjwysocki.net; helgaas@kernel.org; > >bp@alien8.de; james.morse@arm.com; lenb@kernel.org; > >tony.luck@intel.com; dan.carpenter@oracle.com; > >zhangliguang@linux.alibaba.com; andriy.shevchenko@linux.intel.com; > >Wangkefeng (OS Kernel Lab) <wangkefeng.wang@huawei.com>; > >jroedel@suse.de; Linuxarm <linuxarm@huawei.com>; yangyicong > ><yangyicong@huawei.com>; Jonathan Cameron > ><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com> > >Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown > >(vendor) CPER records > > > >On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <shiju.jose@huawei.com> > >wrote: > >> > >> Hi Rafael, Hi James, > >> > >> Can you help to merge this patch because I added and tested all the > >suggestions from James. > > > >I could apply the [1/2] in principle, but I need an ACK for the [2/2] from the > >PCI side. > > > >That said, it looks like the [1/2] is a James' patch that you are sending with > >some changes made by you. > James added following changes on top of the original patch(V10) by me, > [ Removed kfifo and ghes_gdata_pool. Expanded commit message ] > I had confusion how the S-o-b tag to be added for James's changes in the V11 patch posted. So James should have sent the patch with his S-o-b under it. You cannot add S-o-b for somebody else to any patches. You can only add your S-o-b to somebody else's patch if you have made any changes on top of the original. In case you want to make a record of somebody else's contribution to your patch, you can use the Co-developed-by tag. Thanks!
Hi Rafael, >-----Original Message----- >From: linux-pci-owner@vger.kernel.org [mailto:linux-pci- >owner@vger.kernel.org] On Behalf Of Rafael J. Wysocki >Sent: 13 July 2020 14:38 >To: Shiju Jose <shiju.jose@huawei.com> >Cc: Rafael J. Wysocki <rafael@kernel.org>; linux-acpi@vger.kernel.org; linux- >pci@vger.kernel.org; linux-kernel@vger.kernel.org; rjw@rjwysocki.net; >helgaas@kernel.org; bp@alien8.de; james.morse@arm.com; >lenb@kernel.org; tony.luck@intel.com; dan.carpenter@oracle.com; >zhangliguang@linux.alibaba.com; andriy.shevchenko@linux.intel.com; >Wangkefeng (OS Kernel Lab) <wangkefeng.wang@huawei.com>; >jroedel@suse.de; Linuxarm <linuxarm@huawei.com>; yangyicong ><yangyicong@huawei.com>; Jonathan Cameron ><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com> >Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown >(vendor) CPER records > >On Mon, Jul 13, 2020 at 3:33 PM Shiju Jose <shiju.jose@huawei.com> wrote: >> >> Hi Rafael, >> >> >-----Original Message----- >> >From: Rafael J. Wysocki [mailto:rafael@kernel.org] >> >Sent: 13 July 2020 12:18 >> >To: Shiju Jose <shiju.jose@huawei.com> >> >Cc: linux-acpi@vger.kernel.org; linux-pci@vger.kernel.org; linux- >> >kernel@vger.kernel.org; rjw@rjwysocki.net; helgaas@kernel.org; >> >bp@alien8.de; james.morse@arm.com; lenb@kernel.org; >> >tony.luck@intel.com; dan.carpenter@oracle.com; >> >zhangliguang@linux.alibaba.com; andriy.shevchenko@linux.intel.com; >> >Wangkefeng (OS Kernel Lab) <wangkefeng.wang@huawei.com>; >> >jroedel@suse.de; Linuxarm <linuxarm@huawei.com>; yangyicong >> ><yangyicong@huawei.com>; Jonathan Cameron >> ><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com> >> >Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for >> >unknown >> >(vendor) CPER records >> > >> >On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <shiju.jose@huawei.com> >> >wrote: >> >> >> >> Hi Rafael, Hi James, >> >> >> >> Can you help to merge this patch because I added and tested all the >> >suggestions from James. >> > >> >I could apply the [1/2] in principle, but I need an ACK for the [2/2] >> >from the PCI side. >> > >> >That said, it looks like the [1/2] is a James' patch that you are >> >sending with some changes made by you. >> James added following changes on top of the original patch(V10) by me, >> [ Removed kfifo and ghes_gdata_pool. Expanded commit message ] I had >> confusion how the S-o-b tag to be added for James's changes in the V11 >patch posted. > >So James should have sent the patch with his S-o-b under it. > >You cannot add S-o-b for somebody else to any patches. You can only add >your S-o-b to somebody else's patch if you have made any changes on top of >the original. > >In case you want to make a record of somebody else's contribution to your >patch, you can use the Co-developed-by tag. Ok. I will resend the patch with Co-developed-by tag. > >Thanks! Thanks, Shiju
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 81bf71b10d44..99df00f64306 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -79,6 +79,12 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) +#define GHES_VENDOR_ENTRY_LEN(gdata_len) \ + (sizeof(struct ghes_vendor_record_entry) + (gdata_len)) +#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \ + ((struct acpi_hest_generic_data *) \ + ((struct ghes_vendor_record_entry *)(vendor_entry) + 1)) + /* * NMI-like notifications vary by architecture, before the compiler can prune * unused static functions it needs a value for these enums. @@ -123,6 +129,12 @@ static DEFINE_MUTEX(ghes_list_mutex); */ static DEFINE_SPINLOCK(ghes_notify_lock_irq); +struct ghes_vendor_record_entry { + struct work_struct work; + int error_severity; + char vendor_record[]; +}; + static struct gen_pool *ghes_estatus_pool; static unsigned long ghes_estatus_pool_size_request; @@ -511,6 +523,56 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) #endif } +static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list); + +int ghes_register_vendor_record_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&vendor_record_notify_list, nb); +} +EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier); + +void ghes_unregister_vendor_record_notifier(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&vendor_record_notify_list, nb); +} +EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier); + +static void ghes_vendor_record_work_func(struct work_struct *work) +{ + struct ghes_vendor_record_entry *entry; + struct acpi_hest_generic_data *gdata; + u32 len; + + entry = container_of(work, struct ghes_vendor_record_entry, work); + gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); + + blocking_notifier_call_chain(&vendor_record_notify_list, + entry->error_severity, gdata); + + len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); + gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len); +} + +static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, + int sev) +{ + struct acpi_hest_generic_data *copied_gdata; + struct ghes_vendor_record_entry *entry; + u32 len; + + len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); + entry = (void *)gen_pool_alloc(ghes_estatus_pool, len); + if (!entry) + return; + + copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); + memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata)); + entry->error_severity = sev; + + INIT_WORK(&entry->work, ghes_vendor_record_work_func); + schedule_work(&entry->work); +} + static bool ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -549,6 +611,7 @@ static bool ghes_do_proc(struct ghes *ghes, } else { void *err = acpi_hest_get_payload(gdata); + ghes_defer_non_standard_event(gdata, sev); log_non_standard_event(sec_type, fru_id, fru_text, sec_sev, err, gdata->error_data_length); diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 517a5231cc1b..ae0e8847fdd5 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -53,6 +53,33 @@ enum { GHES_SEV_PANIC = 0x3, }; +#ifdef CONFIG_ACPI_APEI_GHES +/** + * ghes_register_vendor_record_notifier - register a notifier for vendor + * records that the kernel would otherwise ignore. + * @nb: pointer to the notifier_block structure of the event handler. + * + * return 0 : SUCCESS, non-zero : FAIL + */ +int ghes_register_vendor_record_notifier(struct notifier_block *nb); + +/** + * ghes_unregister_vendor_record_notifier - unregister the previously + * registered vendor record notifier. + * @nb: pointer to the notifier_block structure of the vendor record handler. + */ +void ghes_unregister_vendor_record_notifier(struct notifier_block *nb); +#else +static inline int ghes_register_vendor_record_notifier(struct notifier_block *nb) +{ + return -ENODEV; +} + +static inline void ghes_unregister_vendor_record_notifier(struct notifier_block *nb) +{ +} +#endif + int ghes_estatus_pool_init(int num_ghes); /* From drivers/edac/ghes_edac.c */