diff mbox series

[v11,1/2] ACPI / APEI: Add a notifier chain for unknown (vendor) CPER records

Message ID 20200622120527.690-2-shiju.jose@huawei.com (mailing list archive)
State Superseded, archived
Headers show
Series ACPI / APEI: Add support to notify the vendor specific HW errors | expand

Commit Message

Shiju Jose June 22, 2020, 12:05 p.m. UTC
CPER records describing a firmware-first error are identified by GUID.
The ghes driver currently logs, but ignores any unknown CPER records.
This prevents describing errors that can't be represented by a standard
entry, that would otherwise allow a driver to recover from an error.
The UEFI spec calls these 'Non-standard Section Body' (N.2.3 of
version 2.8).

Add a notifier chain for these non-standard/vendor-records. Callers
must identify their type of records by GUID.

Record data is copied to memory from the ghes_estatus_pool to allow
us to keep it until after the notifier has run.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
[ Removed kfifo and ghes_gdata_pool. Expanded commit message ]
Signed-off-by: James Morse <james.morse@arm.com>
---
 drivers/acpi/apei/ghes.c | 63 ++++++++++++++++++++++++++++++++++++++++
 include/acpi/ghes.h      | 27 +++++++++++++++++
 2 files changed, 90 insertions(+)

Comments

Shiju Jose July 13, 2020, 8:35 a.m. UTC | #1
Hi Rafael, Hi James,

Can you help to merge this patch because I added and tested all the suggestions from James.

Thanks,
Shiju

>-----Original Message-----
>From: linux-pci-owner@vger.kernel.org [mailto:linux-pci-
>owner@vger.kernel.org] On Behalf Of Shiju Jose
>Sent: 22 June 2020 13:05
>To: linux-acpi@vger.kernel.org; linux-pci@vger.kernel.org; linux-
>kernel@vger.kernel.org; rjw@rjwysocki.net; helgaas@kernel.org;
>bp@alien8.de; james.morse@arm.com; lenb@kernel.org;
>tony.luck@intel.com; dan.carpenter@oracle.com;
>zhangliguang@linux.alibaba.com; andriy.shevchenko@linux.intel.com;
>Wangkefeng (OS Kernel Lab) <wangkefeng.wang@huawei.com>;
>jroedel@suse.de
>Cc: Linuxarm <linuxarm@huawei.com>; yangyicong
><yangyicong@huawei.com>; Jonathan Cameron
><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com>
>Subject: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown
>(vendor) CPER records
>
>CPER records describing a firmware-first error are identified by GUID.
>The ghes driver currently logs, but ignores any unknown CPER records.
>This prevents describing errors that can't be represented by a standard entry,
>that would otherwise allow a driver to recover from an error.
>The UEFI spec calls these 'Non-standard Section Body' (N.2.3 of version 2.8).
>
>Add a notifier chain for these non-standard/vendor-records. Callers must
>identify their type of records by GUID.
>
>Record data is copied to memory from the ghes_estatus_pool to allow us to
>keep it until after the notifier has run.
>
>Signed-off-by: Shiju Jose <shiju.jose@huawei.com> [ Removed kfifo and
>ghes_gdata_pool. Expanded commit message ]
>Signed-off-by: James Morse <james.morse@arm.com>
>---
> drivers/acpi/apei/ghes.c | 63
>++++++++++++++++++++++++++++++++++++++++
> include/acpi/ghes.h      | 27 +++++++++++++++++
> 2 files changed, 90 insertions(+)
>
>diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index
>81bf71b10d44..99df00f64306 100644
>--- a/drivers/acpi/apei/ghes.c
>+++ b/drivers/acpi/apei/ghes.c
>@@ -79,6 +79,12 @@
> 	((struct acpi_hest_generic_status *)				\
> 	 ((struct ghes_estatus_node *)(estatus_node) + 1))
>
>+#define GHES_VENDOR_ENTRY_LEN(gdata_len)                               \
>+	(sizeof(struct ghes_vendor_record_entry) + (gdata_len))
>+#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry)                     \
>+	((struct acpi_hest_generic_data *)                              \
>+	((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
>+
> /*
>  *  NMI-like notifications vary by architecture, before the compiler can prune
>  *  unused static functions it needs a value for these enums.
>@@ -123,6 +129,12 @@ static DEFINE_MUTEX(ghes_list_mutex);
>  */
> static DEFINE_SPINLOCK(ghes_notify_lock_irq);
>
>+struct ghes_vendor_record_entry {
>+	struct work_struct work;
>+	int error_severity;
>+	char vendor_record[];
>+};
>+
> static struct gen_pool *ghes_estatus_pool;  static unsigned long
>ghes_estatus_pool_size_request;
>
>@@ -511,6 +523,56 @@ static void ghes_handle_aer(struct
>acpi_hest_generic_data *gdata)  #endif  }
>
>+static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
>+
>+int ghes_register_vendor_record_notifier(struct notifier_block *nb) {
>+	return blocking_notifier_chain_register(&vendor_record_notify_list,
>+nb); } EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
>+
>+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
>+{
>+	blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
>}
>+EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
>+
>+static void ghes_vendor_record_work_func(struct work_struct *work) {
>+	struct ghes_vendor_record_entry *entry;
>+	struct acpi_hest_generic_data *gdata;
>+	u32 len;
>+
>+	entry = container_of(work, struct ghes_vendor_record_entry, work);
>+	gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
>+
>+	blocking_notifier_call_chain(&vendor_record_notify_list,
>+				     entry->error_severity, gdata);
>+
>+	len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
>+	gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len); }
>+
>+static void ghes_defer_non_standard_event(struct acpi_hest_generic_data
>*gdata,
>+					  int sev)
>+{
>+	struct acpi_hest_generic_data *copied_gdata;
>+	struct ghes_vendor_record_entry *entry;
>+	u32 len;
>+
>+	len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
>+	entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
>+	if (!entry)
>+		return;
>+
>+	copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
>+	memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
>+	entry->error_severity = sev;
>+
>+	INIT_WORK(&entry->work, ghes_vendor_record_work_func);
>+	schedule_work(&entry->work);
>+}
>+
> static bool ghes_do_proc(struct ghes *ghes,
> 			 const struct acpi_hest_generic_status *estatus)  {
>@@ -549,6 +611,7 @@ static bool ghes_do_proc(struct ghes *ghes,
> 		} else {
> 			void *err = acpi_hest_get_payload(gdata);
>
>+			ghes_defer_non_standard_event(gdata, sev);
> 			log_non_standard_event(sec_type, fru_id, fru_text,
> 					       sec_sev, err,
> 					       gdata->error_data_length);
>diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index
>517a5231cc1b..ae0e8847fdd5 100644
>--- a/include/acpi/ghes.h
>+++ b/include/acpi/ghes.h
>@@ -53,6 +53,33 @@ enum {
> 	GHES_SEV_PANIC = 0x3,
> };
>
>+#ifdef CONFIG_ACPI_APEI_GHES
>+/**
>+ * ghes_register_vendor_record_notifier - register a notifier for
>+vendor
>+ * records that the kernel would otherwise ignore.
>+ * @nb: pointer to the notifier_block structure of the event handler.
>+ *
>+ * return 0 : SUCCESS, non-zero : FAIL
>+ */
>+int ghes_register_vendor_record_notifier(struct notifier_block *nb);
>+
>+/**
>+ * ghes_unregister_vendor_record_notifier - unregister the previously
>+ * registered vendor record notifier.
>+ * @nb: pointer to the notifier_block structure of the vendor record
>handler.
>+ */
>+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb);
>+#else static inline int ghes_register_vendor_record_notifier(struct
>+notifier_block *nb) {
>+	return -ENODEV;
>+}
>+
>+static inline void ghes_unregister_vendor_record_notifier(struct
>+notifier_block *nb) { } #endif
>+
> int ghes_estatus_pool_init(int num_ghes);
>
> /* From drivers/edac/ghes_edac.c */
>--
>2.17.1
>
Rafael J. Wysocki July 13, 2020, 11:17 a.m. UTC | #2
On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <shiju.jose@huawei.com> wrote:
>
> Hi Rafael, Hi James,
>
> Can you help to merge this patch because I added and tested all the suggestions from James.

I could apply the [1/2] in principle, but I need an ACK for the [2/2]
from the PCI side.

That said, it looks like the [1/2] is a James' patch that you are
sending with some changes made by you.

In that case the ordering of the S-o-b tags under it should be
different (the S-o-b from James, the what-you-have-change line and the
S-o-b from you) and also the From: tag should point to James.

Thanks!
Shiju Jose July 13, 2020, 1:33 p.m. UTC | #3
Hi Rafael,

>-----Original Message-----
>From: Rafael J. Wysocki [mailto:rafael@kernel.org]
>Sent: 13 July 2020 12:18
>To: Shiju Jose <shiju.jose@huawei.com>
>Cc: linux-acpi@vger.kernel.org; linux-pci@vger.kernel.org; linux-
>kernel@vger.kernel.org; rjw@rjwysocki.net; helgaas@kernel.org;
>bp@alien8.de; james.morse@arm.com; lenb@kernel.org;
>tony.luck@intel.com; dan.carpenter@oracle.com;
>zhangliguang@linux.alibaba.com; andriy.shevchenko@linux.intel.com;
>Wangkefeng (OS Kernel Lab) <wangkefeng.wang@huawei.com>;
>jroedel@suse.de; Linuxarm <linuxarm@huawei.com>; yangyicong
><yangyicong@huawei.com>; Jonathan Cameron
><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com>
>Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown
>(vendor) CPER records
>
>On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <shiju.jose@huawei.com>
>wrote:
>>
>> Hi Rafael, Hi James,
>>
>> Can you help to merge this patch because I added and tested all the
>suggestions from James.
>
>I could apply the [1/2] in principle, but I need an ACK for the [2/2] from the
>PCI side.
>
>That said, it looks like the [1/2] is a James' patch that you are sending with
>some changes made by you.
James added following changes on top of the original patch(V10) by me, 
[ Removed kfifo and ghes_gdata_pool. Expanded commit message ]
I had confusion how the S-o-b tag to be added for James's changes in the V11 patch posted.

>
>In that case the ordering of the S-o-b tags under it should be different (the S-
>o-b from James, the what-you-have-change line and the S-o-b from you) and
>also the From: tag should point to James.
>
>Thanks!

Thanks,
Shiju
Rafael J. Wysocki July 13, 2020, 1:38 p.m. UTC | #4
On Mon, Jul 13, 2020 at 3:33 PM Shiju Jose <shiju.jose@huawei.com> wrote:
>
> Hi Rafael,
>
> >-----Original Message-----
> >From: Rafael J. Wysocki [mailto:rafael@kernel.org]
> >Sent: 13 July 2020 12:18
> >To: Shiju Jose <shiju.jose@huawei.com>
> >Cc: linux-acpi@vger.kernel.org; linux-pci@vger.kernel.org; linux-
> >kernel@vger.kernel.org; rjw@rjwysocki.net; helgaas@kernel.org;
> >bp@alien8.de; james.morse@arm.com; lenb@kernel.org;
> >tony.luck@intel.com; dan.carpenter@oracle.com;
> >zhangliguang@linux.alibaba.com; andriy.shevchenko@linux.intel.com;
> >Wangkefeng (OS Kernel Lab) <wangkefeng.wang@huawei.com>;
> >jroedel@suse.de; Linuxarm <linuxarm@huawei.com>; yangyicong
> ><yangyicong@huawei.com>; Jonathan Cameron
> ><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com>
> >Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown
> >(vendor) CPER records
> >
> >On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <shiju.jose@huawei.com>
> >wrote:
> >>
> >> Hi Rafael, Hi James,
> >>
> >> Can you help to merge this patch because I added and tested all the
> >suggestions from James.
> >
> >I could apply the [1/2] in principle, but I need an ACK for the [2/2] from the
> >PCI side.
> >
> >That said, it looks like the [1/2] is a James' patch that you are sending with
> >some changes made by you.
> James added following changes on top of the original patch(V10) by me,
> [ Removed kfifo and ghes_gdata_pool. Expanded commit message ]
> I had confusion how the S-o-b tag to be added for James's changes in the V11 patch posted.

So James should have sent the patch with his S-o-b under it.

You cannot add S-o-b for somebody else to any patches.  You can only
add your S-o-b to somebody else's patch if you have made any changes
on top of the original.

In case you want to make a record of somebody else's contribution to
your patch, you can use the Co-developed-by tag.

Thanks!
Shiju Jose July 13, 2020, 1:50 p.m. UTC | #5
Hi Rafael,

>-----Original Message-----
>From: linux-pci-owner@vger.kernel.org [mailto:linux-pci-
>owner@vger.kernel.org] On Behalf Of Rafael J. Wysocki
>Sent: 13 July 2020 14:38
>To: Shiju Jose <shiju.jose@huawei.com>
>Cc: Rafael J. Wysocki <rafael@kernel.org>; linux-acpi@vger.kernel.org; linux-
>pci@vger.kernel.org; linux-kernel@vger.kernel.org; rjw@rjwysocki.net;
>helgaas@kernel.org; bp@alien8.de; james.morse@arm.com;
>lenb@kernel.org; tony.luck@intel.com; dan.carpenter@oracle.com;
>zhangliguang@linux.alibaba.com; andriy.shevchenko@linux.intel.com;
>Wangkefeng (OS Kernel Lab) <wangkefeng.wang@huawei.com>;
>jroedel@suse.de; Linuxarm <linuxarm@huawei.com>; yangyicong
><yangyicong@huawei.com>; Jonathan Cameron
><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com>
>Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown
>(vendor) CPER records
>
>On Mon, Jul 13, 2020 at 3:33 PM Shiju Jose <shiju.jose@huawei.com> wrote:
>>
>> Hi Rafael,
>>
>> >-----Original Message-----
>> >From: Rafael J. Wysocki [mailto:rafael@kernel.org]
>> >Sent: 13 July 2020 12:18
>> >To: Shiju Jose <shiju.jose@huawei.com>
>> >Cc: linux-acpi@vger.kernel.org; linux-pci@vger.kernel.org; linux-
>> >kernel@vger.kernel.org; rjw@rjwysocki.net; helgaas@kernel.org;
>> >bp@alien8.de; james.morse@arm.com; lenb@kernel.org;
>> >tony.luck@intel.com; dan.carpenter@oracle.com;
>> >zhangliguang@linux.alibaba.com; andriy.shevchenko@linux.intel.com;
>> >Wangkefeng (OS Kernel Lab) <wangkefeng.wang@huawei.com>;
>> >jroedel@suse.de; Linuxarm <linuxarm@huawei.com>; yangyicong
>> ><yangyicong@huawei.com>; Jonathan Cameron
>> ><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com>
>> >Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for
>> >unknown
>> >(vendor) CPER records
>> >
>> >On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <shiju.jose@huawei.com>
>> >wrote:
>> >>
>> >> Hi Rafael, Hi James,
>> >>
>> >> Can you help to merge this patch because I added and tested all the
>> >suggestions from James.
>> >
>> >I could apply the [1/2] in principle, but I need an ACK for the [2/2]
>> >from the PCI side.
>> >
>> >That said, it looks like the [1/2] is a James' patch that you are
>> >sending with some changes made by you.
>> James added following changes on top of the original patch(V10) by me,
>> [ Removed kfifo and ghes_gdata_pool. Expanded commit message ] I had
>> confusion how the S-o-b tag to be added for James's changes in the V11
>patch posted.
>
>So James should have sent the patch with his S-o-b under it.
>
>You cannot add S-o-b for somebody else to any patches.  You can only add
>your S-o-b to somebody else's patch if you have made any changes on top of
>the original.
>
>In case you want to make a record of somebody else's contribution to your
>patch, you can use the Co-developed-by tag.

Ok. I will resend the patch with Co-developed-by tag.

>
>Thanks!

Thanks,
Shiju
diff mbox series

Patch

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 81bf71b10d44..99df00f64306 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -79,6 +79,12 @@ 
 	((struct acpi_hest_generic_status *)				\
 	 ((struct ghes_estatus_node *)(estatus_node) + 1))
 
+#define GHES_VENDOR_ENTRY_LEN(gdata_len)                               \
+	(sizeof(struct ghes_vendor_record_entry) + (gdata_len))
+#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry)                     \
+	((struct acpi_hest_generic_data *)                              \
+	((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
+
 /*
  *  NMI-like notifications vary by architecture, before the compiler can prune
  *  unused static functions it needs a value for these enums.
@@ -123,6 +129,12 @@  static DEFINE_MUTEX(ghes_list_mutex);
  */
 static DEFINE_SPINLOCK(ghes_notify_lock_irq);
 
+struct ghes_vendor_record_entry {
+	struct work_struct work;
+	int error_severity;
+	char vendor_record[];
+};
+
 static struct gen_pool *ghes_estatus_pool;
 static unsigned long ghes_estatus_pool_size_request;
 
@@ -511,6 +523,56 @@  static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
 #endif
 }
 
+static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
+
+int ghes_register_vendor_record_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&vendor_record_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
+
+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
+
+static void ghes_vendor_record_work_func(struct work_struct *work)
+{
+	struct ghes_vendor_record_entry *entry;
+	struct acpi_hest_generic_data *gdata;
+	u32 len;
+
+	entry = container_of(work, struct ghes_vendor_record_entry, work);
+	gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
+
+	blocking_notifier_call_chain(&vendor_record_notify_list,
+				     entry->error_severity, gdata);
+
+	len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
+	gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len);
+}
+
+static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
+					  int sev)
+{
+	struct acpi_hest_generic_data *copied_gdata;
+	struct ghes_vendor_record_entry *entry;
+	u32 len;
+
+	len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
+	entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
+	if (!entry)
+		return;
+
+	copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
+	memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
+	entry->error_severity = sev;
+
+	INIT_WORK(&entry->work, ghes_vendor_record_work_func);
+	schedule_work(&entry->work);
+}
+
 static bool ghes_do_proc(struct ghes *ghes,
 			 const struct acpi_hest_generic_status *estatus)
 {
@@ -549,6 +611,7 @@  static bool ghes_do_proc(struct ghes *ghes,
 		} else {
 			void *err = acpi_hest_get_payload(gdata);
 
+			ghes_defer_non_standard_event(gdata, sev);
 			log_non_standard_event(sec_type, fru_id, fru_text,
 					       sec_sev, err,
 					       gdata->error_data_length);
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 517a5231cc1b..ae0e8847fdd5 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -53,6 +53,33 @@  enum {
 	GHES_SEV_PANIC = 0x3,
 };
 
+#ifdef CONFIG_ACPI_APEI_GHES
+/**
+ * ghes_register_vendor_record_notifier - register a notifier for vendor
+ * records that the kernel would otherwise ignore.
+ * @nb: pointer to the notifier_block structure of the event handler.
+ *
+ * return 0 : SUCCESS, non-zero : FAIL
+ */
+int ghes_register_vendor_record_notifier(struct notifier_block *nb);
+
+/**
+ * ghes_unregister_vendor_record_notifier - unregister the previously
+ * registered vendor record notifier.
+ * @nb: pointer to the notifier_block structure of the vendor record handler.
+ */
+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb);
+#else
+static inline int ghes_register_vendor_record_notifier(struct notifier_block *nb)
+{
+	return -ENODEV;
+}
+
+static inline void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
+{
+}
+#endif
+
 int ghes_estatus_pool_init(int num_ghes);
 
 /* From drivers/edac/ghes_edac.c */