diff mbox series

[RFC,2/4] ACPI: APEI: Add ghes_handle_memory_failure to the new notification method

Message ID 20190812101149.26036-3-shiju.jose@huawei.com (mailing list archive)
State RFC, archived
Headers show
Series ACPI: APEI: Add support to notify the vendor specific HW errors | expand

Commit Message

Shiju Jose Aug. 12, 2019, 10:11 a.m. UTC
This patch adds ghes_handle_memory_failure to the new error
notification method.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 drivers/acpi/apei/ghes.c | 51 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 15 deletions(-)

Comments

James Morse Aug. 21, 2019, 5:22 p.m. UTC | #1
Hi,

On 12/08/2019 11:11, Shiju Jose wrote:
> This patch adds ghes_handle_memory_failure to the new error
> notification method.

The commit message doesn't answer the question: why?

The existing code works. This just looks like additional churn.
Given a user, I think the vendor specific example is useful. I don't think making this
thing more pluggable is a good idea.


Thanks,

James
Shiju Jose Aug. 22, 2019, 4:57 p.m. UTC | #2
Hi James,

>-----Original Message-----
>From: James Morse [mailto:james.morse@arm.com]
>Sent: 21 August 2019 18:23
>To: Shiju Jose <shiju.jose@huawei.com>
>Cc: linux-acpi@vger.kernel.org; linux-edac@vger.kernel.org; linux-
>kernel@vger.kernel.org; rjw@rjwysocki.net; lenb@kernel.org;
>tony.luck@intel.com; bp@alien8.de; baicar@os.amperecomputing.com;
>Linuxarm <linuxarm@huawei.com>; Jonathan Cameron
><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com>
>Subject: Re: [PATCH RFC 2/4] ACPI: APEI: Add ghes_handle_memory_failure to
>the new notification method
>
>Hi,
>
>On 12/08/2019 11:11, Shiju Jose wrote:
>> This patch adds ghes_handle_memory_failure to the new error
>> notification method.
>
>The commit message doesn't answer the question: why?
>
>The existing code works. This just looks like additional churn.
>Given a user, I think the vendor specific example is useful. I don't think making
>this thing more pluggable is a good idea.
This was intended to replace the  number of if(guid_equal(...)) else if(guid_equal(...)) checks in the ghes_do_proc() , which would grow when new UEFI defined error sections would be added in the future.
>
>
>Thanks,
>
>James

Thanks,
Shiju
diff mbox series

Patch

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 374d197..4400d56 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -401,14 +401,18 @@  static void ghes_clear_estatus(struct ghes *ghes,
 		ghes_ack_error(ghes->generic_v2);
 }
 
-static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
+static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
+				       int sev, void *data)
 {
-#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
+	struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+	int sec_sev = ghes_severity(gdata->error_severity);
 	unsigned long pfn;
 	int flags = -1;
-	int sec_sev = ghes_severity(gdata->error_severity);
-	struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
 
+	ghes_edac_report_mem_error(sev, mem_err);
+	arch_apei_report_mem_error(sev, mem_err);
+
+#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
 	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
 		return;
 
@@ -569,15 +573,7 @@  static void ghes_do_proc(struct ghes *ghes,
 		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
 			fru_text = gdata->fru_text;
 
-		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
-			struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
-
-			ghes_edac_report_mem_error(sev, mem_err);
-
-			arch_apei_report_mem_error(sev, mem_err);
-			ghes_handle_memory_failure(gdata, sev);
-		}
-		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
+		if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
 			ghes_handle_aer(gdata);
 		}
 		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
@@ -1190,11 +1186,25 @@  static int apei_sdei_unregister_ghes(struct ghes *ghes)
 	return sdei_unregister_ghes(ghes);
 }
 
+struct ghes_err_handler_tab {
+	guid_t sec_type;
+	error_handle handle;
+};
+
+static struct ghes_err_handler_tab handler_tab[] = {
+	{
+		.sec_type = CPER_SEC_PLATFORM_MEM,
+		.handle = ghes_handle_memory_failure,
+	},
+	{ /* sentinel */ }
+};
+
 static int ghes_probe(struct platform_device *ghes_dev)
 {
 	struct acpi_hest_generic *generic;
 	struct ghes *ghes = NULL;
 	unsigned long flags;
+	int i;
 
 	int rc = -EINVAL;
 
@@ -1308,9 +1318,20 @@  static int ghes_probe(struct platform_device *ghes_dev)
 
 	ghes_edac_register(ghes, &ghes_dev->dev);
 
-	if (!refcount_read(&ghes_ref_count))
+	if (!refcount_read(&ghes_ref_count)) {
 		refcount_set(&ghes_ref_count, 1);
-	else
+		/* register handler functions for the standard errors.
+		 * This may be done from the corresponding drivers.
+		 */
+		for (i = 0; handler_tab[i].handle; i++) {
+			if (ghes_error_notify_register(handler_tab[i].sec_type,
+						handler_tab[i].handle, NULL)) {
+				ghes_edac_unregister(ghes);
+				platform_set_drvdata(ghes_dev, NULL);
+				goto err;
+			}
+		}
+	} else
 		refcount_inc(&ghes_ref_count);
 
 	/* Handle any pending errors right away */