diff mbox

[RFC,v2] apei: Add ACPI APEI event notification support

Message ID 1445458480-6493-1-git-send-email-lho@apm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Loc Ho Oct. 21, 2015, 8:14 p.m. UTC
Add ACPI APEI event notification support for ARMv8 SoC under the
following conditions:

1. Without EL3 support and no GPIO PIN available
2. With EL3 support but no mean to notify the OS

v2
* Make all code more generic naming
* Still waiting for comment from Linaro folks on APEI

Signed-off-by: Loc Ho <lho@apm.com>
---
 drivers/edac/Kconfig     |    7 +++
 drivers/edac/Makefile    |    2 +
 drivers/edac/edac_apei.c |  114 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 123 insertions(+), 0 deletions(-)
 create mode 100644 drivers/edac/edac_apei.c

--
1.7.1

Comments

Borislav Petkov Oct. 22, 2015, 9:57 a.m. UTC | #1
I'm top-posting here because I'm adding some more ARM people to CC and
would like for them to see the whole thing.

Ok, so what's the strategy here?

I know Tomasz did some untangling of GHES stuff to make it load on ARM
too and be arch-agnostic. The registration code in it is more than the
tiny edac_apei_irq_handler().

So why is this thing a separate driver? It is called EDAC_APEI although
it is ARM-specific.

Why can't it be part of ghes_edac.c with ARM-specific section, if
absolutely needed?

If this is going to implement the ACPI spec, then I don't see anything
vendor-, or arch-specific getting in the way except maybe that APMC0D51
id.

Hmmm?

On Wed, Oct 21, 2015 at 02:14:40PM -0600, Loc Ho wrote:
> Add ACPI APEI event notification support for ARMv8 SoC under the
> following conditions:
> 
> 1. Without EL3 support and no GPIO PIN available
> 2. With EL3 support but no mean to notify the OS
> 
> v2
> * Make all code more generic naming
> * Still waiting for comment from Linaro folks on APEI
> 
> Signed-off-by: Loc Ho <lho@apm.com>
> ---
>  drivers/edac/Kconfig     |    7 +++
>  drivers/edac/Makefile    |    2 +
>  drivers/edac/edac_apei.c |  114 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 123 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/edac/edac_apei.c
> 
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index ef25000..62c2bb2 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -95,6 +95,13 @@ config EDAC_GHES
> 
>  	  In doubt, say 'Y'.
> 
> +config EDAC_APEI
> +	tristate "ACPI APEI errors notification"
> +	depends on ACPI_APEI && (ARM64 || COMPILE_TEST)
> +	help
> +	  APEI interface support for error notification via interrupt based
> +	  for ARMv8 64-bit SOCs.
> +
>  config EDAC_AMD64
>  	tristate "AMD64 (Opteron, Athlon64)"
>  	depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE
> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> index dbf53e0..9f91ff3 100644
> --- a/drivers/edac/Makefile
> +++ b/drivers/edac/Makefile
> @@ -19,6 +19,7 @@ edac_core-y	+= edac_pci.o edac_pci_sysfs.o
>  endif
> 
>  obj-$(CONFIG_EDAC_GHES)			+= ghes_edac.o
> +obj-$(CONFIG_EDAC_APEI)			+= edac_apei.o
> 
>  edac_mce_amd-y				:= mce_amd.o
>  obj-$(CONFIG_EDAC_DECODE_MCE)		+= edac_mce_amd.o
> @@ -70,3 +71,4 @@ obj-$(CONFIG_EDAC_OCTEON_PCI)		+= octeon_edac-pci.o
>  obj-$(CONFIG_EDAC_ALTERA_MC)		+= altera_edac.o
>  obj-$(CONFIG_EDAC_SYNOPSYS)		+= synopsys_edac.o
>  obj-$(CONFIG_EDAC_XGENE)		+= xgene_edac.o
> +
> diff --git a/drivers/edac/edac_apei.c b/drivers/edac/edac_apei.c
> new file mode 100644
> index 0000000..fd066f0
> --- /dev/null
> +++ b/drivers/edac/edac_apei.c
> @@ -0,0 +1,114 @@
> +/*
> + * APEI notification support for ARM 64-bit Soc's
> + *
> + * Copyright (c) 2015, Applied Micro Circuits Corporation
> + * Author: Tuan Phan <tphan@apm.com>
> + *
> + * This program is free software; you can redistribute  it and/or modify it
> + * under  the terms of  the GNU General  Public License as published by the
> + * Free Software Foundation;  either version 2 of the  License, or (at your
> + * option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + *
> + * This driver provides APEI error notification via interrupt with the UEFI
> + * FW for SoC processors under the following conditions:
> + *
> + * 1. No interrupt GPIO available or don't want to use one and no EL3 support
> + * 2. With EL3 support but no mean to interrupt the Linux OS that an event
> + *    has occurred.
> + *
> + */
> +#include <linux/acpi.h>
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +
> +struct edac_apei_dev {
> +	void  __iomem	*csr_base;
> +	struct device	*dev;
> +	u32		irq;
> +	acpi_handle	evt_handle;
> +};
> +
> +static const struct acpi_device_id edac_apei_match[] = {
> +	{ "APMC0D51", 0},
> +	{},
> +};
> +MODULE_DEVICE_TABLE(acpi, edac_apei_match);
> +
> +static irqreturn_t edac_apei_irq_handler(int irq, void *data)
> +{
> +	struct edac_apei_dev *ctx = (struct edac_apei_dev *) data;
> +
> +	/* Clear the interrupt */
> +	writel(readl(ctx->csr_base), ctx->csr_base);
> +
> +	/* Send error event */
> +	acpi_execute_simple_method(ctx->evt_handle, NULL, 0);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static int edac_apei_probe(struct platform_device *pdev)
> +{
> +	struct edac_apei_dev *ctx;
> +	struct resource *res;
> +	acpi_handle evt_handle;
> +	int rc;
> +
> +	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ctx->dev = &pdev->dev;
> +	platform_set_drvdata(pdev, ctx);
> +
> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	ctx->csr_base = devm_ioremap_resource(&pdev->dev, res);
> +	if (IS_ERR(ctx->csr_base)) {
> +		dev_err(&pdev->dev, "No CSR resource\n");
> +		return PTR_ERR(ctx->csr_base);
> +	}
> +
> +	ctx->irq = platform_get_irq(pdev, 0);
> +	if (ctx->irq < 0) {
> +		dev_err(&pdev->dev, "No IRQ resource\n");
> +		return ctx->irq;
> +	}
> +
> +	rc = devm_request_irq(&pdev->dev, ctx->irq, edac_apei_irq_handler, 0,
> +			      dev_name(&pdev->dev), ctx);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Could not request IRQ %d\n", ctx->irq);
> +		return rc;
> +	}
> +
> +	if (!ACPI_SUCCESS(acpi_get_handle(ACPI_HANDLE(ctx->dev), "_EVT",
> +					  &evt_handle)))
> +		return AE_BAD_PARAMETER;
> +
> +	ctx->evt_handle = evt_handle;
> +
> +	return rc;
> +}
> +
> +static struct platform_driver edac_apei_driver = {
> +	.probe = edac_apei_probe,
> +	.driver = {
> +		.name = "edac-apei",
> +		.acpi_match_table = ACPI_PTR(edac_apei_match),
> +	},
> +};
> +module_platform_driver(edac_apei_driver);
> +
> +MODULE_DESCRIPTION("EDAC APEI driver");
> +MODULE_AUTHOR("Tuan Phan <tphan@apm.com>");
> +MODULE_LICENSE("GPL");
> +MODULE_VERSION("0.1");
> --
> 1.7.1
> 
>
Loc Ho Oct. 23, 2015, 12:21 a.m. UTC | #2
Hi Borislav,

>
> Ok, so what's the strategy here?
>
> I know Tomasz did some untangling of GHES stuff to make it load on ARM
> too and be arch-agnostic. The registration code in it is more than the
> tiny edac_apei_irq_handler().
>
> So why is this thing a separate driver? It is called EDAC_APEI although
> it is ARM-specific.
>
> Why can't it be part of ghes_edac.c with ARM-specific section, if
> absolutely needed?
>
> If this is going to implement the ACPI spec, then I don't see anything
> vendor-, or arch-specific getting in the way except maybe that APMC0D51
> id.
>
> Hmmm?

Tuan and I looked over the ghes.c code and it does seems to have most
of the stuff that we need. The only issue that we have is how to clear
the interrupt. With our HW, the hardware error source record isn't
really hardware but some memory table. Clearing the GHES table doesn't
clear the interrupt. If there is an method in which one can call in
function ghes_irq_func to clear the interrupt, it would work for us. I
am thinking one possible solution is to add an AML method. But in
order to call an AML method, one need an device node - which in turn
will requires another small piece of code (like the second version of
the driver). Any suggestion anyone?

-Loc
Will Deacon Nov. 19, 2015, 2:18 p.m. UTC | #3
[Thanks Boris for CC'ing me]

Hi all,

Sorry for the delay on this, I had to do some research and this isn't
really my area of expertise.

On Thu, Oct 22, 2015 at 11:57:16AM +0200, Borislav Petkov wrote:
> I'm top-posting here because I'm adding some more ARM people to CC and
> would like for them to see the whole thing.
> 
> Ok, so what's the strategy here?
> 
> I know Tomasz did some untangling of GHES stuff to make it load on ARM
> too and be arch-agnostic. The registration code in it is more than the
> tiny edac_apei_irq_handler().
> 
> So why is this thing a separate driver? It is called EDAC_APEI although
> it is ARM-specific.
> 
> Why can't it be part of ghes_edac.c with ARM-specific section, if
> absolutely needed?
> 
> If this is going to implement the ACPI spec, then I don't see anything
> vendor-, or arch-specific getting in the way except maybe that APMC0D51
> id.
> 
> Hmmm?

I think you've hit the nail on the head. As far as I can tell, this is
*not* conformant to ACPI APEI (which should be platform and architecture
agnostic) and is actually an implementation-specific interface. However,
I'm happy to be proven wrong if somebody can point me at a document
describing ACPI APEI in a way that matches this implementation.

> > +static const struct acpi_device_id edac_apei_match[] = {
> > +	{ "APMC0D51", 0},
> > +	{},

This, in particular, gives the game away methinks.

Will
Jon Masters Nov. 19, 2015, 3:05 p.m. UTC | #4
Top post - quick reply - you are correct. This is a vendor specific implementation. APEI is generic, and in the general case there should not be a need to do anything with a special driver. Some will need their own special solution in the early days, but everyone else should be able to go generic. In particular I expect those implementing EL3 to do firmware first handling and to signal the event independently of any OS specific hooks.
Borislav Petkov Nov. 19, 2015, 3:51 p.m. UTC | #5
On Thu, Nov 19, 2015 at 10:05:16AM -0500, Jon Masters wrote:
> Top post - quick reply - you are correct. This is a vendor specific
> implementation. APEI is generic, and in the general case there should
> not be a need to do anything with a special driver. Some will need
> their own special solution in the early days, but everyone else should
> be able to go generic. In particular I expect those implementing EL3
> to do firmware first handling and to signal the event independently of
> any OS specific hooks.

In the meantime, I got these two patches:

https://lkml.kernel.org/r/1446857519-5908-1-git-send-email-lho@apm.com

which makes the GHES IRQ shared and, apparently solves the issue for
APM.

I'd appreciate it if you guys took a look.

Thanks!
Loc Ho Nov. 19, 2015, 6:19 p.m. UTC | #6
Hi,

>> Top post - quick reply - you are correct. This is a vendor specific
>> implementation. APEI is generic, and in the general case there should
>> not be a need to do anything with a special driver. Some will need
>> their own special solution in the early days, but everyone else should
>> be able to go generic. In particular I expect those implementing EL3
>> to do firmware first handling and to signal the event independently of
>> any OS specific hooks.
>
> In the meantime, I got these two patches:
>
> https://lkml.kernel.org/r/1446857519-5908-1-git-send-email-lho@apm.com
>
> which makes the GHES IRQ shared and, apparently solves the issue for
> APM.
>
> I'd appreciate it if you guys took a look.
>

Let me explain a bit so that all of you understand what was changed
between version 2 and 3. For background, the issue is how to clear the
interrupt. In version 2, this is via the Linux kernel driver. In
version 3, we moved this to the FW and have the FW configures the GIC
directly to just mark the error interrupts (as spec in the GHES table)
pending. As it is pending only, it will get clear after the interrupt
gets processed. In doing this, two issues was observed. One is the
fact that we have multiple error cascades into a few interrupts. The
second issue is how ACPI upper layer handles return codes an function.
The patch in version 3 addresses both of these issues.

-Loc
diff mbox

Patch

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index ef25000..62c2bb2 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -95,6 +95,13 @@  config EDAC_GHES

 	  In doubt, say 'Y'.

+config EDAC_APEI
+	tristate "ACPI APEI errors notification"
+	depends on ACPI_APEI && (ARM64 || COMPILE_TEST)
+	help
+	  APEI interface support for error notification via interrupt based
+	  for ARMv8 64-bit SOCs.
+
 config EDAC_AMD64
 	tristate "AMD64 (Opteron, Athlon64)"
 	depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index dbf53e0..9f91ff3 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -19,6 +19,7 @@  edac_core-y	+= edac_pci.o edac_pci_sysfs.o
 endif

 obj-$(CONFIG_EDAC_GHES)			+= ghes_edac.o
+obj-$(CONFIG_EDAC_APEI)			+= edac_apei.o

 edac_mce_amd-y				:= mce_amd.o
 obj-$(CONFIG_EDAC_DECODE_MCE)		+= edac_mce_amd.o
@@ -70,3 +71,4 @@  obj-$(CONFIG_EDAC_OCTEON_PCI)		+= octeon_edac-pci.o
 obj-$(CONFIG_EDAC_ALTERA_MC)		+= altera_edac.o
 obj-$(CONFIG_EDAC_SYNOPSYS)		+= synopsys_edac.o
 obj-$(CONFIG_EDAC_XGENE)		+= xgene_edac.o
+
diff --git a/drivers/edac/edac_apei.c b/drivers/edac/edac_apei.c
new file mode 100644
index 0000000..fd066f0
--- /dev/null
+++ b/drivers/edac/edac_apei.c
@@ -0,0 +1,114 @@ 
+/*
+ * APEI notification support for ARM 64-bit Soc's
+ *
+ * Copyright (c) 2015, Applied Micro Circuits Corporation
+ * Author: Tuan Phan <tphan@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This driver provides APEI error notification via interrupt with the UEFI
+ * FW for SoC processors under the following conditions:
+ *
+ * 1. No interrupt GPIO available or don't want to use one and no EL3 support
+ * 2. With EL3 support but no mean to interrupt the Linux OS that an event
+ *    has occurred.
+ *
+ */
+#include <linux/acpi.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+struct edac_apei_dev {
+	void  __iomem	*csr_base;
+	struct device	*dev;
+	u32		irq;
+	acpi_handle	evt_handle;
+};
+
+static const struct acpi_device_id edac_apei_match[] = {
+	{ "APMC0D51", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, edac_apei_match);
+
+static irqreturn_t edac_apei_irq_handler(int irq, void *data)
+{
+	struct edac_apei_dev *ctx = (struct edac_apei_dev *) data;
+
+	/* Clear the interrupt */
+	writel(readl(ctx->csr_base), ctx->csr_base);
+
+	/* Send error event */
+	acpi_execute_simple_method(ctx->evt_handle, NULL, 0);
+
+	return IRQ_HANDLED;
+}
+
+static int edac_apei_probe(struct platform_device *pdev)
+{
+	struct edac_apei_dev *ctx;
+	struct resource *res;
+	acpi_handle evt_handle;
+	int rc;
+
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ctx);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ctx->csr_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ctx->csr_base)) {
+		dev_err(&pdev->dev, "No CSR resource\n");
+		return PTR_ERR(ctx->csr_base);
+	}
+
+	ctx->irq = platform_get_irq(pdev, 0);
+	if (ctx->irq < 0) {
+		dev_err(&pdev->dev, "No IRQ resource\n");
+		return ctx->irq;
+	}
+
+	rc = devm_request_irq(&pdev->dev, ctx->irq, edac_apei_irq_handler, 0,
+			      dev_name(&pdev->dev), ctx);
+	if (rc) {
+		dev_err(&pdev->dev, "Could not request IRQ %d\n", ctx->irq);
+		return rc;
+	}
+
+	if (!ACPI_SUCCESS(acpi_get_handle(ACPI_HANDLE(ctx->dev), "_EVT",
+					  &evt_handle)))
+		return AE_BAD_PARAMETER;
+
+	ctx->evt_handle = evt_handle;
+
+	return rc;
+}
+
+static struct platform_driver edac_apei_driver = {
+	.probe = edac_apei_probe,
+	.driver = {
+		.name = "edac-apei",
+		.acpi_match_table = ACPI_PTR(edac_apei_match),
+	},
+};
+module_platform_driver(edac_apei_driver);
+
+MODULE_DESCRIPTION("EDAC APEI driver");
+MODULE_AUTHOR("Tuan Phan <tphan@apm.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("0.1");