Message ID | 1570102361-11696-3-git-send-email-talel@amazon.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Amazon's Annapurna Labs POS Driver | expand |
On Thu, 03 Oct 2019 12:32:41 +0100, Talel Shenhar <talel@amazon.com> wrote: > > The Amazon's Annapurna Labs SoCs includes Point Of Serialization error > logging unit that reports an error in case write error (e.g . Attempt to > write to a read only register). > This error shall be reported to EDAC subsystem as uncorrectable-error. > > Signed-off-by: Talel Shenhar <talel@amazon.com> > --- > MAINTAINERS | 7 ++ > drivers/edac/Kconfig | 6 ++ > drivers/edac/Makefile | 1 + > drivers/edac/al_pos_edac.c | 173 +++++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 187 insertions(+) > create mode 100644 drivers/edac/al_pos_edac.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index e7a47b5..f5ce446 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -751,6 +751,13 @@ F: drivers/tty/serial/altera_jtaguart.c > F: include/linux/altera_uart.h > F: include/linux/altera_jtaguart.h > > +AMAZON ANNAPURNA LABS POS EDAC DRIVER > +M: Talel Shenhar <talel@amazon.com> > +M: Talel Shenhar <talelshenhar@gmail.com> > +S: Maintained > +F: Documentation/devicetree/bindings/edac/amazon,al-pos-edac.yaml > +F: drivers/edac/al-pos-edac.c > + > AMAZON ANNAPURNA LABS THERMAL MMIO DRIVER > M: Talel Shenhar <talel@amazon.com> > S: Maintained > diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig > index 200c04c..bb5805f 100644 > --- a/drivers/edac/Kconfig > +++ b/drivers/edac/Kconfig > @@ -100,6 +100,12 @@ config EDAC_AMD64_ERROR_INJECTION > In addition, there are two control files, inject_read and inject_write, > which trigger the DRAM ECC Read and Write respectively. > > +config EDAC_AL_POS > + tristate "Amazon's Annapurna Labs POS EDAC driver" > + depends on (ARCH_ALPINE || COMPILE_TEST) > + help > + Include support for the SoC POS EDAC error capability. > + > config EDAC_AMD76X > tristate "AMD 76x (760, 762, 768)" > depends on PCI && X86_32 > diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile > index 165ca65e..3571936 100644 > --- a/drivers/edac/Makefile > +++ b/drivers/edac/Makefile > @@ -22,6 +22,7 @@ obj-$(CONFIG_EDAC_GHES) += ghes_edac.o > edac_mce_amd-y := mce_amd.o > obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o > > +obj-$(CONFIG_EDAC_AL_POS) += al_pos_edac.o > obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o > obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o > obj-$(CONFIG_EDAC_I5000) += i5000_edac.o > diff --git a/drivers/edac/al_pos_edac.c b/drivers/edac/al_pos_edac.c > new file mode 100644 > index 00000000..bd6cd87 > --- /dev/null > +++ b/drivers/edac/al_pos_edac.c > @@ -0,0 +1,173 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. > + */ > +#include <linux/bitfield.h> > +#include <linux/edac.h> > +#include <linux/of_irq.h> > +#include "edac_module.h" > + > +#define DRV_NAME "al_pos_edac" > +#define AL_POS_EDAC_MSG_MAX 256 > + > +/* Registers Offset */ > +#define AL_POS_ERROR_LOG_1 0x0 > +#define AL_POS_ERROR_LOG_0 0x4 > + > +/* Registers Fields */ > +#define AL_POS_ERROR_LOG_1_VALID BIT(31) > +#define AL_POS_ERROR_LOG_1_BRESP GENMASK(18, 17) > +#define AL_POS_ERROR_LOG_1_REQUEST_ID GENMASK(16, 8) > +#define AL_POS_ERROR_LOG_1_ADDR_HIGH GENMASK(7, 0) > + > +#define AL_POS_ERROR_LOG_0_ADDR_LOW GENMASK(31, 0) > + > +struct al_pos_edac { > + struct edac_device_ctl_info *edac_dev; > + void __iomem *mmio_base; > + int irq; > +}; > + > +static int al_pos_handle(struct al_pos_edac *al_pos) > +{ > + u32 log0, log1; > + u64 addr; > + u16 request_id; > + u8 bresp; > + char msg[AL_POS_EDAC_MSG_MAX]; > + > + log1 = readl(al_pos->mmio_base + AL_POS_ERROR_LOG_1); I already commented on the misuse of strict accesses. Unless you can explain and document *why* you need the extra ordering, please use relaxed accesses. > + if (!FIELD_GET(AL_POS_ERROR_LOG_1_VALID, log1)) > + return 0; > + > + log0 = readl(al_pos->mmio_base + AL_POS_ERROR_LOG_0); > + writel(0, al_pos->mmio_base + AL_POS_ERROR_LOG_1); > + > + addr = FIELD_GET(AL_POS_ERROR_LOG_0_ADDR_LOW, log0); > + addr |= (((u64)FIELD_GET(AL_POS_ERROR_LOG_1_ADDR_HIGH, log1)) << 32); > + request_id = FIELD_GET(AL_POS_ERROR_LOG_1_REQUEST_ID, log1); > + bresp = FIELD_GET(AL_POS_ERROR_LOG_1_BRESP, log1); > + > + snprintf(msg, sizeof(msg), > + "addr=0x%llx request_id=0x%x bresp=0x%x\n", > + addr, request_id, bresp); > + > + edac_device_handle_ue(al_pos->edac_dev, 0, 0, msg); > + > + return 1; > +} > + > +static void al_pos_edac_check(struct edac_device_ctl_info *edac_dev) > +{ > + struct al_pos_edac *al_pos = edac_dev->pvt_info; > + > + al_pos_handle(al_pos); > +} > + > +static irqreturn_t al_pos_irq_handler(int irq, void *info) > +{ > + struct platform_device *pdev = info; > + struct al_pos_edac *al_pos = platform_get_drvdata(pdev); > + > + if (al_pos_handle(al_pos)) > + return IRQ_HANDLED; > + return IRQ_NONE; > +} > + > +static int al_pos_probe(struct platform_device *pdev) > +{ > + struct edac_device_ctl_info *edac_dev; > + struct al_pos_edac *al_pos; > + int ret; > + > + edac_dev = edac_device_alloc_ctl_info(sizeof(*al_pos), DRV_NAME, 1, > + DRV_NAME, 1, 0, NULL, 0, > + edac_device_alloc_index()); > + if (!edac_dev) > + return -ENOMEM; > + > + al_pos = edac_dev->pvt_info; > + al_pos->edac_dev = edac_dev; > + platform_set_drvdata(pdev, al_pos); > + > + al_pos->mmio_base = devm_platform_ioremap_resource(pdev, 0); > + if (IS_ERR(al_pos->mmio_base)) { > + dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n", > + PTR_ERR(al_pos->mmio_base)); > + return PTR_ERR(al_pos->mmio_base); > + } > + > + al_pos->irq = platform_get_irq(pdev, 0); > + if (al_pos->irq <= 0) > + edac_dev->edac_check = al_pos_edac_check; > + > + edac_dev->dev = &pdev->dev; > + edac_dev->mod_name = DRV_NAME; > + edac_dev->dev_name = dev_name(&pdev->dev); > + edac_dev->ctl_name = "POS"; > + > + ret = edac_device_add_device(edac_dev); > + if (ret) { > + dev_err(&pdev->dev, "Failed to add edac device\n"); > + goto err_free_edac; > + } > + > + if (al_pos->irq > 0) { > + ret = devm_request_irq(&pdev->dev, > + al_pos->irq, > + al_pos_irq_handler, > + 0, > + pdev->name, > + pdev); > + if (ret != 0) { > + dev_err(&pdev->dev, > + "failed to register to irq %d (%d)\n", > + al_pos->irq, ret); > + goto err_remove_edac; Would it be worth continuing without interrupts? After all, the interrupt seems to be an optional part of the device... Thanks, M.
thanks for the review On 10/7/2019 2:26 PM, Marc Zyngier wrote: > On Thu, 03 Oct 2019 12:32:41 +0100, > Talel Shenhar <talel@amazon.com> wrote: >> + log1 = readl(al_pos->mmio_base + AL_POS_ERROR_LOG_1); > I already commented on the misuse of strict accesses. Unless you can > explain and document *why* you need the extra ordering, please use > relaxed accesses. agreeing on relaxed, shall be part of v5 > >> + >> + if (al_pos->irq > 0) { >> + ret = devm_request_irq(&pdev->dev, >> + al_pos->irq, >> + al_pos_irq_handler, >> + 0, >> + pdev->name, >> + pdev); >> + if (ret != 0) { >> + dev_err(&pdev->dev, >> + "failed to register to irq %d (%d)\n", >> + al_pos->irq, ret); >> + goto err_remove_edac; > Would it be worth continuing without interrupts? After all, the > interrupt seems to be an optional part of the device... indeed interrupts are optional, however, this is optional for some of the systems. in some cases (and some systems), this error event is critical and should cause fast handling. for those, we define the interrupts. so bottom line, i would like to keep this error in case of error in interrupt. > > Thanks, > > M. >
diff --git a/MAINTAINERS b/MAINTAINERS index e7a47b5..f5ce446 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -751,6 +751,13 @@ F: drivers/tty/serial/altera_jtaguart.c F: include/linux/altera_uart.h F: include/linux/altera_jtaguart.h +AMAZON ANNAPURNA LABS POS EDAC DRIVER +M: Talel Shenhar <talel@amazon.com> +M: Talel Shenhar <talelshenhar@gmail.com> +S: Maintained +F: Documentation/devicetree/bindings/edac/amazon,al-pos-edac.yaml +F: drivers/edac/al-pos-edac.c + AMAZON ANNAPURNA LABS THERMAL MMIO DRIVER M: Talel Shenhar <talel@amazon.com> S: Maintained diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 200c04c..bb5805f 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -100,6 +100,12 @@ config EDAC_AMD64_ERROR_INJECTION In addition, there are two control files, inject_read and inject_write, which trigger the DRAM ECC Read and Write respectively. +config EDAC_AL_POS + tristate "Amazon's Annapurna Labs POS EDAC driver" + depends on (ARCH_ALPINE || COMPILE_TEST) + help + Include support for the SoC POS EDAC error capability. + config EDAC_AMD76X tristate "AMD 76x (760, 762, 768)" depends on PCI && X86_32 diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 165ca65e..3571936 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_EDAC_GHES) += ghes_edac.o edac_mce_amd-y := mce_amd.o obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o +obj-$(CONFIG_EDAC_AL_POS) += al_pos_edac.o obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o obj-$(CONFIG_EDAC_I5000) += i5000_edac.o diff --git a/drivers/edac/al_pos_edac.c b/drivers/edac/al_pos_edac.c new file mode 100644 index 00000000..bd6cd87 --- /dev/null +++ b/drivers/edac/al_pos_edac.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + */ +#include <linux/bitfield.h> +#include <linux/edac.h> +#include <linux/of_irq.h> +#include "edac_module.h" + +#define DRV_NAME "al_pos_edac" +#define AL_POS_EDAC_MSG_MAX 256 + +/* Registers Offset */ +#define AL_POS_ERROR_LOG_1 0x0 +#define AL_POS_ERROR_LOG_0 0x4 + +/* Registers Fields */ +#define AL_POS_ERROR_LOG_1_VALID BIT(31) +#define AL_POS_ERROR_LOG_1_BRESP GENMASK(18, 17) +#define AL_POS_ERROR_LOG_1_REQUEST_ID GENMASK(16, 8) +#define AL_POS_ERROR_LOG_1_ADDR_HIGH GENMASK(7, 0) + +#define AL_POS_ERROR_LOG_0_ADDR_LOW GENMASK(31, 0) + +struct al_pos_edac { + struct edac_device_ctl_info *edac_dev; + void __iomem *mmio_base; + int irq; +}; + +static int al_pos_handle(struct al_pos_edac *al_pos) +{ + u32 log0, log1; + u64 addr; + u16 request_id; + u8 bresp; + char msg[AL_POS_EDAC_MSG_MAX]; + + log1 = readl(al_pos->mmio_base + AL_POS_ERROR_LOG_1); + if (!FIELD_GET(AL_POS_ERROR_LOG_1_VALID, log1)) + return 0; + + log0 = readl(al_pos->mmio_base + AL_POS_ERROR_LOG_0); + writel(0, al_pos->mmio_base + AL_POS_ERROR_LOG_1); + + addr = FIELD_GET(AL_POS_ERROR_LOG_0_ADDR_LOW, log0); + addr |= (((u64)FIELD_GET(AL_POS_ERROR_LOG_1_ADDR_HIGH, log1)) << 32); + request_id = FIELD_GET(AL_POS_ERROR_LOG_1_REQUEST_ID, log1); + bresp = FIELD_GET(AL_POS_ERROR_LOG_1_BRESP, log1); + + snprintf(msg, sizeof(msg), + "addr=0x%llx request_id=0x%x bresp=0x%x\n", + addr, request_id, bresp); + + edac_device_handle_ue(al_pos->edac_dev, 0, 0, msg); + + return 1; +} + +static void al_pos_edac_check(struct edac_device_ctl_info *edac_dev) +{ + struct al_pos_edac *al_pos = edac_dev->pvt_info; + + al_pos_handle(al_pos); +} + +static irqreturn_t al_pos_irq_handler(int irq, void *info) +{ + struct platform_device *pdev = info; + struct al_pos_edac *al_pos = platform_get_drvdata(pdev); + + if (al_pos_handle(al_pos)) + return IRQ_HANDLED; + return IRQ_NONE; +} + +static int al_pos_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *edac_dev; + struct al_pos_edac *al_pos; + int ret; + + edac_dev = edac_device_alloc_ctl_info(sizeof(*al_pos), DRV_NAME, 1, + DRV_NAME, 1, 0, NULL, 0, + edac_device_alloc_index()); + if (!edac_dev) + return -ENOMEM; + + al_pos = edac_dev->pvt_info; + al_pos->edac_dev = edac_dev; + platform_set_drvdata(pdev, al_pos); + + al_pos->mmio_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(al_pos->mmio_base)) { + dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n", + PTR_ERR(al_pos->mmio_base)); + return PTR_ERR(al_pos->mmio_base); + } + + al_pos->irq = platform_get_irq(pdev, 0); + if (al_pos->irq <= 0) + edac_dev->edac_check = al_pos_edac_check; + + edac_dev->dev = &pdev->dev; + edac_dev->mod_name = DRV_NAME; + edac_dev->dev_name = dev_name(&pdev->dev); + edac_dev->ctl_name = "POS"; + + ret = edac_device_add_device(edac_dev); + if (ret) { + dev_err(&pdev->dev, "Failed to add edac device\n"); + goto err_free_edac; + } + + if (al_pos->irq > 0) { + ret = devm_request_irq(&pdev->dev, + al_pos->irq, + al_pos_irq_handler, + 0, + pdev->name, + pdev); + if (ret != 0) { + dev_err(&pdev->dev, + "failed to register to irq %d (%d)\n", + al_pos->irq, ret); + goto err_remove_edac; + } + } + + return 0; + +err_remove_edac: + edac_device_del_device(edac_dev->dev); +err_free_edac: + edac_device_free_ctl_info(edac_dev); + + return ret; +} + +static int al_pos_remove(struct platform_device *pdev) +{ + struct al_pos_edac *al_pos = platform_get_drvdata(pdev); + + if (al_pos->irq > 0) + devm_free_irq(&pdev->dev, al_pos->irq, pdev); + + edac_device_del_device(al_pos->edac_dev->dev); + edac_device_free_ctl_info(al_pos->edac_dev); + + return 0; +} + +static const struct of_device_id al_pos_of_match[] = { + { .compatible = "amazon,al-pos-edac", }, + {}, +}; + +MODULE_DEVICE_TABLE(of, al_pos_of_match); + +static struct platform_driver al_pos_driver = { + .probe = al_pos_probe, + .remove = al_pos_remove, + .driver = { + .name = DRV_NAME, + .of_match_table = al_pos_of_match, + }, +}; + +module_platform_driver(al_pos_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Talel Shenhar"); +MODULE_DESCRIPTION("Amazon's Annapurna Labs POS driver");
The Amazon's Annapurna Labs SoCs includes Point Of Serialization error logging unit that reports an error in case write error (e.g . Attempt to write to a read only register). This error shall be reported to EDAC subsystem as uncorrectable-error. Signed-off-by: Talel Shenhar <talel@amazon.com> --- MAINTAINERS | 7 ++ drivers/edac/Kconfig | 6 ++ drivers/edac/Makefile | 1 + drivers/edac/al_pos_edac.c | 173 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 187 insertions(+) create mode 100644 drivers/edac/al_pos_edac.c