diff mbox series

[v2,2/2] Loongarch: EDAC driver for loongson memory controller

Message ID 20240903015354.9443-3-zhaoqunqin@loongson.cn (mailing list archive)
State New
Headers show
Series Add EDAC driver for ls3a5000 memory controller | expand

Commit Message

Zhao Qunqin Sept. 3, 2024, 1:53 a.m. UTC
Report single bit errors (CE) only.

Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
---
 MAINTAINERS                  |   1 +
 arch/loongarch/Kconfig       |   1 +
 drivers/edac/Kconfig         |   8 ++
 drivers/edac/Makefile        |   1 +
 drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++
 5 files changed, 198 insertions(+)
 create mode 100644 drivers/edac/ls3a5000_edac.c

Comments

Huacai Chen Sept. 3, 2024, 2:39 a.m. UTC | #1
Hi, Qunqin,

On Tue, Sep 3, 2024 at 9:53 AM Zhao Qunqin <zhaoqunqin@loongson.cn> wrote:
>
> Report single bit errors (CE) only.
>
> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
> ---
>  MAINTAINERS                  |   1 +
>  arch/loongarch/Kconfig       |   1 +
>  drivers/edac/Kconfig         |   8 ++
>  drivers/edac/Makefile        |   1 +
>  drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++
>  5 files changed, 198 insertions(+)
>  create mode 100644 drivers/edac/ls3a5000_edac.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 6cc8cfc8f..b43f82279 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -13242,6 +13242,7 @@ M:      Zhao Qunqin <zhaoqunqin@loongson.cn>
>  L:     linux-edac@vger.kernel.org
>  S:     Maintained
>  F:     Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml
> +F:     drivers/edac/ls3a5000_edac.c
>
>  LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
>  M:     Sathya Prakash <sathya.prakash@broadcom.com>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 70f169210..348030c24 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -182,6 +182,7 @@ config LOONGARCH
>         select PCI_QUIRKS
>         select PERF_USE_VMALLOC
>         select RTC_LIB
> +       select EDAC_SUPPORT
>         select SPARSE_IRQ
>         select SYSCTL_ARCH_UNALIGN_ALLOW
>         select SYSCTL_ARCH_UNALIGN_NO_WARN
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 16c8de505..2d10256f0 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -573,5 +573,13 @@ config EDAC_VERSAL
>           Support injecting both correctable and uncorrectable errors
>           for debugging purposes.
>
> +config EDAC_LS3A5000
> +       tristate "Ls3a5000 Memory Controller"
> +       depends on LOONGARCH || COMPILE_TEST
> +       help
> +         Support for error detection and correction on the ls3a5000 memory
> +         controller. This driver report single bit errors (CE) only.
> +         Ls3c5000l, ls3c5000, ls3d5000, ls3a6000 and ls3c6000 are compatible
> +         with ls3a5000.
Here can be improved as:

config EDAC_LOONGSON3
       tristate "Loonson-3 Memory Controller"
       depends on LOONGARCH || COMPILE_TEST
       help
         Support for error detection and correction on the Loongson-3 family
         memory controller. This driver reports single bit errors (CE) only.
         Loongson-3A5000/3C5000/3C5000L/3A6000/3C6000 are compatible.

>
>  endif # EDAC
> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> index 4edfb83ff..0974e3fa6 100644
> --- a/drivers/edac/Makefile
> +++ b/drivers/edac/Makefile
> @@ -89,3 +89,4 @@ obj-$(CONFIG_EDAC_DMC520)             += dmc520_edac.o
>  obj-$(CONFIG_EDAC_NPCM)                        += npcm_edac.o
>  obj-$(CONFIG_EDAC_ZYNQMP)              += zynqmp_edac.o
>  obj-$(CONFIG_EDAC_VERSAL)              += versal_edac.o
> +obj-$(CONFIG_EDAC_LS3A5000)            += ls3a5000_edac.o
> diff --git a/drivers/edac/ls3a5000_edac.c b/drivers/edac/ls3a5000_edac.c
> new file mode 100644
> index 000000000..c68fd7c5f
> --- /dev/null
> +++ b/drivers/edac/ls3a5000_edac.c
> @@ -0,0 +1,187 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Loongson Technology Corporation Limited.
> + */
> +
> +#include <linux/edac.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/platform_device.h>
> +
> +#include "edac_module.h"
> +
> +enum ecc_index {
> +       ECC_SET = 0,
> +       ECC_RESERVED,
> +       ECC_COUNT,
> +       ECC_CS_COUNT,
> +       ECC_CODE,
> +       ECC_ADDR,
> +       ECC_DATA0,
> +       ECC_DATA1,
> +       ECC_DATA2,
> +       ECC_DATA3,
> +};
> +
> +struct loongson_edac_pvt {
> +       u64 *ecc_base;
> +       int last_ce_count;
> +};
> +
> +static void loongson_update_ce_count(struct mem_ctl_info *mci,
> +                                       int chan,
> +                                       int new)
> +{
> +       int add;
> +       struct loongson_edac_pvt *pvt = mci->pvt_info;
> +
> +       add = new - pvt->last_ce_count;
> +
> +       /* Store the new value */
> +       pvt->last_ce_count = new;
> +
> +       /* device resume or any other exceptions*/
> +       if (add < 0)
> +               return;
> +
> +       /*updated the edac core */
> +       if (add != 0) {
> +               edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
> +                                       0, 0, 0,
> +                                       chan, 0, -1, "error", "");
> +               edac_mc_printk(mci, KERN_INFO, "add: %d", add);
> +       }
> +}
> +
> +static int loongson_read_ecc(struct mem_ctl_info *mci)
> +{
> +       u64 ecc;
> +       int cs = 0;
> +       struct loongson_edac_pvt *pvt = mci->pvt_info;
> +
> +       if (!pvt->ecc_base)
> +               return pvt->last_ce_count;
> +
> +       ecc = pvt->ecc_base[ECC_CS_COUNT];
> +       cs += ecc & 0xff;               // cs0
> +       cs += (ecc >> 8) & 0xff;        // cs1
> +       cs += (ecc >> 16) & 0xff;       // cs2
> +       cs += (ecc >> 24) & 0xff;       // cs3
> +
> +       return cs;
> +}
> +
> +static void loongson_edac_check(struct mem_ctl_info *mci)
> +{
> +       loongson_update_ce_count(mci, 0, loongson_read_ecc(mci));
> +}
> +
> +static int get_dimm_config(struct mem_ctl_info *mci)
> +{
> +       u32 size, npages;
> +       struct dimm_info *dimm;
> +
> +       /* size not used */
> +       size = -1;
> +       npages = MiB_TO_PAGES(size);
> +
> +       dimm = edac_get_dimm(mci, 0, 0, 0);
> +       dimm->nr_pages = npages;
> +       snprintf(dimm->label, sizeof(dimm->label),
> +                       "MC#%uChannel#%u_DIMM#%u",
> +                       mci->mc_idx, 0, 0);
> +       dimm->grain = 8;
> +
> +       return 0;
> +}
> +
> +static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase)
> +{
> +       struct loongson_edac_pvt *pvt = mci->pvt_info;
> +
> +       pvt->ecc_base = vbase;
> +       pvt->last_ce_count = loongson_read_ecc(mci);
> +}
> +
> +static int loongson_edac_probe(struct platform_device *pdev)
> +{
> +       struct resource *rs;
> +       struct mem_ctl_info *mci;
> +       struct edac_mc_layer layers[2];
> +       struct loongson_edac_pvt *pvt;
> +       u64 *vbase = NULL;
> +
> +       rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +       /* not return if can not find resource or resource start equals NULL */
> +       if (rs && rs->start) {
> +               vbase = devm_ioremap_resource(&pdev->dev, rs);
> +               if (IS_ERR(vbase))
> +                       return PTR_ERR(vbase);
> +       }
> +
> +       /* allocate a new MC control structure */
> +       layers[0].type = EDAC_MC_LAYER_CHANNEL;
> +       layers[0].size = 1;
> +       layers[0].is_virt_csrow = false;
> +       layers[1].type = EDAC_MC_LAYER_SLOT;
> +       layers[1].size = 1;
> +       layers[1].is_virt_csrow = true;
> +       mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
> +       if (mci == NULL)
> +               return -ENOMEM;
> +
> +       edac_dbg(0, "MC: mci = %p\n", mci);
> +
> +       mci->mc_idx = edac_device_alloc_index();
> +       mci->mtype_cap = MEM_FLAG_RDDR4;
> +       mci->edac_ctl_cap = EDAC_FLAG_NONE;
> +       mci->edac_cap = EDAC_FLAG_NONE;
> +       mci->mod_name = "loongson_edac.c";
> +       mci->ctl_name = "loongson_edac_ctl";
> +       mci->dev_name = "loongson_edac_dev";
> +       mci->ctl_page_to_phys = NULL;
> +       mci->pdev = &pdev->dev;
> +       mci->error_desc.grain = 8;
> +       /* Set the function pointer to an actual operation function */
> +       mci->edac_check = loongson_edac_check;
> +
> +       loongson_pvt_init(mci, vbase);
> +       get_dimm_config(mci);
> +
> +       if (edac_mc_add_mc(mci)) {
> +               edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
> +               edac_mc_free(mci);
> +       }
> +       edac_op_state = EDAC_OPSTATE_POLL;
> +
> +       return 0;
> +}
> +
> +static void loongson_edac_remove(struct platform_device *pdev)
> +{
> +       struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
> +
> +       if (mci)
> +               edac_mc_free(mci);
> +}
> +
> +static const struct of_device_id loongson_edac_of_match[] = {
> +       { .compatible = "loongson,ls3a5000-mc-edac", },
> +       {}
> +};
> +MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
> +
> +static struct platform_driver loongson_edac_driver = {
> +       .probe          = loongson_edac_probe,
> +       .remove         = loongson_edac_remove,
> +       .driver         = {
> +               .name   = "ls-mc-edac",
The name can be better as loongson-mc-edac.

Huacai

> +               .of_match_table = loongson_edac_of_match,
> +       },
> +};
> +
> +module_platform_driver(loongson_edac_driver);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n");
> +MODULE_DESCRIPTION("EDAC driver for loongson memory controller");
> --
> 2.43.0
>
Huacai Chen Sept. 3, 2024, 2:43 a.m. UTC | #2
On Tue, Sep 3, 2024 at 10:39 AM Huacai Chen <chenhuacai@kernel.org> wrote:
>
> Hi, Qunqin,
>
> On Tue, Sep 3, 2024 at 9:53 AM Zhao Qunqin <zhaoqunqin@loongson.cn> wrote:
> >
> > Report single bit errors (CE) only.
> >
> > Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
> > ---
> >  MAINTAINERS                  |   1 +
> >  arch/loongarch/Kconfig       |   1 +
> >  drivers/edac/Kconfig         |   8 ++
> >  drivers/edac/Makefile        |   1 +
> >  drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++
> >  5 files changed, 198 insertions(+)
> >  create mode 100644 drivers/edac/ls3a5000_edac.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 6cc8cfc8f..b43f82279 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -13242,6 +13242,7 @@ M:      Zhao Qunqin <zhaoqunqin@loongson.cn>
> >  L:     linux-edac@vger.kernel.org
> >  S:     Maintained
> >  F:     Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml
> > +F:     drivers/edac/ls3a5000_edac.c
> >
> >  LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
> >  M:     Sathya Prakash <sathya.prakash@broadcom.com>
> > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> > index 70f169210..348030c24 100644
> > --- a/arch/loongarch/Kconfig
> > +++ b/arch/loongarch/Kconfig
> > @@ -182,6 +182,7 @@ config LOONGARCH
> >         select PCI_QUIRKS
> >         select PERF_USE_VMALLOC
> >         select RTC_LIB
> > +       select EDAC_SUPPORT
> >         select SPARSE_IRQ
> >         select SYSCTL_ARCH_UNALIGN_ALLOW
> >         select SYSCTL_ARCH_UNALIGN_NO_WARN
> > diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> > index 16c8de505..2d10256f0 100644
> > --- a/drivers/edac/Kconfig
> > +++ b/drivers/edac/Kconfig
> > @@ -573,5 +573,13 @@ config EDAC_VERSAL
> >           Support injecting both correctable and uncorrectable errors
> >           for debugging purposes.
> >
> > +config EDAC_LS3A5000
> > +       tristate "Ls3a5000 Memory Controller"
> > +       depends on LOONGARCH || COMPILE_TEST
> > +       help
> > +         Support for error detection and correction on the ls3a5000 memory
> > +         controller. This driver report single bit errors (CE) only.
> > +         Ls3c5000l, ls3c5000, ls3d5000, ls3a6000 and ls3c6000 are compatible
> > +         with ls3a5000.
> Here can be improved as:
>
> config EDAC_LOONGSON3
>        tristate "Loonson-3 Memory Controller"
>        depends on LOONGARCH || COMPILE_TEST
>        help
>          Support for error detection and correction on the Loongson-3 family
>          memory controller. This driver reports single bit errors (CE) only.
>          Loongson-3A5000/3C5000/3C5000L/3A6000/3C6000 are compatible.
>
And the driver file should be loongson_edac.c, this naming has nothing
to do with the dt-binding names.

Huacai

> >
> >  endif # EDAC
> > diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> > index 4edfb83ff..0974e3fa6 100644
> > --- a/drivers/edac/Makefile
> > +++ b/drivers/edac/Makefile
> > @@ -89,3 +89,4 @@ obj-$(CONFIG_EDAC_DMC520)             += dmc520_edac.o
> >  obj-$(CONFIG_EDAC_NPCM)                        += npcm_edac.o
> >  obj-$(CONFIG_EDAC_ZYNQMP)              += zynqmp_edac.o
> >  obj-$(CONFIG_EDAC_VERSAL)              += versal_edac.o
> > +obj-$(CONFIG_EDAC_LS3A5000)            += ls3a5000_edac.o
> > diff --git a/drivers/edac/ls3a5000_edac.c b/drivers/edac/ls3a5000_edac.c
> > new file mode 100644
> > index 000000000..c68fd7c5f
> > --- /dev/null
> > +++ b/drivers/edac/ls3a5000_edac.c
> > @@ -0,0 +1,187 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2024 Loongson Technology Corporation Limited.
> > + */
> > +
> > +#include <linux/edac.h>
> > +#include <linux/module.h>
> > +#include <linux/init.h>
> > +#include <linux/platform_device.h>
> > +
> > +#include "edac_module.h"
> > +
> > +enum ecc_index {
> > +       ECC_SET = 0,
> > +       ECC_RESERVED,
> > +       ECC_COUNT,
> > +       ECC_CS_COUNT,
> > +       ECC_CODE,
> > +       ECC_ADDR,
> > +       ECC_DATA0,
> > +       ECC_DATA1,
> > +       ECC_DATA2,
> > +       ECC_DATA3,
> > +};
> > +
> > +struct loongson_edac_pvt {
> > +       u64 *ecc_base;
> > +       int last_ce_count;
> > +};
> > +
> > +static void loongson_update_ce_count(struct mem_ctl_info *mci,
> > +                                       int chan,
> > +                                       int new)
> > +{
> > +       int add;
> > +       struct loongson_edac_pvt *pvt = mci->pvt_info;
> > +
> > +       add = new - pvt->last_ce_count;
> > +
> > +       /* Store the new value */
> > +       pvt->last_ce_count = new;
> > +
> > +       /* device resume or any other exceptions*/
> > +       if (add < 0)
> > +               return;
> > +
> > +       /*updated the edac core */
> > +       if (add != 0) {
> > +               edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
> > +                                       0, 0, 0,
> > +                                       chan, 0, -1, "error", "");
> > +               edac_mc_printk(mci, KERN_INFO, "add: %d", add);
> > +       }
> > +}
> > +
> > +static int loongson_read_ecc(struct mem_ctl_info *mci)
> > +{
> > +       u64 ecc;
> > +       int cs = 0;
> > +       struct loongson_edac_pvt *pvt = mci->pvt_info;
> > +
> > +       if (!pvt->ecc_base)
> > +               return pvt->last_ce_count;
> > +
> > +       ecc = pvt->ecc_base[ECC_CS_COUNT];
> > +       cs += ecc & 0xff;               // cs0
> > +       cs += (ecc >> 8) & 0xff;        // cs1
> > +       cs += (ecc >> 16) & 0xff;       // cs2
> > +       cs += (ecc >> 24) & 0xff;       // cs3
> > +
> > +       return cs;
> > +}
> > +
> > +static void loongson_edac_check(struct mem_ctl_info *mci)
> > +{
> > +       loongson_update_ce_count(mci, 0, loongson_read_ecc(mci));
> > +}
> > +
> > +static int get_dimm_config(struct mem_ctl_info *mci)
> > +{
> > +       u32 size, npages;
> > +       struct dimm_info *dimm;
> > +
> > +       /* size not used */
> > +       size = -1;
> > +       npages = MiB_TO_PAGES(size);
> > +
> > +       dimm = edac_get_dimm(mci, 0, 0, 0);
> > +       dimm->nr_pages = npages;
> > +       snprintf(dimm->label, sizeof(dimm->label),
> > +                       "MC#%uChannel#%u_DIMM#%u",
> > +                       mci->mc_idx, 0, 0);
> > +       dimm->grain = 8;
> > +
> > +       return 0;
> > +}
> > +
> > +static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase)
> > +{
> > +       struct loongson_edac_pvt *pvt = mci->pvt_info;
> > +
> > +       pvt->ecc_base = vbase;
> > +       pvt->last_ce_count = loongson_read_ecc(mci);
> > +}
> > +
> > +static int loongson_edac_probe(struct platform_device *pdev)
> > +{
> > +       struct resource *rs;
> > +       struct mem_ctl_info *mci;
> > +       struct edac_mc_layer layers[2];
> > +       struct loongson_edac_pvt *pvt;
> > +       u64 *vbase = NULL;
> > +
> > +       rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +       /* not return if can not find resource or resource start equals NULL */
> > +       if (rs && rs->start) {
> > +               vbase = devm_ioremap_resource(&pdev->dev, rs);
> > +               if (IS_ERR(vbase))
> > +                       return PTR_ERR(vbase);
> > +       }
> > +
> > +       /* allocate a new MC control structure */
> > +       layers[0].type = EDAC_MC_LAYER_CHANNEL;
> > +       layers[0].size = 1;
> > +       layers[0].is_virt_csrow = false;
> > +       layers[1].type = EDAC_MC_LAYER_SLOT;
> > +       layers[1].size = 1;
> > +       layers[1].is_virt_csrow = true;
> > +       mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
> > +       if (mci == NULL)
> > +               return -ENOMEM;
> > +
> > +       edac_dbg(0, "MC: mci = %p\n", mci);
> > +
> > +       mci->mc_idx = edac_device_alloc_index();
> > +       mci->mtype_cap = MEM_FLAG_RDDR4;
> > +       mci->edac_ctl_cap = EDAC_FLAG_NONE;
> > +       mci->edac_cap = EDAC_FLAG_NONE;
> > +       mci->mod_name = "loongson_edac.c";
> > +       mci->ctl_name = "loongson_edac_ctl";
> > +       mci->dev_name = "loongson_edac_dev";
> > +       mci->ctl_page_to_phys = NULL;
> > +       mci->pdev = &pdev->dev;
> > +       mci->error_desc.grain = 8;
> > +       /* Set the function pointer to an actual operation function */
> > +       mci->edac_check = loongson_edac_check;
> > +
> > +       loongson_pvt_init(mci, vbase);
> > +       get_dimm_config(mci);
> > +
> > +       if (edac_mc_add_mc(mci)) {
> > +               edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
> > +               edac_mc_free(mci);
> > +       }
> > +       edac_op_state = EDAC_OPSTATE_POLL;
> > +
> > +       return 0;
> > +}
> > +
> > +static void loongson_edac_remove(struct platform_device *pdev)
> > +{
> > +       struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
> > +
> > +       if (mci)
> > +               edac_mc_free(mci);
> > +}
> > +
> > +static const struct of_device_id loongson_edac_of_match[] = {
> > +       { .compatible = "loongson,ls3a5000-mc-edac", },
> > +       {}
> > +};
> > +MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
> > +
> > +static struct platform_driver loongson_edac_driver = {
> > +       .probe          = loongson_edac_probe,
> > +       .remove         = loongson_edac_remove,
> > +       .driver         = {
> > +               .name   = "ls-mc-edac",
> The name can be better as loongson-mc-edac.
>
> Huacai
>
> > +               .of_match_table = loongson_edac_of_match,
> > +       },
> > +};
> > +
> > +module_platform_driver(loongson_edac_driver);
> > +
> > +MODULE_LICENSE("GPL");
> > +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n");
> > +MODULE_DESCRIPTION("EDAC driver for loongson memory controller");
> > --
> > 2.43.0
> >
Mingcong Bai Sept. 3, 2024, 3:31 a.m. UTC | #3
Greetings all,

在 2024-09-03 10:39,Huacai Chen 写道:
> Hi, Qunqin,
> 
> On Tue, Sep 3, 2024 at 9:53 AM Zhao Qunqin <zhaoqunqin@loongson.cn> 
> wrote:
>> 
>> Report single bit errors (CE) only.
>> 
>> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
>> ---
>>  MAINTAINERS                  |   1 +
>>  arch/loongarch/Kconfig       |   1 +
>>  drivers/edac/Kconfig         |   8 ++
>>  drivers/edac/Makefile        |   1 +
>>  drivers/edac/ls3a5000_edac.c | 187 
>> +++++++++++++++++++++++++++++++++++
>>  5 files changed, 198 insertions(+)
>>  create mode 100644 drivers/edac/ls3a5000_edac.c
>> 
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 6cc8cfc8f..b43f82279 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -13242,6 +13242,7 @@ M:      Zhao Qunqin <zhaoqunqin@loongson.cn>
>>  L:     linux-edac@vger.kernel.org
>>  S:     Maintained
>>  F:     
>> Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml
>> +F:     drivers/edac/ls3a5000_edac.c
>> 
>>  LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
>>  M:     Sathya Prakash <sathya.prakash@broadcom.com>
>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>> index 70f169210..348030c24 100644
>> --- a/arch/loongarch/Kconfig
>> +++ b/arch/loongarch/Kconfig
>> @@ -182,6 +182,7 @@ config LOONGARCH
>>         select PCI_QUIRKS
>>         select PERF_USE_VMALLOC
>>         select RTC_LIB
>> +       select EDAC_SUPPORT
>>         select SPARSE_IRQ
>>         select SYSCTL_ARCH_UNALIGN_ALLOW
>>         select SYSCTL_ARCH_UNALIGN_NO_WARN
>> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
>> index 16c8de505..2d10256f0 100644
>> --- a/drivers/edac/Kconfig
>> +++ b/drivers/edac/Kconfig
>> @@ -573,5 +573,13 @@ config EDAC_VERSAL
>>           Support injecting both correctable and uncorrectable errors
>>           for debugging purposes.
>> 
>> +config EDAC_LS3A5000
>> +       tristate "Ls3a5000 Memory Controller"
>> +       depends on LOONGARCH || COMPILE_TEST
>> +       help
>> +         Support for error detection and correction on the ls3a5000 
>> memory
>> +         controller. This driver report single bit errors (CE) only.
>> +         Ls3c5000l, ls3c5000, ls3d5000, ls3a6000 and ls3c6000 are 
>> compatible
>> +         with ls3a5000.
> Here can be improved as:
> 
> config EDAC_LOONGSON3
>        tristate "Loonson-3 Memory Controller"
>        depends on LOONGARCH || COMPILE_TEST
>        help
>          Support for error detection and correction on the Loongson-3 
> family
>          memory controller. This driver reports single bit errors (CE) 
> only.
>          Loongson-3A5000/3C5000/3C5000L/3A6000/3C6000 are compatible.
> 

Huacai,

This looks much better, minus a typo - it should be Loongson-3, not 
Loonson-3. Also, what about 3B/D5000 and potentially 3A6000, 3B6000, and 
3D6000, etc.?

My suggestion would be to list them as a family, say: "All members of 
the Loongson-3 family of processors are compatible." If there are 
exceptions, then I would list them more explicitly, something like "all 
members... except the... series."

What do you think?

>> 
>>  endif # EDAC
>> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
>> index 4edfb83ff..0974e3fa6 100644
>> --- a/drivers/edac/Makefile
>> +++ b/drivers/edac/Makefile
>> @@ -89,3 +89,4 @@ obj-$(CONFIG_EDAC_DMC520)             += 
>> dmc520_edac.o
>>  obj-$(CONFIG_EDAC_NPCM)                        += npcm_edac.o
>>  obj-$(CONFIG_EDAC_ZYNQMP)              += zynqmp_edac.o
>>  obj-$(CONFIG_EDAC_VERSAL)              += versal_edac.o
>> +obj-$(CONFIG_EDAC_LS3A5000)            += ls3a5000_edac.o
>> diff --git a/drivers/edac/ls3a5000_edac.c 
>> b/drivers/edac/ls3a5000_edac.c
>> new file mode 100644
>> index 000000000..c68fd7c5f
>> --- /dev/null
>> +++ b/drivers/edac/ls3a5000_edac.c
>> @@ -0,0 +1,187 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2024 Loongson Technology Corporation Limited.
>> + */
>> +
>> +#include <linux/edac.h>
>> +#include <linux/module.h>
>> +#include <linux/init.h>
>> +#include <linux/platform_device.h>
>> +
>> +#include "edac_module.h"
>> +
>> +enum ecc_index {
>> +       ECC_SET = 0,
>> +       ECC_RESERVED,
>> +       ECC_COUNT,
>> +       ECC_CS_COUNT,
>> +       ECC_CODE,
>> +       ECC_ADDR,
>> +       ECC_DATA0,
>> +       ECC_DATA1,
>> +       ECC_DATA2,
>> +       ECC_DATA3,
>> +};
>> +
>> +struct loongson_edac_pvt {
>> +       u64 *ecc_base;
>> +       int last_ce_count;
>> +};
>> +
>> +static void loongson_update_ce_count(struct mem_ctl_info *mci,
>> +                                       int chan,
>> +                                       int new)
>> +{
>> +       int add;
>> +       struct loongson_edac_pvt *pvt = mci->pvt_info;
>> +
>> +       add = new - pvt->last_ce_count;
>> +
>> +       /* Store the new value */
>> +       pvt->last_ce_count = new;
>> +
>> +       /* device resume or any other exceptions*/
>> +       if (add < 0)
>> +               return;
>> +
>> +       /*updated the edac core */
>> +       if (add != 0) {
>> +               edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
>> +                                       0, 0, 0,
>> +                                       chan, 0, -1, "error", "");
>> +               edac_mc_printk(mci, KERN_INFO, "add: %d", add);
>> +       }
>> +}
>> +
>> +static int loongson_read_ecc(struct mem_ctl_info *mci)
>> +{
>> +       u64 ecc;
>> +       int cs = 0;
>> +       struct loongson_edac_pvt *pvt = mci->pvt_info;
>> +
>> +       if (!pvt->ecc_base)
>> +               return pvt->last_ce_count;
>> +
>> +       ecc = pvt->ecc_base[ECC_CS_COUNT];
>> +       cs += ecc & 0xff;               // cs0
>> +       cs += (ecc >> 8) & 0xff;        // cs1
>> +       cs += (ecc >> 16) & 0xff;       // cs2
>> +       cs += (ecc >> 24) & 0xff;       // cs3
>> +
>> +       return cs;
>> +}
>> +
>> +static void loongson_edac_check(struct mem_ctl_info *mci)
>> +{
>> +       loongson_update_ce_count(mci, 0, loongson_read_ecc(mci));
>> +}
>> +
>> +static int get_dimm_config(struct mem_ctl_info *mci)
>> +{
>> +       u32 size, npages;
>> +       struct dimm_info *dimm;
>> +
>> +       /* size not used */
>> +       size = -1;
>> +       npages = MiB_TO_PAGES(size);
>> +
>> +       dimm = edac_get_dimm(mci, 0, 0, 0);
>> +       dimm->nr_pages = npages;
>> +       snprintf(dimm->label, sizeof(dimm->label),
>> +                       "MC#%uChannel#%u_DIMM#%u",
>> +                       mci->mc_idx, 0, 0);
>> +       dimm->grain = 8;
>> +
>> +       return 0;
>> +}
>> +
>> +static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase)
>> +{
>> +       struct loongson_edac_pvt *pvt = mci->pvt_info;
>> +
>> +       pvt->ecc_base = vbase;
>> +       pvt->last_ce_count = loongson_read_ecc(mci);
>> +}
>> +
>> +static int loongson_edac_probe(struct platform_device *pdev)
>> +{
>> +       struct resource *rs;
>> +       struct mem_ctl_info *mci;
>> +       struct edac_mc_layer layers[2];
>> +       struct loongson_edac_pvt *pvt;
>> +       u64 *vbase = NULL;
>> +
>> +       rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>> +       /* not return if can not find resource or resource start 
>> equals NULL */
>> +       if (rs && rs->start) {
>> +               vbase = devm_ioremap_resource(&pdev->dev, rs);
>> +               if (IS_ERR(vbase))
>> +                       return PTR_ERR(vbase);
>> +       }
>> +
>> +       /* allocate a new MC control structure */
>> +       layers[0].type = EDAC_MC_LAYER_CHANNEL;
>> +       layers[0].size = 1;
>> +       layers[0].is_virt_csrow = false;
>> +       layers[1].type = EDAC_MC_LAYER_SLOT;
>> +       layers[1].size = 1;
>> +       layers[1].is_virt_csrow = true;
>> +       mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 
>> sizeof(*pvt));
>> +       if (mci == NULL)
>> +               return -ENOMEM;
>> +
>> +       edac_dbg(0, "MC: mci = %p\n", mci);
>> +
>> +       mci->mc_idx = edac_device_alloc_index();
>> +       mci->mtype_cap = MEM_FLAG_RDDR4;
>> +       mci->edac_ctl_cap = EDAC_FLAG_NONE;
>> +       mci->edac_cap = EDAC_FLAG_NONE;
>> +       mci->mod_name = "loongson_edac.c";
>> +       mci->ctl_name = "loongson_edac_ctl";
>> +       mci->dev_name = "loongson_edac_dev";
>> +       mci->ctl_page_to_phys = NULL;
>> +       mci->pdev = &pdev->dev;
>> +       mci->error_desc.grain = 8;
>> +       /* Set the function pointer to an actual operation function */
>> +       mci->edac_check = loongson_edac_check;
>> +
>> +       loongson_pvt_init(mci, vbase);
>> +       get_dimm_config(mci);
>> +
>> +       if (edac_mc_add_mc(mci)) {
>> +               edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
>> +               edac_mc_free(mci);
>> +       }
>> +       edac_op_state = EDAC_OPSTATE_POLL;
>> +
>> +       return 0;
>> +}
>> +
>> +static void loongson_edac_remove(struct platform_device *pdev)
>> +{
>> +       struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
>> +
>> +       if (mci)
>> +               edac_mc_free(mci);
>> +}
>> +
>> +static const struct of_device_id loongson_edac_of_match[] = {
>> +       { .compatible = "loongson,ls3a5000-mc-edac", },
>> +       {}
>> +};
>> +MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
>> +
>> +static struct platform_driver loongson_edac_driver = {
>> +       .probe          = loongson_edac_probe,
>> +       .remove         = loongson_edac_remove,
>> +       .driver         = {
>> +               .name   = "ls-mc-edac",
> The name can be better as loongson-mc-edac.
> 
> Huacai
> 
>> +               .of_match_table = loongson_edac_of_match,
>> +       },
>> +};
>> +
>> +module_platform_driver(loongson_edac_driver);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n");
>> +MODULE_DESCRIPTION("EDAC driver for loongson memory controller");
>> --
>> 2.43.0
>> 

Best Regards,
Mingcong Bai
Markus Elfring Sept. 3, 2024, 6:33 a.m. UTC | #4
> +++ b/drivers/edac/ls3a5000_edac.c
> @@ -0,0 +1,187 @@> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n");

How do you think about to omit a line break character from such a string literal?

Regards,
Markus
Krzysztof Kozlowski Sept. 3, 2024, 6:47 a.m. UTC | #5
On Tue, Sep 03, 2024 at 09:53:54AM +0800, Zhao Qunqin wrote:
> Report single bit errors (CE) only.
> 
> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
> ---
>  MAINTAINERS                  |   1 +
>  arch/loongarch/Kconfig       |   1 +
>  drivers/edac/Kconfig         |   8 ++
>  drivers/edac/Makefile        |   1 +
>  drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++
>  5 files changed, 198 insertions(+)
>  create mode 100644 drivers/edac/ls3a5000_edac.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 6cc8cfc8f..b43f82279 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -13242,6 +13242,7 @@ M:	Zhao Qunqin <zhaoqunqin@loongson.cn>
>  L:	linux-edac@vger.kernel.org
>  S:	Maintained
>  F:	Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml
> +F:	drivers/edac/ls3a5000_edac.c
>  
>  LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
>  M:	Sathya Prakash <sathya.prakash@broadcom.com>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 70f169210..348030c24 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -182,6 +182,7 @@ config LOONGARCH
>  	select PCI_QUIRKS
>  	select PERF_USE_VMALLOC
>  	select RTC_LIB
> +	select EDAC_SUPPORT

I think you got here comment before. How did you address it?

>  	select SPARSE_IRQ
>  	select SYSCTL_ARCH_UNALIGN_ALLOW
>  	select SYSCTL_ARCH_UNALIGN_NO_WARN
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 16c8de505..2d10256f0 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -573,5 +573,13 @@ config EDAC_VERSAL
>  	  Support injecting both correctable and uncorrectable errors
>  	  for debugging purposes.
>  

...

 +
> +static int loongson_edac_probe(struct platform_device *pdev)
> +{
> +	struct resource *rs;
> +	struct mem_ctl_info *mci;
> +	struct edac_mc_layer layers[2];
> +	struct loongson_edac_pvt *pvt;
> +	u64 *vbase = NULL;
> +
> +	rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	/* not return if can not find resource or resource start equals NULL */

Why?

> +	if (rs && rs->start) {
> +		vbase = devm_ioremap_resource(&pdev->dev, rs);
> +		if (IS_ERR(vbase))
> +			return PTR_ERR(vbase);
> +	}
> +
> +	/* allocate a new MC control structure */
> +	layers[0].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[0].size = 1;
> +	layers[0].is_virt_csrow = false;
> +	layers[1].type = EDAC_MC_LAYER_SLOT;
> +	layers[1].size = 1;
> +	layers[1].is_virt_csrow = true;
> +	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
> +	if (mci == NULL)
> +		return -ENOMEM;
> +
> +	edac_dbg(0, "MC: mci = %p\n", mci);

Nope, drop. You should never print pointers.

> +
> +	mci->mc_idx = edac_device_alloc_index();
> +	mci->mtype_cap = MEM_FLAG_RDDR4;
> +	mci->edac_ctl_cap = EDAC_FLAG_NONE;
> +	mci->edac_cap = EDAC_FLAG_NONE;
> +	mci->mod_name = "loongson_edac.c";
> +	mci->ctl_name = "loongson_edac_ctl";
> +	mci->dev_name = "loongson_edac_dev";
> +	mci->ctl_page_to_phys = NULL;
> +	mci->pdev = &pdev->dev;
> +	mci->error_desc.grain = 8;
> +	/* Set the function pointer to an actual operation function */
> +	mci->edac_check = loongson_edac_check;
> +
> +	loongson_pvt_init(mci, vbase);
> +	get_dimm_config(mci);
> +
> +	if (edac_mc_add_mc(mci)) {
> +		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
> +		edac_mc_free(mci);

That's still a success of probe? Weird a bit.

> +	}
> +	edac_op_state = EDAC_OPSTATE_POLL;
> +
> +	return 0;
> +}
> +
> +static void loongson_edac_remove(struct platform_device *pdev)
> +{
> +	struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
> +
> +	if (mci)
> +		edac_mc_free(mci);
> +}
> +
> +static const struct of_device_id loongson_edac_of_match[] = {
> +	{ .compatible = "loongson,ls3a5000-mc-edac", },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
> +
> +static struct platform_driver loongson_edac_driver = {
> +	.probe		= loongson_edac_probe,
> +	.remove		= loongson_edac_remove,
> +	.driver		= {
> +		.name	= "ls-mc-edac",
> +		.of_match_table = loongson_edac_of_match,
> +	},
> +};
> +
> +module_platform_driver(loongson_edac_driver);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n");

Drop \n.

> +MODULE_DESCRIPTION("EDAC driver for loongson memory controller");
> -- 
> 2.43.0
>
Zhao Qunqin Sept. 3, 2024, 7:24 a.m. UTC | #6
在 2024/9/3 下午2:47, Krzysztof Kozlowski 写道:
> On Tue, Sep 03, 2024 at 09:53:54AM +0800, Zhao Qunqin wrote:
>> Report single bit errors (CE) only.
>>
>> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
>> ---
>>   MAINTAINERS                  |   1 +
>>   arch/loongarch/Kconfig       |   1 +
>>   drivers/edac/Kconfig         |   8 ++
>>   drivers/edac/Makefile        |   1 +
>>   drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++
>>   5 files changed, 198 insertions(+)
>>   create mode 100644 drivers/edac/ls3a5000_edac.c
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 6cc8cfc8f..b43f82279 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -13242,6 +13242,7 @@ M:	Zhao Qunqin <zhaoqunqin@loongson.cn>
>>   L:	linux-edac@vger.kernel.org
>>   S:	Maintained
>>   F:	Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml
>> +F:	drivers/edac/ls3a5000_edac.c
>>   
>>   LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
>>   M:	Sathya Prakash <sathya.prakash@broadcom.com>
>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>> index 70f169210..348030c24 100644
>> --- a/arch/loongarch/Kconfig
>> +++ b/arch/loongarch/Kconfig
>> @@ -182,6 +182,7 @@ config LOONGARCH
>>   	select PCI_QUIRKS
>>   	select PERF_USE_VMALLOC
>>   	select RTC_LIB
>> +	select EDAC_SUPPORT
> I think you got here comment before. How did you address it?
I just randomly found a spot, and I will put it at the end(next version 
patch).
>
>>   	select SPARSE_IRQ
>>   	select SYSCTL_ARCH_UNALIGN_ALLOW
>>   	select SYSCTL_ARCH_UNALIGN_NO_WARN
>> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
>> index 16c8de505..2d10256f0 100644
>> --- a/drivers/edac/Kconfig
>> +++ b/drivers/edac/Kconfig
>> @@ -573,5 +573,13 @@ config EDAC_VERSAL
>>   	  Support injecting both correctable and uncorrectable errors
>>   	  for debugging purposes.
>>   
> ...
>
>   +
>> +static int loongson_edac_probe(struct platform_device *pdev)
>> +{
>> +	struct resource *rs;
>> +	struct mem_ctl_info *mci;
>> +	struct edac_mc_layer layers[2];
>> +	struct loongson_edac_pvt *pvt;
>> +	u64 *vbase = NULL;
>> +
>> +	rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>> +	/* not return if can not find resource or resource start equals NULL */
> Why?

Because there are multiple memory controllers in the ls3x soc,

but the ECC function of some memory controllers cannot be used.

But in any case, a node must be created in /sys/devices/system/edac/mc/  
through edac_mc_add_mc(mci).

Then if the ECC function of the memory controller cannot be used, set 
start to NULL or do not pass mem resource,

which is equivalent to enumeration of memory controller, and the CE 
count will always be zero.

>> +	if (rs && rs->start) {
>> +		vbase = devm_ioremap_resource(&pdev->dev, rs);
>> +		if (IS_ERR(vbase))
>> +			return PTR_ERR(vbase);
>> +	}
>> +
>> +	/* allocate a new MC control structure */
>> +	layers[0].type = EDAC_MC_LAYER_CHANNEL;
>> +	layers[0].size = 1;
>> +	layers[0].is_virt_csrow = false;
>> +	layers[1].type = EDAC_MC_LAYER_SLOT;
>> +	layers[1].size = 1;
>> +	layers[1].is_virt_csrow = true;
>> +	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
>> +	if (mci == NULL)
>> +		return -ENOMEM;
>> +
>> +	edac_dbg(0, "MC: mci = %p\n", mci);
> Nope, drop. You should never print pointers.
I will drop it in next version patch.
>> +
>> +	mci->mc_idx = edac_device_alloc_index();
>> +	mci->mtype_cap = MEM_FLAG_RDDR4;
>> +	mci->edac_ctl_cap = EDAC_FLAG_NONE;
>> +	mci->edac_cap = EDAC_FLAG_NONE;
>> +	mci->mod_name = "loongson_edac.c";
>> +	mci->ctl_name = "loongson_edac_ctl";
>> +	mci->dev_name = "loongson_edac_dev";
>> +	mci->ctl_page_to_phys = NULL;
>> +	mci->pdev = &pdev->dev;
>> +	mci->error_desc.grain = 8;
>> +	/* Set the function pointer to an actual operation function */
>> +	mci->edac_check = loongson_edac_check;
>> +
>> +	loongson_pvt_init(mci, vbase);
>> +	get_dimm_config(mci);
>> +
>> +	if (edac_mc_add_mc(mci)) {
>> +		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
>> +		edac_mc_free(mci);
> That's still a success of probe? Weird a bit.

I will add return ret if (ret =edac_mc_add_mc(mci))  in next version patch.

>
>> +	}
>> +	edac_op_state = EDAC_OPSTATE_POLL;
>> +
>> +	return 0;
>> +}
>> +
>> +static void loongson_edac_remove(struct platform_device *pdev)
>> +{
>> +	struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
>> +
>> +	if (mci)
>> +		edac_mc_free(mci);
>> +}
>> +
>> +static const struct of_device_id loongson_edac_of_match[] = {
>> +	{ .compatible = "loongson,ls3a5000-mc-edac", },
>> +	{}
>> +};
>> +MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
>> +
>> +static struct platform_driver loongson_edac_driver = {
>> +	.probe		= loongson_edac_probe,
>> +	.remove		= loongson_edac_remove,
>> +	.driver		= {
>> +		.name	= "ls-mc-edac",
>> +		.of_match_table = loongson_edac_of_match,
>> +	},
>> +};
>> +
>> +module_platform_driver(loongson_edac_driver);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n");
> Drop \n.
I will drop it in next version patch.
>> +MODULE_DESCRIPTION("EDAC driver for loongson memory controller");
>> -- 
>> 2.43.0
>>
Best regards,

Zhao Qunqin.
Zhao Qunqin Sept. 3, 2024, 7:45 a.m. UTC | #7
在 2024/9/3 下午2:33, Markus Elfring 写道:
> …
>> +++ b/drivers/edac/ls3a5000_edac.c
>> @@ -0,0 +1,187 @@
> …
>> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n");
> How do you think about to omit a line break character from such a string literal?
I will drop \n in next version patch.

Best regards,

Zhao Qunqin.

>
> Regards,
> Markus
Krzysztof Kozlowski Sept. 3, 2024, 7:58 a.m. UTC | #8
On 03/09/2024 09:24, Zhao Qunqin wrote:
> 
> 在 2024/9/3 下午2:47, Krzysztof Kozlowski 写道:
>> On Tue, Sep 03, 2024 at 09:53:54AM +0800, Zhao Qunqin wrote:
>>> Report single bit errors (CE) only.
>>>
>>> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
>>> ---
>>>   MAINTAINERS                  |   1 +
>>>   arch/loongarch/Kconfig       |   1 +
>>>   drivers/edac/Kconfig         |   8 ++
>>>   drivers/edac/Makefile        |   1 +
>>>   drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++
>>>   5 files changed, 198 insertions(+)
>>>   create mode 100644 drivers/edac/ls3a5000_edac.c
>>>
>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>> index 6cc8cfc8f..b43f82279 100644
>>> --- a/MAINTAINERS
>>> +++ b/MAINTAINERS
>>> @@ -13242,6 +13242,7 @@ M:	Zhao Qunqin <zhaoqunqin@loongson.cn>
>>>   L:	linux-edac@vger.kernel.org
>>>   S:	Maintained
>>>   F:	Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml
>>> +F:	drivers/edac/ls3a5000_edac.c
>>>   
>>>   LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
>>>   M:	Sathya Prakash <sathya.prakash@broadcom.com>
>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>> index 70f169210..348030c24 100644
>>> --- a/arch/loongarch/Kconfig
>>> +++ b/arch/loongarch/Kconfig
>>> @@ -182,6 +182,7 @@ config LOONGARCH
>>>   	select PCI_QUIRKS
>>>   	select PERF_USE_VMALLOC
>>>   	select RTC_LIB
>>> +	select EDAC_SUPPORT
>> I think you got here comment before. How did you address it?
> I just randomly found a spot, and I will put it at the end(next version 
> patch).

No, the comment was different. You must not select user-visible symbols.

>>
>>>   	select SPARSE_IRQ
>>>   	select SYSCTL_ARCH_UNALIGN_ALLOW
>>>   	select SYSCTL_ARCH_UNALIGN_NO_WARN
>>> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
>>> index 16c8de505..2d10256f0 100644
>>> --- a/drivers/edac/Kconfig
>>> +++ b/drivers/edac/Kconfig
>>> @@ -573,5 +573,13 @@ config EDAC_VERSAL
>>>   	  Support injecting both correctable and uncorrectable errors
>>>   	  for debugging purposes.
>>>   
>> ...
>>
>>   +
>>> +static int loongson_edac_probe(struct platform_device *pdev)
>>> +{
>>> +	struct resource *rs;
>>> +	struct mem_ctl_info *mci;
>>> +	struct edac_mc_layer layers[2];
>>> +	struct loongson_edac_pvt *pvt;
>>> +	u64 *vbase = NULL;
>>> +
>>> +	rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>>> +	/* not return if can not find resource or resource start equals NULL */
>> Why?
> 
> Because there are multiple memory controllers in the ls3x soc,
> 
> but the ECC function of some memory controllers cannot be used.

Then what does the driver do for such memory controllers? Your binding
is quite clear here that above code is just bogus. It is not possible to
have node without reg.

Please point us to your DTS and results of dtbs_check.

> 
> But in any case, a node must be created in /sys/devices/system/edac/mc/  
> through edac_mc_add_mc(mci).
> 
> Then if the ECC function of the memory controller cannot be used, set 
> start to NULL or do not pass mem resource,
> 
> which is equivalent to enumeration of memory controller, and the CE 
> count will always be zero.
> 
>>> +	if (rs && rs->start) {
>>> +		vbase = devm_ioremap_resource(&pdev->dev, rs);
>>> +		if (IS_ERR(vbase))
>>> +			return PTR_ERR(vbase);
>>> +	}


Best regards,
Krzysztof
Xi Ruoyao Sept. 3, 2024, 8:30 a.m. UTC | #9
On Tue, 2024-09-03 at 09:58 +0200, Krzysztof Kozlowski wrote:
> > > > +	select EDAC_SUPPORT
> > > I think you got here comment before. How did you address it?
> > I just randomly found a spot, and I will put it at the end(next version 
> > patch).
> 
> No, the comment was different. You must not select user-visible symbols.

EDAC_SUPPORT isn't user-visible.  EDAC is and it has been removed.
Zhao Qunqin Sept. 3, 2024, 9:59 a.m. UTC | #10
在 2024/9/3 下午3:58, Krzysztof Kozlowski 写道:
> On 03/09/2024 09:24, Zhao Qunqin wrote:
>> 在 2024/9/3 下午2:47, Krzysztof Kozlowski 写道:
>>> On Tue, Sep 03, 2024 at 09:53:54AM +0800, Zhao Qunqin wrote:
>>>> Report single bit errors (CE) only.
>>>>
>>>> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
>>>> ---
>>>>    MAINTAINERS                  |   1 +
>>>>    arch/loongarch/Kconfig       |   1 +
>>>>    drivers/edac/Kconfig         |   8 ++
>>>>    drivers/edac/Makefile        |   1 +
>>>>    drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++
>>>>    5 files changed, 198 insertions(+)
>>>>    create mode 100644 drivers/edac/ls3a5000_edac.c
>>>>
>>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>>> index 6cc8cfc8f..b43f82279 100644
>>>> --- a/MAINTAINERS
>>>> +++ b/MAINTAINERS
>>>> @@ -13242,6 +13242,7 @@ M:	Zhao Qunqin <zhaoqunqin@loongson.cn>
>>>>    L:	linux-edac@vger.kernel.org
>>>>    S:	Maintained
>>>>    F:	Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml
>>>> +F:	drivers/edac/ls3a5000_edac.c
>>>>    
>>>>    LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
>>>>    M:	Sathya Prakash <sathya.prakash@broadcom.com>
>>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>>> index 70f169210..348030c24 100644
>>>> --- a/arch/loongarch/Kconfig
>>>> +++ b/arch/loongarch/Kconfig
>>>> @@ -182,6 +182,7 @@ config LOONGARCH
>>>>    	select PCI_QUIRKS
>>>>    	select PERF_USE_VMALLOC
>>>>    	select RTC_LIB
>>>> +	select EDAC_SUPPORT
>>> I think you got here comment before. How did you address it?
>> I just randomly found a spot, and I will put it at the end(next version
>> patch).
> No, the comment was different. You must not select user-visible symbols.

Last version patch is:

@@ -182,6 +182,7 @@ config LOONGARCH
   	select PCI_QUIRKS
   	select PERF_USE_VMALLOC
   	select RTC_LIB
+	select EDAC_SUPPORT
+	select EDAC

I delet "select EDAC" in this version patch. Other arch's Kconfig has "select EDAC_SUPPORT", such as arch/x86/Kconfig and arch/arm64/Kconfig
.

>
>>>>    	select SPARSE_IRQ
>>>>    	select SYSCTL_ARCH_UNALIGN_ALLOW
>>>>    	select SYSCTL_ARCH_UNALIGN_NO_WARN
>>>> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
>>>> index 16c8de505..2d10256f0 100644
>>>> --- a/drivers/edac/Kconfig
>>>> +++ b/drivers/edac/Kconfig
>>>> @@ -573,5 +573,13 @@ config EDAC_VERSAL
>>>>    	  Support injecting both correctable and uncorrectable errors
>>>>    	  for debugging purposes.
>>>>    
>>> ...
>>>
>>>    +
>>>> +static int loongson_edac_probe(struct platform_device *pdev)
>>>> +{
>>>> +	struct resource *rs;
>>>> +	struct mem_ctl_info *mci;
>>>> +	struct edac_mc_layer layers[2];
>>>> +	struct loongson_edac_pvt *pvt;
>>>> +	u64 *vbase = NULL;
>>>> +
>>>> +	rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>>>> +	/* not return if can not find resource or resource start equals NULL */
>>> Why?
>> Because there are multiple memory controllers in the ls3x soc,
>>
>> but the ECC function of some memory controllers cannot be used.
> Then what does the driver do for such memory controllers? Your binding
> is quite clear here that above code is just bogus. It is not possible to
> have node without reg.

I will rewrite the code above.

> Please point us to your DTS and results of dtbs_check.
>
>> But in any case, a node must be created in /sys/devices/system/edac/mc/
>> through edac_mc_add_mc(mci).
>>
>> Then if the ECC function of the memory controller cannot be used, set
>> start to NULL or do not pass mem resource,
>>
>> which is equivalent to enumeration of memory controller, and the CE
>> count will always be zero.
>>
>>>> +	if (rs && rs->start) {
>>>> +		vbase = devm_ioremap_resource(&pdev->dev, rs);
>>>> +		if (IS_ERR(vbase))
>>>> +			return PTR_ERR(vbase);
>>>> +	}
>
> Best regards,
> Krzysztof
Krzysztof Kozlowski Sept. 3, 2024, 11:29 a.m. UTC | #11
On 03/09/2024 10:30, Xi Ruoyao wrote:
> On Tue, 2024-09-03 at 09:58 +0200, Krzysztof Kozlowski wrote:
>>>>> +	select EDAC_SUPPORT
>>>> I think you got here comment before. How did you address it?
>>> I just randomly found a spot, and I will put it at the end(next version 
>>> patch).
>>
>> No, the comment was different. You must not select user-visible symbols.
> 
> EDAC_SUPPORT isn't user-visible.  EDAC is and it has been removed.

Ah, ok, I missed that. Shouldn't this be separate patch with its own
rationale? Or before this driver there was no EDAC support for Loongson
at all?

Best regards,
Krzysztof
Zhao Qunqin Sept. 3, 2024, 11:31 a.m. UTC | #12
在 2024/9/3 下午7:29, Krzysztof Kozlowski 写道:
> On 03/09/2024 10:30, Xi Ruoyao wrote:
>> On Tue, 2024-09-03 at 09:58 +0200, Krzysztof Kozlowski wrote:
>>>>>> +	select EDAC_SUPPORT
>>>>> I think you got here comment before. How did you address it?
>>>> I just randomly found a spot, and I will put it at the end(next version
>>>> patch).
>>> No, the comment was different. You must not select user-visible symbols.
>> EDAC_SUPPORT isn't user-visible.  EDAC is and it has been removed.
> Ah, ok, I missed that. Shouldn't this be separate patch with its own
> rationale? Or before this driver there was no EDAC support for Loongson
> at all?

Before this driver there was no EDAC support for Loongson
at all.

Best regards,
Zhao Qunqin.

> Best regards,
> Krzysztof
Krzysztof Kozlowski Sept. 3, 2024, 11:36 a.m. UTC | #13
On 03/09/2024 13:31, Zhao Qunqin wrote:
> 
> 在 2024/9/3 下午7:29, Krzysztof Kozlowski 写道:
>> On 03/09/2024 10:30, Xi Ruoyao wrote:
>>> On Tue, 2024-09-03 at 09:58 +0200, Krzysztof Kozlowski wrote:
>>>>>>> +	select EDAC_SUPPORT
>>>>>> I think you got here comment before. How did you address it?
>>>>> I just randomly found a spot, and I will put it at the end(next version
>>>>> patch).
>>>> No, the comment was different. You must not select user-visible symbols.
>>> EDAC_SUPPORT isn't user-visible.  EDAC is and it has been removed.
>> Ah, ok, I missed that. Shouldn't this be separate patch with its own
>> rationale? Or before this driver there was no EDAC support for Loongson
>> at all?
> 
> Before this driver there was no EDAC support for Loongson
> at all.

Ack, ok.

Best regards,
Krzysztof
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 6cc8cfc8f..b43f82279 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13242,6 +13242,7 @@  M:	Zhao Qunqin <zhaoqunqin@loongson.cn>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml
+F:	drivers/edac/ls3a5000_edac.c
 
 LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
 M:	Sathya Prakash <sathya.prakash@broadcom.com>
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 70f169210..348030c24 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -182,6 +182,7 @@  config LOONGARCH
 	select PCI_QUIRKS
 	select PERF_USE_VMALLOC
 	select RTC_LIB
+	select EDAC_SUPPORT
 	select SPARSE_IRQ
 	select SYSCTL_ARCH_UNALIGN_ALLOW
 	select SYSCTL_ARCH_UNALIGN_NO_WARN
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 16c8de505..2d10256f0 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -573,5 +573,13 @@  config EDAC_VERSAL
 	  Support injecting both correctable and uncorrectable errors
 	  for debugging purposes.
 
+config EDAC_LS3A5000
+	tristate "Ls3a5000 Memory Controller"
+	depends on LOONGARCH || COMPILE_TEST
+	help
+	  Support for error detection and correction on the ls3a5000 memory
+	  controller. This driver report single bit errors (CE) only.
+	  Ls3c5000l, ls3c5000, ls3d5000, ls3a6000 and ls3c6000 are compatible
+	  with ls3a5000.
 
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 4edfb83ff..0974e3fa6 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -89,3 +89,4 @@  obj-$(CONFIG_EDAC_DMC520)		+= dmc520_edac.o
 obj-$(CONFIG_EDAC_NPCM)			+= npcm_edac.o
 obj-$(CONFIG_EDAC_ZYNQMP)		+= zynqmp_edac.o
 obj-$(CONFIG_EDAC_VERSAL)		+= versal_edac.o
+obj-$(CONFIG_EDAC_LS3A5000)		+= ls3a5000_edac.o
diff --git a/drivers/edac/ls3a5000_edac.c b/drivers/edac/ls3a5000_edac.c
new file mode 100644
index 000000000..c68fd7c5f
--- /dev/null
+++ b/drivers/edac/ls3a5000_edac.c
@@ -0,0 +1,187 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited.
+ */
+
+#include <linux/edac.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include "edac_module.h"
+
+enum ecc_index {
+	ECC_SET = 0,
+	ECC_RESERVED,
+	ECC_COUNT,
+	ECC_CS_COUNT,
+	ECC_CODE,
+	ECC_ADDR,
+	ECC_DATA0,
+	ECC_DATA1,
+	ECC_DATA2,
+	ECC_DATA3,
+};
+
+struct loongson_edac_pvt {
+	u64 *ecc_base;
+	int last_ce_count;
+};
+
+static void loongson_update_ce_count(struct mem_ctl_info *mci,
+					int chan,
+					int new)
+{
+	int add;
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+	add = new - pvt->last_ce_count;
+
+	/* Store the new value */
+	pvt->last_ce_count = new;
+
+	/* device resume or any other exceptions*/
+	if (add < 0)
+		return;
+
+	/*updated the edac core */
+	if (add != 0) {
+		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
+					0, 0, 0,
+					chan, 0, -1, "error", "");
+		edac_mc_printk(mci, KERN_INFO, "add: %d", add);
+	}
+}
+
+static int loongson_read_ecc(struct mem_ctl_info *mci)
+{
+	u64 ecc;
+	int cs = 0;
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+	if (!pvt->ecc_base)
+		return pvt->last_ce_count;
+
+	ecc = pvt->ecc_base[ECC_CS_COUNT];
+	cs += ecc & 0xff;		// cs0
+	cs += (ecc >> 8) & 0xff;	// cs1
+	cs += (ecc >> 16) & 0xff;	// cs2
+	cs += (ecc >> 24) & 0xff;	// cs3
+
+	return cs;
+}
+
+static void loongson_edac_check(struct mem_ctl_info *mci)
+{
+	loongson_update_ce_count(mci, 0, loongson_read_ecc(mci));
+}
+
+static int get_dimm_config(struct mem_ctl_info *mci)
+{
+	u32 size, npages;
+	struct dimm_info *dimm;
+
+	/* size not used */
+	size = -1;
+	npages = MiB_TO_PAGES(size);
+
+	dimm = edac_get_dimm(mci, 0, 0, 0);
+	dimm->nr_pages = npages;
+	snprintf(dimm->label, sizeof(dimm->label),
+			"MC#%uChannel#%u_DIMM#%u",
+			mci->mc_idx, 0, 0);
+	dimm->grain = 8;
+
+	return 0;
+}
+
+static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase)
+{
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+	pvt->ecc_base = vbase;
+	pvt->last_ce_count = loongson_read_ecc(mci);
+}
+
+static int loongson_edac_probe(struct platform_device *pdev)
+{
+	struct resource *rs;
+	struct mem_ctl_info *mci;
+	struct edac_mc_layer layers[2];
+	struct loongson_edac_pvt *pvt;
+	u64 *vbase = NULL;
+
+	rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* not return if can not find resource or resource start equals NULL */
+	if (rs && rs->start) {
+		vbase = devm_ioremap_resource(&pdev->dev, rs);
+		if (IS_ERR(vbase))
+			return PTR_ERR(vbase);
+	}
+
+	/* allocate a new MC control structure */
+	layers[0].type = EDAC_MC_LAYER_CHANNEL;
+	layers[0].size = 1;
+	layers[0].is_virt_csrow = false;
+	layers[1].type = EDAC_MC_LAYER_SLOT;
+	layers[1].size = 1;
+	layers[1].is_virt_csrow = true;
+	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+	if (mci == NULL)
+		return -ENOMEM;
+
+	edac_dbg(0, "MC: mci = %p\n", mci);
+
+	mci->mc_idx = edac_device_alloc_index();
+	mci->mtype_cap = MEM_FLAG_RDDR4;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE;
+	mci->edac_cap = EDAC_FLAG_NONE;
+	mci->mod_name = "loongson_edac.c";
+	mci->ctl_name = "loongson_edac_ctl";
+	mci->dev_name = "loongson_edac_dev";
+	mci->ctl_page_to_phys = NULL;
+	mci->pdev = &pdev->dev;
+	mci->error_desc.grain = 8;
+	/* Set the function pointer to an actual operation function */
+	mci->edac_check = loongson_edac_check;
+
+	loongson_pvt_init(mci, vbase);
+	get_dimm_config(mci);
+
+	if (edac_mc_add_mc(mci)) {
+		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+		edac_mc_free(mci);
+	}
+	edac_op_state = EDAC_OPSTATE_POLL;
+
+	return 0;
+}
+
+static void loongson_edac_remove(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
+
+	if (mci)
+		edac_mc_free(mci);
+}
+
+static const struct of_device_id loongson_edac_of_match[] = {
+	{ .compatible = "loongson,ls3a5000-mc-edac", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
+
+static struct platform_driver loongson_edac_driver = {
+	.probe		= loongson_edac_probe,
+	.remove		= loongson_edac_remove,
+	.driver		= {
+		.name	= "ls-mc-edac",
+		.of_match_table = loongson_edac_of_match,
+	},
+};
+
+module_platform_driver(loongson_edac_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n");
+MODULE_DESCRIPTION("EDAC driver for loongson memory controller");