Message ID | 20240903015354.9443-3-zhaoqunqin@loongson.cn (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add EDAC driver for ls3a5000 memory controller | expand |
Hi, Qunqin, On Tue, Sep 3, 2024 at 9:53 AM Zhao Qunqin <zhaoqunqin@loongson.cn> wrote: > > Report single bit errors (CE) only. > > Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn> > --- > MAINTAINERS | 1 + > arch/loongarch/Kconfig | 1 + > drivers/edac/Kconfig | 8 ++ > drivers/edac/Makefile | 1 + > drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++ > 5 files changed, 198 insertions(+) > create mode 100644 drivers/edac/ls3a5000_edac.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index 6cc8cfc8f..b43f82279 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -13242,6 +13242,7 @@ M: Zhao Qunqin <zhaoqunqin@loongson.cn> > L: linux-edac@vger.kernel.org > S: Maintained > F: Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml > +F: drivers/edac/ls3a5000_edac.c > > LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) > M: Sathya Prakash <sathya.prakash@broadcom.com> > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig > index 70f169210..348030c24 100644 > --- a/arch/loongarch/Kconfig > +++ b/arch/loongarch/Kconfig > @@ -182,6 +182,7 @@ config LOONGARCH > select PCI_QUIRKS > select PERF_USE_VMALLOC > select RTC_LIB > + select EDAC_SUPPORT > select SPARSE_IRQ > select SYSCTL_ARCH_UNALIGN_ALLOW > select SYSCTL_ARCH_UNALIGN_NO_WARN > diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig > index 16c8de505..2d10256f0 100644 > --- a/drivers/edac/Kconfig > +++ b/drivers/edac/Kconfig > @@ -573,5 +573,13 @@ config EDAC_VERSAL > Support injecting both correctable and uncorrectable errors > for debugging purposes. > > +config EDAC_LS3A5000 > + tristate "Ls3a5000 Memory Controller" > + depends on LOONGARCH || COMPILE_TEST > + help > + Support for error detection and correction on the ls3a5000 memory > + controller. This driver report single bit errors (CE) only. > + Ls3c5000l, ls3c5000, ls3d5000, ls3a6000 and ls3c6000 are compatible > + with ls3a5000. Here can be improved as: config EDAC_LOONGSON3 tristate "Loonson-3 Memory Controller" depends on LOONGARCH || COMPILE_TEST help Support for error detection and correction on the Loongson-3 family memory controller. This driver reports single bit errors (CE) only. Loongson-3A5000/3C5000/3C5000L/3A6000/3C6000 are compatible. > > endif # EDAC > diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile > index 4edfb83ff..0974e3fa6 100644 > --- a/drivers/edac/Makefile > +++ b/drivers/edac/Makefile > @@ -89,3 +89,4 @@ obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o > obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o > obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o > obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o > +obj-$(CONFIG_EDAC_LS3A5000) += ls3a5000_edac.o > diff --git a/drivers/edac/ls3a5000_edac.c b/drivers/edac/ls3a5000_edac.c > new file mode 100644 > index 000000000..c68fd7c5f > --- /dev/null > +++ b/drivers/edac/ls3a5000_edac.c > @@ -0,0 +1,187 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2024 Loongson Technology Corporation Limited. > + */ > + > +#include <linux/edac.h> > +#include <linux/module.h> > +#include <linux/init.h> > +#include <linux/platform_device.h> > + > +#include "edac_module.h" > + > +enum ecc_index { > + ECC_SET = 0, > + ECC_RESERVED, > + ECC_COUNT, > + ECC_CS_COUNT, > + ECC_CODE, > + ECC_ADDR, > + ECC_DATA0, > + ECC_DATA1, > + ECC_DATA2, > + ECC_DATA3, > +}; > + > +struct loongson_edac_pvt { > + u64 *ecc_base; > + int last_ce_count; > +}; > + > +static void loongson_update_ce_count(struct mem_ctl_info *mci, > + int chan, > + int new) > +{ > + int add; > + struct loongson_edac_pvt *pvt = mci->pvt_info; > + > + add = new - pvt->last_ce_count; > + > + /* Store the new value */ > + pvt->last_ce_count = new; > + > + /* device resume or any other exceptions*/ > + if (add < 0) > + return; > + > + /*updated the edac core */ > + if (add != 0) { > + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add, > + 0, 0, 0, > + chan, 0, -1, "error", ""); > + edac_mc_printk(mci, KERN_INFO, "add: %d", add); > + } > +} > + > +static int loongson_read_ecc(struct mem_ctl_info *mci) > +{ > + u64 ecc; > + int cs = 0; > + struct loongson_edac_pvt *pvt = mci->pvt_info; > + > + if (!pvt->ecc_base) > + return pvt->last_ce_count; > + > + ecc = pvt->ecc_base[ECC_CS_COUNT]; > + cs += ecc & 0xff; // cs0 > + cs += (ecc >> 8) & 0xff; // cs1 > + cs += (ecc >> 16) & 0xff; // cs2 > + cs += (ecc >> 24) & 0xff; // cs3 > + > + return cs; > +} > + > +static void loongson_edac_check(struct mem_ctl_info *mci) > +{ > + loongson_update_ce_count(mci, 0, loongson_read_ecc(mci)); > +} > + > +static int get_dimm_config(struct mem_ctl_info *mci) > +{ > + u32 size, npages; > + struct dimm_info *dimm; > + > + /* size not used */ > + size = -1; > + npages = MiB_TO_PAGES(size); > + > + dimm = edac_get_dimm(mci, 0, 0, 0); > + dimm->nr_pages = npages; > + snprintf(dimm->label, sizeof(dimm->label), > + "MC#%uChannel#%u_DIMM#%u", > + mci->mc_idx, 0, 0); > + dimm->grain = 8; > + > + return 0; > +} > + > +static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase) > +{ > + struct loongson_edac_pvt *pvt = mci->pvt_info; > + > + pvt->ecc_base = vbase; > + pvt->last_ce_count = loongson_read_ecc(mci); > +} > + > +static int loongson_edac_probe(struct platform_device *pdev) > +{ > + struct resource *rs; > + struct mem_ctl_info *mci; > + struct edac_mc_layer layers[2]; > + struct loongson_edac_pvt *pvt; > + u64 *vbase = NULL; > + > + rs = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + /* not return if can not find resource or resource start equals NULL */ > + if (rs && rs->start) { > + vbase = devm_ioremap_resource(&pdev->dev, rs); > + if (IS_ERR(vbase)) > + return PTR_ERR(vbase); > + } > + > + /* allocate a new MC control structure */ > + layers[0].type = EDAC_MC_LAYER_CHANNEL; > + layers[0].size = 1; > + layers[0].is_virt_csrow = false; > + layers[1].type = EDAC_MC_LAYER_SLOT; > + layers[1].size = 1; > + layers[1].is_virt_csrow = true; > + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); > + if (mci == NULL) > + return -ENOMEM; > + > + edac_dbg(0, "MC: mci = %p\n", mci); > + > + mci->mc_idx = edac_device_alloc_index(); > + mci->mtype_cap = MEM_FLAG_RDDR4; > + mci->edac_ctl_cap = EDAC_FLAG_NONE; > + mci->edac_cap = EDAC_FLAG_NONE; > + mci->mod_name = "loongson_edac.c"; > + mci->ctl_name = "loongson_edac_ctl"; > + mci->dev_name = "loongson_edac_dev"; > + mci->ctl_page_to_phys = NULL; > + mci->pdev = &pdev->dev; > + mci->error_desc.grain = 8; > + /* Set the function pointer to an actual operation function */ > + mci->edac_check = loongson_edac_check; > + > + loongson_pvt_init(mci, vbase); > + get_dimm_config(mci); > + > + if (edac_mc_add_mc(mci)) { > + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); > + edac_mc_free(mci); > + } > + edac_op_state = EDAC_OPSTATE_POLL; > + > + return 0; > +} > + > +static void loongson_edac_remove(struct platform_device *pdev) > +{ > + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); > + > + if (mci) > + edac_mc_free(mci); > +} > + > +static const struct of_device_id loongson_edac_of_match[] = { > + { .compatible = "loongson,ls3a5000-mc-edac", }, > + {} > +}; > +MODULE_DEVICE_TABLE(of, loongson_edac_of_match); > + > +static struct platform_driver loongson_edac_driver = { > + .probe = loongson_edac_probe, > + .remove = loongson_edac_remove, > + .driver = { > + .name = "ls-mc-edac", The name can be better as loongson-mc-edac. Huacai > + .of_match_table = loongson_edac_of_match, > + }, > +}; > + > +module_platform_driver(loongson_edac_driver); > + > +MODULE_LICENSE("GPL"); > +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n"); > +MODULE_DESCRIPTION("EDAC driver for loongson memory controller"); > -- > 2.43.0 >
On Tue, Sep 3, 2024 at 10:39 AM Huacai Chen <chenhuacai@kernel.org> wrote: > > Hi, Qunqin, > > On Tue, Sep 3, 2024 at 9:53 AM Zhao Qunqin <zhaoqunqin@loongson.cn> wrote: > > > > Report single bit errors (CE) only. > > > > Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn> > > --- > > MAINTAINERS | 1 + > > arch/loongarch/Kconfig | 1 + > > drivers/edac/Kconfig | 8 ++ > > drivers/edac/Makefile | 1 + > > drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++ > > 5 files changed, 198 insertions(+) > > create mode 100644 drivers/edac/ls3a5000_edac.c > > > > diff --git a/MAINTAINERS b/MAINTAINERS > > index 6cc8cfc8f..b43f82279 100644 > > --- a/MAINTAINERS > > +++ b/MAINTAINERS > > @@ -13242,6 +13242,7 @@ M: Zhao Qunqin <zhaoqunqin@loongson.cn> > > L: linux-edac@vger.kernel.org > > S: Maintained > > F: Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml > > +F: drivers/edac/ls3a5000_edac.c > > > > LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) > > M: Sathya Prakash <sathya.prakash@broadcom.com> > > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig > > index 70f169210..348030c24 100644 > > --- a/arch/loongarch/Kconfig > > +++ b/arch/loongarch/Kconfig > > @@ -182,6 +182,7 @@ config LOONGARCH > > select PCI_QUIRKS > > select PERF_USE_VMALLOC > > select RTC_LIB > > + select EDAC_SUPPORT > > select SPARSE_IRQ > > select SYSCTL_ARCH_UNALIGN_ALLOW > > select SYSCTL_ARCH_UNALIGN_NO_WARN > > diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig > > index 16c8de505..2d10256f0 100644 > > --- a/drivers/edac/Kconfig > > +++ b/drivers/edac/Kconfig > > @@ -573,5 +573,13 @@ config EDAC_VERSAL > > Support injecting both correctable and uncorrectable errors > > for debugging purposes. > > > > +config EDAC_LS3A5000 > > + tristate "Ls3a5000 Memory Controller" > > + depends on LOONGARCH || COMPILE_TEST > > + help > > + Support for error detection and correction on the ls3a5000 memory > > + controller. This driver report single bit errors (CE) only. > > + Ls3c5000l, ls3c5000, ls3d5000, ls3a6000 and ls3c6000 are compatible > > + with ls3a5000. > Here can be improved as: > > config EDAC_LOONGSON3 > tristate "Loonson-3 Memory Controller" > depends on LOONGARCH || COMPILE_TEST > help > Support for error detection and correction on the Loongson-3 family > memory controller. This driver reports single bit errors (CE) only. > Loongson-3A5000/3C5000/3C5000L/3A6000/3C6000 are compatible. > And the driver file should be loongson_edac.c, this naming has nothing to do with the dt-binding names. Huacai > > > > endif # EDAC > > diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile > > index 4edfb83ff..0974e3fa6 100644 > > --- a/drivers/edac/Makefile > > +++ b/drivers/edac/Makefile > > @@ -89,3 +89,4 @@ obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o > > obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o > > obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o > > obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o > > +obj-$(CONFIG_EDAC_LS3A5000) += ls3a5000_edac.o > > diff --git a/drivers/edac/ls3a5000_edac.c b/drivers/edac/ls3a5000_edac.c > > new file mode 100644 > > index 000000000..c68fd7c5f > > --- /dev/null > > +++ b/drivers/edac/ls3a5000_edac.c > > @@ -0,0 +1,187 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Copyright (C) 2024 Loongson Technology Corporation Limited. > > + */ > > + > > +#include <linux/edac.h> > > +#include <linux/module.h> > > +#include <linux/init.h> > > +#include <linux/platform_device.h> > > + > > +#include "edac_module.h" > > + > > +enum ecc_index { > > + ECC_SET = 0, > > + ECC_RESERVED, > > + ECC_COUNT, > > + ECC_CS_COUNT, > > + ECC_CODE, > > + ECC_ADDR, > > + ECC_DATA0, > > + ECC_DATA1, > > + ECC_DATA2, > > + ECC_DATA3, > > +}; > > + > > +struct loongson_edac_pvt { > > + u64 *ecc_base; > > + int last_ce_count; > > +}; > > + > > +static void loongson_update_ce_count(struct mem_ctl_info *mci, > > + int chan, > > + int new) > > +{ > > + int add; > > + struct loongson_edac_pvt *pvt = mci->pvt_info; > > + > > + add = new - pvt->last_ce_count; > > + > > + /* Store the new value */ > > + pvt->last_ce_count = new; > > + > > + /* device resume or any other exceptions*/ > > + if (add < 0) > > + return; > > + > > + /*updated the edac core */ > > + if (add != 0) { > > + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add, > > + 0, 0, 0, > > + chan, 0, -1, "error", ""); > > + edac_mc_printk(mci, KERN_INFO, "add: %d", add); > > + } > > +} > > + > > +static int loongson_read_ecc(struct mem_ctl_info *mci) > > +{ > > + u64 ecc; > > + int cs = 0; > > + struct loongson_edac_pvt *pvt = mci->pvt_info; > > + > > + if (!pvt->ecc_base) > > + return pvt->last_ce_count; > > + > > + ecc = pvt->ecc_base[ECC_CS_COUNT]; > > + cs += ecc & 0xff; // cs0 > > + cs += (ecc >> 8) & 0xff; // cs1 > > + cs += (ecc >> 16) & 0xff; // cs2 > > + cs += (ecc >> 24) & 0xff; // cs3 > > + > > + return cs; > > +} > > + > > +static void loongson_edac_check(struct mem_ctl_info *mci) > > +{ > > + loongson_update_ce_count(mci, 0, loongson_read_ecc(mci)); > > +} > > + > > +static int get_dimm_config(struct mem_ctl_info *mci) > > +{ > > + u32 size, npages; > > + struct dimm_info *dimm; > > + > > + /* size not used */ > > + size = -1; > > + npages = MiB_TO_PAGES(size); > > + > > + dimm = edac_get_dimm(mci, 0, 0, 0); > > + dimm->nr_pages = npages; > > + snprintf(dimm->label, sizeof(dimm->label), > > + "MC#%uChannel#%u_DIMM#%u", > > + mci->mc_idx, 0, 0); > > + dimm->grain = 8; > > + > > + return 0; > > +} > > + > > +static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase) > > +{ > > + struct loongson_edac_pvt *pvt = mci->pvt_info; > > + > > + pvt->ecc_base = vbase; > > + pvt->last_ce_count = loongson_read_ecc(mci); > > +} > > + > > +static int loongson_edac_probe(struct platform_device *pdev) > > +{ > > + struct resource *rs; > > + struct mem_ctl_info *mci; > > + struct edac_mc_layer layers[2]; > > + struct loongson_edac_pvt *pvt; > > + u64 *vbase = NULL; > > + > > + rs = platform_get_resource(pdev, IORESOURCE_MEM, 0); > > + /* not return if can not find resource or resource start equals NULL */ > > + if (rs && rs->start) { > > + vbase = devm_ioremap_resource(&pdev->dev, rs); > > + if (IS_ERR(vbase)) > > + return PTR_ERR(vbase); > > + } > > + > > + /* allocate a new MC control structure */ > > + layers[0].type = EDAC_MC_LAYER_CHANNEL; > > + layers[0].size = 1; > > + layers[0].is_virt_csrow = false; > > + layers[1].type = EDAC_MC_LAYER_SLOT; > > + layers[1].size = 1; > > + layers[1].is_virt_csrow = true; > > + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); > > + if (mci == NULL) > > + return -ENOMEM; > > + > > + edac_dbg(0, "MC: mci = %p\n", mci); > > + > > + mci->mc_idx = edac_device_alloc_index(); > > + mci->mtype_cap = MEM_FLAG_RDDR4; > > + mci->edac_ctl_cap = EDAC_FLAG_NONE; > > + mci->edac_cap = EDAC_FLAG_NONE; > > + mci->mod_name = "loongson_edac.c"; > > + mci->ctl_name = "loongson_edac_ctl"; > > + mci->dev_name = "loongson_edac_dev"; > > + mci->ctl_page_to_phys = NULL; > > + mci->pdev = &pdev->dev; > > + mci->error_desc.grain = 8; > > + /* Set the function pointer to an actual operation function */ > > + mci->edac_check = loongson_edac_check; > > + > > + loongson_pvt_init(mci, vbase); > > + get_dimm_config(mci); > > + > > + if (edac_mc_add_mc(mci)) { > > + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); > > + edac_mc_free(mci); > > + } > > + edac_op_state = EDAC_OPSTATE_POLL; > > + > > + return 0; > > +} > > + > > +static void loongson_edac_remove(struct platform_device *pdev) > > +{ > > + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); > > + > > + if (mci) > > + edac_mc_free(mci); > > +} > > + > > +static const struct of_device_id loongson_edac_of_match[] = { > > + { .compatible = "loongson,ls3a5000-mc-edac", }, > > + {} > > +}; > > +MODULE_DEVICE_TABLE(of, loongson_edac_of_match); > > + > > +static struct platform_driver loongson_edac_driver = { > > + .probe = loongson_edac_probe, > > + .remove = loongson_edac_remove, > > + .driver = { > > + .name = "ls-mc-edac", > The name can be better as loongson-mc-edac. > > Huacai > > > + .of_match_table = loongson_edac_of_match, > > + }, > > +}; > > + > > +module_platform_driver(loongson_edac_driver); > > + > > +MODULE_LICENSE("GPL"); > > +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n"); > > +MODULE_DESCRIPTION("EDAC driver for loongson memory controller"); > > -- > > 2.43.0 > >
Greetings all, 在 2024-09-03 10:39,Huacai Chen 写道: > Hi, Qunqin, > > On Tue, Sep 3, 2024 at 9:53 AM Zhao Qunqin <zhaoqunqin@loongson.cn> > wrote: >> >> Report single bit errors (CE) only. >> >> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn> >> --- >> MAINTAINERS | 1 + >> arch/loongarch/Kconfig | 1 + >> drivers/edac/Kconfig | 8 ++ >> drivers/edac/Makefile | 1 + >> drivers/edac/ls3a5000_edac.c | 187 >> +++++++++++++++++++++++++++++++++++ >> 5 files changed, 198 insertions(+) >> create mode 100644 drivers/edac/ls3a5000_edac.c >> >> diff --git a/MAINTAINERS b/MAINTAINERS >> index 6cc8cfc8f..b43f82279 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -13242,6 +13242,7 @@ M: Zhao Qunqin <zhaoqunqin@loongson.cn> >> L: linux-edac@vger.kernel.org >> S: Maintained >> F: >> Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml >> +F: drivers/edac/ls3a5000_edac.c >> >> LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) >> M: Sathya Prakash <sathya.prakash@broadcom.com> >> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig >> index 70f169210..348030c24 100644 >> --- a/arch/loongarch/Kconfig >> +++ b/arch/loongarch/Kconfig >> @@ -182,6 +182,7 @@ config LOONGARCH >> select PCI_QUIRKS >> select PERF_USE_VMALLOC >> select RTC_LIB >> + select EDAC_SUPPORT >> select SPARSE_IRQ >> select SYSCTL_ARCH_UNALIGN_ALLOW >> select SYSCTL_ARCH_UNALIGN_NO_WARN >> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig >> index 16c8de505..2d10256f0 100644 >> --- a/drivers/edac/Kconfig >> +++ b/drivers/edac/Kconfig >> @@ -573,5 +573,13 @@ config EDAC_VERSAL >> Support injecting both correctable and uncorrectable errors >> for debugging purposes. >> >> +config EDAC_LS3A5000 >> + tristate "Ls3a5000 Memory Controller" >> + depends on LOONGARCH || COMPILE_TEST >> + help >> + Support for error detection and correction on the ls3a5000 >> memory >> + controller. This driver report single bit errors (CE) only. >> + Ls3c5000l, ls3c5000, ls3d5000, ls3a6000 and ls3c6000 are >> compatible >> + with ls3a5000. > Here can be improved as: > > config EDAC_LOONGSON3 > tristate "Loonson-3 Memory Controller" > depends on LOONGARCH || COMPILE_TEST > help > Support for error detection and correction on the Loongson-3 > family > memory controller. This driver reports single bit errors (CE) > only. > Loongson-3A5000/3C5000/3C5000L/3A6000/3C6000 are compatible. > Huacai, This looks much better, minus a typo - it should be Loongson-3, not Loonson-3. Also, what about 3B/D5000 and potentially 3A6000, 3B6000, and 3D6000, etc.? My suggestion would be to list them as a family, say: "All members of the Loongson-3 family of processors are compatible." If there are exceptions, then I would list them more explicitly, something like "all members... except the... series." What do you think? >> >> endif # EDAC >> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile >> index 4edfb83ff..0974e3fa6 100644 >> --- a/drivers/edac/Makefile >> +++ b/drivers/edac/Makefile >> @@ -89,3 +89,4 @@ obj-$(CONFIG_EDAC_DMC520) += >> dmc520_edac.o >> obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o >> obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o >> obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o >> +obj-$(CONFIG_EDAC_LS3A5000) += ls3a5000_edac.o >> diff --git a/drivers/edac/ls3a5000_edac.c >> b/drivers/edac/ls3a5000_edac.c >> new file mode 100644 >> index 000000000..c68fd7c5f >> --- /dev/null >> +++ b/drivers/edac/ls3a5000_edac.c >> @@ -0,0 +1,187 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* >> + * Copyright (C) 2024 Loongson Technology Corporation Limited. >> + */ >> + >> +#include <linux/edac.h> >> +#include <linux/module.h> >> +#include <linux/init.h> >> +#include <linux/platform_device.h> >> + >> +#include "edac_module.h" >> + >> +enum ecc_index { >> + ECC_SET = 0, >> + ECC_RESERVED, >> + ECC_COUNT, >> + ECC_CS_COUNT, >> + ECC_CODE, >> + ECC_ADDR, >> + ECC_DATA0, >> + ECC_DATA1, >> + ECC_DATA2, >> + ECC_DATA3, >> +}; >> + >> +struct loongson_edac_pvt { >> + u64 *ecc_base; >> + int last_ce_count; >> +}; >> + >> +static void loongson_update_ce_count(struct mem_ctl_info *mci, >> + int chan, >> + int new) >> +{ >> + int add; >> + struct loongson_edac_pvt *pvt = mci->pvt_info; >> + >> + add = new - pvt->last_ce_count; >> + >> + /* Store the new value */ >> + pvt->last_ce_count = new; >> + >> + /* device resume or any other exceptions*/ >> + if (add < 0) >> + return; >> + >> + /*updated the edac core */ >> + if (add != 0) { >> + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add, >> + 0, 0, 0, >> + chan, 0, -1, "error", ""); >> + edac_mc_printk(mci, KERN_INFO, "add: %d", add); >> + } >> +} >> + >> +static int loongson_read_ecc(struct mem_ctl_info *mci) >> +{ >> + u64 ecc; >> + int cs = 0; >> + struct loongson_edac_pvt *pvt = mci->pvt_info; >> + >> + if (!pvt->ecc_base) >> + return pvt->last_ce_count; >> + >> + ecc = pvt->ecc_base[ECC_CS_COUNT]; >> + cs += ecc & 0xff; // cs0 >> + cs += (ecc >> 8) & 0xff; // cs1 >> + cs += (ecc >> 16) & 0xff; // cs2 >> + cs += (ecc >> 24) & 0xff; // cs3 >> + >> + return cs; >> +} >> + >> +static void loongson_edac_check(struct mem_ctl_info *mci) >> +{ >> + loongson_update_ce_count(mci, 0, loongson_read_ecc(mci)); >> +} >> + >> +static int get_dimm_config(struct mem_ctl_info *mci) >> +{ >> + u32 size, npages; >> + struct dimm_info *dimm; >> + >> + /* size not used */ >> + size = -1; >> + npages = MiB_TO_PAGES(size); >> + >> + dimm = edac_get_dimm(mci, 0, 0, 0); >> + dimm->nr_pages = npages; >> + snprintf(dimm->label, sizeof(dimm->label), >> + "MC#%uChannel#%u_DIMM#%u", >> + mci->mc_idx, 0, 0); >> + dimm->grain = 8; >> + >> + return 0; >> +} >> + >> +static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase) >> +{ >> + struct loongson_edac_pvt *pvt = mci->pvt_info; >> + >> + pvt->ecc_base = vbase; >> + pvt->last_ce_count = loongson_read_ecc(mci); >> +} >> + >> +static int loongson_edac_probe(struct platform_device *pdev) >> +{ >> + struct resource *rs; >> + struct mem_ctl_info *mci; >> + struct edac_mc_layer layers[2]; >> + struct loongson_edac_pvt *pvt; >> + u64 *vbase = NULL; >> + >> + rs = platform_get_resource(pdev, IORESOURCE_MEM, 0); >> + /* not return if can not find resource or resource start >> equals NULL */ >> + if (rs && rs->start) { >> + vbase = devm_ioremap_resource(&pdev->dev, rs); >> + if (IS_ERR(vbase)) >> + return PTR_ERR(vbase); >> + } >> + >> + /* allocate a new MC control structure */ >> + layers[0].type = EDAC_MC_LAYER_CHANNEL; >> + layers[0].size = 1; >> + layers[0].is_virt_csrow = false; >> + layers[1].type = EDAC_MC_LAYER_SLOT; >> + layers[1].size = 1; >> + layers[1].is_virt_csrow = true; >> + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, >> sizeof(*pvt)); >> + if (mci == NULL) >> + return -ENOMEM; >> + >> + edac_dbg(0, "MC: mci = %p\n", mci); >> + >> + mci->mc_idx = edac_device_alloc_index(); >> + mci->mtype_cap = MEM_FLAG_RDDR4; >> + mci->edac_ctl_cap = EDAC_FLAG_NONE; >> + mci->edac_cap = EDAC_FLAG_NONE; >> + mci->mod_name = "loongson_edac.c"; >> + mci->ctl_name = "loongson_edac_ctl"; >> + mci->dev_name = "loongson_edac_dev"; >> + mci->ctl_page_to_phys = NULL; >> + mci->pdev = &pdev->dev; >> + mci->error_desc.grain = 8; >> + /* Set the function pointer to an actual operation function */ >> + mci->edac_check = loongson_edac_check; >> + >> + loongson_pvt_init(mci, vbase); >> + get_dimm_config(mci); >> + >> + if (edac_mc_add_mc(mci)) { >> + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); >> + edac_mc_free(mci); >> + } >> + edac_op_state = EDAC_OPSTATE_POLL; >> + >> + return 0; >> +} >> + >> +static void loongson_edac_remove(struct platform_device *pdev) >> +{ >> + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); >> + >> + if (mci) >> + edac_mc_free(mci); >> +} >> + >> +static const struct of_device_id loongson_edac_of_match[] = { >> + { .compatible = "loongson,ls3a5000-mc-edac", }, >> + {} >> +}; >> +MODULE_DEVICE_TABLE(of, loongson_edac_of_match); >> + >> +static struct platform_driver loongson_edac_driver = { >> + .probe = loongson_edac_probe, >> + .remove = loongson_edac_remove, >> + .driver = { >> + .name = "ls-mc-edac", > The name can be better as loongson-mc-edac. > > Huacai > >> + .of_match_table = loongson_edac_of_match, >> + }, >> +}; >> + >> +module_platform_driver(loongson_edac_driver); >> + >> +MODULE_LICENSE("GPL"); >> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n"); >> +MODULE_DESCRIPTION("EDAC driver for loongson memory controller"); >> -- >> 2.43.0 >> Best Regards, Mingcong Bai
… > +++ b/drivers/edac/ls3a5000_edac.c > @@ -0,0 +1,187 @@ … > +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n"); How do you think about to omit a line break character from such a string literal? Regards, Markus
On Tue, Sep 03, 2024 at 09:53:54AM +0800, Zhao Qunqin wrote: > Report single bit errors (CE) only. > > Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn> > --- > MAINTAINERS | 1 + > arch/loongarch/Kconfig | 1 + > drivers/edac/Kconfig | 8 ++ > drivers/edac/Makefile | 1 + > drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++ > 5 files changed, 198 insertions(+) > create mode 100644 drivers/edac/ls3a5000_edac.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index 6cc8cfc8f..b43f82279 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -13242,6 +13242,7 @@ M: Zhao Qunqin <zhaoqunqin@loongson.cn> > L: linux-edac@vger.kernel.org > S: Maintained > F: Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml > +F: drivers/edac/ls3a5000_edac.c > > LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) > M: Sathya Prakash <sathya.prakash@broadcom.com> > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig > index 70f169210..348030c24 100644 > --- a/arch/loongarch/Kconfig > +++ b/arch/loongarch/Kconfig > @@ -182,6 +182,7 @@ config LOONGARCH > select PCI_QUIRKS > select PERF_USE_VMALLOC > select RTC_LIB > + select EDAC_SUPPORT I think you got here comment before. How did you address it? > select SPARSE_IRQ > select SYSCTL_ARCH_UNALIGN_ALLOW > select SYSCTL_ARCH_UNALIGN_NO_WARN > diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig > index 16c8de505..2d10256f0 100644 > --- a/drivers/edac/Kconfig > +++ b/drivers/edac/Kconfig > @@ -573,5 +573,13 @@ config EDAC_VERSAL > Support injecting both correctable and uncorrectable errors > for debugging purposes. > ... + > +static int loongson_edac_probe(struct platform_device *pdev) > +{ > + struct resource *rs; > + struct mem_ctl_info *mci; > + struct edac_mc_layer layers[2]; > + struct loongson_edac_pvt *pvt; > + u64 *vbase = NULL; > + > + rs = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + /* not return if can not find resource or resource start equals NULL */ Why? > + if (rs && rs->start) { > + vbase = devm_ioremap_resource(&pdev->dev, rs); > + if (IS_ERR(vbase)) > + return PTR_ERR(vbase); > + } > + > + /* allocate a new MC control structure */ > + layers[0].type = EDAC_MC_LAYER_CHANNEL; > + layers[0].size = 1; > + layers[0].is_virt_csrow = false; > + layers[1].type = EDAC_MC_LAYER_SLOT; > + layers[1].size = 1; > + layers[1].is_virt_csrow = true; > + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); > + if (mci == NULL) > + return -ENOMEM; > + > + edac_dbg(0, "MC: mci = %p\n", mci); Nope, drop. You should never print pointers. > + > + mci->mc_idx = edac_device_alloc_index(); > + mci->mtype_cap = MEM_FLAG_RDDR4; > + mci->edac_ctl_cap = EDAC_FLAG_NONE; > + mci->edac_cap = EDAC_FLAG_NONE; > + mci->mod_name = "loongson_edac.c"; > + mci->ctl_name = "loongson_edac_ctl"; > + mci->dev_name = "loongson_edac_dev"; > + mci->ctl_page_to_phys = NULL; > + mci->pdev = &pdev->dev; > + mci->error_desc.grain = 8; > + /* Set the function pointer to an actual operation function */ > + mci->edac_check = loongson_edac_check; > + > + loongson_pvt_init(mci, vbase); > + get_dimm_config(mci); > + > + if (edac_mc_add_mc(mci)) { > + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); > + edac_mc_free(mci); That's still a success of probe? Weird a bit. > + } > + edac_op_state = EDAC_OPSTATE_POLL; > + > + return 0; > +} > + > +static void loongson_edac_remove(struct platform_device *pdev) > +{ > + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); > + > + if (mci) > + edac_mc_free(mci); > +} > + > +static const struct of_device_id loongson_edac_of_match[] = { > + { .compatible = "loongson,ls3a5000-mc-edac", }, > + {} > +}; > +MODULE_DEVICE_TABLE(of, loongson_edac_of_match); > + > +static struct platform_driver loongson_edac_driver = { > + .probe = loongson_edac_probe, > + .remove = loongson_edac_remove, > + .driver = { > + .name = "ls-mc-edac", > + .of_match_table = loongson_edac_of_match, > + }, > +}; > + > +module_platform_driver(loongson_edac_driver); > + > +MODULE_LICENSE("GPL"); > +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n"); Drop \n. > +MODULE_DESCRIPTION("EDAC driver for loongson memory controller"); > -- > 2.43.0 >
在 2024/9/3 下午2:47, Krzysztof Kozlowski 写道: > On Tue, Sep 03, 2024 at 09:53:54AM +0800, Zhao Qunqin wrote: >> Report single bit errors (CE) only. >> >> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn> >> --- >> MAINTAINERS | 1 + >> arch/loongarch/Kconfig | 1 + >> drivers/edac/Kconfig | 8 ++ >> drivers/edac/Makefile | 1 + >> drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++ >> 5 files changed, 198 insertions(+) >> create mode 100644 drivers/edac/ls3a5000_edac.c >> >> diff --git a/MAINTAINERS b/MAINTAINERS >> index 6cc8cfc8f..b43f82279 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -13242,6 +13242,7 @@ M: Zhao Qunqin <zhaoqunqin@loongson.cn> >> L: linux-edac@vger.kernel.org >> S: Maintained >> F: Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml >> +F: drivers/edac/ls3a5000_edac.c >> >> LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) >> M: Sathya Prakash <sathya.prakash@broadcom.com> >> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig >> index 70f169210..348030c24 100644 >> --- a/arch/loongarch/Kconfig >> +++ b/arch/loongarch/Kconfig >> @@ -182,6 +182,7 @@ config LOONGARCH >> select PCI_QUIRKS >> select PERF_USE_VMALLOC >> select RTC_LIB >> + select EDAC_SUPPORT > I think you got here comment before. How did you address it? I just randomly found a spot, and I will put it at the end(next version patch). > >> select SPARSE_IRQ >> select SYSCTL_ARCH_UNALIGN_ALLOW >> select SYSCTL_ARCH_UNALIGN_NO_WARN >> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig >> index 16c8de505..2d10256f0 100644 >> --- a/drivers/edac/Kconfig >> +++ b/drivers/edac/Kconfig >> @@ -573,5 +573,13 @@ config EDAC_VERSAL >> Support injecting both correctable and uncorrectable errors >> for debugging purposes. >> > ... > > + >> +static int loongson_edac_probe(struct platform_device *pdev) >> +{ >> + struct resource *rs; >> + struct mem_ctl_info *mci; >> + struct edac_mc_layer layers[2]; >> + struct loongson_edac_pvt *pvt; >> + u64 *vbase = NULL; >> + >> + rs = platform_get_resource(pdev, IORESOURCE_MEM, 0); >> + /* not return if can not find resource or resource start equals NULL */ > Why? Because there are multiple memory controllers in the ls3x soc, but the ECC function of some memory controllers cannot be used. But in any case, a node must be created in /sys/devices/system/edac/mc/ through edac_mc_add_mc(mci). Then if the ECC function of the memory controller cannot be used, set start to NULL or do not pass mem resource, which is equivalent to enumeration of memory controller, and the CE count will always be zero. >> + if (rs && rs->start) { >> + vbase = devm_ioremap_resource(&pdev->dev, rs); >> + if (IS_ERR(vbase)) >> + return PTR_ERR(vbase); >> + } >> + >> + /* allocate a new MC control structure */ >> + layers[0].type = EDAC_MC_LAYER_CHANNEL; >> + layers[0].size = 1; >> + layers[0].is_virt_csrow = false; >> + layers[1].type = EDAC_MC_LAYER_SLOT; >> + layers[1].size = 1; >> + layers[1].is_virt_csrow = true; >> + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); >> + if (mci == NULL) >> + return -ENOMEM; >> + >> + edac_dbg(0, "MC: mci = %p\n", mci); > Nope, drop. You should never print pointers. I will drop it in next version patch. >> + >> + mci->mc_idx = edac_device_alloc_index(); >> + mci->mtype_cap = MEM_FLAG_RDDR4; >> + mci->edac_ctl_cap = EDAC_FLAG_NONE; >> + mci->edac_cap = EDAC_FLAG_NONE; >> + mci->mod_name = "loongson_edac.c"; >> + mci->ctl_name = "loongson_edac_ctl"; >> + mci->dev_name = "loongson_edac_dev"; >> + mci->ctl_page_to_phys = NULL; >> + mci->pdev = &pdev->dev; >> + mci->error_desc.grain = 8; >> + /* Set the function pointer to an actual operation function */ >> + mci->edac_check = loongson_edac_check; >> + >> + loongson_pvt_init(mci, vbase); >> + get_dimm_config(mci); >> + >> + if (edac_mc_add_mc(mci)) { >> + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); >> + edac_mc_free(mci); > That's still a success of probe? Weird a bit. I will add return ret if (ret =edac_mc_add_mc(mci)) in next version patch. > >> + } >> + edac_op_state = EDAC_OPSTATE_POLL; >> + >> + return 0; >> +} >> + >> +static void loongson_edac_remove(struct platform_device *pdev) >> +{ >> + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); >> + >> + if (mci) >> + edac_mc_free(mci); >> +} >> + >> +static const struct of_device_id loongson_edac_of_match[] = { >> + { .compatible = "loongson,ls3a5000-mc-edac", }, >> + {} >> +}; >> +MODULE_DEVICE_TABLE(of, loongson_edac_of_match); >> + >> +static struct platform_driver loongson_edac_driver = { >> + .probe = loongson_edac_probe, >> + .remove = loongson_edac_remove, >> + .driver = { >> + .name = "ls-mc-edac", >> + .of_match_table = loongson_edac_of_match, >> + }, >> +}; >> + >> +module_platform_driver(loongson_edac_driver); >> + >> +MODULE_LICENSE("GPL"); >> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n"); > Drop \n. I will drop it in next version patch. >> +MODULE_DESCRIPTION("EDAC driver for loongson memory controller"); >> -- >> 2.43.0 >> Best regards, Zhao Qunqin.
在 2024/9/3 下午2:33, Markus Elfring 写道: > … >> +++ b/drivers/edac/ls3a5000_edac.c >> @@ -0,0 +1,187 @@ > … >> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n"); > How do you think about to omit a line break character from such a string literal? I will drop \n in next version patch. Best regards, Zhao Qunqin. > > Regards, > Markus
On 03/09/2024 09:24, Zhao Qunqin wrote: > > 在 2024/9/3 下午2:47, Krzysztof Kozlowski 写道: >> On Tue, Sep 03, 2024 at 09:53:54AM +0800, Zhao Qunqin wrote: >>> Report single bit errors (CE) only. >>> >>> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn> >>> --- >>> MAINTAINERS | 1 + >>> arch/loongarch/Kconfig | 1 + >>> drivers/edac/Kconfig | 8 ++ >>> drivers/edac/Makefile | 1 + >>> drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++ >>> 5 files changed, 198 insertions(+) >>> create mode 100644 drivers/edac/ls3a5000_edac.c >>> >>> diff --git a/MAINTAINERS b/MAINTAINERS >>> index 6cc8cfc8f..b43f82279 100644 >>> --- a/MAINTAINERS >>> +++ b/MAINTAINERS >>> @@ -13242,6 +13242,7 @@ M: Zhao Qunqin <zhaoqunqin@loongson.cn> >>> L: linux-edac@vger.kernel.org >>> S: Maintained >>> F: Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml >>> +F: drivers/edac/ls3a5000_edac.c >>> >>> LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) >>> M: Sathya Prakash <sathya.prakash@broadcom.com> >>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig >>> index 70f169210..348030c24 100644 >>> --- a/arch/loongarch/Kconfig >>> +++ b/arch/loongarch/Kconfig >>> @@ -182,6 +182,7 @@ config LOONGARCH >>> select PCI_QUIRKS >>> select PERF_USE_VMALLOC >>> select RTC_LIB >>> + select EDAC_SUPPORT >> I think you got here comment before. How did you address it? > I just randomly found a spot, and I will put it at the end(next version > patch). No, the comment was different. You must not select user-visible symbols. >> >>> select SPARSE_IRQ >>> select SYSCTL_ARCH_UNALIGN_ALLOW >>> select SYSCTL_ARCH_UNALIGN_NO_WARN >>> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig >>> index 16c8de505..2d10256f0 100644 >>> --- a/drivers/edac/Kconfig >>> +++ b/drivers/edac/Kconfig >>> @@ -573,5 +573,13 @@ config EDAC_VERSAL >>> Support injecting both correctable and uncorrectable errors >>> for debugging purposes. >>> >> ... >> >> + >>> +static int loongson_edac_probe(struct platform_device *pdev) >>> +{ >>> + struct resource *rs; >>> + struct mem_ctl_info *mci; >>> + struct edac_mc_layer layers[2]; >>> + struct loongson_edac_pvt *pvt; >>> + u64 *vbase = NULL; >>> + >>> + rs = platform_get_resource(pdev, IORESOURCE_MEM, 0); >>> + /* not return if can not find resource or resource start equals NULL */ >> Why? > > Because there are multiple memory controllers in the ls3x soc, > > but the ECC function of some memory controllers cannot be used. Then what does the driver do for such memory controllers? Your binding is quite clear here that above code is just bogus. It is not possible to have node without reg. Please point us to your DTS and results of dtbs_check. > > But in any case, a node must be created in /sys/devices/system/edac/mc/ > through edac_mc_add_mc(mci). > > Then if the ECC function of the memory controller cannot be used, set > start to NULL or do not pass mem resource, > > which is equivalent to enumeration of memory controller, and the CE > count will always be zero. > >>> + if (rs && rs->start) { >>> + vbase = devm_ioremap_resource(&pdev->dev, rs); >>> + if (IS_ERR(vbase)) >>> + return PTR_ERR(vbase); >>> + } Best regards, Krzysztof
On Tue, 2024-09-03 at 09:58 +0200, Krzysztof Kozlowski wrote: > > > > + select EDAC_SUPPORT > > > I think you got here comment before. How did you address it? > > I just randomly found a spot, and I will put it at the end(next version > > patch). > > No, the comment was different. You must not select user-visible symbols. EDAC_SUPPORT isn't user-visible. EDAC is and it has been removed.
在 2024/9/3 下午3:58, Krzysztof Kozlowski 写道: > On 03/09/2024 09:24, Zhao Qunqin wrote: >> 在 2024/9/3 下午2:47, Krzysztof Kozlowski 写道: >>> On Tue, Sep 03, 2024 at 09:53:54AM +0800, Zhao Qunqin wrote: >>>> Report single bit errors (CE) only. >>>> >>>> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn> >>>> --- >>>> MAINTAINERS | 1 + >>>> arch/loongarch/Kconfig | 1 + >>>> drivers/edac/Kconfig | 8 ++ >>>> drivers/edac/Makefile | 1 + >>>> drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++ >>>> 5 files changed, 198 insertions(+) >>>> create mode 100644 drivers/edac/ls3a5000_edac.c >>>> >>>> diff --git a/MAINTAINERS b/MAINTAINERS >>>> index 6cc8cfc8f..b43f82279 100644 >>>> --- a/MAINTAINERS >>>> +++ b/MAINTAINERS >>>> @@ -13242,6 +13242,7 @@ M: Zhao Qunqin <zhaoqunqin@loongson.cn> >>>> L: linux-edac@vger.kernel.org >>>> S: Maintained >>>> F: Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml >>>> +F: drivers/edac/ls3a5000_edac.c >>>> >>>> LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) >>>> M: Sathya Prakash <sathya.prakash@broadcom.com> >>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig >>>> index 70f169210..348030c24 100644 >>>> --- a/arch/loongarch/Kconfig >>>> +++ b/arch/loongarch/Kconfig >>>> @@ -182,6 +182,7 @@ config LOONGARCH >>>> select PCI_QUIRKS >>>> select PERF_USE_VMALLOC >>>> select RTC_LIB >>>> + select EDAC_SUPPORT >>> I think you got here comment before. How did you address it? >> I just randomly found a spot, and I will put it at the end(next version >> patch). > No, the comment was different. You must not select user-visible symbols. Last version patch is: @@ -182,6 +182,7 @@ config LOONGARCH select PCI_QUIRKS select PERF_USE_VMALLOC select RTC_LIB + select EDAC_SUPPORT + select EDAC I delet "select EDAC" in this version patch. Other arch's Kconfig has "select EDAC_SUPPORT", such as arch/x86/Kconfig and arch/arm64/Kconfig . > >>>> select SPARSE_IRQ >>>> select SYSCTL_ARCH_UNALIGN_ALLOW >>>> select SYSCTL_ARCH_UNALIGN_NO_WARN >>>> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig >>>> index 16c8de505..2d10256f0 100644 >>>> --- a/drivers/edac/Kconfig >>>> +++ b/drivers/edac/Kconfig >>>> @@ -573,5 +573,13 @@ config EDAC_VERSAL >>>> Support injecting both correctable and uncorrectable errors >>>> for debugging purposes. >>>> >>> ... >>> >>> + >>>> +static int loongson_edac_probe(struct platform_device *pdev) >>>> +{ >>>> + struct resource *rs; >>>> + struct mem_ctl_info *mci; >>>> + struct edac_mc_layer layers[2]; >>>> + struct loongson_edac_pvt *pvt; >>>> + u64 *vbase = NULL; >>>> + >>>> + rs = platform_get_resource(pdev, IORESOURCE_MEM, 0); >>>> + /* not return if can not find resource or resource start equals NULL */ >>> Why? >> Because there are multiple memory controllers in the ls3x soc, >> >> but the ECC function of some memory controllers cannot be used. > Then what does the driver do for such memory controllers? Your binding > is quite clear here that above code is just bogus. It is not possible to > have node without reg. I will rewrite the code above. > Please point us to your DTS and results of dtbs_check. > >> But in any case, a node must be created in /sys/devices/system/edac/mc/ >> through edac_mc_add_mc(mci). >> >> Then if the ECC function of the memory controller cannot be used, set >> start to NULL or do not pass mem resource, >> >> which is equivalent to enumeration of memory controller, and the CE >> count will always be zero. >> >>>> + if (rs && rs->start) { >>>> + vbase = devm_ioremap_resource(&pdev->dev, rs); >>>> + if (IS_ERR(vbase)) >>>> + return PTR_ERR(vbase); >>>> + } > > Best regards, > Krzysztof
On 03/09/2024 10:30, Xi Ruoyao wrote: > On Tue, 2024-09-03 at 09:58 +0200, Krzysztof Kozlowski wrote: >>>>> + select EDAC_SUPPORT >>>> I think you got here comment before. How did you address it? >>> I just randomly found a spot, and I will put it at the end(next version >>> patch). >> >> No, the comment was different. You must not select user-visible symbols. > > EDAC_SUPPORT isn't user-visible. EDAC is and it has been removed. Ah, ok, I missed that. Shouldn't this be separate patch with its own rationale? Or before this driver there was no EDAC support for Loongson at all? Best regards, Krzysztof
在 2024/9/3 下午7:29, Krzysztof Kozlowski 写道: > On 03/09/2024 10:30, Xi Ruoyao wrote: >> On Tue, 2024-09-03 at 09:58 +0200, Krzysztof Kozlowski wrote: >>>>>> + select EDAC_SUPPORT >>>>> I think you got here comment before. How did you address it? >>>> I just randomly found a spot, and I will put it at the end(next version >>>> patch). >>> No, the comment was different. You must not select user-visible symbols. >> EDAC_SUPPORT isn't user-visible. EDAC is and it has been removed. > Ah, ok, I missed that. Shouldn't this be separate patch with its own > rationale? Or before this driver there was no EDAC support for Loongson > at all? Before this driver there was no EDAC support for Loongson at all. Best regards, Zhao Qunqin. > Best regards, > Krzysztof
On 03/09/2024 13:31, Zhao Qunqin wrote: > > 在 2024/9/3 下午7:29, Krzysztof Kozlowski 写道: >> On 03/09/2024 10:30, Xi Ruoyao wrote: >>> On Tue, 2024-09-03 at 09:58 +0200, Krzysztof Kozlowski wrote: >>>>>>> + select EDAC_SUPPORT >>>>>> I think you got here comment before. How did you address it? >>>>> I just randomly found a spot, and I will put it at the end(next version >>>>> patch). >>>> No, the comment was different. You must not select user-visible symbols. >>> EDAC_SUPPORT isn't user-visible. EDAC is and it has been removed. >> Ah, ok, I missed that. Shouldn't this be separate patch with its own >> rationale? Or before this driver there was no EDAC support for Loongson >> at all? > > Before this driver there was no EDAC support for Loongson > at all. Ack, ok. Best regards, Krzysztof
diff --git a/MAINTAINERS b/MAINTAINERS index 6cc8cfc8f..b43f82279 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13242,6 +13242,7 @@ M: Zhao Qunqin <zhaoqunqin@loongson.cn> L: linux-edac@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml +F: drivers/edac/ls3a5000_edac.c LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) M: Sathya Prakash <sathya.prakash@broadcom.com> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 70f169210..348030c24 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -182,6 +182,7 @@ config LOONGARCH select PCI_QUIRKS select PERF_USE_VMALLOC select RTC_LIB + select EDAC_SUPPORT select SPARSE_IRQ select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_ARCH_UNALIGN_NO_WARN diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 16c8de505..2d10256f0 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -573,5 +573,13 @@ config EDAC_VERSAL Support injecting both correctable and uncorrectable errors for debugging purposes. +config EDAC_LS3A5000 + tristate "Ls3a5000 Memory Controller" + depends on LOONGARCH || COMPILE_TEST + help + Support for error detection and correction on the ls3a5000 memory + controller. This driver report single bit errors (CE) only. + Ls3c5000l, ls3c5000, ls3d5000, ls3a6000 and ls3c6000 are compatible + with ls3a5000. endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 4edfb83ff..0974e3fa6 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -89,3 +89,4 @@ obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o +obj-$(CONFIG_EDAC_LS3A5000) += ls3a5000_edac.o diff --git a/drivers/edac/ls3a5000_edac.c b/drivers/edac/ls3a5000_edac.c new file mode 100644 index 000000000..c68fd7c5f --- /dev/null +++ b/drivers/edac/ls3a5000_edac.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited. + */ + +#include <linux/edac.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/platform_device.h> + +#include "edac_module.h" + +enum ecc_index { + ECC_SET = 0, + ECC_RESERVED, + ECC_COUNT, + ECC_CS_COUNT, + ECC_CODE, + ECC_ADDR, + ECC_DATA0, + ECC_DATA1, + ECC_DATA2, + ECC_DATA3, +}; + +struct loongson_edac_pvt { + u64 *ecc_base; + int last_ce_count; +}; + +static void loongson_update_ce_count(struct mem_ctl_info *mci, + int chan, + int new) +{ + int add; + struct loongson_edac_pvt *pvt = mci->pvt_info; + + add = new - pvt->last_ce_count; + + /* Store the new value */ + pvt->last_ce_count = new; + + /* device resume or any other exceptions*/ + if (add < 0) + return; + + /*updated the edac core */ + if (add != 0) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add, + 0, 0, 0, + chan, 0, -1, "error", ""); + edac_mc_printk(mci, KERN_INFO, "add: %d", add); + } +} + +static int loongson_read_ecc(struct mem_ctl_info *mci) +{ + u64 ecc; + int cs = 0; + struct loongson_edac_pvt *pvt = mci->pvt_info; + + if (!pvt->ecc_base) + return pvt->last_ce_count; + + ecc = pvt->ecc_base[ECC_CS_COUNT]; + cs += ecc & 0xff; // cs0 + cs += (ecc >> 8) & 0xff; // cs1 + cs += (ecc >> 16) & 0xff; // cs2 + cs += (ecc >> 24) & 0xff; // cs3 + + return cs; +} + +static void loongson_edac_check(struct mem_ctl_info *mci) +{ + loongson_update_ce_count(mci, 0, loongson_read_ecc(mci)); +} + +static int get_dimm_config(struct mem_ctl_info *mci) +{ + u32 size, npages; + struct dimm_info *dimm; + + /* size not used */ + size = -1; + npages = MiB_TO_PAGES(size); + + dimm = edac_get_dimm(mci, 0, 0, 0); + dimm->nr_pages = npages; + snprintf(dimm->label, sizeof(dimm->label), + "MC#%uChannel#%u_DIMM#%u", + mci->mc_idx, 0, 0); + dimm->grain = 8; + + return 0; +} + +static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase) +{ + struct loongson_edac_pvt *pvt = mci->pvt_info; + + pvt->ecc_base = vbase; + pvt->last_ce_count = loongson_read_ecc(mci); +} + +static int loongson_edac_probe(struct platform_device *pdev) +{ + struct resource *rs; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; + struct loongson_edac_pvt *pvt; + u64 *vbase = NULL; + + rs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + /* not return if can not find resource or resource start equals NULL */ + if (rs && rs->start) { + vbase = devm_ioremap_resource(&pdev->dev, rs); + if (IS_ERR(vbase)) + return PTR_ERR(vbase); + } + + /* allocate a new MC control structure */ + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = 1; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = 1; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); + if (mci == NULL) + return -ENOMEM; + + edac_dbg(0, "MC: mci = %p\n", mci); + + mci->mc_idx = edac_device_alloc_index(); + mci->mtype_cap = MEM_FLAG_RDDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "loongson_edac.c"; + mci->ctl_name = "loongson_edac_ctl"; + mci->dev_name = "loongson_edac_dev"; + mci->ctl_page_to_phys = NULL; + mci->pdev = &pdev->dev; + mci->error_desc.grain = 8; + /* Set the function pointer to an actual operation function */ + mci->edac_check = loongson_edac_check; + + loongson_pvt_init(mci, vbase); + get_dimm_config(mci); + + if (edac_mc_add_mc(mci)) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + edac_mc_free(mci); + } + edac_op_state = EDAC_OPSTATE_POLL; + + return 0; +} + +static void loongson_edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); + + if (mci) + edac_mc_free(mci); +} + +static const struct of_device_id loongson_edac_of_match[] = { + { .compatible = "loongson,ls3a5000-mc-edac", }, + {} +}; +MODULE_DEVICE_TABLE(of, loongson_edac_of_match); + +static struct platform_driver loongson_edac_driver = { + .probe = loongson_edac_probe, + .remove = loongson_edac_remove, + .driver = { + .name = "ls-mc-edac", + .of_match_table = loongson_edac_of_match, + }, +}; + +module_platform_driver(loongson_edac_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n"); +MODULE_DESCRIPTION("EDAC driver for loongson memory controller");
Report single bit errors (CE) only. Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn> --- MAINTAINERS | 1 + arch/loongarch/Kconfig | 1 + drivers/edac/Kconfig | 8 ++ drivers/edac/Makefile | 1 + drivers/edac/ls3a5000_edac.c | 187 +++++++++++++++++++++++++++++++++++ 5 files changed, 198 insertions(+) create mode 100644 drivers/edac/ls3a5000_edac.c