diff mbox series

[v1,1/2] Loongarch: EDAC driver for loongson memory controller

Message ID 20240821064728.8642-2-zhaoqunqin@loongson.cn (mailing list archive)
State New
Headers show
Series Add EDAC driver for loongson memory controller | expand

Commit Message

Zhao Qunqin Aug. 21, 2024, 6:47 a.m. UTC
From: zhaoqunqin <zhaoqunqin@loongson.cn>

Report single bit errors (CE) only

Signed-off-by: zhaoqunqin <zhaoqunqin@loongson.cn>
---
 arch/loongarch/Kconfig       |   2 +
 drivers/edac/Kconfig         |  10 ++
 drivers/edac/Makefile        |   1 +
 drivers/edac/loongson_edac.c | 208 +++++++++++++++++++++++++++++++++++
 4 files changed, 221 insertions(+)
 create mode 100644 drivers/edac/loongson_edac.c

Comments

Xi Ruoyao Aug. 21, 2024, 6:51 a.m. UTC | #1
On Wed, 2024-08-21 at 14:47 +0800, Zhao Qunqin wrote:
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -179,6 +179,8 @@ config LOONGARCH
>  	select PCI_QUIRKS
>  	select PERF_USE_VMALLOC
>  	select RTC_LIB
> +	select EDAC_SUPPORT
> +	select EDAC

This line looks incorrect.  It's forcing the users to enable EDAC even
if they don't need it (for example using a non-ECC memory).

And no other arch does this.
Krzysztof Kozlowski Aug. 21, 2024, 8:24 a.m. UTC | #2
On 21/08/2024 08:47, Zhao Qunqin wrote:
> From: zhaoqunqin <zhaoqunqin@loongson.cn>
> 
> Report single bit errors (CE) only
> 
> Signed-off-by: zhaoqunqin <zhaoqunqin@loongson.cn>
> ---
>  arch/loongarch/Kconfig       |   2 +
>  drivers/edac/Kconfig         |  10 ++
>  drivers/edac/Makefile        |   1 +
>  drivers/edac/loongson_edac.c | 208 +++++++++++++++++++++++++++++++++++
>  4 files changed, 221 insertions(+)
>  create mode 100644 drivers/edac/loongson_edac.c
> 
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index ddc042895..59d47053f 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -179,6 +179,8 @@ config LOONGARCH
>  	select PCI_QUIRKS
>  	select PERF_USE_VMALLOC
>  	select RTC_LIB
> +	select EDAC_SUPPORT
> +	select EDAC

Nope, you should not select user-visible sumbols.

>  	select SPARSE_IRQ
>  	select SYSCTL_ARCH_UNALIGN_ALLOW
>  	select SYSCTL_ARCH_UNALIGN_NO_WARN
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 16c8de505..60b1997f0 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -573,5 +573,15 @@ config EDAC_VERSAL
>  	  Support injecting both correctable and uncorrectable errors
>  	  for debugging purposes.
>  
> +config EDAC_LOONGSON
> +	tristate "Loongson EDAC"
> +	depends on LOONGARCH

Missing compile test

> +	default m
> +	help
> +	  Support for error detection and correction on the loongson memory
> +	  controller.
> +
> +	  Report single bit errors (CE) only.
> +

Why double line? Drop

>  
>  endif # EDAC
> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> index 4edfb83ff..d6f2cfe7e 100644
> --- a/drivers/edac/Makefile
> +++ b/drivers/edac/Makefile
> @@ -89,3 +89,4 @@ obj-$(CONFIG_EDAC_DMC520)		+= dmc520_edac.o
>  obj-$(CONFIG_EDAC_NPCM)			+= npcm_edac.o
>  obj-$(CONFIG_EDAC_ZYNQMP)		+= zynqmp_edac.o
>  obj-$(CONFIG_EDAC_VERSAL)		+= versal_edac.o
> +obj-$(CONFIG_EDAC_LOONGSON)		+= loongson_edac.o
> diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c
> new file mode 100644
> index 000000000..c639c11ed
> --- /dev/null
> +++ b/drivers/edac/loongson_edac.c
> @@ -0,0 +1,208 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Loongson Technology Corporation Limited.
> + */
> +
> +#include <linux/edac.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/platform_device.h>
> +
> +#include "edac_module.h"
> +
> +enum ecc_index {
> +	ECC_SET = 0,
> +	ECC_RESERVED,
> +	ECC_COUNT,
> +	ECC_CS_COUNT,
> +	ECC_CODE,
> +	ECC_ADDR,
> +	ECC_DATA0,
> +	ECC_DATA1,
> +	ECC_DATA2,
> +	ECC_DATA3,
> +};
> +
> +static long idx;

Drop, racy and useless. If you need ID, then use IDR but first explain
what purpose does it serve.

> +
> +struct loongson_edac_pvt {
> +	volatile u64 *ecc_base;
> +	int last_ce_count;
> +};
> +
> +static void loongson_update_ce_count(struct mem_ctl_info *mci,
> +					int chan,
> +					int new)
> +{
> +	int add;
> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
> +
> +	add = new - pvt->last_ce_count;
> +
> +	/* Store the new values */
> +	pvt->last_ce_count = new;
> +
> +	/* device resume or any other exceptions*/
> +	if (add < 0)
> +		return;
> +
> +	/*updated the edac core */
> +	if (add != 0) {
> +		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
> +					0, 0, 0,
> +					chan, 0, -1, "error", "");
> +		edac_mc_printk(mci, KERN_INFO, "add: %d", add);
> +	}
> +}
> +
> +static int loongson_read_ecc(struct mem_ctl_info *mci)
> +{
> +	u64 ecc;
> +	int cs = 0;
> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
> +
> +	if (!pvt->ecc_base)
> +		return pvt->last_ce_count;
> +
> +	ecc = pvt->ecc_base[ECC_CS_COUNT];
> +	cs += ecc & 0xff;		// cs0
> +	cs += (ecc >> 8) & 0xff;	// cs1
> +	cs += (ecc >> 16) & 0xff;	// cs2
> +	cs += (ecc >> 24) & 0xff;	// cs3
> +
> +	return cs;
> +}
> +
> +static void loongson_edac_check(struct mem_ctl_info *mci)
> +{
> +	loongson_update_ce_count(mci, 0, loongson_read_ecc(mci));
> +}
> +
> +static int get_dimm_config(struct mem_ctl_info *mci)
> +{
> +	u32 size, npages;
> +	struct dimm_info *dimm;
> +
> +	/* size not used */
> +	size = -1;
> +	npages = MiB_TO_PAGES(size);
> +
> +	dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
> +			0, 0, 0);
> +	dimm->nr_pages = npages;
> +	snprintf(dimm->label, sizeof(dimm->label),
> +			"MC#%uChannel#%u_DIMM#%u",
> +			mci->mc_idx, 0, 0);
> +	dimm->grain = 8;
> +
> +	return 0;
> +}
> +
> +static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase)
> +{
> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
> +
> +	pvt->ecc_base = vbase;
> +	pvt->last_ce_count = loongson_read_ecc(mci);
> +}
> +
> +static int loongson_edac_probe(struct platform_device *pdev)
> +{
> +	struct resource *rs;
> +	struct mem_ctl_info *mci;
> +	struct edac_mc_layer layers[2];
> +	struct loongson_edac_pvt *pvt;
> +	u64 *vbase = NULL;
> +
> +	rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (!rs)
> +		return -EINVAL;
> +	if (rs->start) {
> +		vbase = devm_ioremap(&pdev->dev, rs->start, resource_size(rs));

Why you cannot use wrapper over these two calls - devm_ioremap_resource?

> +		if (!vbase)
> +			return -ENOMEM;
> +	}
> +
> +	/* allocate a new MC control structure */
> +	layers[0].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[0].size = 1;
> +	layers[0].is_virt_csrow = false;
> +	layers[1].type = EDAC_MC_LAYER_SLOT;
> +	layers[1].size = 1;
> +	layers[1].is_virt_csrow = true;
> +	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
> +	if (mci == NULL)
> +		return -ENOMEM;
> +
> +	edac_dbg(0, "MC: mci = %p\n", mci);
> +
> +	mci->mc_idx = idx++;
> +	mci->mtype_cap = MEM_FLAG_RDDR4;
> +	mci->edac_ctl_cap = EDAC_FLAG_NONE;
> +	mci->edac_cap = EDAC_FLAG_NONE;
> +	mci->mod_name = "loongson_edac.c";
> +	mci->ctl_name = "loongson_edac_ctl";
> +	mci->dev_name = "loongson_edac_dev";
> +	mci->ctl_page_to_phys = NULL;
> +	mci->pdev = &pdev->dev;
> +	mci->error_desc.grain = 8;
> +	/* Set the function pointer to an actual operation function */
> +	mci->edac_check = loongson_edac_check;
> +
> +	loongson_pvt_init(mci, vbase);
> +	get_dimm_config(mci);
> +
> +	if (edac_mc_add_mc(mci)) {
> +		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
> +		edac_mc_free(mci);
> +	}
> +	return 0;
> +}
> +
> +static int loongson_edac_remove(struct platform_device *pdev)
> +{
> +	struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
> +
> +	if (mci) {
> +		edac_mc_free(mci);
> +		return 0;
> +	}
> +	return -ENODEV;
> +}
> +
> +static const struct of_device_id loongson_edac_of_match[] = {
> +	{ .compatible = "loongson,ls-mc-edac", },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
> +
> +static struct platform_driver loongson_edac_driver = {
> +	.probe		= loongson_edac_probe,
> +	.remove		= loongson_edac_remove,
> +	.driver		= {
> +		.name	= "ls-mc-edac",
> +		.owner = THIS_MODULE,

Drop... that's ancient code.

> +		.of_match_table = loongson_edac_of_match,
> +	},
> +};
> +
> +static int __init loongson_edac_init(void)
> +{
> +	/* poll only */
> +	edac_op_state = EDAC_OPSTATE_POLL;

Drop, unused. Clean your driver before posting it.

> +
> +	return platform_driver_register(&loongson_edac_driver);
> +}
> +
> +static void __exit loongson_edac_exit(void)
> +{
> +	platform_driver_unregister(&loongson_edac_driver);
> +}
> +
> +module_init(loongson_edac_init);
> +module_exit(loongson_edac_exit);

module_platform_driver

> +module_param(edac_op_state, int, 0444);

Drop

> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n");
> +MODULE_DESCRIPTION("EDAC driver for loongson memory controller");

Best regards,
Krzysztof
kernel test robot Aug. 22, 2024, 12:07 a.m. UTC | #3
Hi Zhao,

kernel test robot noticed the following build errors:

[auto build test ERROR on ras/edac-for-next]
[also build test ERROR on robh/for-next linus/master v6.11-rc4 next-20240821]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhao-Qunqin/Loongarch-EDAC-driver-for-loongson-memory-controller/20240821-145127
base:   https://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac-for-next
patch link:    https://lore.kernel.org/r/20240821064728.8642-2-zhaoqunqin%40loongson.cn
patch subject: [PATCH v1 1/2] Loongarch: EDAC driver for loongson memory controller
config: loongarch-allmodconfig (https://download.01.org/0day-ci/archive/20240822/202408220634.Irq2TUcL-lkp@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240822/202408220634.Irq2TUcL-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408220634.Irq2TUcL-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/edac/loongson_edac.c: In function 'get_dimm_config':
>> drivers/edac/loongson_edac.c:90:16: error: implicit declaration of function 'EDAC_DIMM_PTR' [-Wimplicit-function-declaration]
      90 |         dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
         |                ^~~~~~~~~~~~~
>> drivers/edac/loongson_edac.c:90:14: error: assignment to 'struct dimm_info *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
      90 |         dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
         |              ^
   drivers/edac/loongson_edac.c: At top level:
>> drivers/edac/loongson_edac.c:181:27: error: initialization of 'void (*)(struct platform_device *)' from incompatible pointer type 'int (*)(struct platform_device *)' [-Wincompatible-pointer-types]
     181 |         .remove         = loongson_edac_remove,
         |                           ^~~~~~~~~~~~~~~~~~~~
   drivers/edac/loongson_edac.c:181:27: note: (near initialization for 'loongson_edac_driver.<anonymous>.remove')


vim +/EDAC_DIMM_PTR +90 drivers/edac/loongson_edac.c

    80	
    81	static int get_dimm_config(struct mem_ctl_info *mci)
    82	{
    83		u32 size, npages;
    84		struct dimm_info *dimm;
    85	
    86		/* size not used */
    87		size = -1;
    88		npages = MiB_TO_PAGES(size);
    89	
  > 90		dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
    91				0, 0, 0);
    92		dimm->nr_pages = npages;
    93		snprintf(dimm->label, sizeof(dimm->label),
    94				"MC#%uChannel#%u_DIMM#%u",
    95				mci->mc_idx, 0, 0);
    96		dimm->grain = 8;
    97	
    98		return 0;
    99	}
   100	
   101	static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase)
   102	{
   103		struct loongson_edac_pvt *pvt = mci->pvt_info;
   104	
   105		pvt->ecc_base = vbase;
   106		pvt->last_ce_count = loongson_read_ecc(mci);
   107	}
   108	
   109	static int loongson_edac_probe(struct platform_device *pdev)
   110	{
   111		struct resource *rs;
   112		struct mem_ctl_info *mci;
   113		struct edac_mc_layer layers[2];
   114		struct loongson_edac_pvt *pvt;
   115		u64 *vbase = NULL;
   116	
   117		rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
   118		if (!rs)
   119			return -EINVAL;
   120		if (rs->start) {
   121			vbase = devm_ioremap(&pdev->dev, rs->start, resource_size(rs));
   122			if (!vbase)
   123				return -ENOMEM;
   124		}
   125	
   126		/* allocate a new MC control structure */
   127		layers[0].type = EDAC_MC_LAYER_CHANNEL;
   128		layers[0].size = 1;
   129		layers[0].is_virt_csrow = false;
   130		layers[1].type = EDAC_MC_LAYER_SLOT;
   131		layers[1].size = 1;
   132		layers[1].is_virt_csrow = true;
   133		mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
   134		if (mci == NULL)
   135			return -ENOMEM;
   136	
   137		edac_dbg(0, "MC: mci = %p\n", mci);
   138	
   139		mci->mc_idx = idx++;
   140		mci->mtype_cap = MEM_FLAG_RDDR4;
   141		mci->edac_ctl_cap = EDAC_FLAG_NONE;
   142		mci->edac_cap = EDAC_FLAG_NONE;
   143		mci->mod_name = "loongson_edac.c";
   144		mci->ctl_name = "loongson_edac_ctl";
   145		mci->dev_name = "loongson_edac_dev";
   146		mci->ctl_page_to_phys = NULL;
   147		mci->pdev = &pdev->dev;
   148		mci->error_desc.grain = 8;
   149		/* Set the function pointer to an actual operation function */
   150		mci->edac_check = loongson_edac_check;
   151	
   152		loongson_pvt_init(mci, vbase);
   153		get_dimm_config(mci);
   154	
   155		if (edac_mc_add_mc(mci)) {
   156			edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
   157			edac_mc_free(mci);
   158		}
   159		return 0;
   160	}
   161	
   162	static int loongson_edac_remove(struct platform_device *pdev)
   163	{
   164		struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
   165	
   166		if (mci) {
   167			edac_mc_free(mci);
   168			return 0;
   169		}
   170		return -ENODEV;
   171	}
   172	
   173	static const struct of_device_id loongson_edac_of_match[] = {
   174		{ .compatible = "loongson,ls-mc-edac", },
   175		{}
   176	};
   177	MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
   178	
   179	static struct platform_driver loongson_edac_driver = {
   180		.probe		= loongson_edac_probe,
 > 181		.remove		= loongson_edac_remove,
   182		.driver		= {
   183			.name	= "ls-mc-edac",
   184			.owner = THIS_MODULE,
   185			.of_match_table = loongson_edac_of_match,
   186		},
   187	};
   188
kernel test robot Aug. 22, 2024, 3:14 a.m. UTC | #4
Hi Zhao,

kernel test robot noticed the following build warnings:

[auto build test WARNING on ras/edac-for-next]
[also build test WARNING on robh/for-next linus/master v6.11-rc4 next-20240821]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhao-Qunqin/Loongarch-EDAC-driver-for-loongson-memory-controller/20240821-145127
base:   https://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac-for-next
patch link:    https://lore.kernel.org/r/20240821064728.8642-2-zhaoqunqin%40loongson.cn
patch subject: [PATCH v1 1/2] Loongarch: EDAC driver for loongson memory controller
config: loongarch-kismet-CONFIG_EDAC-CONFIG_LOONGARCH-0-0 (https://download.01.org/0day-ci/archive/20240822/202408221024.FpH0yAEh-lkp@intel.com/config)
reproduce: (https://download.01.org/0day-ci/archive/20240822/202408221024.FpH0yAEh-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408221024.FpH0yAEh-lkp@intel.com/

kismet warnings: (new ones prefixed by >>)
>> kismet: WARNING: unmet direct dependencies detected for EDAC when selected by LOONGARCH
   WARNING: unmet direct dependencies detected for EDAC
     Depends on [n]: HAS_IOMEM [=y] && EDAC_SUPPORT [=y] && RAS [=n]
     Selected by [y]:
     - LOONGARCH [=y]
kernel test robot Aug. 23, 2024, 2:42 a.m. UTC | #5
Hi Zhao,

kernel test robot noticed the following build warnings:

[auto build test WARNING on ras/edac-for-next]
[also build test WARNING on robh/for-next linus/master v6.11-rc4 next-20240822]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhao-Qunqin/Loongarch-EDAC-driver-for-loongson-memory-controller/20240821-145127
base:   https://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac-for-next
patch link:    https://lore.kernel.org/r/20240821064728.8642-2-zhaoqunqin%40loongson.cn
patch subject: [PATCH v1 1/2] Loongarch: EDAC driver for loongson memory controller
config: loongarch-loongson3_defconfig (https://download.01.org/0day-ci/archive/20240823/202408231055.okLH0uuC-lkp@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 13.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240823/202408231055.okLH0uuC-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408231055.okLH0uuC-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/edac/loongson_edac.c: In function 'get_dimm_config':
   drivers/edac/loongson_edac.c:90:16: error: implicit declaration of function 'EDAC_DIMM_PTR' [-Werror=implicit-function-declaration]
      90 |         dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
         |                ^~~~~~~~~~~~~
>> drivers/edac/loongson_edac.c:90:14: warning: assignment to 'struct dimm_info *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
      90 |         dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
         |              ^
   drivers/edac/loongson_edac.c: At top level:
   drivers/edac/loongson_edac.c:181:27: error: initialization of 'void (*)(struct platform_device *)' from incompatible pointer type 'int (*)(struct platform_device *)' [-Werror=incompatible-pointer-types]
     181 |         .remove         = loongson_edac_remove,
         |                           ^~~~~~~~~~~~~~~~~~~~
   drivers/edac/loongson_edac.c:181:27: note: (near initialization for 'loongson_edac_driver.<anonymous>.remove')
   cc1: some warnings being treated as errors


vim +90 drivers/edac/loongson_edac.c

    80	
    81	static int get_dimm_config(struct mem_ctl_info *mci)
    82	{
    83		u32 size, npages;
    84		struct dimm_info *dimm;
    85	
    86		/* size not used */
    87		size = -1;
    88		npages = MiB_TO_PAGES(size);
    89	
  > 90		dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
    91				0, 0, 0);
    92		dimm->nr_pages = npages;
    93		snprintf(dimm->label, sizeof(dimm->label),
    94				"MC#%uChannel#%u_DIMM#%u",
    95				mci->mc_idx, 0, 0);
    96		dimm->grain = 8;
    97	
    98		return 0;
    99	}
   100
diff mbox series

Patch

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ddc042895..59d47053f 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -179,6 +179,8 @@  config LOONGARCH
 	select PCI_QUIRKS
 	select PERF_USE_VMALLOC
 	select RTC_LIB
+	select EDAC_SUPPORT
+	select EDAC
 	select SPARSE_IRQ
 	select SYSCTL_ARCH_UNALIGN_ALLOW
 	select SYSCTL_ARCH_UNALIGN_NO_WARN
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 16c8de505..60b1997f0 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -573,5 +573,15 @@  config EDAC_VERSAL
 	  Support injecting both correctable and uncorrectable errors
 	  for debugging purposes.
 
+config EDAC_LOONGSON
+	tristate "Loongson EDAC"
+	depends on LOONGARCH
+	default m
+	help
+	  Support for error detection and correction on the loongson memory
+	  controller.
+
+	  Report single bit errors (CE) only.
+
 
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 4edfb83ff..d6f2cfe7e 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -89,3 +89,4 @@  obj-$(CONFIG_EDAC_DMC520)		+= dmc520_edac.o
 obj-$(CONFIG_EDAC_NPCM)			+= npcm_edac.o
 obj-$(CONFIG_EDAC_ZYNQMP)		+= zynqmp_edac.o
 obj-$(CONFIG_EDAC_VERSAL)		+= versal_edac.o
+obj-$(CONFIG_EDAC_LOONGSON)		+= loongson_edac.o
diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c
new file mode 100644
index 000000000..c639c11ed
--- /dev/null
+++ b/drivers/edac/loongson_edac.c
@@ -0,0 +1,208 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited.
+ */
+
+#include <linux/edac.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include "edac_module.h"
+
+enum ecc_index {
+	ECC_SET = 0,
+	ECC_RESERVED,
+	ECC_COUNT,
+	ECC_CS_COUNT,
+	ECC_CODE,
+	ECC_ADDR,
+	ECC_DATA0,
+	ECC_DATA1,
+	ECC_DATA2,
+	ECC_DATA3,
+};
+
+static long idx;
+
+struct loongson_edac_pvt {
+	volatile u64 *ecc_base;
+	int last_ce_count;
+};
+
+static void loongson_update_ce_count(struct mem_ctl_info *mci,
+					int chan,
+					int new)
+{
+	int add;
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+	add = new - pvt->last_ce_count;
+
+	/* Store the new values */
+	pvt->last_ce_count = new;
+
+	/* device resume or any other exceptions*/
+	if (add < 0)
+		return;
+
+	/*updated the edac core */
+	if (add != 0) {
+		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
+					0, 0, 0,
+					chan, 0, -1, "error", "");
+		edac_mc_printk(mci, KERN_INFO, "add: %d", add);
+	}
+}
+
+static int loongson_read_ecc(struct mem_ctl_info *mci)
+{
+	u64 ecc;
+	int cs = 0;
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+	if (!pvt->ecc_base)
+		return pvt->last_ce_count;
+
+	ecc = pvt->ecc_base[ECC_CS_COUNT];
+	cs += ecc & 0xff;		// cs0
+	cs += (ecc >> 8) & 0xff;	// cs1
+	cs += (ecc >> 16) & 0xff;	// cs2
+	cs += (ecc >> 24) & 0xff;	// cs3
+
+	return cs;
+}
+
+static void loongson_edac_check(struct mem_ctl_info *mci)
+{
+	loongson_update_ce_count(mci, 0, loongson_read_ecc(mci));
+}
+
+static int get_dimm_config(struct mem_ctl_info *mci)
+{
+	u32 size, npages;
+	struct dimm_info *dimm;
+
+	/* size not used */
+	size = -1;
+	npages = MiB_TO_PAGES(size);
+
+	dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
+			0, 0, 0);
+	dimm->nr_pages = npages;
+	snprintf(dimm->label, sizeof(dimm->label),
+			"MC#%uChannel#%u_DIMM#%u",
+			mci->mc_idx, 0, 0);
+	dimm->grain = 8;
+
+	return 0;
+}
+
+static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase)
+{
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+	pvt->ecc_base = vbase;
+	pvt->last_ce_count = loongson_read_ecc(mci);
+}
+
+static int loongson_edac_probe(struct platform_device *pdev)
+{
+	struct resource *rs;
+	struct mem_ctl_info *mci;
+	struct edac_mc_layer layers[2];
+	struct loongson_edac_pvt *pvt;
+	u64 *vbase = NULL;
+
+	rs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!rs)
+		return -EINVAL;
+	if (rs->start) {
+		vbase = devm_ioremap(&pdev->dev, rs->start, resource_size(rs));
+		if (!vbase)
+			return -ENOMEM;
+	}
+
+	/* allocate a new MC control structure */
+	layers[0].type = EDAC_MC_LAYER_CHANNEL;
+	layers[0].size = 1;
+	layers[0].is_virt_csrow = false;
+	layers[1].type = EDAC_MC_LAYER_SLOT;
+	layers[1].size = 1;
+	layers[1].is_virt_csrow = true;
+	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+	if (mci == NULL)
+		return -ENOMEM;
+
+	edac_dbg(0, "MC: mci = %p\n", mci);
+
+	mci->mc_idx = idx++;
+	mci->mtype_cap = MEM_FLAG_RDDR4;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE;
+	mci->edac_cap = EDAC_FLAG_NONE;
+	mci->mod_name = "loongson_edac.c";
+	mci->ctl_name = "loongson_edac_ctl";
+	mci->dev_name = "loongson_edac_dev";
+	mci->ctl_page_to_phys = NULL;
+	mci->pdev = &pdev->dev;
+	mci->error_desc.grain = 8;
+	/* Set the function pointer to an actual operation function */
+	mci->edac_check = loongson_edac_check;
+
+	loongson_pvt_init(mci, vbase);
+	get_dimm_config(mci);
+
+	if (edac_mc_add_mc(mci)) {
+		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+		edac_mc_free(mci);
+	}
+	return 0;
+}
+
+static int loongson_edac_remove(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
+
+	if (mci) {
+		edac_mc_free(mci);
+		return 0;
+	}
+	return -ENODEV;
+}
+
+static const struct of_device_id loongson_edac_of_match[] = {
+	{ .compatible = "loongson,ls-mc-edac", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
+
+static struct platform_driver loongson_edac_driver = {
+	.probe		= loongson_edac_probe,
+	.remove		= loongson_edac_remove,
+	.driver		= {
+		.name	= "ls-mc-edac",
+		.owner = THIS_MODULE,
+		.of_match_table = loongson_edac_of_match,
+	},
+};
+
+static int __init loongson_edac_init(void)
+{
+	/* poll only */
+	edac_op_state = EDAC_OPSTATE_POLL;
+
+	return platform_driver_register(&loongson_edac_driver);
+}
+
+static void __exit loongson_edac_exit(void)
+{
+	platform_driver_unregister(&loongson_edac_driver);
+}
+
+module_init(loongson_edac_init);
+module_exit(loongson_edac_exit);
+module_param(edac_op_state, int, 0444);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>\n");
+MODULE_DESCRIPTION("EDAC driver for loongson memory controller");