diff mbox series

[v4,2/2] Loongarch: EDAC driver for loongson memory controller

Message ID 20240909032124.18819-3-zhaoqunqin@loongson.cn (mailing list archive)
State New
Headers show
Series Add EDAC driver for loongson memory controller | expand

Commit Message

Zhao Qunqin Sept. 9, 2024, 3:21 a.m. UTC
Reports single bit errors (CE) only.

Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
---
Changes in v4:
        - None

Changes in v3:
        - Addressed review comments raised by Krzysztof and Huacai

Changes in v2:
        - Addressed review comments raised by Krzysztof

 MAINTAINERS                  |   1 +
 arch/loongarch/Kconfig       |   1 +
 drivers/edac/Kconfig         |   8 ++
 drivers/edac/Makefile        |   1 +
 drivers/edac/loongson_edac.c | 182 +++++++++++++++++++++++++++++++++++
 5 files changed, 193 insertions(+)
 create mode 100644 drivers/edac/loongson_edac.c

Comments

Borislav Petkov Sept. 13, 2024, 10:11 a.m. UTC | #1
On Mon, Sep 09, 2024 at 11:21:24AM +0800, Zhao Qunqin wrote:
> Subject: Re: [PATCH v4 2/2] Loongarch: EDAC driver for loongson memory controller

			EDAC/loongson: Add EDAC driver ...

> Reports single bit errors (CE) only.
> 
> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
> ---

...

> diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c
> new file mode 100644
> index 000000000..b89d6e0e7
> --- /dev/null
> +++ b/drivers/edac/loongson_edac.c
> @@ -0,0 +1,182 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Loongson Technology Corporation Limited.
> + */
> +
> +#include <linux/edac.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/platform_device.h>
> +
> +#include "edac_module.h"
> +
> +enum ecc_index {
> +	ECC_SET = 0,
> +	ECC_RESERVED,
> +	ECC_COUNT,
> +	ECC_CS_COUNT,
> +	ECC_CODE,
> +	ECC_ADDR,
> +	ECC_DATA0,
> +	ECC_DATA1,
> +	ECC_DATA2,
> +	ECC_DATA3,
> +};
> +
> +struct loongson_edac_pvt {
> +	u64 *ecc_base;
> +	int last_ce_count;
> +};
> +
> +static void loongson_update_ce_count(struct mem_ctl_info *mci,

Drop the loongson_ prefix from all static functions.

Also, align function arguments on the opening brace.

> +					int chan,
> +					int new)
> +{
> +	int add;
> +	struct loongson_edac_pvt *pvt = mci->pvt_info;

The EDAC tree preferred ordering of variable declarations at the
beginning of a function is reverse fir tree order::

	struct long_struct_name *descriptive_name;
	unsigned long foo, bar;
	unsigned int tmp;
	int ret;

The above is faster to parse than the reverse ordering::

	int ret;
	unsigned int tmp;
	unsigned long foo, bar;
	struct long_struct_name *descriptive_name;

And even more so than random ordering::

	unsigned long foo, bar;
	int ret;
	struct long_struct_name *descriptive_name;
	unsigned int tmp;

> +
> +	add = new - pvt->last_ce_count;
> +
> +	/* Store the new value */

Drop all those obvious comments.

> +	pvt->last_ce_count = new;
> +
> +	/* device resume or any other exceptions*/

No clue what that means.

Also,  the check goes right under the assignment.

> +	if (add < 0)
> +		return;
> +
> +	/*updated the edac core */

Useless comment.

> +	if (add != 0) {

	if (!add)
		return;

and now you can save yourself an indentation level:

	edac_mc_...(

> +		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
> +					0, 0, 0,
> +					chan, 0, -1, "error", "");
> +		edac_mc_printk(mci, KERN_INFO, "add: %d", add);
> +	}
> +}
> +
> +static int loongson_read_ecc(struct mem_ctl_info *mci)
> +{
> +	u64 ecc;
> +	int cs = 0;
> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
> +
> +	if (!pvt->ecc_base)
> +		return pvt->last_ce_count;
> +
> +	ecc = pvt->ecc_base[ECC_CS_COUNT];
> +	cs += ecc & 0xff;		// cs0
> +	cs += (ecc >> 8) & 0xff;	// cs1
> +	cs += (ecc >> 16) & 0xff;	// cs2
> +	cs += (ecc >> 24) & 0xff;	// cs3

No side comments pls - put them over the line.

> +
> +	return cs;
> +}
> +
> +static void loongson_edac_check(struct mem_ctl_info *mci)
> +{
> +	loongson_update_ce_count(mci, 0, loongson_read_ecc(mci));

Drop this silly wrapper.

> +}
> +
> +static int get_dimm_config(struct mem_ctl_info *mci)
> +{
> +	u32 size, npages;
> +	struct dimm_info *dimm;
> +
> +	/* size not used */
> +	size = -1;
> +	npages = MiB_TO_PAGES(size);
> +
> +	dimm = edac_get_dimm(mci, 0, 0, 0);
> +	dimm->nr_pages = npages;
> +	snprintf(dimm->label, sizeof(dimm->label),
> +			"MC#%uChannel#%u_DIMM#%u",
> +			mci->mc_idx, 0, 0);

Align arguments on the opening brace.

> +	dimm->grain = 8;
> +
> +	return 0;
> +}

...
Zhao Qunqin Sept. 14, 2024, 2:18 a.m. UTC | #2
在 2024/9/13 下午6:11, Borislav Petkov 写道:
> On Mon, Sep 09, 2024 at 11:21:24AM +0800, Zhao Qunqin wrote:
>> Subject: Re: [PATCH v4 2/2] Loongarch: EDAC driver for loongson memory controller
> 			EDAC/loongson: Add EDAC driver ...
>
>> Reports single bit errors (CE) only.
>>
>> Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
>> ---
> ...
>
>> diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c
>> new file mode 100644
>> index 000000000..b89d6e0e7
>> --- /dev/null
>> +++ b/drivers/edac/loongson_edac.c
>> @@ -0,0 +1,182 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2024 Loongson Technology Corporation Limited.
>> + */
>> +
>> +#include <linux/edac.h>
>> +#include <linux/module.h>
>> +#include <linux/init.h>
>> +#include <linux/platform_device.h>
>> +
>> +#include "edac_module.h"
>> +
>> +enum ecc_index {
>> +	ECC_SET = 0,
>> +	ECC_RESERVED,
>> +	ECC_COUNT,
>> +	ECC_CS_COUNT,
>> +	ECC_CODE,
>> +	ECC_ADDR,
>> +	ECC_DATA0,
>> +	ECC_DATA1,
>> +	ECC_DATA2,
>> +	ECC_DATA3,
>> +};
>> +
>> +struct loongson_edac_pvt {
>> +	u64 *ecc_base;
>> +	int last_ce_count;
>> +};
>> +
>> +static void loongson_update_ce_count(struct mem_ctl_info *mci,
> Drop the loongson_ prefix from all static functions.
>
> Also, align function arguments on the opening brace.
>
>> +					int chan,
>> +					int new)
>> +{
>> +	int add;
>> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
> The EDAC tree preferred ordering of variable declarations at the
> beginning of a function is reverse fir tree order::
>
> 	struct long_struct_name *descriptive_name;
> 	unsigned long foo, bar;
> 	unsigned int tmp;
> 	int ret;
>
> The above is faster to parse than the reverse ordering::
>
> 	int ret;
> 	unsigned int tmp;
> 	unsigned long foo, bar;
> 	struct long_struct_name *descriptive_name;
>
> And even more so than random ordering::
>
> 	unsigned long foo, bar;
> 	int ret;
> 	struct long_struct_name *descriptive_name;
> 	unsigned int tmp;
>
>> +
>> +	add = new - pvt->last_ce_count;
>> +
>> +	/* Store the new value */
> Drop all those obvious comments.
>
>> +	pvt->last_ce_count = new;
>> +
>> +	/* device resume or any other exceptions*/
> No clue what that means.

The CE value of the memory controller should only increase,

but it will return to its initial value when resuming from S3 or S4 
sleep state,

then the new ce count may be smaller than the last ce count.

>
> Also,  the check goes right under the assignment.
>
>> +	if (add < 0)
>> +		return;
>> +
>> +	/*updated the edac core */
> Useless comment.
>
>> +	if (add != 0) {
> 	if (!add)
> 		return;
>
> and now you can save yourself an indentation level:
>
> 	edac_mc_...(
>
>> +		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
>> +					0, 0, 0,
>> +					chan, 0, -1, "error", "");
>> +		edac_mc_printk(mci, KERN_INFO, "add: %d", add);
>> +	}
>> +}
>> +
>> +static int loongson_read_ecc(struct mem_ctl_info *mci)
>> +{
>> +	u64 ecc;
>> +	int cs = 0;
>> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
>> +
>> +	if (!pvt->ecc_base)
>> +		return pvt->last_ce_count;
>> +
>> +	ecc = pvt->ecc_base[ECC_CS_COUNT];
>> +	cs += ecc & 0xff;		// cs0
>> +	cs += (ecc >> 8) & 0xff;	// cs1
>> +	cs += (ecc >> 16) & 0xff;	// cs2
>> +	cs += (ecc >> 24) & 0xff;	// cs3
> No side comments pls - put them over the line.
>
>> +
>> +	return cs;
>> +}
>> +
>> +static void loongson_edac_check(struct mem_ctl_info *mci)
>> +{
>> +	loongson_update_ce_count(mci, 0, loongson_read_ecc(mci));
> Drop this silly wrapper.
>
>> +}
>> +
>> +static int get_dimm_config(struct mem_ctl_info *mci)
>> +{
>> +	u32 size, npages;
>> +	struct dimm_info *dimm;
>> +
>> +	/* size not used */
>> +	size = -1;
>> +	npages = MiB_TO_PAGES(size);
>> +
>> +	dimm = edac_get_dimm(mci, 0, 0, 0);
>> +	dimm->nr_pages = npages;
>> +	snprintf(dimm->label, sizeof(dimm->label),
>> +			"MC#%uChannel#%u_DIMM#%u",
>> +			mci->mc_idx, 0, 0);
> Align arguments on the opening brace.
>
>> +	dimm->grain = 8;
>> +
>> +	return 0;
>> +}

Will address above comments. Thanks for taking the time to review.

Best regards,

Zhao Qunqin.

>
>
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 6cc8cfc8f..5b4526638 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13242,6 +13242,7 @@  M:	Zhao Qunqin <zhaoqunqin@loongson.cn>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/edac/loongson,ls3a5000-mc-edac.yaml
+F:	drivers/edac/loongson_edac.c
 
 LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
 M:	Sathya Prakash <sathya.prakash@broadcom.com>
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 70f169210..9c135f1a2 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -181,6 +181,7 @@  config LOONGARCH
 	select PCI_MSI_ARCH_FALLBACKS
 	select PCI_QUIRKS
 	select PERF_USE_VMALLOC
+	select EDAC_SUPPORT
 	select RTC_LIB
 	select SPARSE_IRQ
 	select SYSCTL_ARCH_UNALIGN_ALLOW
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 81af6c344..719bb6ca7 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -564,5 +564,13 @@  config EDAC_VERSAL
 	  Support injecting both correctable and uncorrectable errors
 	  for debugging purposes.
 
+config EDAC_LOONGSON3
+	tristate "Loongson-3 Memory Controller"
+	depends on LOONGARCH || COMPILE_TEST
+	help
+	  Support for error detection and correction on the Loongson-3
+	  family memory controller. This driver reports single bit
+	  errors (CE) only. Loongson-3A5000/3C5000/3D5000/3C5000L/3A6000/3C6000
+	  are compatible.
 
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index faf310eec..e72ca1be4 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -88,3 +88,4 @@  obj-$(CONFIG_EDAC_DMC520)		+= dmc520_edac.o
 obj-$(CONFIG_EDAC_NPCM)			+= npcm_edac.o
 obj-$(CONFIG_EDAC_ZYNQMP)		+= zynqmp_edac.o
 obj-$(CONFIG_EDAC_VERSAL)		+= versal_edac.o
+obj-$(CONFIG_EDAC_LOONGSON3)		+= loongson_edac.o
diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c
new file mode 100644
index 000000000..b89d6e0e7
--- /dev/null
+++ b/drivers/edac/loongson_edac.c
@@ -0,0 +1,182 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited.
+ */
+
+#include <linux/edac.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include "edac_module.h"
+
+enum ecc_index {
+	ECC_SET = 0,
+	ECC_RESERVED,
+	ECC_COUNT,
+	ECC_CS_COUNT,
+	ECC_CODE,
+	ECC_ADDR,
+	ECC_DATA0,
+	ECC_DATA1,
+	ECC_DATA2,
+	ECC_DATA3,
+};
+
+struct loongson_edac_pvt {
+	u64 *ecc_base;
+	int last_ce_count;
+};
+
+static void loongson_update_ce_count(struct mem_ctl_info *mci,
+					int chan,
+					int new)
+{
+	int add;
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+	add = new - pvt->last_ce_count;
+
+	/* Store the new value */
+	pvt->last_ce_count = new;
+
+	/* device resume or any other exceptions*/
+	if (add < 0)
+		return;
+
+	/*updated the edac core */
+	if (add != 0) {
+		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
+					0, 0, 0,
+					chan, 0, -1, "error", "");
+		edac_mc_printk(mci, KERN_INFO, "add: %d", add);
+	}
+}
+
+static int loongson_read_ecc(struct mem_ctl_info *mci)
+{
+	u64 ecc;
+	int cs = 0;
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+	if (!pvt->ecc_base)
+		return pvt->last_ce_count;
+
+	ecc = pvt->ecc_base[ECC_CS_COUNT];
+	cs += ecc & 0xff;		// cs0
+	cs += (ecc >> 8) & 0xff;	// cs1
+	cs += (ecc >> 16) & 0xff;	// cs2
+	cs += (ecc >> 24) & 0xff;	// cs3
+
+	return cs;
+}
+
+static void loongson_edac_check(struct mem_ctl_info *mci)
+{
+	loongson_update_ce_count(mci, 0, loongson_read_ecc(mci));
+}
+
+static int get_dimm_config(struct mem_ctl_info *mci)
+{
+	u32 size, npages;
+	struct dimm_info *dimm;
+
+	/* size not used */
+	size = -1;
+	npages = MiB_TO_PAGES(size);
+
+	dimm = edac_get_dimm(mci, 0, 0, 0);
+	dimm->nr_pages = npages;
+	snprintf(dimm->label, sizeof(dimm->label),
+			"MC#%uChannel#%u_DIMM#%u",
+			mci->mc_idx, 0, 0);
+	dimm->grain = 8;
+
+	return 0;
+}
+
+static void loongson_pvt_init(struct mem_ctl_info *mci, u64 *vbase)
+{
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+	pvt->ecc_base = vbase;
+	pvt->last_ce_count = loongson_read_ecc(mci);
+}
+
+static int loongson_edac_probe(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci;
+	struct edac_mc_layer layers[2];
+	struct loongson_edac_pvt *pvt;
+	u64 *vbase = NULL;
+	int ret;
+
+	vbase = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(vbase))
+		return PTR_ERR(vbase);
+
+	/* allocate a new MC control structure */
+	layers[0].type = EDAC_MC_LAYER_CHANNEL;
+	layers[0].size = 1;
+	layers[0].is_virt_csrow = false;
+	layers[1].type = EDAC_MC_LAYER_SLOT;
+	layers[1].size = 1;
+	layers[1].is_virt_csrow = true;
+	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+	if (mci == NULL)
+		return -ENOMEM;
+
+	mci->mc_idx = edac_device_alloc_index();
+	mci->mtype_cap = MEM_FLAG_RDDR4;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE;
+	mci->edac_cap = EDAC_FLAG_NONE;
+	mci->mod_name = "loongson_edac.c";
+	mci->ctl_name = "loongson_edac_ctl";
+	mci->dev_name = "loongson_edac_dev";
+	mci->ctl_page_to_phys = NULL;
+	mci->pdev = &pdev->dev;
+	mci->error_desc.grain = 8;
+	/* Set the function pointer to an actual operation function */
+	mci->edac_check = loongson_edac_check;
+
+	loongson_pvt_init(mci, vbase);
+	get_dimm_config(mci);
+
+	ret = edac_mc_add_mc(mci);
+	if (ret) {
+		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+		edac_mc_free(mci);
+		return ret;
+	}
+	edac_op_state = EDAC_OPSTATE_POLL;
+
+	return 0;
+}
+
+static void loongson_edac_remove(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
+
+	if (mci)
+		edac_mc_free(mci);
+}
+
+static const struct of_device_id loongson_edac_of_match[] = {
+	{ .compatible = "loongson,ls3a5000-mc-edac", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, loongson_edac_of_match);
+
+static struct platform_driver loongson_edac_driver = {
+	.probe		= loongson_edac_probe,
+	.remove		= loongson_edac_remove,
+	.driver		= {
+		.name	= "loongson-mc-edac",
+		.of_match_table = loongson_edac_of_match,
+	},
+};
+module_platform_driver(loongson_edac_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>");
+MODULE_DESCRIPTION("EDAC driver for loongson memory controller");