diff mbox series

[v3,11/11] cxl/pci: Add callback to log AER correctable error

Message ID 166879134802.674819.8577415268687156421.stgit@djiang5-desk3.ch.intel.com (mailing list archive)
State Superseded
Headers show
Series cxl/pci: Add fundamental error handling | expand

Commit Message

Dave Jiang Nov. 18, 2022, 5:09 p.m. UTC
Add AER error handler callback to read the correctable error status
register for the CXL device. Log the error as a trace event and clear the
error.

Suggested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/cxl/pci.c |   20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

Comments

Jonathan Cameron Nov. 21, 2022, 12:21 p.m. UTC | #1
On Fri, 18 Nov 2022 10:09:08 -0700
Dave Jiang <dave.jiang@intel.com> wrote:

> Add AER error handler callback to read the correctable error status
> register for the CXL device. Log the error as a trace event and clear the
> error.
> 
> Suggested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

You may want to make it clearer in the description of patch 10 that
we 'need' the callback rather than falling into the better logging
category (which was what I was previously thinking!)

Jonathan

> ---
>  drivers/cxl/pci.c |   20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
> 
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index dad69110291d..b394fd227949 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -621,10 +621,30 @@ static void cxl_error_resume(struct pci_dev *pdev)
>  		 dev->driver ? "successful" : "failed");
>  }
>  
> +static void cxl_correctable_error_log(struct pci_dev *pdev)
> +{
> +	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
> +	struct cxl_memdev *cxlmd = cxlds->cxlmd;
> +	struct device *dev = &cxlmd->dev;
> +	void __iomem *addr;
> +	u32 status;
> +
> +	if (!cxlds->regs.ras)
> +		return;
> +
> +	addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
> +	status = le32_to_cpu(readl(addr));
> +	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
> +		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);

Ah. I'd forgotten we need to clear this an hence 'need' the callback in 
the previous patch to handle this properly.


> +		trace_cxl_aer_correctable_error(dev_name(dev), status);
> +	}
> +}
> +
>  static const struct pci_error_handlers cxl_error_handlers = {
>  	.error_detected	= cxl_error_detected,
>  	.slot_reset	= cxl_slot_reset,
>  	.resume		= cxl_error_resume,
> +	.cor_error_log	= cxl_correctable_error_log,
>  };
>  
>  static struct pci_driver cxl_pci_driver = {
> 
>
diff mbox series

Patch

diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index dad69110291d..b394fd227949 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -621,10 +621,30 @@  static void cxl_error_resume(struct pci_dev *pdev)
 		 dev->driver ? "successful" : "failed");
 }
 
+static void cxl_correctable_error_log(struct pci_dev *pdev)
+{
+	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+	struct cxl_memdev *cxlmd = cxlds->cxlmd;
+	struct device *dev = &cxlmd->dev;
+	void __iomem *addr;
+	u32 status;
+
+	if (!cxlds->regs.ras)
+		return;
+
+	addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
+	status = le32_to_cpu(readl(addr));
+	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
+		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+		trace_cxl_aer_correctable_error(dev_name(dev), status);
+	}
+}
+
 static const struct pci_error_handlers cxl_error_handlers = {
 	.error_detected	= cxl_error_detected,
 	.slot_reset	= cxl_slot_reset,
 	.resume		= cxl_error_resume,
+	.cor_error_log	= cxl_correctable_error_log,
 };
 
 static struct pci_driver cxl_pci_driver = {