Message ID | 20230622205523.85375-22-terry.bowman@amd.com |
---|---|
State | Superseded |
Headers | show |
Series | cxl/pci: Add support for RCH RAS error handling | expand |
On 6/22/23 13:55, Terry Bowman wrote: > The CXL error handler currently only logs endpoint RAS status. The CXL > topology includes several components providing RAS details to be logged > during error handling.[1] Update the current handler's RAS logging to use a > RAS register address. Also, update the error handler function names to be > consistent with correctable and uncorrecable RAS. This will allow for s/uncorrecable/uncorrectable/ > adding support to log other CXL component's RAS details in the future. > > [1] CXL3.0 Table 8-22 CXL_Capability_ID Assignment > > Co-developed-by: Robert Richter <rrichter@amd.com> > Signed-off-by: Robert Richter <rrichter@amd.com> > Signed-off-by: Terry Bowman <terry.bowman@amd.com> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> > --- > drivers/cxl/core/pci.c | 44 +++++++++++++++++++++++++++++------------- > 1 file changed, 31 insertions(+), 13 deletions(-) > > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c > index 375f01c6cad6..9cb39835e154 100644 > --- a/drivers/cxl/core/pci.c > +++ b/drivers/cxl/core/pci.c > @@ -665,32 +665,36 @@ void read_cdat_data(struct cxl_port *port) > } > EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); > > -void cxl_cor_error_detected(struct pci_dev *pdev) > +static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, > + void __iomem *ras_base) > { > - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); > void __iomem *addr; > u32 status; > > - if (!cxlds->regs.ras) > + if (!ras_base) > return; > > - addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; > + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; > status = readl(addr); > if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { > writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); > trace_cxl_aer_correctable_error(cxlds->cxlmd, status); > } > } > -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); > + > +static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) > +{ > + return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); > +} > > /* CXL spec rev3.0 8.2.4.16.1 */ > -static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) > +static void header_log_copy(void __iomem *ras_base, u32 *log) > { > void __iomem *addr; > u32 *log_addr; > int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); > > - addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; > + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; > log_addr = log; > > for (i = 0; i < log_u32_size; i++) { > @@ -704,17 +708,18 @@ static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) > * Log the state of the RAS status registers and prepare them to log the > * next error status. Return 1 if reset needed. > */ > -static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) > +static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, > + void __iomem *ras_base) > { > u32 hl[CXL_HEADERLOG_SIZE_U32]; > void __iomem *addr; > u32 status; > u32 fe; > > - if (!cxlds->regs.ras) > + if (!ras_base) > return false; > > - addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; > + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; > status = readl(addr); > if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) > return false; > @@ -722,7 +727,7 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) > /* If multiple errors, log header points to first error from ctrl reg */ > if (hweight32(status) > 1) { > void __iomem *rcc_addr = > - cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; > + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; > > fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, > readl(rcc_addr))); > @@ -730,13 +735,26 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) > fe = status; > } > > - header_log_copy(cxlds, hl); > + header_log_copy(ras_base, hl); > trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); > writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); > > return true; > } > > +static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) > +{ > + return __cxl_handle_ras(cxlds, cxlds->regs.ras); > +} > + > +void cxl_cor_error_detected(struct pci_dev *pdev) > +{ > + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); > + > + cxl_handle_endpoint_cor_ras(cxlds); > +} > +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); > + > pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, > pci_channel_state_t state) > { > @@ -751,7 +769,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, > * chance the situation is recoverable dump the status of the RAS > * capability registers and bounce the active state of the memdev. > */ > - ue = cxl_report_and_clear(cxlds); > + ue = cxl_handle_endpoint_ras(cxlds); > > switch (state) { > case pci_channel_io_normal:
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 375f01c6cad6..9cb39835e154 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -665,32 +665,36 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); -void cxl_cor_error_detected(struct pci_dev *pdev) +static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); void __iomem *addr; u32 status; - if (!cxlds->regs.ras) + if (!ras_base) return; - addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; status = readl(addr); if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); trace_cxl_aer_correctable_error(cxlds->cxlmd, status); } } -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + +static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) +{ + return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); +} /* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) +static void header_log_copy(void __iomem *ras_base, u32 *log) { void __iomem *addr; u32 *log_addr; int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; log_addr = log; for (i = 0; i < log_u32_size; i++) { @@ -704,17 +708,18 @@ static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) +static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; u32 status; u32 fe; - if (!cxlds->regs.ras) + if (!ras_base) return false; - addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; status = readl(addr); if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) return false; @@ -722,7 +727,7 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) /* If multiple errors, log header points to first error from ctrl reg */ if (hweight32(status) > 1) { void __iomem *rcc_addr = - cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(rcc_addr))); @@ -730,13 +735,26 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) fe = status; } - header_log_copy(cxlds, hl); + header_log_copy(ras_base, hl); trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; } +static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) +{ + return __cxl_handle_ras(cxlds, cxlds->regs.ras); +} + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + + cxl_handle_endpoint_cor_ras(cxlds); +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { @@ -751,7 +769,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_report_and_clear(cxlds); + ue = cxl_handle_endpoint_ras(cxlds); switch (state) { case pci_channel_io_normal: