Message ID | 20230607221651.2454764-22-terry.bowman@amd.com |
---|---|
State | Superseded |
Headers | show |
Series | cxl/pci: Add support for RCH RAS error handling | expand |
Terry Bowman wrote: > The CXL error handler currently only logs endpoint RAS status. The CXL > topology includes several components providing RAS details to be logged > during error handling.[1] Update the current handler's RAS logging to use a > RAS register address. This will allow for adding support to log other CXL > component's RAS details in the future. > > [1] CXL3.0 Table 8-22 CXL_Capability_ID Assignment > > Co-developed-by: Robert Richter <rrichter@amd.com> > Signed-off-by: Robert Richter <rrichter@amd.com> > Signed-off-by: Terry Bowman <terry.bowman@amd.com> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Looks good.
Terry Bowman wrote: > The CXL error handler currently only logs endpoint RAS status. The CXL > topology includes several components providing RAS details to be logged > during error handling.[1] Update the current handler's RAS logging to use a > RAS register address. This will allow for adding support to log other CXL > component's RAS details in the future. > > [1] CXL3.0 Table 8-22 CXL_Capability_ID Assignment > > Co-developed-by: Robert Richter <rrichter@amd.com> > Signed-off-by: Robert Richter <rrichter@amd.com> > Signed-off-by: Terry Bowman <terry.bowman@amd.com> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > --- > drivers/cxl/core/pci.c | 42 ++++++++++++++++++++++++++++++------------ > 1 file changed, 30 insertions(+), 12 deletions(-) > > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c > index 67f4ab6daa34..def6ee5ab4f5 100644 > --- a/drivers/cxl/core/pci.c > +++ b/drivers/cxl/core/pci.c > @@ -665,32 +665,36 @@ void read_cdat_data(struct cxl_port *port) > } > EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); > > -void cxl_cor_error_detected(struct pci_dev *pdev) > +static void __cxl_log_correctable_ras(struct cxl_dev_state *cxlds, > + void __iomem *ras_base) > { > - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); > void __iomem *addr; > u32 status; > > - if (!cxlds->regs.ras) > + if (!ras_base) > return; > > - addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; > + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; > status = readl(addr); > if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { > writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); > trace_cxl_aer_correctable_error(cxlds->cxlmd, status); > } > } > -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); > + > +static void cxl_log_correctable_ras_endpoint(struct cxl_dev_state *cxlds) > +{ > + return __cxl_log_correctable_ras(cxlds, cxlds->regs.ras); > +} As I review patch 24 it leads me back here to grumble about some naming choices. We now have: cxl_cor_error_detected() __cxl_log_correctable_ras() cxl_report_and_clear() Which of those clear the status? What is the difference between "report" and a "log"? I don't much care what name gets chosen but these three functions at least need to give the impression they were written with a common vision. The term "handle" would not surprise me in the names of functions that emit messages and clear register status bits.
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 67f4ab6daa34..def6ee5ab4f5 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -665,32 +665,36 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); -void cxl_cor_error_detected(struct pci_dev *pdev) +static void __cxl_log_correctable_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); void __iomem *addr; u32 status; - if (!cxlds->regs.ras) + if (!ras_base) return; - addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; status = readl(addr); if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); trace_cxl_aer_correctable_error(cxlds->cxlmd, status); } } -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + +static void cxl_log_correctable_ras_endpoint(struct cxl_dev_state *cxlds) +{ + return __cxl_log_correctable_ras(cxlds, cxlds->regs.ras); +} /* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) +static void header_log_copy(void __iomem *ras_base, u32 *log) { void __iomem *addr; u32 *log_addr; int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; log_addr = log; for (i = 0; i < log_u32_size; i++) { @@ -704,17 +708,18 @@ static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) +static bool __cxl_report_and_clear(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; u32 status; u32 fe; - if (!cxlds->regs.ras) + if (!ras_base) return false; - addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; status = readl(addr); if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) return false; @@ -722,7 +727,7 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) /* If multiple errors, log header points to first error from ctrl reg */ if (hweight32(status) > 1) { void __iomem *rcc_addr = - cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(rcc_addr))); @@ -730,13 +735,26 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) fe = status; } - header_log_copy(cxlds, hl); + header_log_copy(ras_base, hl); trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; } +static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) +{ + return __cxl_report_and_clear(cxlds, cxlds->regs.ras); +} + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + + cxl_log_correctable_ras_endpoint(cxlds); +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, pci_channel_state_t state) {