Message ID | 20250208002941.4135321-10-terry.bowman@amd.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Bjorn Helgaas |
Headers | show |
Series | Enable CXL PCIe port protocol error handling and logging | expand |
On Fri, Feb 07, 2025 at 06:29:33PM -0600, Terry Bowman wrote: > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c > index 4af39abbfab3..0adebf261fe7 100644 > --- a/drivers/cxl/core/pci.c > +++ b/drivers/cxl/core/pci.c > @@ -652,7 +652,7 @@ void read_cdat_data(struct cxl_port *port) > } > EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); > > -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, > +static void __cxl_handle_cor_ras(struct device *dev, > void __iomem *ras_base) > { > void __iomem *addr; > @@ -663,10 +663,8 @@ static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, > > addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; > status = readl(addr); > - if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK)) { > - dev_err(cxl_dev, "%s():%d: CE Status is empty\n", __func__, __LINE__); > + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK)) > return; > - } > writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); > > if (is_cxl_memdev(cxl_dev)) This seems like where you actually wanted this original change: diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index aa855c2068e0..a0c78655a8af 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -714,7 +714,7 @@ void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, } EXPORT_SYMBOL_NS_GPL(cxl_cper_trace_uncorr_port_prot_err, "CXL"); -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, +static void __cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) { void __iomem *addr; @@ -725,15 +725,19 @@ static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; status = readl(addr); - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); - } + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK)) + return; + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + + if (is_cxl_memdev(dev)) + trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status); + else if (is_cxl_port(dev)) + trace_cxl_port_aer_correctable_error(dev, status); }
On 2/10/2025 2:16 PM, Gregory Price wrote: > On Fri, Feb 07, 2025 at 06:29:33PM -0600, Terry Bowman wrote: >> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c >> index 4af39abbfab3..0adebf261fe7 100644 >> --- a/drivers/cxl/core/pci.c >> +++ b/drivers/cxl/core/pci.c >> @@ -652,7 +652,7 @@ void read_cdat_data(struct cxl_port *port) >> } >> EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); >> >> -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, >> +static void __cxl_handle_cor_ras(struct device *dev, >> void __iomem *ras_base) >> { >> void __iomem *addr; >> @@ -663,10 +663,8 @@ static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, >> >> addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; >> status = readl(addr); >> - if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK)) { >> - dev_err(cxl_dev, "%s():%d: CE Status is empty\n", __func__, __LINE__); >> + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK)) >> return; >> - } >> writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); >> >> if (is_cxl_memdev(cxl_dev)) > > This seems like where you actually wanted this original change: You're right. Somehow I moved a chunk into the wrong patch. I might need to respin this. Terry > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c > index aa855c2068e0..a0c78655a8af 100644 > --- a/drivers/cxl/core/pci.c > +++ b/drivers/cxl/core/pci.c > @@ -714,7 +714,7 @@ void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, > } > EXPORT_SYMBOL_NS_GPL(cxl_cper_trace_uncorr_port_prot_err, "CXL"); > > -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, > +static void __cxl_handle_cor_ras(struct device *dev, > void __iomem *ras_base) > { > void __iomem *addr; > @@ -725,15 +725,19 @@ static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, > > addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; > status = readl(addr); > - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { > - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); > - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); > - } > + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK)) > + return; > + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); > + > + if (is_cxl_memdev(dev)) > + trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status); > + else if (is_cxl_port(dev)) > + trace_cxl_port_aer_correctable_error(dev, status); > }
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 4af39abbfab3..0adebf261fe7 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -652,7 +652,7 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, +static void __cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) { void __iomem *addr; @@ -663,10 +663,8 @@ static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; status = readl(addr); - if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK)) { - dev_err(cxl_dev, "%s():%d: CE Status is empty\n", __func__, __LINE__); + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK)) return; - } writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); if (is_cxl_memdev(cxl_dev)) @@ -677,7 +675,7 @@ static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) { - return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); + return __cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); } /* CXL spec rev3.0 8.2.4.16.1 */ @@ -701,8 +699,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) +static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; @@ -729,7 +726,7 @@ static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, } header_log_copy(ras_base, hl); - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); + trace_cxl_aer_uncorrectable_error(to_cxl_memdev(dev), status, fe, hl); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; @@ -737,7 +734,7 @@ static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) { - return __cxl_handle_ras(cxlds, cxlds->regs.ras); + return __cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); } #ifdef CONFIG_PCIEAER_CXL @@ -831,13 +828,13 @@ EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, struct cxl_dport *dport) { - return __cxl_handle_cor_ras(cxlds, dport->regs.ras); + return __cxl_handle_cor_ras(&cxlds->cxlmd->dev, dport->regs.ras); } static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, struct cxl_dport *dport) { - return __cxl_handle_ras(cxlds, dport->regs.ras); + return __cxl_handle_ras(&cxlds->cxlmd->dev, dport->regs.ras); } /*