diff mbox series

[v4,12/15] cxl/pci: Add error handler for CXL PCIe Port RAS errors

Message ID 20241211234002.3728674-13-terry.bowman@amd.com
State New
Headers show
Series Enable CXL PCIe Port protocol error handling and logging | expand

Commit Message

Bowman, Terry Dec. 11, 2024, 11:39 p.m. UTC
Introduce correctable and uncorrectable CXL PCIe port protocol error
handlers.

The handlers will be called with a 'struct pci_dev' parameter
indicating the CXL Port device requiring handling. The CXL PCIe Port
device's underlying 'struct device' will match the Port device in the
CXL topology.

Use the PCIe Port's device object to find the matching Upstream Switch
Port, Downstream Switch Port, or Root Port in the CXL topology. The
matching device will contain a reference to the RAS register block used to
handle and log the error.

Invoke the existing __cxl_handle_ras() or __cxl_handle_cor_ras() passing
a reference to the RAS registers as a parameter. These functions will use
the register reference to clear the device's RAS status.

Future patches will assign the error handlers and add trace logging.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
 drivers/cxl/core/pci.c | 61 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

Comments

Li Ming Dec. 12, 2024, 2:19 a.m. UTC | #1
On 12/12/2024 7:39 AM, Terry Bowman wrote:
> Introduce correctable and uncorrectable CXL PCIe port protocol error
> handlers.
>
> The handlers will be called with a 'struct pci_dev' parameter
> indicating the CXL Port device requiring handling. The CXL PCIe Port
> device's underlying 'struct device' will match the Port device in the
> CXL topology.
>
> Use the PCIe Port's device object to find the matching Upstream Switch
> Port, Downstream Switch Port, or Root Port in the CXL topology. The
> matching device will contain a reference to the RAS register block used to
> handle and log the error.
>
> Invoke the existing __cxl_handle_ras() or __cxl_handle_cor_ras() passing
> a reference to the RAS registers as a parameter. These functions will use
> the register reference to clear the device's RAS status.
>
> Future patches will assign the error handlers and add trace logging.
>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> ---
>  drivers/cxl/core/pci.c | 61 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 61 insertions(+)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 89f8d65d71ce..52afaedf5171 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -772,6 +772,67 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>  	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
>  }
>  
> +static int match_uport(struct device *dev, const void *data)
> +{
> +	struct device *uport_dev = (struct device *)data;
> +	struct cxl_port *port;
> +
> +	if (!is_cxl_port(dev))
> +		return 0;
> +
> +	port = to_cxl_port(dev);
> +
> +	return port->uport_dev == uport_dev;
> +}
> +
> +static void __iomem *cxl_pci_port_ras(struct pci_dev *pdev)
> +{
> +	void __iomem *ras_base;
> +	struct cxl_port *port;
> +
> +	if (!pdev)
> +		return NULL;
> +
> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) ||
> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) {
> +		struct cxl_dport *dport;
> +
> +		port = find_cxl_port(&pdev->dev, &dport);
> +		ras_base = dport ? dport->regs.ras : NULL;
> +		if (port)
> +			put_device(&port->dev);
> +		return ras_base;
> +	} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) {
> +		struct device *port_dev;
> +
> +		port_dev = bus_find_device(&cxl_bus_type, NULL, &pdev->dev,
> +					   match_uport);
> +		if (!port_dev)
> +			return NULL;
> +
> +		port = to_cxl_port(port_dev);
> +		ras_base = port ? port->uport_regs.ras : NULL;

I think that is no need to check 'port', just directly use 'ras_base = port->uport_regs.ras;', because match_uport() already checks it, returned port_dev must be a port.


Ming
diff mbox series

Patch

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 89f8d65d71ce..52afaedf5171 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -772,6 +772,67 @@  static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
 	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
 }
 
+static int match_uport(struct device *dev, const void *data)
+{
+	struct device *uport_dev = (struct device *)data;
+	struct cxl_port *port;
+
+	if (!is_cxl_port(dev))
+		return 0;
+
+	port = to_cxl_port(dev);
+
+	return port->uport_dev == uport_dev;
+}
+
+static void __iomem *cxl_pci_port_ras(struct pci_dev *pdev)
+{
+	void __iomem *ras_base;
+	struct cxl_port *port;
+
+	if (!pdev)
+		return NULL;
+
+	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) ||
+	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) {
+		struct cxl_dport *dport;
+
+		port = find_cxl_port(&pdev->dev, &dport);
+		ras_base = dport ? dport->regs.ras : NULL;
+		if (port)
+			put_device(&port->dev);
+		return ras_base;
+	} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) {
+		struct device *port_dev;
+
+		port_dev = bus_find_device(&cxl_bus_type, NULL, &pdev->dev,
+					   match_uport);
+		if (!port_dev)
+			return NULL;
+
+		port = to_cxl_port(port_dev);
+		ras_base = port ? port->uport_regs.ras : NULL;
+		put_device(port_dev);
+		return ras_base;
+	}
+
+	return NULL;
+}
+
+static void cxl_port_cor_error_detected(struct pci_dev *pdev)
+{
+	void __iomem *ras_base = cxl_pci_port_ras(pdev);
+
+	__cxl_handle_cor_ras(&pdev->dev, ras_base);
+}
+
+static bool cxl_port_error_detected(struct pci_dev *pdev)
+{
+	void __iomem *ras_base = cxl_pci_port_ras(pdev);
+
+	return __cxl_handle_ras(&pdev->dev, ras_base);
+}
+
 void cxl_uport_init_ras_reporting(struct cxl_port *port)
 {
 	/* uport may have more than 1 downstream EP. Check if already mapped. */