diff mbox series

cxl: add RAS status unmasking for CXL

Message ID 167088362227.2423645.1158488960356900672.stgit@djiang5-desk3.ch.intel.com
State Superseded
Headers show
Series cxl: add RAS status unmasking for CXL | expand

Commit Message

Dave Jiang Dec. 12, 2022, 10:20 p.m. UTC
By default the CXL RAS mask registers bits are defaulted to 1's and suppress
all error reporting. If the kernel has negotiated ownership of error
handling for CXL then unmask the mask registers by writing 0s.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>

---

Based on patch posted by Ira [1] to export CXL native error reporting control.

[1]: https://lore.kernel.org/linux-cxl/20221212070627.1372402-2-ira.weiny@intel.com/
---
 drivers/cxl/cxl.h |    1 +
 drivers/cxl/pci.c |   36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

Comments

Dan Williams Dec. 13, 2022, 5:37 a.m. UTC | #1
Dave Jiang wrote:
> By default the CXL RAS mask registers bits are defaulted to 1's and suppress
> all error reporting. If the kernel has negotiated ownership of error
> handling for CXL then unmask the mask registers by writing 0s.
> 
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> 
> ---
> 
> Based on patch posted by Ira [1] to export CXL native error reporting control.
> 
> [1]: https://lore.kernel.org/linux-cxl/20221212070627.1372402-2-ira.weiny@intel.com/
> ---
>  drivers/cxl/cxl.h |    1 +
>  drivers/cxl/pci.c |   36 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 37 insertions(+)
> 
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 1b1cf459ac77..31e795c6d537 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -130,6 +130,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
>  #define   CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0))
>  #define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4
>  #define   CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0))
> +#define   CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8)
>  #define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8
>  #define   CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0))
>  #define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 33083a522fd1..7fce67d493f3 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -419,6 +419,41 @@ static void disable_aer(void *pdev)
>  	pci_disable_pcie_error_reporting(pdev);
>  }
>  
> +/*
> + * CXL v3.0 6.2.3 Table 6-4
> + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits
> + * mode, otherwise it's 68B flits mode.
> + */
> +static bool cxl_pci_flit_256(struct pci_dev *pdev)
> +{
> +	u32 lnksta2;
> +
> +	pcie_capability_read_dword(pdev, PCI_EXP_LNKSTA2, &lnksta2);
> +	return lnksta2 & BIT(10);

Add a definition of PCI_EXP_LNKSTA2_FLIT to
include/uapi/linux/pci_regs.h?

> +}
> +
> +static void cxl_pci_ras_unmask(struct pci_dev *pdev)
> +{

Could return an error the same as pci_enable_pcie_error_reporting() and
just have the caller ignore it.

> +	struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
> +	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
> +	u32 val;
> +
> +	if (!cxlds->regs.ras)
> +		return;
> +
> +	/* BIOS has CXL error control */
> +	if (!host_bridge->native_cxl_error)
> +		return;

It would be nice to have a dev_dbg() for which errors are unmasked by
this event, so just print the initial value.

> +
> +	val = 0;
> +	if (!cxl_pci_flit_256(pdev))

Perhaps, before this, check if any of the CXL protocol got enabled
(bit0, bit1, or bit2 in Flex Port status) and skip touching mask
otherwise?

> +		val |= CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK;
> +	writel(val, cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET);
> +
> +	val = 0;
> +	writel(val, cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET);
> +}
> +
>  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  {
>  	struct cxl_register_map map;
> @@ -498,6 +533,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  
>  	if (cxlds->regs.ras) {
>  		pci_enable_pcie_error_reporting(pdev);
> +		cxl_pci_ras_unmask(pdev);
>  		rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
>  		if (rc)
>  			return rc;
> 
>
diff mbox series

Patch

diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 1b1cf459ac77..31e795c6d537 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -130,6 +130,7 @@  static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
 #define   CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0))
 #define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4
 #define   CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0))
+#define   CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8)
 #define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8
 #define   CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0))
 #define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 33083a522fd1..7fce67d493f3 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -419,6 +419,41 @@  static void disable_aer(void *pdev)
 	pci_disable_pcie_error_reporting(pdev);
 }
 
+/*
+ * CXL v3.0 6.2.3 Table 6-4
+ * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits
+ * mode, otherwise it's 68B flits mode.
+ */
+static bool cxl_pci_flit_256(struct pci_dev *pdev)
+{
+	u32 lnksta2;
+
+	pcie_capability_read_dword(pdev, PCI_EXP_LNKSTA2, &lnksta2);
+	return lnksta2 & BIT(10);
+}
+
+static void cxl_pci_ras_unmask(struct pci_dev *pdev)
+{
+	struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
+	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+	u32 val;
+
+	if (!cxlds->regs.ras)
+		return;
+
+	/* BIOS has CXL error control */
+	if (!host_bridge->native_cxl_error)
+		return;
+
+	val = 0;
+	if (!cxl_pci_flit_256(pdev))
+		val |= CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK;
+	writel(val, cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET);
+
+	val = 0;
+	writel(val, cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET);
+}
+
 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct cxl_register_map map;
@@ -498,6 +533,7 @@  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	if (cxlds->regs.ras) {
 		pci_enable_pcie_error_reporting(pdev);
+		cxl_pci_ras_unmask(pdev);
 		rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
 		if (rc)
 			return rc;