Message ID | 167096738875.2861540.11815053323626849940.stgit@djiang5-desk3.ch.intel.com |
---|---|
State | Superseded |
Headers | show |
Series | [v2] cxl: add RAS status unmasking for CXL | expand |
On Tue, Dec 13, 2022 at 02:36:28PM -0700, Jiang, Dave wrote: > By default the CXL RAS mask registers bits are defaulted to 1's and > suppress all error reporting. If the kernel has negotiated ownership > of error handling for CXL then unmask the mask registers by writing 0s. > > Signed-off-by: Dave Jiang <dave.jiang@intel.com> Tested with a mod to Qemu here: https://lore.kernel.org/linux-cxl/20221213-ira-flexbus-port-v1-1-86afd4f30be6@intel.com/T/#u Unless I messed that up: Tested-by: Ira Weiny <ira.weiny@intel.com> However... > > --- > > Based on patch posted by Ira [1] to export CXL native error reporting control. > > [1]: https://lore.kernel.org/linux-cxl/20221212070627.1372402-2-ira.weiny@intel.com/ > > v2: > - Add definition of PCI_EXP_LNKSTA2_FLIT. (Dan) > - Return error for cxl_pci_ras_unmask(). (Dan) Why return an error code that is not used? Ira > - Add dev_dbg() for register bits to be cleared. (Dan) > - Check Flex Port DVSEC status. (Dan) > --- > drivers/cxl/cxl.h | 1 + > drivers/cxl/cxlpci.h | 4 +++ > drivers/cxl/pci.c | 61 +++++++++++++++++++++++++++++++++++++++++ > include/uapi/linux/pci_regs.h | 1 + > 4 files changed, 67 insertions(+) > > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > index 1b1cf459ac77..31e795c6d537 100644 > --- a/drivers/cxl/cxl.h > +++ b/drivers/cxl/cxl.h > @@ -130,6 +130,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) > #define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0)) > #define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4 > #define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0)) > +#define CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8) > #define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8 > #define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0)) > #define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC > diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h > index 920909791bb9..d138d9cd8e33 100644 > --- a/drivers/cxl/cxlpci.h > +++ b/drivers/cxl/cxlpci.h > @@ -45,6 +45,10 @@ > > /* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ > #define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 > +#define CXL_DVSEC_PORT_STATUS_OFFSET 0xE > +#define CXL_DVSEC_PORT_STATUS_CACHE_ENABLED BIT(0) > +#define CXL_DVSEC_PORT_STATUS_IO_ENABLED BIT(1) > +#define CXL_DVSEC_PORT_STATUS_MEM_ENABLED BIT(2) > > /* CXL 2.0 8.1.9: Register Locator DVSEC */ > #define CXL_DVSEC_REG_LOCATOR 8 > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index 33083a522fd1..03691570649d 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -419,6 +419,66 @@ static void disable_aer(void *pdev) > pci_disable_pcie_error_reporting(pdev); > } > > +/* > + * CXL v3.0 6.2.3 Table 6-4 > + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits > + * mode, otherwise it's 68B flits mode. > + */ > +static bool cxl_pci_flit_256(struct pci_dev *pdev) > +{ > + u32 lnksta2; > + > + pcie_capability_read_dword(pdev, PCI_EXP_LNKSTA2, &lnksta2); > + return lnksta2 & PCI_EXP_LNKSTA2_FLIT; > +} > + > +static int cxl_pci_ras_unmask(struct pci_dev *pdev) > +{ > + struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); > + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); > + void __iomem *addr; > + int dvsec, rc; > + u16 port_sta; > + u32 val; > + > + if (!cxlds->regs.ras) > + return -ENODEV; > + > + /* BIOS has CXL error control */ > + if (!host_bridge->native_cxl_error) > + return -EOPNOTSUPP; > + > + dvsec = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, > + CXL_DVSEC_PCIE_FLEXBUS_PORT); > + if (!dvsec) > + return -ENODEV; > + > + rc = pci_read_config_word(pdev, dvsec + CXL_DVSEC_PORT_STATUS_OFFSET, > + &port_sta); > + if (rc) > + return rc; > + > + if (!(port_sta & (CXL_DVSEC_PORT_STATUS_CACHE_ENABLED | > + CXL_DVSEC_PORT_STATUS_IO_ENABLED | > + CXL_DVSEC_PORT_STATUS_MEM_ENABLED))) > + return -EOPNOTSUPP; > + > + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; > + dev_dbg(&pdev->dev, "Unmasking Uncorrectable RAS errors: %#x\n", > + readl(addr)); > + val = 0; > + if (!cxl_pci_flit_256(pdev)) > + val |= CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; > + writel(val, addr); > + > + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; > + dev_dbg(&pdev->dev, "Unmasking Correctable RAS errors: %#x\n", > + readl(addr)); > + val = 0; > + writel(val, addr); > + return 0; > +} > + > static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > { > struct cxl_register_map map; > @@ -498,6 +558,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > if (cxlds->regs.ras) { > pci_enable_pcie_error_reporting(pdev); > + cxl_pci_ras_unmask(pdev); > rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); > if (rc) > return rc; > diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h > index 82a03ea954af..576ee2ec973f 100644 > --- a/include/uapi/linux/pci_regs.h > +++ b/include/uapi/linux/pci_regs.h > @@ -693,6 +693,7 @@ > #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ > #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ > #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ > +#define PCI_EXP_LNKSTA2_FLIT BIT(10) /* Flit Mode Status */ > #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ > #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ > #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ > >
Ira Weiny wrote: > On Tue, Dec 13, 2022 at 02:36:28PM -0700, Jiang, Dave wrote: > > By default the CXL RAS mask registers bits are defaulted to 1's and > > suppress all error reporting. If the kernel has negotiated ownership > > of error handling for CXL then unmask the mask registers by writing 0s. > > > > Signed-off-by: Dave Jiang <dave.jiang@intel.com> > > Tested with a mod to Qemu here: > > https://lore.kernel.org/linux-cxl/20221213-ira-flexbus-port-v1-1-86afd4f30be6@intel.com/T/#u > > Unless I messed that up: > > Tested-by: Ira Weiny <ira.weiny@intel.com> > > However... > > > > > --- > > > > Based on patch posted by Ira [1] to export CXL native error reporting control. > > > > [1]: https://lore.kernel.org/linux-cxl/20221212070627.1372402-2-ira.weiny@intel.com/ > > > > v2: > > - Add definition of PCI_EXP_LNKSTA2_FLIT. (Dan) > > - Return error for cxl_pci_ras_unmask(). (Dan) > > Why return an error code that is not used? I suggested this mainly for symmetry with pci_enable_pcie_error_reporting() so that it is clear that this unmasking can fail.
On Tue, 13 Dec 2022 14:36:28 -0700 Dave Jiang <dave.jiang@intel.com> wrote: > By default the CXL RAS mask registers bits are defaulted to 1's and > suppress all error reporting. If the kernel has negotiated ownership > of error handling for CXL then unmask the mask registers by writing 0s. > > Signed-off-by: Dave Jiang <dave.jiang@intel.com> A big oopsy from me. As you've indirectly identified, the QEMU code completely ignores the mask register. I'll fix that up. > > --- > > Based on patch posted by Ira [1] to export CXL native error reporting control. > > [1]: https://lore.kernel.org/linux-cxl/20221212070627.1372402-2-ira.weiny@intel.com/ > > v2: > - Add definition of PCI_EXP_LNKSTA2_FLIT. (Dan) > - Return error for cxl_pci_ras_unmask(). (Dan) > - Add dev_dbg() for register bits to be cleared. (Dan) > - Check Flex Port DVSEC status. (Dan) > --- > drivers/cxl/cxl.h | 1 + > drivers/cxl/cxlpci.h | 4 +++ > drivers/cxl/pci.c | 61 +++++++++++++++++++++++++++++++++++++++++ > include/uapi/linux/pci_regs.h | 1 + > 4 files changed, 67 insertions(+) > > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > index 1b1cf459ac77..31e795c6d537 100644 > --- a/drivers/cxl/cxl.h > +++ b/drivers/cxl/cxl.h > @@ -130,6 +130,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) > #define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0)) > #define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4 > #define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0)) > +#define CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8) > #define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8 > #define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0)) > #define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC > diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h > index 920909791bb9..d138d9cd8e33 100644 > --- a/drivers/cxl/cxlpci.h > +++ b/drivers/cxl/cxlpci.h > @@ -45,6 +45,10 @@ > > /* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ > #define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 > +#define CXL_DVSEC_PORT_STATUS_OFFSET 0xE > +#define CXL_DVSEC_PORT_STATUS_CACHE_ENABLED BIT(0) > +#define CXL_DVSEC_PORT_STATUS_IO_ENABLED BIT(1) > +#define CXL_DVSEC_PORT_STATUS_MEM_ENABLED BIT(2) > > /* CXL 2.0 8.1.9: Register Locator DVSEC */ > #define CXL_DVSEC_REG_LOCATOR 8 > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index 33083a522fd1..03691570649d 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -419,6 +419,66 @@ static void disable_aer(void *pdev) > pci_disable_pcie_error_reporting(pdev); > } > > +/* > + * CXL v3.0 6.2.3 Table 6-4 > + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits > + * mode, otherwise it's 68B flits mode. > + */ > +static bool cxl_pci_flit_256(struct pci_dev *pdev) > +{ > + u32 lnksta2; > + > + pcie_capability_read_dword(pdev, PCI_EXP_LNKSTA2, &lnksta2); > + return lnksta2 & PCI_EXP_LNKSTA2_FLIT; > +} > + > +static int cxl_pci_ras_unmask(struct pci_dev *pdev) > +{ > + struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); > + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); > + void __iomem *addr; > + int dvsec, rc; > + u16 port_sta; > + u32 val; > + > + if (!cxlds->regs.ras) > + return -ENODEV; > + > + /* BIOS has CXL error control */ > + if (!host_bridge->native_cxl_error) > + return -EOPNOTSUPP; > + > + dvsec = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, > + CXL_DVSEC_PCIE_FLEXBUS_PORT); > + if (!dvsec) > + return -ENODEV; > + > + rc = pci_read_config_word(pdev, dvsec + CXL_DVSEC_PORT_STATUS_OFFSET, > + &port_sta); > + if (rc) > + return rc; > + > + if (!(port_sta & (CXL_DVSEC_PORT_STATUS_CACHE_ENABLED | > + CXL_DVSEC_PORT_STATUS_IO_ENABLED | > + CXL_DVSEC_PORT_STATUS_MEM_ENABLED))) > + return -EOPNOTSUPP; confused. Why do we care about these? If there is a spec reference to say don't turn on RAS reporting unless these are true, fair enough, but if we are in this state then far worse things are going on than just not having RAS. > + > + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; > + dev_dbg(&pdev->dev, "Unmasking Uncorrectable RAS errors: %#x\n", > + readl(addr)); > + val = 0; Awkward corner. We kind of want to blanket unmask, but the other bits are RsvdP so we should leave any we don't understand alone - in theory a future spec version could use them for something other than error mask bits. > + if (!cxl_pci_flit_256(pdev)) > + val |= CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; > + writel(val, addr); > + > + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; > + dev_dbg(&pdev->dev, "Unmasking Correctable RAS errors: %#x\n", > + readl(addr)); > + val = 0; Same here - don't touch the bits we don't undestand. > + writel(val, addr); > + return 0; > +} > + > static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > { > struct cxl_register_map map; > @@ -498,6 +558,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > if (cxlds->regs.ras) { > pci_enable_pcie_error_reporting(pdev); > + cxl_pci_ras_unmask(pdev); > rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); > if (rc) > return rc; > diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h > index 82a03ea954af..576ee2ec973f 100644 > --- a/include/uapi/linux/pci_regs.h > +++ b/include/uapi/linux/pci_regs.h > @@ -693,6 +693,7 @@ > #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ > #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ > #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ > +#define PCI_EXP_LNKSTA2_FLIT BIT(10) /* Flit Mode Status */ > #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ > #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ > #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ > >
Jonathan Cameron wrote: > On Tue, 13 Dec 2022 14:36:28 -0700 > Dave Jiang <dave.jiang@intel.com> wrote: > > > By default the CXL RAS mask registers bits are defaulted to 1's and > > suppress all error reporting. If the kernel has negotiated ownership > > of error handling for CXL then unmask the mask registers by writing 0s. > > > > Signed-off-by: Dave Jiang <dave.jiang@intel.com> > > A big oopsy from me. As you've indirectly identified, the QEMU code completely > ignores the mask register. I'll fix that up. > > > > > --- > > > > Based on patch posted by Ira [1] to export CXL native error reporting control. > > > > [1]: https://lore.kernel.org/linux-cxl/20221212070627.1372402-2-ira.weiny@intel.com/ > > > > v2: > > - Add definition of PCI_EXP_LNKSTA2_FLIT. (Dan) > > - Return error for cxl_pci_ras_unmask(). (Dan) > > - Add dev_dbg() for register bits to be cleared. (Dan) > > - Check Flex Port DVSEC status. (Dan) > > --- > > drivers/cxl/cxl.h | 1 + > > drivers/cxl/cxlpci.h | 4 +++ > > drivers/cxl/pci.c | 61 +++++++++++++++++++++++++++++++++++++++++ > > include/uapi/linux/pci_regs.h | 1 + > > 4 files changed, 67 insertions(+) > > > > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > > index 1b1cf459ac77..31e795c6d537 100644 > > --- a/drivers/cxl/cxl.h > > +++ b/drivers/cxl/cxl.h > > @@ -130,6 +130,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) > > #define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0)) > > #define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4 > > #define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0)) > > +#define CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8) > > #define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8 > > #define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0)) > > #define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC > > diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h > > index 920909791bb9..d138d9cd8e33 100644 > > --- a/drivers/cxl/cxlpci.h > > +++ b/drivers/cxl/cxlpci.h > > @@ -45,6 +45,10 @@ > > > > /* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ > > #define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 > > +#define CXL_DVSEC_PORT_STATUS_OFFSET 0xE > > +#define CXL_DVSEC_PORT_STATUS_CACHE_ENABLED BIT(0) > > +#define CXL_DVSEC_PORT_STATUS_IO_ENABLED BIT(1) > > +#define CXL_DVSEC_PORT_STATUS_MEM_ENABLED BIT(2) > > > > /* CXL 2.0 8.1.9: Register Locator DVSEC */ > > #define CXL_DVSEC_REG_LOCATOR 8 > > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > > index 33083a522fd1..03691570649d 100644 > > --- a/drivers/cxl/pci.c > > +++ b/drivers/cxl/pci.c > > @@ -419,6 +419,66 @@ static void disable_aer(void *pdev) > > pci_disable_pcie_error_reporting(pdev); > > } > > > > +/* > > + * CXL v3.0 6.2.3 Table 6-4 > > + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits > > + * mode, otherwise it's 68B flits mode. > > + */ > > +static bool cxl_pci_flit_256(struct pci_dev *pdev) > > +{ > > + u32 lnksta2; > > + > > + pcie_capability_read_dword(pdev, PCI_EXP_LNKSTA2, &lnksta2); > > + return lnksta2 & PCI_EXP_LNKSTA2_FLIT; > > +} > > + > > +static int cxl_pci_ras_unmask(struct pci_dev *pdev) > > +{ > > + struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); > > + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); > > + void __iomem *addr; > > + int dvsec, rc; > > + u16 port_sta; > > + u32 val; > > + > > + if (!cxlds->regs.ras) > > + return -ENODEV; > > + > > + /* BIOS has CXL error control */ > > + if (!host_bridge->native_cxl_error) > > + return -EOPNOTSUPP; > > + > > + dvsec = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, > > + CXL_DVSEC_PCIE_FLEXBUS_PORT); > > + if (!dvsec) > > + return -ENODEV; > > + > > + rc = pci_read_config_word(pdev, dvsec + CXL_DVSEC_PORT_STATUS_OFFSET, > > + &port_sta); > > + if (rc) > > + return rc; > > + > > + if (!(port_sta & (CXL_DVSEC_PORT_STATUS_CACHE_ENABLED | > > + CXL_DVSEC_PORT_STATUS_IO_ENABLED | > > + CXL_DVSEC_PORT_STATUS_MEM_ENABLED))) > > + return -EOPNOTSUPP; > > confused. Why do we care about these? If there is a spec reference > to say don't turn on RAS reporting unless these are true, fair enough, > but if we are in this state then far worse things are going on than > just not having RAS. Oh, this is likely from an offline side comment I made about how the driver currently ignores the flex bus port status register and just assumes that "Mem Active" and "Media Ready" are sufficient for determining that the device is actually operating in CXL mode. So yes, this need not be here, but it does have me wondering if a separate patch should move it into the cxl_port_probe() flow. This also dovetails with the question of what do about AER handling in the RCH case. In that scenario it is not until after the cxl_mem driver successfully attaches the endpoint port that the driver even know where the RAS capability registers are located. So I am wondering if AER and RAS setup work should move out of cxl_pci and into cxl_port and all be gated upon these bits being set to identify that the CXL link has been negotiated.
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 1b1cf459ac77..31e795c6d537 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -130,6 +130,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0)) #define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4 #define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0)) +#define CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8) #define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8 #define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0)) #define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 920909791bb9..d138d9cd8e33 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -45,6 +45,10 @@ /* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ #define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 +#define CXL_DVSEC_PORT_STATUS_OFFSET 0xE +#define CXL_DVSEC_PORT_STATUS_CACHE_ENABLED BIT(0) +#define CXL_DVSEC_PORT_STATUS_IO_ENABLED BIT(1) +#define CXL_DVSEC_PORT_STATUS_MEM_ENABLED BIT(2) /* CXL 2.0 8.1.9: Register Locator DVSEC */ #define CXL_DVSEC_REG_LOCATOR 8 diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 33083a522fd1..03691570649d 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -419,6 +419,66 @@ static void disable_aer(void *pdev) pci_disable_pcie_error_reporting(pdev); } +/* + * CXL v3.0 6.2.3 Table 6-4 + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits + * mode, otherwise it's 68B flits mode. + */ +static bool cxl_pci_flit_256(struct pci_dev *pdev) +{ + u32 lnksta2; + + pcie_capability_read_dword(pdev, PCI_EXP_LNKSTA2, &lnksta2); + return lnksta2 & PCI_EXP_LNKSTA2_FLIT; +} + +static int cxl_pci_ras_unmask(struct pci_dev *pdev) +{ + struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + void __iomem *addr; + int dvsec, rc; + u16 port_sta; + u32 val; + + if (!cxlds->regs.ras) + return -ENODEV; + + /* BIOS has CXL error control */ + if (!host_bridge->native_cxl_error) + return -EOPNOTSUPP; + + dvsec = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, + CXL_DVSEC_PCIE_FLEXBUS_PORT); + if (!dvsec) + return -ENODEV; + + rc = pci_read_config_word(pdev, dvsec + CXL_DVSEC_PORT_STATUS_OFFSET, + &port_sta); + if (rc) + return rc; + + if (!(port_sta & (CXL_DVSEC_PORT_STATUS_CACHE_ENABLED | + CXL_DVSEC_PORT_STATUS_IO_ENABLED | + CXL_DVSEC_PORT_STATUS_MEM_ENABLED))) + return -EOPNOTSUPP; + + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; + dev_dbg(&pdev->dev, "Unmasking Uncorrectable RAS errors: %#x\n", + readl(addr)); + val = 0; + if (!cxl_pci_flit_256(pdev)) + val |= CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; + writel(val, addr); + + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; + dev_dbg(&pdev->dev, "Unmasking Correctable RAS errors: %#x\n", + readl(addr)); + val = 0; + writel(val, addr); + return 0; +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -498,6 +558,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (cxlds->regs.ras) { pci_enable_pcie_error_reporting(pdev); + cxl_pci_ras_unmask(pdev); rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); if (rc) return rc; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 82a03ea954af..576ee2ec973f 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -693,6 +693,7 @@ #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ +#define PCI_EXP_LNKSTA2_FLIT BIT(10) /* Flit Mode Status */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */
By default the CXL RAS mask registers bits are defaulted to 1's and suppress all error reporting. If the kernel has negotiated ownership of error handling for CXL then unmask the mask registers by writing 0s. Signed-off-by: Dave Jiang <dave.jiang@intel.com> --- Based on patch posted by Ira [1] to export CXL native error reporting control. [1]: https://lore.kernel.org/linux-cxl/20221212070627.1372402-2-ira.weiny@intel.com/ v2: - Add definition of PCI_EXP_LNKSTA2_FLIT. (Dan) - Return error for cxl_pci_ras_unmask(). (Dan) - Add dev_dbg() for register bits to be cleared. (Dan) - Check Flex Port DVSEC status. (Dan) --- drivers/cxl/cxl.h | 1 + drivers/cxl/cxlpci.h | 4 +++ drivers/cxl/pci.c | 61 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/pci_regs.h | 1 + 4 files changed, 67 insertions(+)