Message ID | 20240208200042.432958-4-Benjamin.Cheatham@amd.com |
---|---|
State | New, archived |
Headers | show |
Series | CXL, ACPI, APEI, EINJ: Update EINJ for CXL error types | expand |
On 2/8/24 2:00 PM, Ben Cheatham wrote: > Implement CXL helper functions in the EINJ module for getting/injecting > available CXL protocol error types and export them to sysfs under > kernel/debug/cxl. > > The kernel/debug/cxl/einj_types file will print the available CXL > protocol errors in the same format as the available_error_types > file provided by the EINJ module. The > kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the > error_type and error_inject files provided by the EINJ module, i.e.: > writing an error type into $dport_dev/einj_inject will inject said error > type into the CXL dport represented by $dport_dev. > > Reviewed-by: Dan Williams <dan.j.williams@intel.com> > Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com> > --- > Documentation/ABI/testing/debugfs-cxl | 22 ++++ > MAINTAINERS | 1 + > drivers/acpi/apei/einj.c | 158 ++++++++++++++++++++++++-- > drivers/cxl/core/port.c | 39 +++++++ > include/linux/einj-cxl.h | 45 ++++++++ > 5 files changed, 255 insertions(+), 10 deletions(-) > create mode 100644 include/linux/einj-cxl.h > > diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl > index fe61d372e3fa..bcd985cca66a 100644 > --- a/Documentation/ABI/testing/debugfs-cxl > +++ b/Documentation/ABI/testing/debugfs-cxl > @@ -33,3 +33,25 @@ Description: > device cannot clear poison from the address, -ENXIO is returned. > The clear_poison attribute is only visible for devices > supporting the capability. > + > +What: /sys/kernel/debug/cxl/einj_types > +Date: January, 2024 > +KernelVersion: v6.9 > +Contact: linux-cxl@vger.kernel.org > +Description: > + (RO) Prints the CXL protocol error types made available by > + the platform in the format "0x<error number> <error type>". > + The <error number> can be written to einj_inject to inject > + <error type> into a chosen dport. > + > +What: /sys/kernel/debug/cxl/$dport_dev/einj_inject > +Date: January, 2024 > +KernelVersion: v6.9 > +Contact: linux-cxl@vger.kernel.org > +Description: > + (WO) Writing an integer to this file injects the corresponding > + CXL protocol error into $dport_dev ($dport_dev will be a device > + name from /sys/bus/pci/devices). The integer to type mapping for > + injection can be found by reading from einj_types. If the dport > + was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise > + a CXL 2.0 error is injected. > diff --git a/MAINTAINERS b/MAINTAINERS > index 9104430e148e..02d7feb2ed1f 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -5246,6 +5246,7 @@ L: linux-cxl@vger.kernel.org > S: Maintained > F: drivers/cxl/ > F: include/uapi/linux/cxl_mem.h > +F: include/linux/einj-cxl.h > F: tools/testing/cxl/ > > COMPUTE EXPRESS LINK PMU (CPMU) > diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c > index 73dde21d3e89..9137cc01f791 100644 > --- a/drivers/acpi/apei/einj.c > +++ b/drivers/acpi/apei/einj.c > @@ -21,6 +21,7 @@ > #include <linux/nmi.h> > #include <linux/delay.h> > #include <linux/mm.h> > +#include <linux/einj-cxl.h> > #include <linux/platform_device.h> > #include <asm/unaligned.h> > > @@ -37,6 +38,20 @@ > #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ > ACPI_EINJ_MEMORY_UNCORRECTABLE | \ > ACPI_EINJ_MEMORY_FATAL) > +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE > +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) > +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) > +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) > +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) > +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) > +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) > +#endif > +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ > + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ > + ACPI_EINJ_CXL_CACHE_FATAL | \ > + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ > + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ > + ACPI_EINJ_CXL_MEM_FATAL) > > /* > * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. > @@ -543,8 +558,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, > if (type & ACPI5_VENDOR_BIT) { > if (vendor_flags != SETWA_FLAGS_MEM) > goto inject; > - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) > + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { > goto inject; > + } else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) { > + goto inject; > + } > > /* > * Disallow crazy address masks that give BIOS leeway to pick > @@ -596,6 +614,9 @@ static const char * const einj_error_type_string[] = { > "0x00000200\tPlatform Correctable\n", > "0x00000400\tPlatform Uncorrectable non-fatal\n", > "0x00000800\tPlatform Uncorrectable fatal\n", > +}; > + > +static const char * const einj_cxl_error_type_string[] = { > "0x00001000\tCXL.cache Protocol Correctable\n", > "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", > "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", > @@ -621,29 +642,44 @@ static int available_error_type_show(struct seq_file *m, void *v) > > DEFINE_SHOW_ATTRIBUTE(available_error_type); > > -static int error_type_get(void *data, u64 *val) > +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) > { > - *val = error_type; > + int cxl_err, rc; > + u32 available_error_type = 0; > + > + if (!einj_initialized) > + return -ENXIO; > + > + rc = einj_get_available_error_type(&available_error_type); > + if (rc) > + return rc; > + > + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { > + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; > + > + if (available_error_type & cxl_err) > + seq_puts(m, einj_cxl_error_type_string[pos]); > + } > > return 0; > } > +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL); > > -static int error_type_set(void *data, u64 val) > +static int validate_error_type(u64 type) > { > + u32 tval, vendor, available_error_type = 0; > int rc; > - u32 available_error_type = 0; > - u32 tval, vendor; > > /* Only low 32 bits for error type are valid */ > - if (val & GENMASK_ULL(63, 32)) > + if (type & GENMASK_ULL(63, 32)) > return -EINVAL; > > /* > * Vendor defined types have 0x80000000 bit set, and > * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE > */ > - vendor = val & ACPI5_VENDOR_BIT; > - tval = val & 0x7fffffff; > + vendor = type & ACPI5_VENDOR_BIT; > + tval = type & 0x7fffffff; > > /* Only one error type can be specified */ > if (tval & (tval - 1)) > @@ -652,9 +688,105 @@ static int error_type_set(void *data, u64 val) > rc = einj_get_available_error_type(&available_error_type); > if (rc) > return rc; > - if (!(val & available_error_type)) > + if (!(type & available_error_type)) > return -EINVAL; > } > + > + return 0; > +} > + > +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) > +{ > + struct pci_bus *pbus; > + struct pci_host_bridge *bridge; > + u64 seg = 0, bus; > + > + pbus = dport_dev->bus; > + bridge = pci_find_host_bridge(pbus); > + > + if (!bridge) > + return -ENODEV; > + > + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) > + seg = bridge->domain_nr << 24; > + > + bus = pbus->number << 16; > + *sbdf = seg | bus | dport_dev->devfn; > + > + return 0; > +} > + > +int einj_cxl_inject_rch_error(u64 rcrb, u64 type) > +{ > + u64 param1 = 0, param2 = 0, param4 = 0; > + u32 flags; > + int rc; > + > + if (!einj_initialized) > + return -ENXIO; > + > + /* Only CXL error types can be specified */ > + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) > + return -EINVAL; > + > + rc = validate_error_type(type); > + if (rc) > + return rc; > + > + param1 = (u64) rcrb; > + param2 = 0xfffffffffffff000; > + flags = 0x2; > + > + return einj_error_inject(type, flags, param1, param2, 0, param4); > +} > +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL); > + > +int einj_cxl_inject_error(struct pci_dev *dport, u64 type) > +{ > + u64 param1 = 0, param2 = 0, param4 = 0; > + u32 flags; > + int rc; > + > + if (!einj_initialized) > + return -ENXIO; > + > + /* Only CXL error types can be specified */ > + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) > + return -EINVAL; > + > + rc = validate_error_type(type); > + if (rc) > + return rc; > + > + rc = cxl_dport_get_sbdf(dport, ¶m4); > + if (rc) > + return rc; > + > + flags = 0x4; > + > + return einj_error_inject(type, flags, param1, param2, 0, param4); > +} > +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL); > + > +static int error_type_get(void *data, u64 *val) > +{ > + *val = error_type; > + > + return 0; > +} > + > +static int error_type_set(void *data, u64 val) > +{ > + int rc; > + > + /* CXL error types have to be injected from cxl debugfs */ > + if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT)) > + return -EINVAL; > + > + rc = validate_error_type(val); > + if (rc) > + return rc; > + > error_type = val; > > return 0; > @@ -690,6 +822,12 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) > return 0; > } > > +bool einj_is_initialized(void) > +{ > + return einj_initialized; > +} > +EXPORT_SYMBOL_GPL(einj_is_initialized); > + > static int __init einj_probe(struct platform_device *pdev) > { > int rc; > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > index 8c00fd6be730..c52c92222be5 100644 > --- a/drivers/cxl/core/port.c > +++ b/drivers/cxl/core/port.c > @@ -3,6 +3,7 @@ > #include <linux/platform_device.h> > #include <linux/memregion.h> > #include <linux/workqueue.h> > +#include <linux/einj-cxl.h> > #include <linux/debugfs.h> > #include <linux/device.h> > #include <linux/module.h> > @@ -797,6 +798,37 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport, > return rc; > } > > +DEFINE_SHOW_ATTRIBUTE(einj_cxl_available_error_type); > + > +static int cxl_einj_inject(void *data, u64 type) > +{ > + struct cxl_dport *dport = data; > + > + if (dport->rch) > + return einj_cxl_inject_rch_error(dport->rcrb.base, type); > + > + return einj_cxl_inject_error(to_pci_dev(dport->dport_dev), type); > +} > +DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n"); > + > +static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport) > +{ > + struct dentry *dir; > + > + /* > + * dport_dev needs to be a PCIe port for CXL 2.0+ ports because > + * EINJ expects a dport SBDF to be specified for 2.0 error injection. > + */ > + if (!einj_is_initialized() || > + (!dport->rch && !dev_is_pci(dport->dport_dev))) > + return; > + > + dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev)); > + > + debugfs_create_file("einj_inject", 0200, dir, dport, > + &cxl_einj_inject_fops); > +} > + > static struct cxl_port *__devm_cxl_add_port(struct device *host, > struct device *uport_dev, > resource_size_t component_reg_phys, > @@ -1144,6 +1176,8 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, > if (dev_is_pci(dport_dev)) > dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev)); > > + cxl_debugfs_create_dport_dir(dport); > + > return dport; > } > > @@ -2220,6 +2254,11 @@ static __init int cxl_core_init(void) > > cxl_debugfs = debugfs_create_dir("cxl", NULL); > > + if (einj_is_initialized()) { > + debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL, > + &einj_cxl_available_error_type_fops); > + } > + > cxl_mbox_init(); > > rc = cxl_memdev_init(); > diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h > new file mode 100644 > index 000000000000..af57cc8580a6 > --- /dev/null > +++ b/include/linux/einj-cxl.h > @@ -0,0 +1,45 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > +/* > + * CXL protocol Error INJection support. > + * > + * Copyright (c) 2023 Advanced Micro Devices, Inc. > + * All Rights Reserved. > + * > + * Author: Ben Cheatham <benjamin.cheatham@amd.com> > + */ > +#ifndef CXL_EINJ_H > +#define CXL_EINJ_H > + > +#include <linux/pci.h> > + > +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ) I was testing some other work using this kernel and was getting a linker error when I had CXL_BUS set to 'y' and ACPI_APEI_EINJ set to 'm'. I went ahead and solved it by changing the line above to use IS_REACHABLE() instead. The change shouldn't actually affect the functionality of the patch since this is in a build configuration where the CXL driver isn't using the EINJ module since CONFIG_CXL_EINJ is set to n due to Kconfig restraints. I can submit another version of this series with this fix, but I don't think it makes much sense for a 1 line change, so I've but a diff below with the change for whoever ends up putting this in their tree: diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h index af57cc8580a6..70d411ea4629 100644 --- a/include/linux/einj-cxl.h +++ b/include/linux/einj-cxl.h @@ -12,7 +12,7 @@ #include <linux/pci.h> -#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ) +#if IS_REACHABLE(CONFIG_ACPI_APEI_EINJ) int einj_cxl_available_error_type_show(struct seq_file *m, void *v); int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); int einj_cxl_inject_rch_error(u64 rcrb, u64 type); Thanks, Ben > +int einj_cxl_available_error_type_show(struct seq_file *m, void *v); > +int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); > +int einj_cxl_inject_rch_error(u64 rcrb, u64 type); > +bool einj_is_initialized(void); > +#else > +static inline int einj_cxl_available_error_type_show(struct seq_file *m, > + void *v) > +{ > + return -ENXIO; > +} > + > +static inline int einj_cxl_type_show(struct seq_file *m, void *data) > +{ > + return -ENXIO; > +} > + > +static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type) > +{ > + return -ENXIO; > +} > + > +static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type) > +{ > + return -ENXIO; > +} > + > +static inline bool einj_is_initialized(void) { return false; } > +#endif > + > +#endif
Ben Cheatham wrote: > > > On 2/8/24 2:00 PM, Ben Cheatham wrote: > > Implement CXL helper functions in the EINJ module for getting/injecting > > available CXL protocol error types and export them to sysfs under > > kernel/debug/cxl. > > > > The kernel/debug/cxl/einj_types file will print the available CXL > > protocol errors in the same format as the available_error_types > > file provided by the EINJ module. The > > kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the > > error_type and error_inject files provided by the EINJ module, i.e.: > > writing an error type into $dport_dev/einj_inject will inject said error > > type into the CXL dport represented by $dport_dev. > > > > Reviewed-by: Dan Williams <dan.j.williams@intel.com> > > Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com> > > --- > > Documentation/ABI/testing/debugfs-cxl | 22 ++++ > > MAINTAINERS | 1 + > > drivers/acpi/apei/einj.c | 158 ++++++++++++++++++++++++-- > > drivers/cxl/core/port.c | 39 +++++++ > > include/linux/einj-cxl.h | 45 ++++++++ > > 5 files changed, 255 insertions(+), 10 deletions(-) > > create mode 100644 include/linux/einj-cxl.h > > > > diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl > > index fe61d372e3fa..bcd985cca66a 100644 > > --- a/Documentation/ABI/testing/debugfs-cxl > > +++ b/Documentation/ABI/testing/debugfs-cxl > > @@ -33,3 +33,25 @@ Description: > > device cannot clear poison from the address, -ENXIO is returned. > > The clear_poison attribute is only visible for devices > > supporting the capability. > > + > > +What: /sys/kernel/debug/cxl/einj_types > > +Date: January, 2024 > > +KernelVersion: v6.9 > > +Contact: linux-cxl@vger.kernel.org > > +Description: > > + (RO) Prints the CXL protocol error types made available by > > + the platform in the format "0x<error number> <error type>". > > + The <error number> can be written to einj_inject to inject > > + <error type> into a chosen dport. > > + > > +What: /sys/kernel/debug/cxl/$dport_dev/einj_inject > > +Date: January, 2024 > > +KernelVersion: v6.9 > > +Contact: linux-cxl@vger.kernel.org > > +Description: > > + (WO) Writing an integer to this file injects the corresponding > > + CXL protocol error into $dport_dev ($dport_dev will be a device > > + name from /sys/bus/pci/devices). The integer to type mapping for > > + injection can be found by reading from einj_types. If the dport > > + was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise > > + a CXL 2.0 error is injected. > > diff --git a/MAINTAINERS b/MAINTAINERS > > index 9104430e148e..02d7feb2ed1f 100644 > > --- a/MAINTAINERS > > +++ b/MAINTAINERS > > @@ -5246,6 +5246,7 @@ L: linux-cxl@vger.kernel.org > > S: Maintained > > F: drivers/cxl/ > > F: include/uapi/linux/cxl_mem.h > > +F: include/linux/einj-cxl.h > > F: tools/testing/cxl/ > > > > COMPUTE EXPRESS LINK PMU (CPMU) > > diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c > > index 73dde21d3e89..9137cc01f791 100644 > > --- a/drivers/acpi/apei/einj.c > > +++ b/drivers/acpi/apei/einj.c > > @@ -21,6 +21,7 @@ > > #include <linux/nmi.h> > > #include <linux/delay.h> > > #include <linux/mm.h> > > +#include <linux/einj-cxl.h> > > #include <linux/platform_device.h> > > #include <asm/unaligned.h> > > > > @@ -37,6 +38,20 @@ > > #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ > > ACPI_EINJ_MEMORY_UNCORRECTABLE | \ > > ACPI_EINJ_MEMORY_FATAL) > > +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE > > +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) > > +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) > > +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) > > +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) > > +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) > > +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) > > +#endif > > +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ > > + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ > > + ACPI_EINJ_CXL_CACHE_FATAL | \ > > + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ > > + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ > > + ACPI_EINJ_CXL_MEM_FATAL) > > > > /* > > * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. > > @@ -543,8 +558,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, > > if (type & ACPI5_VENDOR_BIT) { > > if (vendor_flags != SETWA_FLAGS_MEM) > > goto inject; > > - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) > > + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { > > goto inject; > > + } else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) { > > + goto inject; > > + } > > > > /* > > * Disallow crazy address masks that give BIOS leeway to pick > > @@ -596,6 +614,9 @@ static const char * const einj_error_type_string[] = { > > "0x00000200\tPlatform Correctable\n", > > "0x00000400\tPlatform Uncorrectable non-fatal\n", > > "0x00000800\tPlatform Uncorrectable fatal\n", > > +}; > > + > > +static const char * const einj_cxl_error_type_string[] = { > > "0x00001000\tCXL.cache Protocol Correctable\n", > > "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", > > "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", > > @@ -621,29 +642,44 @@ static int available_error_type_show(struct seq_file *m, void *v) > > > > DEFINE_SHOW_ATTRIBUTE(available_error_type); > > > > -static int error_type_get(void *data, u64 *val) > > +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) > > { > > - *val = error_type; > > + int cxl_err, rc; > > + u32 available_error_type = 0; > > + > > + if (!einj_initialized) > > + return -ENXIO; > > + > > + rc = einj_get_available_error_type(&available_error_type); > > + if (rc) > > + return rc; > > + > > + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { > > + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; > > + > > + if (available_error_type & cxl_err) > > + seq_puts(m, einj_cxl_error_type_string[pos]); > > + } > > > > return 0; > > } > > +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL); > > > > -static int error_type_set(void *data, u64 val) > > +static int validate_error_type(u64 type) > > { > > + u32 tval, vendor, available_error_type = 0; > > int rc; > > - u32 available_error_type = 0; > > - u32 tval, vendor; > > > > /* Only low 32 bits for error type are valid */ > > - if (val & GENMASK_ULL(63, 32)) > > + if (type & GENMASK_ULL(63, 32)) > > return -EINVAL; > > > > /* > > * Vendor defined types have 0x80000000 bit set, and > > * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE > > */ > > - vendor = val & ACPI5_VENDOR_BIT; > > - tval = val & 0x7fffffff; > > + vendor = type & ACPI5_VENDOR_BIT; > > + tval = type & 0x7fffffff; > > > > /* Only one error type can be specified */ > > if (tval & (tval - 1)) > > @@ -652,9 +688,105 @@ static int error_type_set(void *data, u64 val) > > rc = einj_get_available_error_type(&available_error_type); > > if (rc) > > return rc; > > - if (!(val & available_error_type)) > > + if (!(type & available_error_type)) > > return -EINVAL; > > } > > + > > + return 0; > > +} > > + > > +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) > > +{ > > + struct pci_bus *pbus; > > + struct pci_host_bridge *bridge; > > + u64 seg = 0, bus; > > + > > + pbus = dport_dev->bus; > > + bridge = pci_find_host_bridge(pbus); > > + > > + if (!bridge) > > + return -ENODEV; > > + > > + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) > > + seg = bridge->domain_nr << 24; > > + > > + bus = pbus->number << 16; > > + *sbdf = seg | bus | dport_dev->devfn; > > + > > + return 0; > > +} > > + > > +int einj_cxl_inject_rch_error(u64 rcrb, u64 type) > > +{ > > + u64 param1 = 0, param2 = 0, param4 = 0; > > + u32 flags; > > + int rc; > > + > > + if (!einj_initialized) > > + return -ENXIO; > > + > > + /* Only CXL error types can be specified */ > > + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) > > + return -EINVAL; > > + > > + rc = validate_error_type(type); > > + if (rc) > > + return rc; > > + > > + param1 = (u64) rcrb; > > + param2 = 0xfffffffffffff000; > > + flags = 0x2; > > + > > + return einj_error_inject(type, flags, param1, param2, 0, param4); > > +} > > +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL); > > + > > +int einj_cxl_inject_error(struct pci_dev *dport, u64 type) > > +{ > > + u64 param1 = 0, param2 = 0, param4 = 0; > > + u32 flags; > > + int rc; > > + > > + if (!einj_initialized) > > + return -ENXIO; > > + > > + /* Only CXL error types can be specified */ > > + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) > > + return -EINVAL; > > + > > + rc = validate_error_type(type); > > + if (rc) > > + return rc; > > + > > + rc = cxl_dport_get_sbdf(dport, ¶m4); > > + if (rc) > > + return rc; > > + > > + flags = 0x4; > > + > > + return einj_error_inject(type, flags, param1, param2, 0, param4); > > +} > > +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL); > > + > > +static int error_type_get(void *data, u64 *val) > > +{ > > + *val = error_type; > > + > > + return 0; > > +} > > + > > +static int error_type_set(void *data, u64 val) > > +{ > > + int rc; > > + > > + /* CXL error types have to be injected from cxl debugfs */ > > + if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT)) > > + return -EINVAL; > > + > > + rc = validate_error_type(val); > > + if (rc) > > + return rc; > > + > > error_type = val; > > > > return 0; > > @@ -690,6 +822,12 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) > > return 0; > > } > > > > +bool einj_is_initialized(void) > > +{ > > + return einj_initialized; > > +} > > +EXPORT_SYMBOL_GPL(einj_is_initialized); > > + > > static int __init einj_probe(struct platform_device *pdev) > > { > > int rc; > > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > > index 8c00fd6be730..c52c92222be5 100644 > > --- a/drivers/cxl/core/port.c > > +++ b/drivers/cxl/core/port.c > > @@ -3,6 +3,7 @@ > > #include <linux/platform_device.h> > > #include <linux/memregion.h> > > #include <linux/workqueue.h> > > +#include <linux/einj-cxl.h> > > #include <linux/debugfs.h> > > #include <linux/device.h> > > #include <linux/module.h> > > @@ -797,6 +798,37 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport, > > return rc; > > } > > > > +DEFINE_SHOW_ATTRIBUTE(einj_cxl_available_error_type); > > + > > +static int cxl_einj_inject(void *data, u64 type) > > +{ > > + struct cxl_dport *dport = data; > > + > > + if (dport->rch) > > + return einj_cxl_inject_rch_error(dport->rcrb.base, type); > > + > > + return einj_cxl_inject_error(to_pci_dev(dport->dport_dev), type); > > +} > > +DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n"); > > + > > +static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport) > > +{ > > + struct dentry *dir; > > + > > + /* > > + * dport_dev needs to be a PCIe port for CXL 2.0+ ports because > > + * EINJ expects a dport SBDF to be specified for 2.0 error injection. > > + */ > > + if (!einj_is_initialized() || > > + (!dport->rch && !dev_is_pci(dport->dport_dev))) > > + return; > > + > > + dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev)); > > + > > + debugfs_create_file("einj_inject", 0200, dir, dport, > > + &cxl_einj_inject_fops); > > +} > > + > > static struct cxl_port *__devm_cxl_add_port(struct device *host, > > struct device *uport_dev, > > resource_size_t component_reg_phys, > > @@ -1144,6 +1176,8 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, > > if (dev_is_pci(dport_dev)) > > dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev)); > > > > + cxl_debugfs_create_dport_dir(dport); > > + > > return dport; > > } > > > > @@ -2220,6 +2254,11 @@ static __init int cxl_core_init(void) > > > > cxl_debugfs = debugfs_create_dir("cxl", NULL); > > > > + if (einj_is_initialized()) { > > + debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL, > > + &einj_cxl_available_error_type_fops); > > + } > > + > > cxl_mbox_init(); > > > > rc = cxl_memdev_init(); > > diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h > > new file mode 100644 > > index 000000000000..af57cc8580a6 > > --- /dev/null > > +++ b/include/linux/einj-cxl.h > > @@ -0,0 +1,45 @@ > > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > > +/* > > + * CXL protocol Error INJection support. > > + * > > + * Copyright (c) 2023 Advanced Micro Devices, Inc. > > + * All Rights Reserved. > > + * > > + * Author: Ben Cheatham <benjamin.cheatham@amd.com> > > + */ > > +#ifndef CXL_EINJ_H > > +#define CXL_EINJ_H > > + > > +#include <linux/pci.h> > > + > > +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ) > > I was testing some other work using this kernel and was getting a linker error > when I had CXL_BUS set to 'y' and ACPI_APEI_EINJ set to 'm'. I went ahead and > solved it by changing the line above to use IS_REACHABLE() instead. The change > shouldn't actually affect the functionality of the patch since this is in a build > configuration where the CXL driver isn't using the EINJ module since CONFIG_CXL_EINJ > is set to n due to Kconfig restraints. > > I can submit another version of this series with this fix, but I don't think it > makes much sense for a 1 line change, so I've but a diff below with the change > for whoever ends up putting this in their tree: I don't think this is the right fix. CONFIG_CXL_EINJ should be disabled when CONFIG_CXL_BUS is built-in. Looks like the polarity the dependency is backwards. The problem with IS_REACHABLE() is it removes the simple debug option of "send me your .config" to see why some functionality is not turned on. Could you test out flipping the dependency from: depends on ACPI_APEI_EINJ >= CXL_BUS ...to: depends on ACPI_APEI_EINJ <= CXL_BUS ?
Dan Williams wrote: > Ben Cheatham wrote: > > > > > > On 2/8/24 2:00 PM, Ben Cheatham wrote: > > > Implement CXL helper functions in the EINJ module for getting/injecting > > > available CXL protocol error types and export them to sysfs under > > > kernel/debug/cxl. > > > > > > The kernel/debug/cxl/einj_types file will print the available CXL > > > protocol errors in the same format as the available_error_types > > > file provided by the EINJ module. The > > > kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the > > > error_type and error_inject files provided by the EINJ module, i.e.: > > > writing an error type into $dport_dev/einj_inject will inject said error > > > type into the CXL dport represented by $dport_dev. > > > > > > Reviewed-by: Dan Williams <dan.j.williams@intel.com> > > > Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com> > > > --- > > > Documentation/ABI/testing/debugfs-cxl | 22 ++++ > > > MAINTAINERS | 1 + > > > drivers/acpi/apei/einj.c | 158 ++++++++++++++++++++++++-- > > > drivers/cxl/core/port.c | 39 +++++++ > > > include/linux/einj-cxl.h | 45 ++++++++ > > > 5 files changed, 255 insertions(+), 10 deletions(-) > > > create mode 100644 include/linux/einj-cxl.h > > > > > > diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl > > > index fe61d372e3fa..bcd985cca66a 100644 > > > --- a/Documentation/ABI/testing/debugfs-cxl > > > +++ b/Documentation/ABI/testing/debugfs-cxl > > > @@ -33,3 +33,25 @@ Description: > > > device cannot clear poison from the address, -ENXIO is returned. > > > The clear_poison attribute is only visible for devices > > > supporting the capability. > > > + > > > +What: /sys/kernel/debug/cxl/einj_types > > > +Date: January, 2024 > > > +KernelVersion: v6.9 > > > +Contact: linux-cxl@vger.kernel.org > > > +Description: > > > + (RO) Prints the CXL protocol error types made available by > > > + the platform in the format "0x<error number> <error type>". > > > + The <error number> can be written to einj_inject to inject > > > + <error type> into a chosen dport. > > > + > > > +What: /sys/kernel/debug/cxl/$dport_dev/einj_inject > > > +Date: January, 2024 > > > +KernelVersion: v6.9 > > > +Contact: linux-cxl@vger.kernel.org > > > +Description: > > > + (WO) Writing an integer to this file injects the corresponding > > > + CXL protocol error into $dport_dev ($dport_dev will be a device > > > + name from /sys/bus/pci/devices). The integer to type mapping for > > > + injection can be found by reading from einj_types. If the dport > > > + was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise > > > + a CXL 2.0 error is injected. > > > diff --git a/MAINTAINERS b/MAINTAINERS > > > index 9104430e148e..02d7feb2ed1f 100644 > > > --- a/MAINTAINERS > > > +++ b/MAINTAINERS > > > @@ -5246,6 +5246,7 @@ L: linux-cxl@vger.kernel.org > > > S: Maintained > > > F: drivers/cxl/ > > > F: include/uapi/linux/cxl_mem.h > > > +F: include/linux/einj-cxl.h > > > F: tools/testing/cxl/ > > > > > > COMPUTE EXPRESS LINK PMU (CPMU) > > > diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c > > > index 73dde21d3e89..9137cc01f791 100644 > > > --- a/drivers/acpi/apei/einj.c > > > +++ b/drivers/acpi/apei/einj.c > > > @@ -21,6 +21,7 @@ > > > #include <linux/nmi.h> > > > #include <linux/delay.h> > > > #include <linux/mm.h> > > > +#include <linux/einj-cxl.h> > > > #include <linux/platform_device.h> > > > #include <asm/unaligned.h> > > > > > > @@ -37,6 +38,20 @@ > > > #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ > > > ACPI_EINJ_MEMORY_UNCORRECTABLE | \ > > > ACPI_EINJ_MEMORY_FATAL) > > > +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE > > > +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) > > > +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) > > > +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) > > > +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) > > > +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) > > > +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) > > > +#endif > > > +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ > > > + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ > > > + ACPI_EINJ_CXL_CACHE_FATAL | \ > > > + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ > > > + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ > > > + ACPI_EINJ_CXL_MEM_FATAL) > > > > > > /* > > > * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. > > > @@ -543,8 +558,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, > > > if (type & ACPI5_VENDOR_BIT) { > > > if (vendor_flags != SETWA_FLAGS_MEM) > > > goto inject; > > > - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) > > > + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { > > > goto inject; > > > + } else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) { > > > + goto inject; > > > + } > > > > > > /* > > > * Disallow crazy address masks that give BIOS leeway to pick > > > @@ -596,6 +614,9 @@ static const char * const einj_error_type_string[] = { > > > "0x00000200\tPlatform Correctable\n", > > > "0x00000400\tPlatform Uncorrectable non-fatal\n", > > > "0x00000800\tPlatform Uncorrectable fatal\n", > > > +}; > > > + > > > +static const char * const einj_cxl_error_type_string[] = { > > > "0x00001000\tCXL.cache Protocol Correctable\n", > > > "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", > > > "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", > > > @@ -621,29 +642,44 @@ static int available_error_type_show(struct seq_file *m, void *v) > > > > > > DEFINE_SHOW_ATTRIBUTE(available_error_type); > > > > > > -static int error_type_get(void *data, u64 *val) > > > +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) > > > { > > > - *val = error_type; > > > + int cxl_err, rc; > > > + u32 available_error_type = 0; > > > + > > > + if (!einj_initialized) > > > + return -ENXIO; > > > + > > > + rc = einj_get_available_error_type(&available_error_type); > > > + if (rc) > > > + return rc; > > > + > > > + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { > > > + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; > > > + > > > + if (available_error_type & cxl_err) > > > + seq_puts(m, einj_cxl_error_type_string[pos]); > > > + } > > > > > > return 0; > > > } > > > +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL); > > > > > > -static int error_type_set(void *data, u64 val) > > > +static int validate_error_type(u64 type) > > > { > > > + u32 tval, vendor, available_error_type = 0; > > > int rc; > > > - u32 available_error_type = 0; > > > - u32 tval, vendor; > > > > > > /* Only low 32 bits for error type are valid */ > > > - if (val & GENMASK_ULL(63, 32)) > > > + if (type & GENMASK_ULL(63, 32)) > > > return -EINVAL; > > > > > > /* > > > * Vendor defined types have 0x80000000 bit set, and > > > * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE > > > */ > > > - vendor = val & ACPI5_VENDOR_BIT; > > > - tval = val & 0x7fffffff; > > > + vendor = type & ACPI5_VENDOR_BIT; > > > + tval = type & 0x7fffffff; > > > > > > /* Only one error type can be specified */ > > > if (tval & (tval - 1)) > > > @@ -652,9 +688,105 @@ static int error_type_set(void *data, u64 val) > > > rc = einj_get_available_error_type(&available_error_type); > > > if (rc) > > > return rc; > > > - if (!(val & available_error_type)) > > > + if (!(type & available_error_type)) > > > return -EINVAL; > > > } > > > + > > > + return 0; > > > +} > > > + > > > +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) > > > +{ > > > + struct pci_bus *pbus; > > > + struct pci_host_bridge *bridge; > > > + u64 seg = 0, bus; > > > + > > > + pbus = dport_dev->bus; > > > + bridge = pci_find_host_bridge(pbus); > > > + > > > + if (!bridge) > > > + return -ENODEV; > > > + > > > + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) > > > + seg = bridge->domain_nr << 24; > > > + > > > + bus = pbus->number << 16; > > > + *sbdf = seg | bus | dport_dev->devfn; > > > + > > > + return 0; > > > +} > > > + > > > +int einj_cxl_inject_rch_error(u64 rcrb, u64 type) > > > +{ > > > + u64 param1 = 0, param2 = 0, param4 = 0; > > > + u32 flags; > > > + int rc; > > > + > > > + if (!einj_initialized) > > > + return -ENXIO; > > > + > > > + /* Only CXL error types can be specified */ > > > + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) > > > + return -EINVAL; > > > + > > > + rc = validate_error_type(type); > > > + if (rc) > > > + return rc; > > > + > > > + param1 = (u64) rcrb; > > > + param2 = 0xfffffffffffff000; > > > + flags = 0x2; > > > + > > > + return einj_error_inject(type, flags, param1, param2, 0, param4); > > > +} > > > +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL); > > > + > > > +int einj_cxl_inject_error(struct pci_dev *dport, u64 type) > > > +{ > > > + u64 param1 = 0, param2 = 0, param4 = 0; > > > + u32 flags; > > > + int rc; > > > + > > > + if (!einj_initialized) > > > + return -ENXIO; > > > + > > > + /* Only CXL error types can be specified */ > > > + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) > > > + return -EINVAL; > > > + > > > + rc = validate_error_type(type); > > > + if (rc) > > > + return rc; > > > + > > > + rc = cxl_dport_get_sbdf(dport, ¶m4); > > > + if (rc) > > > + return rc; > > > + > > > + flags = 0x4; > > > + > > > + return einj_error_inject(type, flags, param1, param2, 0, param4); > > > +} > > > +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL); > > > + > > > +static int error_type_get(void *data, u64 *val) > > > +{ > > > + *val = error_type; > > > + > > > + return 0; > > > +} > > > + > > > +static int error_type_set(void *data, u64 val) > > > +{ > > > + int rc; > > > + > > > + /* CXL error types have to be injected from cxl debugfs */ > > > + if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT)) > > > + return -EINVAL; > > > + > > > + rc = validate_error_type(val); > > > + if (rc) > > > + return rc; > > > + > > > error_type = val; > > > > > > return 0; > > > @@ -690,6 +822,12 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) > > > return 0; > > > } > > > > > > +bool einj_is_initialized(void) > > > +{ > > > + return einj_initialized; > > > +} > > > +EXPORT_SYMBOL_GPL(einj_is_initialized); > > > + > > > static int __init einj_probe(struct platform_device *pdev) > > > { > > > int rc; > > > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > > > index 8c00fd6be730..c52c92222be5 100644 > > > --- a/drivers/cxl/core/port.c > > > +++ b/drivers/cxl/core/port.c > > > @@ -3,6 +3,7 @@ > > > #include <linux/platform_device.h> > > > #include <linux/memregion.h> > > > #include <linux/workqueue.h> > > > +#include <linux/einj-cxl.h> > > > #include <linux/debugfs.h> > > > #include <linux/device.h> > > > #include <linux/module.h> > > > @@ -797,6 +798,37 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport, > > > return rc; > > > } > > > > > > +DEFINE_SHOW_ATTRIBUTE(einj_cxl_available_error_type); > > > + > > > +static int cxl_einj_inject(void *data, u64 type) > > > +{ > > > + struct cxl_dport *dport = data; > > > + > > > + if (dport->rch) > > > + return einj_cxl_inject_rch_error(dport->rcrb.base, type); > > > + > > > + return einj_cxl_inject_error(to_pci_dev(dport->dport_dev), type); > > > +} > > > +DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n"); > > > + > > > +static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport) > > > +{ > > > + struct dentry *dir; > > > + > > > + /* > > > + * dport_dev needs to be a PCIe port for CXL 2.0+ ports because > > > + * EINJ expects a dport SBDF to be specified for 2.0 error injection. > > > + */ > > > + if (!einj_is_initialized() || > > > + (!dport->rch && !dev_is_pci(dport->dport_dev))) > > > + return; > > > + > > > + dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev)); > > > + > > > + debugfs_create_file("einj_inject", 0200, dir, dport, > > > + &cxl_einj_inject_fops); > > > +} > > > + > > > static struct cxl_port *__devm_cxl_add_port(struct device *host, > > > struct device *uport_dev, > > > resource_size_t component_reg_phys, > > > @@ -1144,6 +1176,8 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, > > > if (dev_is_pci(dport_dev)) > > > dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev)); > > > > > > + cxl_debugfs_create_dport_dir(dport); > > > + > > > return dport; > > > } > > > > > > @@ -2220,6 +2254,11 @@ static __init int cxl_core_init(void) > > > > > > cxl_debugfs = debugfs_create_dir("cxl", NULL); > > > > > > + if (einj_is_initialized()) { > > > + debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL, > > > + &einj_cxl_available_error_type_fops); > > > + } > > > + > > > cxl_mbox_init(); > > > > > > rc = cxl_memdev_init(); > > > diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h > > > new file mode 100644 > > > index 000000000000..af57cc8580a6 > > > --- /dev/null > > > +++ b/include/linux/einj-cxl.h > > > @@ -0,0 +1,45 @@ > > > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > > > +/* > > > + * CXL protocol Error INJection support. > > > + * > > > + * Copyright (c) 2023 Advanced Micro Devices, Inc. > > > + * All Rights Reserved. > > > + * > > > + * Author: Ben Cheatham <benjamin.cheatham@amd.com> > > > + */ > > > +#ifndef CXL_EINJ_H > > > +#define CXL_EINJ_H > > > + > > > +#include <linux/pci.h> > > > + > > > +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ) > > > > I was testing some other work using this kernel and was getting a linker error > > when I had CXL_BUS set to 'y' and ACPI_APEI_EINJ set to 'm'. I went ahead and > > solved it by changing the line above to use IS_REACHABLE() instead. The change > > shouldn't actually affect the functionality of the patch since this is in a build > > configuration where the CXL driver isn't using the EINJ module since CONFIG_CXL_EINJ > > is set to n due to Kconfig restraints. > > > > I can submit another version of this series with this fix, but I don't think it > > makes much sense for a 1 line change, so I've but a diff below with the change > > for whoever ends up putting this in their tree: > > I don't think this is the right fix. CONFIG_CXL_EINJ should be disabled when > CONFIG_CXL_BUS is built-in. Looks like the polarity the dependency is > backwards. The problem with IS_REACHABLE() is it removes the simple > debug option of "send me your .config" to see why some functionality is > not turned on. > > Could you test out flipping the dependency from: > > depends on ACPI_APEI_EINJ >= CXL_BUS > > ...to: > > depends on ACPI_APEI_EINJ <= CXL_BUS Wait, no y == 2 and m == 1, so: depends on ACPI_APEI_EINJ >= CXL_BUS ...is correct, but you need IS_ENABLED(CONFIG_CXL_EINJ), not IS_REACHABLE to guard those exports.
On 2/10/24 12:31 AM, Dan Williams wrote: > Dan Williams wrote: >> Ben Cheatham wrote: >>> >>> >>> On 2/8/24 2:00 PM, Ben Cheatham wrote: >>>> Implement CXL helper functions in the EINJ module for getting/injecting >>>> available CXL protocol error types and export them to sysfs under >>>> kernel/debug/cxl. >>>> >>>> The kernel/debug/cxl/einj_types file will print the available CXL >>>> protocol errors in the same format as the available_error_types >>>> file provided by the EINJ module. The >>>> kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the >>>> error_type and error_inject files provided by the EINJ module, i.e.: >>>> writing an error type into $dport_dev/einj_inject will inject said error >>>> type into the CXL dport represented by $dport_dev. >>>> >>>> Reviewed-by: Dan Williams <dan.j.williams@intel.com> >>>> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com> >>>> --- >>>> Documentation/ABI/testing/debugfs-cxl | 22 ++++ >>>> MAINTAINERS | 1 + >>>> drivers/acpi/apei/einj.c | 158 ++++++++++++++++++++++++-- >>>> drivers/cxl/core/port.c | 39 +++++++ >>>> include/linux/einj-cxl.h | 45 ++++++++ >>>> 5 files changed, 255 insertions(+), 10 deletions(-) >>>> create mode 100644 include/linux/einj-cxl.h >>>> >>>> diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl >>>> index fe61d372e3fa..bcd985cca66a 100644 >>>> --- a/Documentation/ABI/testing/debugfs-cxl >>>> +++ b/Documentation/ABI/testing/debugfs-cxl >>>> @@ -33,3 +33,25 @@ Description: >>>> device cannot clear poison from the address, -ENXIO is returned. >>>> The clear_poison attribute is only visible for devices >>>> supporting the capability. >>>> + >>>> +What: /sys/kernel/debug/cxl/einj_types >>>> +Date: January, 2024 >>>> +KernelVersion: v6.9 >>>> +Contact: linux-cxl@vger.kernel.org >>>> +Description: >>>> + (RO) Prints the CXL protocol error types made available by >>>> + the platform in the format "0x<error number> <error type>". >>>> + The <error number> can be written to einj_inject to inject >>>> + <error type> into a chosen dport. >>>> + >>>> +What: /sys/kernel/debug/cxl/$dport_dev/einj_inject >>>> +Date: January, 2024 >>>> +KernelVersion: v6.9 >>>> +Contact: linux-cxl@vger.kernel.org >>>> +Description: >>>> + (WO) Writing an integer to this file injects the corresponding >>>> + CXL protocol error into $dport_dev ($dport_dev will be a device >>>> + name from /sys/bus/pci/devices). The integer to type mapping for >>>> + injection can be found by reading from einj_types. If the dport >>>> + was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise >>>> + a CXL 2.0 error is injected. >>>> diff --git a/MAINTAINERS b/MAINTAINERS >>>> index 9104430e148e..02d7feb2ed1f 100644 >>>> --- a/MAINTAINERS >>>> +++ b/MAINTAINERS >>>> @@ -5246,6 +5246,7 @@ L: linux-cxl@vger.kernel.org >>>> S: Maintained >>>> F: drivers/cxl/ >>>> F: include/uapi/linux/cxl_mem.h >>>> +F: include/linux/einj-cxl.h >>>> F: tools/testing/cxl/ >>>> >>>> COMPUTE EXPRESS LINK PMU (CPMU) >>>> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c >>>> index 73dde21d3e89..9137cc01f791 100644 >>>> --- a/drivers/acpi/apei/einj.c >>>> +++ b/drivers/acpi/apei/einj.c >>>> @@ -21,6 +21,7 @@ >>>> #include <linux/nmi.h> >>>> #include <linux/delay.h> >>>> #include <linux/mm.h> >>>> +#include <linux/einj-cxl.h> >>>> #include <linux/platform_device.h> >>>> #include <asm/unaligned.h> >>>> >>>> @@ -37,6 +38,20 @@ >>>> #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ >>>> ACPI_EINJ_MEMORY_UNCORRECTABLE | \ >>>> ACPI_EINJ_MEMORY_FATAL) >>>> +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE >>>> +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) >>>> +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) >>>> +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) >>>> +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) >>>> +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) >>>> +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) >>>> +#endif >>>> +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ >>>> + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ >>>> + ACPI_EINJ_CXL_CACHE_FATAL | \ >>>> + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ >>>> + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ >>>> + ACPI_EINJ_CXL_MEM_FATAL) >>>> >>>> /* >>>> * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. >>>> @@ -543,8 +558,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, >>>> if (type & ACPI5_VENDOR_BIT) { >>>> if (vendor_flags != SETWA_FLAGS_MEM) >>>> goto inject; >>>> - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) >>>> + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { >>>> goto inject; >>>> + } else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) { >>>> + goto inject; >>>> + } >>>> >>>> /* >>>> * Disallow crazy address masks that give BIOS leeway to pick >>>> @@ -596,6 +614,9 @@ static const char * const einj_error_type_string[] = { >>>> "0x00000200\tPlatform Correctable\n", >>>> "0x00000400\tPlatform Uncorrectable non-fatal\n", >>>> "0x00000800\tPlatform Uncorrectable fatal\n", >>>> +}; >>>> + >>>> +static const char * const einj_cxl_error_type_string[] = { >>>> "0x00001000\tCXL.cache Protocol Correctable\n", >>>> "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", >>>> "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", >>>> @@ -621,29 +642,44 @@ static int available_error_type_show(struct seq_file *m, void *v) >>>> >>>> DEFINE_SHOW_ATTRIBUTE(available_error_type); >>>> >>>> -static int error_type_get(void *data, u64 *val) >>>> +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) >>>> { >>>> - *val = error_type; >>>> + int cxl_err, rc; >>>> + u32 available_error_type = 0; >>>> + >>>> + if (!einj_initialized) >>>> + return -ENXIO; >>>> + >>>> + rc = einj_get_available_error_type(&available_error_type); >>>> + if (rc) >>>> + return rc; >>>> + >>>> + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { >>>> + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; >>>> + >>>> + if (available_error_type & cxl_err) >>>> + seq_puts(m, einj_cxl_error_type_string[pos]); >>>> + } >>>> >>>> return 0; >>>> } >>>> +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL); >>>> >>>> -static int error_type_set(void *data, u64 val) >>>> +static int validate_error_type(u64 type) >>>> { >>>> + u32 tval, vendor, available_error_type = 0; >>>> int rc; >>>> - u32 available_error_type = 0; >>>> - u32 tval, vendor; >>>> >>>> /* Only low 32 bits for error type are valid */ >>>> - if (val & GENMASK_ULL(63, 32)) >>>> + if (type & GENMASK_ULL(63, 32)) >>>> return -EINVAL; >>>> >>>> /* >>>> * Vendor defined types have 0x80000000 bit set, and >>>> * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE >>>> */ >>>> - vendor = val & ACPI5_VENDOR_BIT; >>>> - tval = val & 0x7fffffff; >>>> + vendor = type & ACPI5_VENDOR_BIT; >>>> + tval = type & 0x7fffffff; >>>> >>>> /* Only one error type can be specified */ >>>> if (tval & (tval - 1)) >>>> @@ -652,9 +688,105 @@ static int error_type_set(void *data, u64 val) >>>> rc = einj_get_available_error_type(&available_error_type); >>>> if (rc) >>>> return rc; >>>> - if (!(val & available_error_type)) >>>> + if (!(type & available_error_type)) >>>> return -EINVAL; >>>> } >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) >>>> +{ >>>> + struct pci_bus *pbus; >>>> + struct pci_host_bridge *bridge; >>>> + u64 seg = 0, bus; >>>> + >>>> + pbus = dport_dev->bus; >>>> + bridge = pci_find_host_bridge(pbus); >>>> + >>>> + if (!bridge) >>>> + return -ENODEV; >>>> + >>>> + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) >>>> + seg = bridge->domain_nr << 24; >>>> + >>>> + bus = pbus->number << 16; >>>> + *sbdf = seg | bus | dport_dev->devfn; >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +int einj_cxl_inject_rch_error(u64 rcrb, u64 type) >>>> +{ >>>> + u64 param1 = 0, param2 = 0, param4 = 0; >>>> + u32 flags; >>>> + int rc; >>>> + >>>> + if (!einj_initialized) >>>> + return -ENXIO; >>>> + >>>> + /* Only CXL error types can be specified */ >>>> + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) >>>> + return -EINVAL; >>>> + >>>> + rc = validate_error_type(type); >>>> + if (rc) >>>> + return rc; >>>> + >>>> + param1 = (u64) rcrb; >>>> + param2 = 0xfffffffffffff000; >>>> + flags = 0x2; >>>> + >>>> + return einj_error_inject(type, flags, param1, param2, 0, param4); >>>> +} >>>> +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL); >>>> + >>>> +int einj_cxl_inject_error(struct pci_dev *dport, u64 type) >>>> +{ >>>> + u64 param1 = 0, param2 = 0, param4 = 0; >>>> + u32 flags; >>>> + int rc; >>>> + >>>> + if (!einj_initialized) >>>> + return -ENXIO; >>>> + >>>> + /* Only CXL error types can be specified */ >>>> + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) >>>> + return -EINVAL; >>>> + >>>> + rc = validate_error_type(type); >>>> + if (rc) >>>> + return rc; >>>> + >>>> + rc = cxl_dport_get_sbdf(dport, ¶m4); >>>> + if (rc) >>>> + return rc; >>>> + >>>> + flags = 0x4; >>>> + >>>> + return einj_error_inject(type, flags, param1, param2, 0, param4); >>>> +} >>>> +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL); >>>> + >>>> +static int error_type_get(void *data, u64 *val) >>>> +{ >>>> + *val = error_type; >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +static int error_type_set(void *data, u64 val) >>>> +{ >>>> + int rc; >>>> + >>>> + /* CXL error types have to be injected from cxl debugfs */ >>>> + if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT)) >>>> + return -EINVAL; >>>> + >>>> + rc = validate_error_type(val); >>>> + if (rc) >>>> + return rc; >>>> + >>>> error_type = val; >>>> >>>> return 0; >>>> @@ -690,6 +822,12 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) >>>> return 0; >>>> } >>>> >>>> +bool einj_is_initialized(void) >>>> +{ >>>> + return einj_initialized; >>>> +} >>>> +EXPORT_SYMBOL_GPL(einj_is_initialized); >>>> + >>>> static int __init einj_probe(struct platform_device *pdev) >>>> { >>>> int rc; >>>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c >>>> index 8c00fd6be730..c52c92222be5 100644 >>>> --- a/drivers/cxl/core/port.c >>>> +++ b/drivers/cxl/core/port.c >>>> @@ -3,6 +3,7 @@ >>>> #include <linux/platform_device.h> >>>> #include <linux/memregion.h> >>>> #include <linux/workqueue.h> >>>> +#include <linux/einj-cxl.h> >>>> #include <linux/debugfs.h> >>>> #include <linux/device.h> >>>> #include <linux/module.h> >>>> @@ -797,6 +798,37 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport, >>>> return rc; >>>> } >>>> >>>> +DEFINE_SHOW_ATTRIBUTE(einj_cxl_available_error_type); >>>> + >>>> +static int cxl_einj_inject(void *data, u64 type) >>>> +{ >>>> + struct cxl_dport *dport = data; >>>> + >>>> + if (dport->rch) >>>> + return einj_cxl_inject_rch_error(dport->rcrb.base, type); >>>> + >>>> + return einj_cxl_inject_error(to_pci_dev(dport->dport_dev), type); >>>> +} >>>> +DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n"); >>>> + >>>> +static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport) >>>> +{ >>>> + struct dentry *dir; >>>> + >>>> + /* >>>> + * dport_dev needs to be a PCIe port for CXL 2.0+ ports because >>>> + * EINJ expects a dport SBDF to be specified for 2.0 error injection. >>>> + */ >>>> + if (!einj_is_initialized() || >>>> + (!dport->rch && !dev_is_pci(dport->dport_dev))) >>>> + return; >>>> + >>>> + dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev)); >>>> + >>>> + debugfs_create_file("einj_inject", 0200, dir, dport, >>>> + &cxl_einj_inject_fops); >>>> +} >>>> + >>>> static struct cxl_port *__devm_cxl_add_port(struct device *host, >>>> struct device *uport_dev, >>>> resource_size_t component_reg_phys, >>>> @@ -1144,6 +1176,8 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, >>>> if (dev_is_pci(dport_dev)) >>>> dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev)); >>>> >>>> + cxl_debugfs_create_dport_dir(dport); >>>> + >>>> return dport; >>>> } >>>> >>>> @@ -2220,6 +2254,11 @@ static __init int cxl_core_init(void) >>>> >>>> cxl_debugfs = debugfs_create_dir("cxl", NULL); >>>> >>>> + if (einj_is_initialized()) { >>>> + debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL, >>>> + &einj_cxl_available_error_type_fops); >>>> + } >>>> + >>>> cxl_mbox_init(); >>>> >>>> rc = cxl_memdev_init(); >>>> diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h >>>> new file mode 100644 >>>> index 000000000000..af57cc8580a6 >>>> --- /dev/null >>>> +++ b/include/linux/einj-cxl.h >>>> @@ -0,0 +1,45 @@ >>>> +/* SPDX-License-Identifier: GPL-2.0-or-later */ >>>> +/* >>>> + * CXL protocol Error INJection support. >>>> + * >>>> + * Copyright (c) 2023 Advanced Micro Devices, Inc. >>>> + * All Rights Reserved. >>>> + * >>>> + * Author: Ben Cheatham <benjamin.cheatham@amd.com> >>>> + */ >>>> +#ifndef CXL_EINJ_H >>>> +#define CXL_EINJ_H >>>> + >>>> +#include <linux/pci.h> >>>> + >>>> +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ) >>> >>> I was testing some other work using this kernel and was getting a linker error >>> when I had CXL_BUS set to 'y' and ACPI_APEI_EINJ set to 'm'. I went ahead and >>> solved it by changing the line above to use IS_REACHABLE() instead. The change >>> shouldn't actually affect the functionality of the patch since this is in a build >>> configuration where the CXL driver isn't using the EINJ module since CONFIG_CXL_EINJ >>> is set to n due to Kconfig restraints. >>> >>> I can submit another version of this series with this fix, but I don't think it >>> makes much sense for a 1 line change, so I've but a diff below with the change >>> for whoever ends up putting this in their tree: >> >> I don't think this is the right fix. CONFIG_CXL_EINJ should be disabled when >> CONFIG_CXL_BUS is built-in. Looks like the polarity the dependency is >> backwards. The problem with IS_REACHABLE() is it removes the simple >> debug option of "send me your .config" to see why some functionality is >> not turned on. >> >> Could you test out flipping the dependency from: >> >> depends on ACPI_APEI_EINJ >= CXL_BUS >> >> ...to: >> >> depends on ACPI_APEI_EINJ <= CXL_BUS > > Wait, no y == 2 and m == 1, so: > > depends on ACPI_APEI_EINJ >= CXL_BUS > > ...is correct, but you need IS_ENABLED(CONFIG_CXL_EINJ), not > IS_REACHABLE to guard those exports. Ok, I'll test it out today and let you know if it works. Thanks, Ben
On 2/12/24 8:12 AM, Ben Cheatham wrote: > > > On 2/10/24 12:31 AM, Dan Williams wrote: [snip] >>> >>> I don't think this is the right fix. CONFIG_CXL_EINJ should be disabled when >>> CONFIG_CXL_BUS is built-in. Looks like the polarity the dependency is >>> backwards. The problem with IS_REACHABLE() is it removes the simple >>> debug option of "send me your .config" to see why some functionality is >>> not turned on. >>> >>> Could you test out flipping the dependency from: >>> >>> depends on ACPI_APEI_EINJ >= CXL_BUS >>> >>> ...to: >>> >>> depends on ACPI_APEI_EINJ <= CXL_BUS >> >> Wait, no y == 2 and m == 1, so: >> >> depends on ACPI_APEI_EINJ >= CXL_BUS >> >> ...is correct, but you need IS_ENABLED(CONFIG_CXL_EINJ), not >> IS_REACHABLE to guard those exports. > > Ok, I'll test it out today and let you know if it works. > Alright, I tested it yesterday and that fix doesn't work. When I change the guard to IS_ENABLED(CONFIG_CXL_EINJ) and set CONFIG_CXL_EINJ=n and CONFIG_ACPI_APEI_EINJ=y/m I get a redefinition error in einj.c. I've found a 4 ways to fix this so far, here they are: 1. Leave the guard as IS_ENABLED(CONFIG_ACPI_APEI_EINJ) and add IS_ENABLED(CONFIG_CXL_EINJ) around any calls to the functions in einj-cxl.h in cxl/core/port.c. 2. Change the guard to IS_ENABLED(CONFIG_CXL_EINJ) || IS_REACHABLE(CONFIG_ACPI_APEI_EINJ) and add a IS_ENABLED(CONFIG_CXL_EINJ) check at the beginning of the einj_cxl_* functions in einj.c. I'm not a fan of this one since it doesn't actually change what's built and just "artificially" gates the functionality. 3. Change the guard to IS_ENABLED(CONFIG_CXL_EINJ) and add a #if IS_ENABLED(CONFIG_CXL_EINJ) guard around the einj_cxl_* functions in einj.c. (I know this is pretty undesirable, but I thought I'd mention it for posterity's sake). 4. Change the Kconfig definition of CXL_EINJ to depends on ACPI_APEI_EINJ = CXL_BUS. My vote is for option 4, but I figured I should ask to see if you (or anyone else reading this thread) likes a different one or has a better idea than the ones I laid out. Thanks, Ben
Ben Cheatham wrote: [..] > >> Wait, no y == 2 and m == 1, so: > >> > >> depends on ACPI_APEI_EINJ >= CXL_BUS > >> > >> ...is correct, but you need IS_ENABLED(CONFIG_CXL_EINJ), not > >> IS_REACHABLE to guard those exports. > > > > Ok, I'll test it out today and let you know if it works. > > > > Alright, I tested it yesterday and that fix doesn't work. When I > change the guard to IS_ENABLED(CONFIG_CXL_EINJ) and set > CONFIG_CXL_EINJ=n and CONFIG_ACPI_APEI_EINJ=y/m I get a redefinition > error in einj.c. I've found a 4 ways to fix this so far, here they > are: > [..] > 4. Change the Kconfig definition of CXL_EINJ to depends on ACPI_APEI_EINJ = CXL_BUS. > > My vote is for option 4, but I figured I should ask to see if you (or > anyone else reading this thread) likes a different one or has a better > idea than the ones I laid out. Looks good to me, it is the easiest to understand. Everything else looks like a Kbuild unit test that only a build-bot would love. EINJ needs to remain modular for the non-CXL case and if someone decides they want to turn on this debug feature then CXL needs to be modular too.
On Thu, 8 Feb 2024 14:00:41 -0600 Ben Cheatham <Benjamin.Cheatham@amd.com> wrote: > Implement CXL helper functions in the EINJ module for getting/injecting > available CXL protocol error types and export them to sysfs under > kernel/debug/cxl. > > The kernel/debug/cxl/einj_types file will print the available CXL > protocol errors in the same format as the available_error_types > file provided by the EINJ module. The > kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the > error_type and error_inject files provided by the EINJ module, i.e.: > writing an error type into $dport_dev/einj_inject will inject said error > type into the CXL dport represented by $dport_dev. > > Reviewed-by: Dan Williams <dan.j.williams@intel.com> > Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com> Hi Ben, Sorry I've not looked at this sooner. Anyhow, some comments inline. Mostly looks good to me. Jonathan > --- > Documentation/ABI/testing/debugfs-cxl | 22 ++++ > MAINTAINERS | 1 + > drivers/acpi/apei/einj.c | 158 ++++++++++++++++++++++++-- > drivers/cxl/core/port.c | 39 +++++++ > include/linux/einj-cxl.h | 45 ++++++++ > 5 files changed, 255 insertions(+), 10 deletions(-) > create mode 100644 include/linux/einj-cxl.h > > diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl > index fe61d372e3fa..bcd985cca66a 100644 > --- a/Documentation/ABI/testing/debugfs-cxl > +++ b/Documentation/ABI/testing/debugfs-cxl > @@ -33,3 +33,25 @@ Description: > device cannot clear poison from the address, -ENXIO is returned. > The clear_poison attribute is only visible for devices > supporting the capability. > + > +What: /sys/kernel/debug/cxl/einj_types > +Date: January, 2024 > +KernelVersion: v6.9 > +Contact: linux-cxl@vger.kernel.org > +Description: > + (RO) Prints the CXL protocol error types made available by > + the platform in the format "0x<error number> <error type>". > + The <error number> can be written to einj_inject to inject > + <error type> into a chosen dport. I think it's a limited set, so docs could include what the error_type values can be? From this description it's not obvious they are human readable strings. > + > +What: /sys/kernel/debug/cxl/$dport_dev/einj_inject > +Date: January, 2024 > +KernelVersion: v6.9 > +Contact: linux-cxl@vger.kernel.org > +Description: > + (WO) Writing an integer to this file injects the corresponding > + CXL protocol error into $dport_dev ($dport_dev will be a device > + name from /sys/bus/pci/devices). The integer to type mapping for > + injection can be found by reading from einj_types. If the dport > + was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise > + a CXL 2.0 error is injected. > diff --git a/MAINTAINERS b/MAINTAINERS > index 9104430e148e..02d7feb2ed1f 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -5246,6 +5246,7 @@ L: linux-cxl@vger.kernel.org > S: Maintained > F: drivers/cxl/ > F: include/uapi/linux/cxl_mem.h > +F: include/linux/einj-cxl.h > F: tools/testing/cxl/ > > COMPUTE EXPRESS LINK PMU (CPMU) > diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c > index 73dde21d3e89..9137cc01f791 100644 > --- a/drivers/acpi/apei/einj.c > +++ b/drivers/acpi/apei/einj.c > @@ -21,6 +21,7 @@ > #include <linux/nmi.h> > #include <linux/delay.h> > #include <linux/mm.h> > +#include <linux/einj-cxl.h> > #include <linux/platform_device.h> > #include <asm/unaligned.h> > > @@ -37,6 +38,20 @@ > #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ > ACPI_EINJ_MEMORY_UNCORRECTABLE | \ > ACPI_EINJ_MEMORY_FATAL) > +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE > +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) > +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) > +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) > +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) > +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) > +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) > +#endif > +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ > + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ > + ACPI_EINJ_CXL_CACHE_FATAL | \ > + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ > + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ > + ACPI_EINJ_CXL_MEM_FATAL) > > /* > * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. > @@ -543,8 +558,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, > if (type & ACPI5_VENDOR_BIT) { > if (vendor_flags != SETWA_FLAGS_MEM) > goto inject; > - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) > + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { > goto inject; > + } else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) { > + goto inject; > + } > > /* > * Disallow crazy address masks that give BIOS leeway to pick > @@ -596,6 +614,9 @@ static const char * const einj_error_type_string[] = { > "0x00000200\tPlatform Correctable\n", > "0x00000400\tPlatform Uncorrectable non-fatal\n", > "0x00000800\tPlatform Uncorrectable fatal\n", > +}; > + > +static const char * const einj_cxl_error_type_string[] = { > "0x00001000\tCXL.cache Protocol Correctable\n", > "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", > "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", > @@ -621,29 +642,44 @@ static int available_error_type_show(struct seq_file *m, void *v) > > DEFINE_SHOW_ATTRIBUTE(available_error_type); > > -static int error_type_get(void *data, u64 *val) > +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) > { > - *val = error_type; > + int cxl_err, rc; > + u32 available_error_type = 0; > + > + if (!einj_initialized) > + return -ENXIO; > + > + rc = einj_get_available_error_type(&available_error_type); > + if (rc) > + return rc; > + > + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { Trivial so feel free to ignore but, I'd stick to local styles and have pos declared in more traditional c style. > + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; Maybe clearer as cxl_err = FIELD_PREP(CXL_ERROR_MASK, BIT(pos)); > + > + if (available_error_type & cxl_err) > + seq_puts(m, einj_cxl_error_type_string[pos]); > + } > > return 0; > } > +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL); > > -static int error_type_set(void *data, u64 val) > +static int validate_error_type(u64 type) > { > + u32 tval, vendor, available_error_type = 0; > int rc; > - u32 available_error_type = 0; > - u32 tval, vendor; > > /* Only low 32 bits for error type are valid */ > - if (val & GENMASK_ULL(63, 32)) > + if (type & GENMASK_ULL(63, 32)) > return -EINVAL; > > /* > * Vendor defined types have 0x80000000 bit set, and > * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE > */ > - vendor = val & ACPI5_VENDOR_BIT; > - tval = val & 0x7fffffff; > + vendor = type & ACPI5_VENDOR_BIT; > + tval = type & 0x7fffffff; Could flip this to GENMASK whilst you are here. I don't much like counting fs :) > > /* Only one error type can be specified */ > if (tval & (tval - 1)) > @@ -652,9 +688,105 @@ static int error_type_set(void *data, u64 val) > rc = einj_get_available_error_type(&available_error_type); > if (rc) > return rc; > - if (!(val & available_error_type)) > + if (!(type & available_error_type)) > return -EINVAL; > } > + > + return 0; > +} > + > +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) > +{ > + struct pci_bus *pbus; > + struct pci_host_bridge *bridge; > + u64 seg = 0, bus; > + > + pbus = dport_dev->bus; > + bridge = pci_find_host_bridge(pbus); > + > + if (!bridge) > + return -ENODEV; > + > + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) Ah. x86 is not using the CONFIG_PCI_DOMAINS_GENERIC so I guess we can't just use pci_domain_nr(pbus)? (for the generic case bus->domain_nr is filled in when the host bridge is registered). Pity. > + seg = bridge->domain_nr << 24; > + > + bus = pbus->number << 16; I'd do the shifts whilst building sbpf. AS it stands you end up with what look to be wrong values in seg and bus because you'd shifted them in the local variables. > + *sbdf = seg | bus | dport_dev->devfn; *sbdf = (seg << 24) | (bus << 16) | dport_dev->devfn; (with shifts dropped where seg and bus are set) > + > + return 0; > +} > + > +int einj_cxl_inject_rch_error(u64 rcrb, u64 type) > +{ > + u64 param1 = 0, param2 = 0, param4 = 0; > + u32 flags; > + int rc; > + > + if (!einj_initialized) > + return -ENXIO; > + > + /* Only CXL error types can be specified */ > + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) As below - a utility function would unify the 3 uses of this and avoid need for comment. > + return -EINVAL; > + > + rc = validate_error_type(type); > + if (rc) > + return rc; > + > + param1 = (u64) rcrb; already a u64 > + param2 = 0xfffffffffffff000; No need to initialize these to 0 above. > + flags = 0x2; > + > + return einj_error_inject(type, flags, param1, param2, 0, param4); return einj_error_inject(type, flags, rcrb, 0xfffffffffffff000, 0, 0); > +} > +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL); > + > +int einj_cxl_inject_error(struct pci_dev *dport, u64 type) > +{ > + u64 param1 = 0, param2 = 0, param4 = 0; > + u32 flags; > + int rc; > + > + if (!einj_initialized) > + return -ENXIO; > + > + /* Only CXL error types can be specified */ > + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) As below a utility function would do this nicely (and avoid need for comment). if (!is_cxl_error(type)) > + return -EINVAL; > + > + rc = validate_error_type(type); > + if (rc) > + return rc; > + > + rc = cxl_dport_get_sbdf(dport, ¶m4); > + if (rc) > + return rc; > + > + flags = 0x4; > + > + return einj_error_inject(type, flags, param1, param2, 0, param4); Why not return einj_error_injecT(type, 0x4, 0, 0, 0, param4); ? Maybe flags is useful as "documentation" but given the parameters are nicely in order and you already didn't bother with param3, I can't see why keeping param1 and param2 as local variables is useful. > +} > +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL); > + > +static int error_type_set(void *data, u64 val) > +{ > + int rc; > + > + /* CXL error types have to be injected from cxl debugfs */ > + if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT)) Given you have inverse of this above, maybe it's worth a little helper function to have the logic only once? if (is_cxl_error(val)) > + return -EINVAL; > + > + rc = validate_error_type(val); > + if (rc) > + return rc; > + > error_type = val; > > return 0; > @@ -690,6 +822,12 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) > return 0; > } > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > index 8c00fd6be730..c52c92222be5 100644 > --- a/drivers/cxl/core/port.c > +++ b/drivers/cxl/core/port.c > +static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport) > +{ > + struct dentry *dir; > + > + /* > + * dport_dev needs to be a PCIe port for CXL 2.0+ ports because > + * EINJ expects a dport SBDF to be specified for 2.0 error injection. > + */ > + if (!einj_is_initialized() || > + (!dport->rch && !dev_is_pci(dport->dport_dev))) > + return; Trivial: Even though it's a little more code I'd break this up so that it's clear exactly what the comment refers to. if (!einj_is_initialized) return; /* * dport_dev needs to be a PCIe port for CXL 2.0+ ports because * EINJ expects a dport SBDF to be specified for 2.0 error injection. */ if (!dport->rch && !dev_is_pci(dport->dport_dev)) return; > + > + dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev)); > + > + debugfs_create_file("einj_inject", 0200, dir, dport, > + &cxl_einj_inject_fops); > +} > + > static struct cxl_port *__devm_cxl_add_port(struct device *host, > struct device *uport_dev, > resource_size_t component_reg_phys, ... > @@ -2220,6 +2254,11 @@ static __init int cxl_core_init(void) > > cxl_debugfs = debugfs_create_dir("cxl", NULL); > > + if (einj_is_initialized()) { > + debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL, > + &einj_cxl_available_error_type_fops); > + } > + > cxl_mbox_init(); > > rc = cxl_memdev_init(); > diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h > new file mode 100644 > index 000000000000..af57cc8580a6 > --- /dev/null > +++ b/include/linux/einj-cxl.h > + > +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ) > +int einj_cxl_available_error_type_show(struct seq_file *m, void *v); > +int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); > +int einj_cxl_inject_rch_error(u64 rcrb, u64 type); > +bool einj_is_initialized(void); > +#else It's trivial but if you are respinning, I'd like to see a comment for the else and the endif to let us trivially see what they match with. Lack of indenting for this preprocessor conditions can make this really hard to undwind once a file grows more complex over time. > +static inline int einj_cxl_available_error_type_show(struct seq_file *m, > + void *v) > +{ > + return -ENXIO; > +} > + > +static inline int einj_cxl_type_show(struct seq_file *m, void *data) A stub for a function that doesn't exist otherwise. Left over from refactors? > +{ > + return -ENXIO; > +} > + > +static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type) > +{ > + return -ENXIO; > +} > + > +static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type) > +{ > + return -ENXIO; > +} > + > +static inline bool einj_is_initialized(void) { return false; } > +#endif > + > +#endif
On 2/14/24 9:27 AM, Jonathan Cameron wrote: > On Thu, 8 Feb 2024 14:00:41 -0600 > Ben Cheatham <Benjamin.Cheatham@amd.com> wrote: > >> Implement CXL helper functions in the EINJ module for getting/injecting >> available CXL protocol error types and export them to sysfs under >> kernel/debug/cxl. >> >> The kernel/debug/cxl/einj_types file will print the available CXL >> protocol errors in the same format as the available_error_types >> file provided by the EINJ module. The >> kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the >> error_type and error_inject files provided by the EINJ module, i.e.: >> writing an error type into $dport_dev/einj_inject will inject said error >> type into the CXL dport represented by $dport_dev. >> >> Reviewed-by: Dan Williams <dan.j.williams@intel.com> >> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com> > Hi Ben, > > Sorry I've not looked at this sooner. > > Anyhow, some comments inline. Mostly looks good to me. > > Jonathan > >> --- >> Documentation/ABI/testing/debugfs-cxl | 22 ++++ >> MAINTAINERS | 1 + >> drivers/acpi/apei/einj.c | 158 ++++++++++++++++++++++++-- >> drivers/cxl/core/port.c | 39 +++++++ >> include/linux/einj-cxl.h | 45 ++++++++ >> 5 files changed, 255 insertions(+), 10 deletions(-) >> create mode 100644 include/linux/einj-cxl.h >> >> diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl >> index fe61d372e3fa..bcd985cca66a 100644 >> --- a/Documentation/ABI/testing/debugfs-cxl >> +++ b/Documentation/ABI/testing/debugfs-cxl >> @@ -33,3 +33,25 @@ Description: >> device cannot clear poison from the address, -ENXIO is returned. >> The clear_poison attribute is only visible for devices >> supporting the capability. >> + >> +What: /sys/kernel/debug/cxl/einj_types >> +Date: January, 2024 >> +KernelVersion: v6.9 >> +Contact: linux-cxl@vger.kernel.org >> +Description: >> + (RO) Prints the CXL protocol error types made available by >> + the platform in the format "0x<error number> <error type>". >> + The <error number> can be written to einj_inject to inject >> + <error type> into a chosen dport. > > I think it's a limited set, so docs could include what the error_type values can > be? From this description it's not obvious they are human readable strings. > It is a limited set, but that set has 6 variants. It may make the description a bit long to include all of them, but I could include an example string instead? If length isn't an issue then I can add them all in. >> + >> +What: /sys/kernel/debug/cxl/$dport_dev/einj_inject >> +Date: January, 2024 >> +KernelVersion: v6.9 >> +Contact: linux-cxl@vger.kernel.org >> +Description: >> + (WO) Writing an integer to this file injects the corresponding >> + CXL protocol error into $dport_dev ($dport_dev will be a device >> + name from /sys/bus/pci/devices). The integer to type mapping for >> + injection can be found by reading from einj_types. If the dport >> + was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise >> + a CXL 2.0 error is injected. >> diff --git a/MAINTAINERS b/MAINTAINERS >> index 9104430e148e..02d7feb2ed1f 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -5246,6 +5246,7 @@ L: linux-cxl@vger.kernel.org >> S: Maintained >> F: drivers/cxl/ >> F: include/uapi/linux/cxl_mem.h >> +F: include/linux/einj-cxl.h >> F: tools/testing/cxl/ >> >> COMPUTE EXPRESS LINK PMU (CPMU) >> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c >> index 73dde21d3e89..9137cc01f791 100644 >> --- a/drivers/acpi/apei/einj.c >> +++ b/drivers/acpi/apei/einj.c >> @@ -21,6 +21,7 @@ >> #include <linux/nmi.h> >> #include <linux/delay.h> >> #include <linux/mm.h> >> +#include <linux/einj-cxl.h> >> #include <linux/platform_device.h> >> #include <asm/unaligned.h> >> >> @@ -37,6 +38,20 @@ >> #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ >> ACPI_EINJ_MEMORY_UNCORRECTABLE | \ >> ACPI_EINJ_MEMORY_FATAL) >> +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE >> +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) >> +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) >> +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) >> +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) >> +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) >> +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) >> +#endif >> +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ >> + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ >> + ACPI_EINJ_CXL_CACHE_FATAL | \ >> + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ >> + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ >> + ACPI_EINJ_CXL_MEM_FATAL) >> >> /* >> * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. >> @@ -543,8 +558,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, >> if (type & ACPI5_VENDOR_BIT) { >> if (vendor_flags != SETWA_FLAGS_MEM) >> goto inject; >> - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) >> + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { >> goto inject; >> + } else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) { >> + goto inject; >> + } >> >> /* >> * Disallow crazy address masks that give BIOS leeway to pick >> @@ -596,6 +614,9 @@ static const char * const einj_error_type_string[] = { >> "0x00000200\tPlatform Correctable\n", >> "0x00000400\tPlatform Uncorrectable non-fatal\n", >> "0x00000800\tPlatform Uncorrectable fatal\n", >> +}; >> + >> +static const char * const einj_cxl_error_type_string[] = { >> "0x00001000\tCXL.cache Protocol Correctable\n", >> "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", >> "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", >> @@ -621,29 +642,44 @@ static int available_error_type_show(struct seq_file *m, void *v) >> >> DEFINE_SHOW_ATTRIBUTE(available_error_type); >> >> -static int error_type_get(void *data, u64 *val) >> +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) >> { >> - *val = error_type; >> + int cxl_err, rc; >> + u32 available_error_type = 0; >> + >> + if (!einj_initialized) >> + return -ENXIO; >> + >> + rc = einj_get_available_error_type(&available_error_type); >> + if (rc) >> + return rc; >> + >> + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { > > Trivial so feel free to ignore but, I'd stick to local styles and have pos > declared in more traditional c style. > Will do. >> + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; > > Maybe clearer as > cxl_err = FIELD_PREP(CXL_ERROR_MASK, BIT(pos)); > I'll think about it. I think I agree with you, but I've seen a good amount of people who aren't familiar with the FIELD_* macros in which case it isn't much clearer. >> + >> + if (available_error_type & cxl_err) >> + seq_puts(m, einj_cxl_error_type_string[pos]); >> + } >> >> return 0; >> } >> +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL); >> >> -static int error_type_set(void *data, u64 val) >> +static int validate_error_type(u64 type) >> { >> + u32 tval, vendor, available_error_type = 0; >> int rc; >> - u32 available_error_type = 0; >> - u32 tval, vendor; >> >> /* Only low 32 bits for error type are valid */ >> - if (val & GENMASK_ULL(63, 32)) >> + if (type & GENMASK_ULL(63, 32)) >> return -EINVAL; >> >> /* >> * Vendor defined types have 0x80000000 bit set, and >> * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE >> */ >> - vendor = val & ACPI5_VENDOR_BIT; >> - tval = val & 0x7fffffff; >> + vendor = type & ACPI5_VENDOR_BIT; >> + tval = type & 0x7fffffff; > > Could flip this to GENMASK whilst you are here. > I don't much like counting fs :) > Me neither. Will do. > >> >> /* Only one error type can be specified */ >> if (tval & (tval - 1)) >> @@ -652,9 +688,105 @@ static int error_type_set(void *data, u64 val) >> rc = einj_get_available_error_type(&available_error_type); >> if (rc) >> return rc; >> - if (!(val & available_error_type)) >> + if (!(type & available_error_type)) >> return -EINVAL; >> } >> + >> + return 0; >> +} >> + >> +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) >> +{ >> + struct pci_bus *pbus; >> + struct pci_host_bridge *bridge; >> + u64 seg = 0, bus; >> + >> + pbus = dport_dev->bus; >> + bridge = pci_find_host_bridge(pbus); >> + >> + if (!bridge) >> + return -ENODEV; >> + >> + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) > > Ah. x86 is not using the CONFIG_PCI_DOMAINS_GENERIC > so I guess we can't just use pci_domain_nr(pbus)? > (for the generic case bus->domain_nr is filled in when > the host bridge is registered). Pity. > >> + seg = bridge->domain_nr << 24; >> + >> + bus = pbus->number << 16; > I'd do the shifts whilst building sbpf. > AS it stands you end up with what look to be wrong values in > seg and bus because you'd shifted them in the local variables. > >> + *sbdf = seg | bus | dport_dev->devfn; > *sbdf = (seg << 24) | (bus << 16) | dport_dev->devfn; > > (with shifts dropped where seg and bus are set) Will do! I was never 100% sure about this function, but I never had a problem with it in my testing. >> + >> + return 0; >> +} >> + >> +int einj_cxl_inject_rch_error(u64 rcrb, u64 type) >> +{ >> + u64 param1 = 0, param2 = 0, param4 = 0; >> + u32 flags; >> + int rc; >> + >> + if (!einj_initialized) >> + return -ENXIO; >> + >> + /* Only CXL error types can be specified */ >> + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) > > As below - a utility function would unify the 3 uses of this and > avoid need for comment. > >> + return -EINVAL; >> + >> + rc = validate_error_type(type); >> + if (rc) >> + return rc; >> + >> + param1 = (u64) rcrb; > already a u64 probably left over from an earlier revision, I'll get rid of the cast. >> + param2 = 0xfffffffffffff000; > No need to initialize these to 0 above. >> + flags = 0x2; >> + >> + return einj_error_inject(type, flags, param1, param2, 0, param4); > > return einj_error_inject(type, flags, > rcrb, 0xfffffffffffff000, 0, 0); > Will change. >> +} >> +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL); >> + >> +int einj_cxl_inject_error(struct pci_dev *dport, u64 type) >> +{ >> + u64 param1 = 0, param2 = 0, param4 = 0; >> + u32 flags; >> + int rc; >> + >> + if (!einj_initialized) >> + return -ENXIO; >> + >> + /* Only CXL error types can be specified */ >> + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) > > As below a utility function would do this nicely (and avoid need > for comment). > if (!is_cxl_error(type)) > >> + return -EINVAL; >> + >> + rc = validate_error_type(type); >> + if (rc) >> + return rc; >> + >> + rc = cxl_dport_get_sbdf(dport, ¶m4); >> + if (rc) >> + return rc; >> + >> + flags = 0x4; >> + >> + return einj_error_inject(type, flags, param1, param2, 0, param4); > Why not > return einj_error_injecT(type, 0x4, 0, 0, 0, param4); > ? > > Maybe flags is useful as "documentation" but given the parameters are nicely > in order and you already didn't bother with param3, I can't see why > keeping param1 and param2 as local variables is useful. > It's a good point. I originally had the RCH and VH variants in the same function, and the param variables did matter then. Now that that's not the case, I'll remove them. >> +} >> +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL); > > > >> + >> +static int error_type_set(void *data, u64 val) >> +{ >> + int rc; >> + >> + /* CXL error types have to be injected from cxl debugfs */ >> + if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT)) > > Given you have inverse of this above, maybe it's worth a little > helper function to have the logic only once? > > if (is_cxl_error(val)) > I agree, I'll add it. >> + return -EINVAL; >> + >> + rc = validate_error_type(val); >> + if (rc) >> + return rc; >> + >> error_type = val; >> >> return 0; >> @@ -690,6 +822,12 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) >> return 0; >> } > > >> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c >> index 8c00fd6be730..c52c92222be5 100644 >> --- a/drivers/cxl/core/port.c >> +++ b/drivers/cxl/core/port.c > >> +static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport) >> +{ >> + struct dentry *dir; >> + >> + /* >> + * dport_dev needs to be a PCIe port for CXL 2.0+ ports because >> + * EINJ expects a dport SBDF to be specified for 2.0 error injection. >> + */ >> + if (!einj_is_initialized() || >> + (!dport->rch && !dev_is_pci(dport->dport_dev))) >> + return; > > Trivial: Even though it's a little more code I'd break this up so that it's clear > exactly what the comment refers to. > I'm fine with that, will do. > if (!einj_is_initialized) > return; > > /* > * dport_dev needs to be a PCIe port for CXL 2.0+ ports because > * EINJ expects a dport SBDF to be specified for 2.0 error injection. > */ > if (!dport->rch && !dev_is_pci(dport->dport_dev)) > return; >> + >> + dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev)); >> + >> + debugfs_create_file("einj_inject", 0200, dir, dport, >> + &cxl_einj_inject_fops); >> +} >> + >> static struct cxl_port *__devm_cxl_add_port(struct device *host, >> struct device *uport_dev, >> resource_size_t component_reg_phys, > > ... > >> @@ -2220,6 +2254,11 @@ static __init int cxl_core_init(void) >> >> cxl_debugfs = debugfs_create_dir("cxl", NULL); >> >> + if (einj_is_initialized()) { >> + debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL, >> + &einj_cxl_available_error_type_fops); >> + } >> + >> cxl_mbox_init(); >> >> rc = cxl_memdev_init(); > > >> diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h >> new file mode 100644 >> index 000000000000..af57cc8580a6 >> --- /dev/null >> +++ b/include/linux/einj-cxl.h > > >> + >> +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ) >> +int einj_cxl_available_error_type_show(struct seq_file *m, void *v); >> +int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); >> +int einj_cxl_inject_rch_error(u64 rcrb, u64 type); >> +bool einj_is_initialized(void); >> +#else > > It's trivial but if you are respinning, I'd like to see a comment for the > else and the endif to let us trivially see what they match with. > Lack of indenting for this preprocessor conditions can make this really > hard to undwind once a file grows more complex over time. > I see, I'll comment them! >> +static inline int einj_cxl_available_error_type_show(struct seq_file *m, >> + void *v) >> +{ >> + return -ENXIO; >> +} >> + >> +static inline int einj_cxl_type_show(struct seq_file *m, void *data) > > A stub for a function that doesn't exist otherwise. Left > over from refactors? > Probably, I'll remove it. Thanks, Ben >> +{ >> + return -ENXIO; >> +} >> + >> +static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type) >> +{ >> + return -ENXIO; >> +} >> + >> +static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type) >> +{ >> + return -ENXIO; >> +} >> + >> +static inline bool einj_is_initialized(void) { return false; } >> +#endif >> + >> +#endif >
On Wed, 14 Feb 2024 10:41:00 -0600 Ben Cheatham <benjamin.cheatham@amd.com> wrote: > On 2/14/24 9:27 AM, Jonathan Cameron wrote: > > On Thu, 8 Feb 2024 14:00:41 -0600 > > Ben Cheatham <Benjamin.Cheatham@amd.com> wrote: > > > >> Implement CXL helper functions in the EINJ module for getting/injecting > >> available CXL protocol error types and export them to sysfs under > >> kernel/debug/cxl. > >> > >> The kernel/debug/cxl/einj_types file will print the available CXL > >> protocol errors in the same format as the available_error_types > >> file provided by the EINJ module. The > >> kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the > >> error_type and error_inject files provided by the EINJ module, i.e.: > >> writing an error type into $dport_dev/einj_inject will inject said error > >> type into the CXL dport represented by $dport_dev. > >> > >> Reviewed-by: Dan Williams <dan.j.williams@intel.com> > >> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com> > > Hi Ben, > > > > Sorry I've not looked at this sooner. > > > > Anyhow, some comments inline. Mostly looks good to me. > > > > Jonathan > > > >> --- > >> Documentation/ABI/testing/debugfs-cxl | 22 ++++ > >> MAINTAINERS | 1 + > >> drivers/acpi/apei/einj.c | 158 ++++++++++++++++++++++++-- > >> drivers/cxl/core/port.c | 39 +++++++ > >> include/linux/einj-cxl.h | 45 ++++++++ > >> 5 files changed, 255 insertions(+), 10 deletions(-) > >> create mode 100644 include/linux/einj-cxl.h > >> > >> diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl > >> index fe61d372e3fa..bcd985cca66a 100644 > >> --- a/Documentation/ABI/testing/debugfs-cxl > >> +++ b/Documentation/ABI/testing/debugfs-cxl > >> @@ -33,3 +33,25 @@ Description: > >> device cannot clear poison from the address, -ENXIO is returned. > >> The clear_poison attribute is only visible for devices > >> supporting the capability. > >> + > >> +What: /sys/kernel/debug/cxl/einj_types > >> +Date: January, 2024 > >> +KernelVersion: v6.9 > >> +Contact: linux-cxl@vger.kernel.org > >> +Description: > >> + (RO) Prints the CXL protocol error types made available by > >> + the platform in the format "0x<error number> <error type>". > >> + The <error number> can be written to einj_inject to inject > >> + <error type> into a chosen dport. > > > > I think it's a limited set, so docs could include what the error_type values can > > be? From this description it's not obvious they are human readable strings. > > > > It is a limited set, but that set has 6 variants. It may make the description > a bit long to include all of them, but I could include an example string instead? > If length isn't an issue then I can add them all in. Example works. > > >> + > >> +What: /sys/kernel/debug/cxl/$dport_dev/einj_inject > >> +Date: January, 2024 > >> +KernelVersion: v6.9 > >> +Contact: linux-cxl@vger.kernel.org > >> +Description: > >> + (WO) Writing an integer to this file injects the corresponding > >> + CXL protocol error into $dport_dev ($dport_dev will be a device > >> + name from /sys/bus/pci/devices). The integer to type mapping for > >> + injection can be found by reading from einj_types. If the dport > >> + was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise > >> + a CXL 2.0 error is injected. > >> diff --git a/MAINTAINERS b/MAINTAINERS > >> index 9104430e148e..02d7feb2ed1f 100644 > >> --- a/MAINTAINERS > >> +++ b/MAINTAINERS > >> @@ -5246,6 +5246,7 @@ L: linux-cxl@vger.kernel.org > >> S: Maintained > >> F: drivers/cxl/ > >> F: include/uapi/linux/cxl_mem.h > >> +F: include/linux/einj-cxl.h > >> F: tools/testing/cxl/ > >> > >> COMPUTE EXPRESS LINK PMU (CPMU) > >> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c > >> index 73dde21d3e89..9137cc01f791 100644 > >> --- a/drivers/acpi/apei/einj.c > >> +++ b/drivers/acpi/apei/einj.c > >> @@ -21,6 +21,7 @@ > >> #include <linux/nmi.h> > >> #include <linux/delay.h> > >> #include <linux/mm.h> > >> +#include <linux/einj-cxl.h> > >> #include <linux/platform_device.h> > >> #include <asm/unaligned.h> > >> > >> @@ -37,6 +38,20 @@ > >> #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ > >> ACPI_EINJ_MEMORY_UNCORRECTABLE | \ > >> ACPI_EINJ_MEMORY_FATAL) > >> +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE > >> +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) > >> +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) > >> +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) > >> +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) > >> +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) > >> +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) > >> +#endif > >> +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ > >> + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ > >> + ACPI_EINJ_CXL_CACHE_FATAL | \ > >> + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ > >> + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ > >> + ACPI_EINJ_CXL_MEM_FATAL) > >> > >> /* > >> * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. > >> @@ -543,8 +558,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, > >> if (type & ACPI5_VENDOR_BIT) { > >> if (vendor_flags != SETWA_FLAGS_MEM) > >> goto inject; > >> - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) > >> + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { > >> goto inject; > >> + } else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) { > >> + goto inject; > >> + } > >> > >> /* > >> * Disallow crazy address masks that give BIOS leeway to pick > >> @@ -596,6 +614,9 @@ static const char * const einj_error_type_string[] = { > >> "0x00000200\tPlatform Correctable\n", > >> "0x00000400\tPlatform Uncorrectable non-fatal\n", > >> "0x00000800\tPlatform Uncorrectable fatal\n", > >> +}; > >> + > >> +static const char * const einj_cxl_error_type_string[] = { > >> "0x00001000\tCXL.cache Protocol Correctable\n", > >> "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", > >> "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", > >> @@ -621,29 +642,44 @@ static int available_error_type_show(struct seq_file *m, void *v) > >> > >> DEFINE_SHOW_ATTRIBUTE(available_error_type); > >> > >> -static int error_type_get(void *data, u64 *val) > >> +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) > >> { > >> - *val = error_type; > >> + int cxl_err, rc; > >> + u32 available_error_type = 0; > >> + > >> + if (!einj_initialized) > >> + return -ENXIO; > >> + > >> + rc = einj_get_available_error_type(&available_error_type); > >> + if (rc) > >> + return rc; > >> + > >> + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { > > > > Trivial so feel free to ignore but, I'd stick to local styles and have pos > > declared in more traditional c style. > > > > Will do. > > >> + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; > > > > Maybe clearer as > > cxl_err = FIELD_PREP(CXL_ERROR_MASK, BIT(pos)); > > > > I'll think about it. I think I agree with you, but I've seen a good amount of > people who aren't familiar with the FIELD_* macros in which case it isn't much clearer. Lets teach them ;)
On 2/14/24 11:46 AM, Jonathan Cameron wrote: > On Wed, 14 Feb 2024 10:41:00 -0600 > Ben Cheatham <benjamin.cheatham@amd.com> wrote: > >> On 2/14/24 9:27 AM, Jonathan Cameron wrote: >>> On Thu, 8 Feb 2024 14:00:41 -0600 >>> Ben Cheatham <Benjamin.Cheatham@amd.com> wrote: >>> >>>> Implement CXL helper functions in the EINJ module for getting/injecting >>>> available CXL protocol error types and export them to sysfs under >>>> kernel/debug/cxl. >>>> >>>> The kernel/debug/cxl/einj_types file will print the available CXL >>>> protocol errors in the same format as the available_error_types >>>> file provided by the EINJ module. The >>>> kernel/debug/cxl/$dport_dev/einj_inject is functionally the same as the >>>> error_type and error_inject files provided by the EINJ module, i.e.: >>>> writing an error type into $dport_dev/einj_inject will inject said error >>>> type into the CXL dport represented by $dport_dev. >>>> >>>> Reviewed-by: Dan Williams <dan.j.williams@intel.com> >>>> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com> >>> Hi Ben, >>> >>> Sorry I've not looked at this sooner. >>> >>> Anyhow, some comments inline. Mostly looks good to me. >>> >>> Jonathan >>> >>>> --- >>>> Documentation/ABI/testing/debugfs-cxl | 22 ++++ >>>> MAINTAINERS | 1 + >>>> drivers/acpi/apei/einj.c | 158 ++++++++++++++++++++++++-- >>>> drivers/cxl/core/port.c | 39 +++++++ >>>> include/linux/einj-cxl.h | 45 ++++++++ >>>> 5 files changed, 255 insertions(+), 10 deletions(-) >>>> create mode 100644 include/linux/einj-cxl.h >>>> >>>> diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl >>>> index fe61d372e3fa..bcd985cca66a 100644 >>>> --- a/Documentation/ABI/testing/debugfs-cxl >>>> +++ b/Documentation/ABI/testing/debugfs-cxl >>>> @@ -33,3 +33,25 @@ Description: >>>> device cannot clear poison from the address, -ENXIO is returned. >>>> The clear_poison attribute is only visible for devices >>>> supporting the capability. >>>> + >>>> +What: /sys/kernel/debug/cxl/einj_types >>>> +Date: January, 2024 >>>> +KernelVersion: v6.9 >>>> +Contact: linux-cxl@vger.kernel.org >>>> +Description: >>>> + (RO) Prints the CXL protocol error types made available by >>>> + the platform in the format "0x<error number> <error type>". >>>> + The <error number> can be written to einj_inject to inject >>>> + <error type> into a chosen dport. >>> >>> I think it's a limited set, so docs could include what the error_type values can >>> be? From this description it's not obvious they are human readable strings. >>> >> >> It is a limited set, but that set has 6 variants. It may make the description >> a bit long to include all of them, but I could include an example string instead? >> If length isn't an issue then I can add them all in. > > Example works. > I tried adding them all in and it didn't make the description too long, so I went ahead and did that instead of an example. >> >>>> + >>>> +What: /sys/kernel/debug/cxl/$dport_dev/einj_inject >>>> +Date: January, 2024 >>>> +KernelVersion: v6.9 >>>> +Contact: linux-cxl@vger.kernel.org >>>> +Description: >>>> + (WO) Writing an integer to this file injects the corresponding >>>> + CXL protocol error into $dport_dev ($dport_dev will be a device >>>> + name from /sys/bus/pci/devices). The integer to type mapping for >>>> + injection can be found by reading from einj_types. If the dport >>>> + was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise >>>> + a CXL 2.0 error is injected. >>>> diff --git a/MAINTAINERS b/MAINTAINERS >>>> index 9104430e148e..02d7feb2ed1f 100644 >>>> --- a/MAINTAINERS >>>> +++ b/MAINTAINERS >>>> @@ -5246,6 +5246,7 @@ L: linux-cxl@vger.kernel.org >>>> S: Maintained >>>> F: drivers/cxl/ >>>> F: include/uapi/linux/cxl_mem.h >>>> +F: include/linux/einj-cxl.h >>>> F: tools/testing/cxl/ >>>> >>>> COMPUTE EXPRESS LINK PMU (CPMU) >>>> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c >>>> index 73dde21d3e89..9137cc01f791 100644 >>>> --- a/drivers/acpi/apei/einj.c >>>> +++ b/drivers/acpi/apei/einj.c >>>> @@ -21,6 +21,7 @@ >>>> #include <linux/nmi.h> >>>> #include <linux/delay.h> >>>> #include <linux/mm.h> >>>> +#include <linux/einj-cxl.h> >>>> #include <linux/platform_device.h> >>>> #include <asm/unaligned.h> >>>> >>>> @@ -37,6 +38,20 @@ >>>> #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ >>>> ACPI_EINJ_MEMORY_UNCORRECTABLE | \ >>>> ACPI_EINJ_MEMORY_FATAL) >>>> +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE >>>> +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) >>>> +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) >>>> +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) >>>> +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) >>>> +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) >>>> +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) >>>> +#endif >>>> +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ >>>> + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ >>>> + ACPI_EINJ_CXL_CACHE_FATAL | \ >>>> + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ >>>> + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ >>>> + ACPI_EINJ_CXL_MEM_FATAL) >>>> >>>> /* >>>> * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. >>>> @@ -543,8 +558,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, >>>> if (type & ACPI5_VENDOR_BIT) { >>>> if (vendor_flags != SETWA_FLAGS_MEM) >>>> goto inject; >>>> - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) >>>> + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { >>>> goto inject; >>>> + } else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) { >>>> + goto inject; >>>> + } >>>> >>>> /* >>>> * Disallow crazy address masks that give BIOS leeway to pick >>>> @@ -596,6 +614,9 @@ static const char * const einj_error_type_string[] = { >>>> "0x00000200\tPlatform Correctable\n", >>>> "0x00000400\tPlatform Uncorrectable non-fatal\n", >>>> "0x00000800\tPlatform Uncorrectable fatal\n", >>>> +}; >>>> + >>>> +static const char * const einj_cxl_error_type_string[] = { >>>> "0x00001000\tCXL.cache Protocol Correctable\n", >>>> "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", >>>> "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", >>>> @@ -621,29 +642,44 @@ static int available_error_type_show(struct seq_file *m, void *v) >>>> >>>> DEFINE_SHOW_ATTRIBUTE(available_error_type); >>>> >>>> -static int error_type_get(void *data, u64 *val) >>>> +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) >>>> { >>>> - *val = error_type; >>>> + int cxl_err, rc; >>>> + u32 available_error_type = 0; >>>> + >>>> + if (!einj_initialized) >>>> + return -ENXIO; >>>> + >>>> + rc = einj_get_available_error_type(&available_error_type); >>>> + if (rc) >>>> + return rc; >>>> + >>>> + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { >>> >>> Trivial so feel free to ignore but, I'd stick to local styles and have pos >>> declared in more traditional c style. >>> >> >> Will do. >> >>>> + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; >>> >>> Maybe clearer as >>> cxl_err = FIELD_PREP(CXL_ERROR_MASK, BIT(pos)); >>> >> >> I'll think about it. I think I agree with you, but I've seen a good amount of >> people who aren't familiar with the FIELD_* macros in which case it isn't much clearer. > > Lets teach them ;) > Sounds good! Thanks, Ben
diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl index fe61d372e3fa..bcd985cca66a 100644 --- a/Documentation/ABI/testing/debugfs-cxl +++ b/Documentation/ABI/testing/debugfs-cxl @@ -33,3 +33,25 @@ Description: device cannot clear poison from the address, -ENXIO is returned. The clear_poison attribute is only visible for devices supporting the capability. + +What: /sys/kernel/debug/cxl/einj_types +Date: January, 2024 +KernelVersion: v6.9 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) Prints the CXL protocol error types made available by + the platform in the format "0x<error number> <error type>". + The <error number> can be written to einj_inject to inject + <error type> into a chosen dport. + +What: /sys/kernel/debug/cxl/$dport_dev/einj_inject +Date: January, 2024 +KernelVersion: v6.9 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) Writing an integer to this file injects the corresponding + CXL protocol error into $dport_dev ($dport_dev will be a device + name from /sys/bus/pci/devices). The integer to type mapping for + injection can be found by reading from einj_types. If the dport + was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise + a CXL 2.0 error is injected. diff --git a/MAINTAINERS b/MAINTAINERS index 9104430e148e..02d7feb2ed1f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5246,6 +5246,7 @@ L: linux-cxl@vger.kernel.org S: Maintained F: drivers/cxl/ F: include/uapi/linux/cxl_mem.h +F: include/linux/einj-cxl.h F: tools/testing/cxl/ COMPUTE EXPRESS LINK PMU (CPMU) diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 73dde21d3e89..9137cc01f791 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -21,6 +21,7 @@ #include <linux/nmi.h> #include <linux/delay.h> #include <linux/mm.h> +#include <linux/einj-cxl.h> #include <linux/platform_device.h> #include <asm/unaligned.h> @@ -37,6 +38,20 @@ #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \ ACPI_EINJ_MEMORY_UNCORRECTABLE | \ ACPI_EINJ_MEMORY_FATAL) +#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE +#define ACPI_EINJ_CXL_CACHE_CORRECTABLE BIT(12) +#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE BIT(13) +#define ACPI_EINJ_CXL_CACHE_FATAL BIT(14) +#define ACPI_EINJ_CXL_MEM_CORRECTABLE BIT(15) +#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE BIT(16) +#define ACPI_EINJ_CXL_MEM_FATAL BIT(17) +#endif +#define CXL_ERROR_MASK (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \ + ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \ + ACPI_EINJ_CXL_CACHE_FATAL | \ + ACPI_EINJ_CXL_MEM_CORRECTABLE | \ + ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \ + ACPI_EINJ_CXL_MEM_FATAL) /* * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. @@ -543,8 +558,11 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, if (type & ACPI5_VENDOR_BIT) { if (vendor_flags != SETWA_FLAGS_MEM) goto inject; - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) + } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { goto inject; + } else if ((type & CXL_ERROR_MASK) && (flags & SETWA_FLAGS_MEM)) { + goto inject; + } /* * Disallow crazy address masks that give BIOS leeway to pick @@ -596,6 +614,9 @@ static const char * const einj_error_type_string[] = { "0x00000200\tPlatform Correctable\n", "0x00000400\tPlatform Uncorrectable non-fatal\n", "0x00000800\tPlatform Uncorrectable fatal\n", +}; + +static const char * const einj_cxl_error_type_string[] = { "0x00001000\tCXL.cache Protocol Correctable\n", "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n", "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n", @@ -621,29 +642,44 @@ static int available_error_type_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(available_error_type); -static int error_type_get(void *data, u64 *val) +int einj_cxl_available_error_type_show(struct seq_file *m, void *v) { - *val = error_type; + int cxl_err, rc; + u32 available_error_type = 0; + + if (!einj_initialized) + return -ENXIO; + + rc = einj_get_available_error_type(&available_error_type); + if (rc) + return rc; + + for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) { + cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos; + + if (available_error_type & cxl_err) + seq_puts(m, einj_cxl_error_type_string[pos]); + } return 0; } +EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL); -static int error_type_set(void *data, u64 val) +static int validate_error_type(u64 type) { + u32 tval, vendor, available_error_type = 0; int rc; - u32 available_error_type = 0; - u32 tval, vendor; /* Only low 32 bits for error type are valid */ - if (val & GENMASK_ULL(63, 32)) + if (type & GENMASK_ULL(63, 32)) return -EINVAL; /* * Vendor defined types have 0x80000000 bit set, and * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE */ - vendor = val & ACPI5_VENDOR_BIT; - tval = val & 0x7fffffff; + vendor = type & ACPI5_VENDOR_BIT; + tval = type & 0x7fffffff; /* Only one error type can be specified */ if (tval & (tval - 1)) @@ -652,9 +688,105 @@ static int error_type_set(void *data, u64 val) rc = einj_get_available_error_type(&available_error_type); if (rc) return rc; - if (!(val & available_error_type)) + if (!(type & available_error_type)) return -EINVAL; } + + return 0; +} + +static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf) +{ + struct pci_bus *pbus; + struct pci_host_bridge *bridge; + u64 seg = 0, bus; + + pbus = dport_dev->bus; + bridge = pci_find_host_bridge(pbus); + + if (!bridge) + return -ENODEV; + + if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET) + seg = bridge->domain_nr << 24; + + bus = pbus->number << 16; + *sbdf = seg | bus | dport_dev->devfn; + + return 0; +} + +int einj_cxl_inject_rch_error(u64 rcrb, u64 type) +{ + u64 param1 = 0, param2 = 0, param4 = 0; + u32 flags; + int rc; + + if (!einj_initialized) + return -ENXIO; + + /* Only CXL error types can be specified */ + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) + return -EINVAL; + + rc = validate_error_type(type); + if (rc) + return rc; + + param1 = (u64) rcrb; + param2 = 0xfffffffffffff000; + flags = 0x2; + + return einj_error_inject(type, flags, param1, param2, 0, param4); +} +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL); + +int einj_cxl_inject_error(struct pci_dev *dport, u64 type) +{ + u64 param1 = 0, param2 = 0, param4 = 0; + u32 flags; + int rc; + + if (!einj_initialized) + return -ENXIO; + + /* Only CXL error types can be specified */ + if (type & ~CXL_ERROR_MASK || (type & ACPI5_VENDOR_BIT)) + return -EINVAL; + + rc = validate_error_type(type); + if (rc) + return rc; + + rc = cxl_dport_get_sbdf(dport, ¶m4); + if (rc) + return rc; + + flags = 0x4; + + return einj_error_inject(type, flags, param1, param2, 0, param4); +} +EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL); + +static int error_type_get(void *data, u64 *val) +{ + *val = error_type; + + return 0; +} + +static int error_type_set(void *data, u64 val) +{ + int rc; + + /* CXL error types have to be injected from cxl debugfs */ + if (val & CXL_ERROR_MASK && !(val & ACPI5_VENDOR_BIT)) + return -EINVAL; + + rc = validate_error_type(val); + if (rc) + return rc; + error_type = val; return 0; @@ -690,6 +822,12 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) return 0; } +bool einj_is_initialized(void) +{ + return einj_initialized; +} +EXPORT_SYMBOL_GPL(einj_is_initialized); + static int __init einj_probe(struct platform_device *pdev) { int rc; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 8c00fd6be730..c52c92222be5 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -3,6 +3,7 @@ #include <linux/platform_device.h> #include <linux/memregion.h> #include <linux/workqueue.h> +#include <linux/einj-cxl.h> #include <linux/debugfs.h> #include <linux/device.h> #include <linux/module.h> @@ -797,6 +798,37 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport, return rc; } +DEFINE_SHOW_ATTRIBUTE(einj_cxl_available_error_type); + +static int cxl_einj_inject(void *data, u64 type) +{ + struct cxl_dport *dport = data; + + if (dport->rch) + return einj_cxl_inject_rch_error(dport->rcrb.base, type); + + return einj_cxl_inject_error(to_pci_dev(dport->dport_dev), type); +} +DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n"); + +static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport) +{ + struct dentry *dir; + + /* + * dport_dev needs to be a PCIe port for CXL 2.0+ ports because + * EINJ expects a dport SBDF to be specified for 2.0 error injection. + */ + if (!einj_is_initialized() || + (!dport->rch && !dev_is_pci(dport->dport_dev))) + return; + + dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev)); + + debugfs_create_file("einj_inject", 0200, dir, dport, + &cxl_einj_inject_fops); +} + static struct cxl_port *__devm_cxl_add_port(struct device *host, struct device *uport_dev, resource_size_t component_reg_phys, @@ -1144,6 +1176,8 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (dev_is_pci(dport_dev)) dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev)); + cxl_debugfs_create_dport_dir(dport); + return dport; } @@ -2220,6 +2254,11 @@ static __init int cxl_core_init(void) cxl_debugfs = debugfs_create_dir("cxl", NULL); + if (einj_is_initialized()) { + debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL, + &einj_cxl_available_error_type_fops); + } + cxl_mbox_init(); rc = cxl_memdev_init(); diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h new file mode 100644 index 000000000000..af57cc8580a6 --- /dev/null +++ b/include/linux/einj-cxl.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CXL protocol Error INJection support. + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Ben Cheatham <benjamin.cheatham@amd.com> + */ +#ifndef CXL_EINJ_H +#define CXL_EINJ_H + +#include <linux/pci.h> + +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ) +int einj_cxl_available_error_type_show(struct seq_file *m, void *v); +int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); +int einj_cxl_inject_rch_error(u64 rcrb, u64 type); +bool einj_is_initialized(void); +#else +static inline int einj_cxl_available_error_type_show(struct seq_file *m, + void *v) +{ + return -ENXIO; +} + +static inline int einj_cxl_type_show(struct seq_file *m, void *data) +{ + return -ENXIO; +} + +static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type) +{ + return -ENXIO; +} + +static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type) +{ + return -ENXIO; +} + +static inline bool einj_is_initialized(void) { return false; } +#endif + +#endif