diff mbox series

[RFC,ndctl] cxl: Add inject-error command

Message ID 20250108215749.181852-1-Benjamin.Cheatham@amd.com (mailing list archive)
State New
Headers show
Series [RFC,ndctl] cxl: Add inject-error command | expand

Commit Message

Ben Cheatham Jan. 8, 2025, 9:57 p.m. UTC
Add inject-error command for injecting CXL errors into CXL devices.
The command currently only has support for injecting CXL protocol
errors into CXL downstream ports via EINJ.

The command takes an error type and injects an error of that type into the
specified downstream port. Downstream ports can be specified using the
port's device name with the -d option. Available error types can be obtained
by running "cxl inject-error --list-errors".

This command requires the kernel to be built with CONFIG_DEBUGFS and
CONFIG_ACPI_APEI_EINJ_CXL enabled. It also requires root privileges to
run due to reading from <debugfs>/cxl/einj_types and writing to
<debugfs>/cxl/<dport>/einj_inject.

Example usage:
    # cxl inject-error --list-errors
    cxl.mem_correctable
    cxl.mem_fatal
    ...
    # cxl inject-error -d 0000:00:01.1 cxl.mem_correctable
    injected cxl.mem_correctable protocol error

Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
---
 cxl/builtin.h      |   1 +
 cxl/cxl.c          |   1 +
 cxl/inject-error.c | 188 +++++++++++++++++++++++++++++++++++++++++++++
 cxl/lib/libcxl.c   |  53 +++++++++++++
 cxl/lib/libcxl.sym |   2 +
 cxl/libcxl.h       |  13 ++++
 cxl/meson.build    |   1 +
 7 files changed, 259 insertions(+)
 create mode 100644 cxl/inject-error.c
diff mbox series

Patch

diff --git a/cxl/builtin.h b/cxl/builtin.h
index c483f30..e82fcb5 100644
--- a/cxl/builtin.h
+++ b/cxl/builtin.h
@@ -25,6 +25,7 @@  int cmd_create_region(int argc, const char **argv, struct cxl_ctx *ctx);
 int cmd_enable_region(int argc, const char **argv, struct cxl_ctx *ctx);
 int cmd_disable_region(int argc, const char **argv, struct cxl_ctx *ctx);
 int cmd_destroy_region(int argc, const char **argv, struct cxl_ctx *ctx);
+int cmd_inject_error(int argc, const char **argv, struct cxl_ctx *ctx);
 #ifdef ENABLE_LIBTRACEFS
 int cmd_monitor(int argc, const char **argv, struct cxl_ctx *ctx);
 #else
diff --git a/cxl/cxl.c b/cxl/cxl.c
index 1643667..f808926 100644
--- a/cxl/cxl.c
+++ b/cxl/cxl.c
@@ -79,6 +79,7 @@  static struct cmd_struct commands[] = {
 	{ "enable-region", .c_fn = cmd_enable_region },
 	{ "disable-region", .c_fn = cmd_disable_region },
 	{ "destroy-region", .c_fn = cmd_destroy_region },
+	{ "inject-error", .c_fn = cmd_inject_error },
 	{ "monitor", .c_fn = cmd_monitor },
 };
 
diff --git a/cxl/inject-error.c b/cxl/inject-error.c
new file mode 100644
index 0000000..3645934
--- /dev/null
+++ b/cxl/inject-error.c
@@ -0,0 +1,188 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 AMD. All rights reserved. */
+#include <ccan/array_size/array_size.h>
+#include <util/parse-options.h>
+#include <cxl/libcxl.h>
+#include <util/log.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+
+#define EINJ_TYPES_BUF_SIZE 512
+
+static struct inject_params {
+	const char *type;
+	const char *devname;
+	const char *debugfs;
+	bool debug;
+	bool list;
+} param;
+
+static struct cxl_proto_error {
+	enum cxl_proto_error_types err_type;
+	const char *err_str;
+} cxl_proto_errors[] = {
+	{ CXL_CACHE_CORRECTABLE, "cxl_cache_correctable", },
+	{ CXL_CACHE_UNCORRECTABLE, "cxl_cache_uncorrectable" },
+	{ CXL_CACHE_FATAL, "cxl_cache_fatal" },
+	{ CXL_MEM_CORRECTABLE, "cxl_mem_correctable" },
+	{ CXL_MEM_UNCORRECTABLE, "cxl_mem_uncorrectable" },
+	{ CXL_MEM_FATAL, "cxl_mem_fatal" }
+};
+
+#define BASE_OPTIONS() \
+OPT_BOOLEAN(0, "debug", &param.debug, "turn on debug output"), \
+OPT_BOOLEAN(0, "list-errors", &param.list, "list possible error types"), \
+OPT_STRING('m', "mount", &param.debugfs, "debugfs mount point", \
+	   "Mount point for debug file system, defaults to /sys/kernel/debug")
+
+#define INJECT_OPTIONS() \
+OPT_STRING('d', "device", &param.devname, "CXL device name", \
+	   "Device name of CXL device to inject error into. Protocol errors may only target downstream ports") \
+
+static const struct option inject_options[] = {
+	BASE_OPTIONS(),
+	INJECT_OPTIONS(),
+	OPT_END(),
+};
+
+static struct log_ctx iel;
+
+static struct cxl_proto_error *find_cxl_proto_err(const char *type)
+{
+	unsigned long i;
+
+	for (i = 0; i < ARRAY_SIZE(cxl_proto_errors); i++) {
+		if (!strcmp(type, cxl_proto_errors[i].err_str)) {
+			return &cxl_proto_errors[i];
+		}
+	}
+
+	log_err(&iel, "Invalid CXL protocol error type: %s\n", type);
+	return NULL;
+}
+
+static int list_cxl_proto_errors(struct cxl_ctx *ctx, const char *debugfs)
+{
+	unsigned long i, err_num;
+	char buf[EINJ_TYPES_BUF_SIZE];
+	char *line;
+	int rc;
+
+	rc = cxl_get_proto_errors(ctx, buf, debugfs);
+	if (rc) {
+		log_err(&iel, "Failed to get CXL protocol errors: %d\n", rc);
+		return rc;
+	}
+
+	line = strtok(buf, "\n");
+	while (line) {
+		err_num = strtoul(line, NULL, 16);
+		if (err_num < CXL_CACHE_CORRECTABLE || err_num > CXL_MEM_FATAL)
+			continue;
+
+		for (i = 0; i < ARRAY_SIZE(cxl_proto_errors); i++)
+			if (err_num == cxl_proto_errors[i].err_type)
+				printf("%s\n", cxl_proto_errors[i].err_str);
+
+		line = strtok(NULL, "\n");
+	}
+
+	return 0;
+}
+
+static struct cxl_dport *find_cxl_dport(struct cxl_ctx *ctx, const char *devname)
+{
+	struct cxl_port *port, *top;
+	struct cxl_dport *dport;
+	struct cxl_bus *bus;
+
+	cxl_bus_foreach(ctx, bus) {
+		top = cxl_bus_get_port(bus);
+
+		cxl_port_foreach_all(top, port)
+			cxl_dport_foreach(port, dport)
+				if (!strcmp(devname,
+					    cxl_dport_get_devname(dport)))
+					return dport;
+	}
+
+	log_err(&iel, "Downstream port \"%s\" not found\n", devname);
+	return NULL;
+}
+
+static int inject_proto_err(struct cxl_ctx *ctx, const char *devname,
+			    struct cxl_proto_error *perr, const char *debugfs)
+{
+	struct cxl_dport *dport;
+	int rc;
+
+	if (!devname) {
+		log_err(&iel, "No downstream port specified for injection\n");
+		return -EINVAL;
+	}
+
+	dport = find_cxl_dport(ctx, devname);
+	if (!dport)
+		return -ENODEV;
+
+	rc = cxl_dport_inject_proto_err(dport, perr->err_type, debugfs);
+	if (rc)
+		return rc;
+
+	log_info(&iel, "injected %s protocol error.\n", perr->err_str);
+	return 0;
+}
+
+static int inject_action(int argc, const char **argv, struct cxl_ctx *ctx,
+			 const struct option *options, const char *usage)
+{
+	struct cxl_proto_error *perr;
+	const char * const u[] = {
+		usage,
+		NULL
+	};
+	const char *debugfs;
+	int rc = -EINVAL;
+
+	log_init(&iel, "cxl inject-error", "CXL_INJECT_LOG");
+	argc = parse_options(argc, argv, options, u, 0);
+
+	if (param.debug) {
+		cxl_set_log_priority(ctx, LOG_DEBUG);
+		iel.log_priority = LOG_DEBUG;
+	} else {
+		iel.log_priority = LOG_INFO;
+	}
+
+	if (param.debugfs)
+		debugfs = param.debugfs;
+	else
+		debugfs = "/sys/kernel/debug";
+
+	if (param.list)
+		return list_cxl_proto_errors(ctx, debugfs);
+
+	if (argc != 1) {
+		usage_with_options(u, options);
+		return rc;
+	}
+
+	perr = find_cxl_proto_err(argv[0]);
+	if (perr) {
+		rc = inject_proto_err(ctx, param.devname, perr, debugfs);
+		if (rc)
+			log_err(&iel, "Failed to inject error: %d\n", rc);
+	}
+
+	return rc;
+}
+
+int cmd_inject_error(int argc, const char **argv, struct cxl_ctx *ctx)
+{
+	int rc = inject_action(argc, argv, ctx, inject_options,
+			       "inject-error [<options>] <error-type>");
+
+	return rc ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
index 91eedd1..8174c11 100644
--- a/cxl/lib/libcxl.c
+++ b/cxl/lib/libcxl.c
@@ -3179,6 +3179,59 @@  CXL_EXPORT int cxl_dport_get_id(struct cxl_dport *dport)
 	return dport->id;
 }
 
+CXL_EXPORT int cxl_dport_inject_proto_err(struct cxl_dport *dport,
+					  enum cxl_proto_error_types perr,
+					  const char *debugfs)
+{
+	struct cxl_port *port = cxl_dport_get_port(dport);
+	size_t path_len = strlen(debugfs) + 24;
+	struct cxl_ctx *ctx = port->ctx;
+	char buf[32];
+	char *path;
+	int rc;
+
+	if (!dport->dev_path) {
+		err(ctx, "no dev_path for dport\n");
+		return -EINVAL;
+	}
+
+	path_len += strlen(dport->dev_path);
+	path = calloc(1, path_len);
+	if (!path)
+		return -ENOMEM;
+
+	snprintf(path, path_len, "%s/cxl/%s/einj_inject", debugfs,
+		 cxl_dport_get_devname(dport));
+
+	snprintf(buf, sizeof(buf), "0x%lx\n", (u64) perr);
+	rc = sysfs_write_attr(ctx, path, buf);
+	if (rc)
+		err(ctx, "could not write to %s: %d\n", path, rc);
+
+	free(path);
+	return rc;
+}
+
+CXL_EXPORT int cxl_get_proto_errors(struct cxl_ctx *ctx, char *buf,
+				    const char *debugfs)
+{
+	size_t path_len = strlen(debugfs) + 16;
+	char *path;
+	int rc = 0;
+
+	path = calloc(1, path_len);
+	if (!path)
+		return -ENOMEM;
+
+	snprintf(path, path_len, "%s/cxl/einj_types", debugfs);
+	rc = sysfs_read_attr(ctx, path, buf);
+	if (rc)
+		err(ctx, "could not read from %s: %d\n", path, rc);
+
+	free(path);
+	return rc;
+}
+
 CXL_EXPORT struct cxl_port *cxl_dport_get_port(struct cxl_dport *dport)
 {
 	return dport->port;
diff --git a/cxl/lib/libcxl.sym b/cxl/lib/libcxl.sym
index 304d7fa..d39a12d 100644
--- a/cxl/lib/libcxl.sym
+++ b/cxl/lib/libcxl.sym
@@ -281,4 +281,6 @@  global:
 	cxl_memdev_get_ram_qos_class;
 	cxl_region_qos_class_mismatch;
 	cxl_port_decoders_committed;
+	cxl_dport_inject_proto_err;
+	cxl_get_proto_errors;
 } LIBCXL_6;
diff --git a/cxl/libcxl.h b/cxl/libcxl.h
index fc6dd00..867daa4 100644
--- a/cxl/libcxl.h
+++ b/cxl/libcxl.h
@@ -160,6 +160,15 @@  struct cxl_port *cxl_port_get_next_all(struct cxl_port *port,
 	for (port = cxl_port_get_first(top); port != NULL;                     \
 	     port = cxl_port_get_next_all(port, top))
 
+enum cxl_proto_error_types {
+	CXL_CACHE_CORRECTABLE = 1 << 12,
+	CXL_CACHE_UNCORRECTABLE = 1 << 13,
+	CXL_CACHE_FATAL = 1 << 14,
+	CXL_MEM_CORRECTABLE = 1 << 15,
+	CXL_MEM_UNCORRECTABLE = 1 << 16,
+	CXL_MEM_FATAL = 1 << 17,
+};
+
 struct cxl_dport;
 struct cxl_dport *cxl_dport_get_first(struct cxl_port *port);
 struct cxl_dport *cxl_dport_get_next(struct cxl_dport *dport);
@@ -168,6 +177,10 @@  const char *cxl_dport_get_physical_node(struct cxl_dport *dport);
 const char *cxl_dport_get_firmware_node(struct cxl_dport *dport);
 struct cxl_port *cxl_dport_get_port(struct cxl_dport *dport);
 int cxl_dport_get_id(struct cxl_dport *dport);
+int cxl_dport_inject_proto_err(struct cxl_dport *dport,
+			       enum cxl_proto_error_types err,
+			       const char *debugfs);
+int cxl_get_proto_errors(struct cxl_ctx *ctx, char *buf, const char *debugfs);
 bool cxl_dport_maps_memdev(struct cxl_dport *dport, struct cxl_memdev *memdev);
 struct cxl_dport *cxl_port_get_dport_by_memdev(struct cxl_port *port,
 					       struct cxl_memdev *memdev);
diff --git a/cxl/meson.build b/cxl/meson.build
index 61b4d87..79da4e6 100644
--- a/cxl/meson.build
+++ b/cxl/meson.build
@@ -7,6 +7,7 @@  cxl_src = [
   'memdev.c',
   'json.c',
   'filter.c',
+  'inject-error.c',
   '../daxctl/json.c',
   '../daxctl/filter.c',
 ]