diff mbox series

[v2,iproute2-next,1/5] rdma: Add support for rdma monitor

Message ID 20241107080248.2028680-2-cmeioahs@nvidia.com (mailing list archive)
State Superseded
Delegated to: David Ahern
Headers show
Series Add RDMA monitor support | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Chiara Meiohas Nov. 7, 2024, 8:02 a.m. UTC
From: Chiara Meiohas <cmeiohas@nvidia.com>

Introduce a new command for RDMA event monitoring.
This patch adds a new attribute "event_type" which describes
the event recieved. Add a new NETLINK_RDMA multicast group
and processes listening to this multicast group receive RDMA
events.

The event types supported are IB device registration/unregistration
and net device attachment/detachment.

Example output of rdma monitor and the commands which trigger
the events:

$ rdma monitor
$ rmmod mlx5_ib
[UNREGISTER]    dev 3 rocep8s0f1
[UNREGISTER]    dev 2 rocep8s0f0

$modprobe mlx5_ib
[REGISTER]      dev 4 mlx5_0
[NETDEV_ATTACH] dev 4 mlx5_0 port 1 netdev 4 eth2
[REGISTER]      dev 5 mlx5_1
[NETDEV_ATTACH] dev 5 mlx5_1 port 1 netdev 5 eth3

$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
[UNREGISTER]    dev 4 rocep8s0f0
[REGISTER]      dev 6 mlx5_0
[NETDEV_ATTACH] dev 6 mlx5_0 port 30 netdev 4 eth2

$ echo 4 > /sys/class/net/eth2/device/sriov_numvfs
[NETDEV_ATTACH] dev 6 rdmap8s0f0 port 2 netdev 7 eth4
[NETDEV_ATTACH] dev 6 rdmap8s0f0 port 3 netdev 8 eth5
[NETDEV_ATTACH] dev 6 rdmap8s0f0 port 4 netdev 9 eth6
[NETDEV_ATTACH] dev 6 rdmap8s0f0 port 5 netdev 10 eth7
[REGISTER]      dev 7 mlx5_0
[NETDEV_ATTACH] dev 7 mlx5_0 port 1 netdev 11 eth8
[REGISTER]      dev 8 mlx5_0
[NETDEV_ATTACH] dev 8 mlx5_0 port 1 netdev 12 eth9
[REGISTER]      dev 9 mlx5_0
[NETDEV_ATTACH] dev 9 mlx5_0 port 1 netdev 13 eth10
[REGISTER]      dev 10 mlx5_0
[NETDEV_ATTACH] dev 10 mlx5_0 port 1 netdev 14 eth11

$ echo 0 > /sys/class/net/eth2/device/sriov_numvfs
[UNREGISTER]    dev 7 rocep8s0f0v0
[UNREGISTER]    dev 8 rocep8s0f0v1
[UNREGISTER]    dev 9 rocep8s0f0v2
[UNREGISTER]    dev 10 rocep8s0f0v3
[NETDEV_DETACH] dev 6 rdmap8s0f0 port 2
[NETDEV_DETACH] dev 6 rdmap8s0f0 port 3
[NETDEV_DETACH] dev 6 rdmap8s0f0 port 4
[NETDEV_DETACH] dev 6 rdmap8s0f0 port 5

Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
---
 include/mnl_utils.h     |   1 +
 lib/mnl_utils.c         |   5 ++
 man/man8/rdma-monitor.8 |  51 ++++++++++++
 man/man8/rdma.8         |   7 +-
 rdma/Makefile           |   3 +-
 rdma/monitor.c          | 169 ++++++++++++++++++++++++++++++++++++++++
 rdma/rdma.c             |   3 +-
 rdma/rdma.h             |   1 +
 rdma/utils.c            |   1 +
 9 files changed, 238 insertions(+), 3 deletions(-)
 create mode 100644 man/man8/rdma-monitor.8
 create mode 100644 rdma/monitor.c

Comments

David Ahern Nov. 9, 2024, 5:34 p.m. UTC | #1
On 11/7/24 1:02 AM, Chiara Meiohas wrote:
> diff --git a/rdma/monitor.c b/rdma/monitor.c
> new file mode 100644
> index 00000000..0a2d3053
> --- /dev/null
> +++ b/rdma/monitor.c
> @@ -0,0 +1,169 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +/*
> + * monitor.c	RDMA tool
> + * Authors:     Chiara Meiohas <cmeiohas@nvidia.com>
> + */
> +
> +#include "rdma.h"
> +
> +/* Global utils flags */
> +extern int json;

use include utils.h instead

> +
> +static void mon_print_event_type(struct nlattr **tb)
> +{
> +	static const char *const event_types_str[] = {
> +		"[REGISTER]",
> +		"[UNREGISTER]",
> +		"[NETDEV_ATTACH]",
> +		"[NETDEV_DETACH]",
> +	};
> +	enum rdma_nl_notify_event_type etype;
> +	static char unknown_type[32];

why static?

> +
> +	if (!tb[RDMA_NLDEV_ATTR_EVENT_TYPE])
> +		return;
> +
> +	etype = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_EVENT_TYPE]);
> +	if (etype < ARRAY_SIZE(event_types_str)) {
> +		print_string(PRINT_ANY, "event_type", "%s\t",
> +			     event_types_str[etype]);
> +	} else {
> +		snprintf(unknown_type, sizeof(unknown_type), "[UNKNOWN 0x%02x]", etype);

wrap at about 80 columns; in this case etype should go on the next line
Chiara Meiohas Nov. 11, 2024, 8:01 a.m. UTC | #2
On 09/11/2024 19:34, David Ahern wrote:

> On 11/7/24 1:02 AM, Chiara Meiohas wrote:
>> diff --git a/rdma/monitor.c b/rdma/monitor.c
>> new file mode 100644
>> index 00000000..0a2d3053
>> --- /dev/null
>> +++ b/rdma/monitor.c
>> @@ -0,0 +1,169 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>> +/*
>> + * monitor.c	RDMA tool
>> + * Authors:     Chiara Meiohas <cmeiohas@nvidia.com>
>> + */
>> +
>> +#include "rdma.h"
>> +
>> +/* Global utils flags */
>> +extern int json;
> use include utils.h instead
>
>> +
>> +static void mon_print_event_type(struct nlattr **tb)
>> +{
>> +	static const char *const event_types_str[] = {
>> +		"[REGISTER]",
>> +		"[UNREGISTER]",
>> +		"[NETDEV_ATTACH]",
>> +		"[NETDEV_DETACH]",
>> +	};
>> +	enum rdma_nl_notify_event_type etype;
>> +	static char unknown_type[32];
> why static?
>
>> +
>> +	if (!tb[RDMA_NLDEV_ATTR_EVENT_TYPE])
>> +		return;
>> +
>> +	etype = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_EVENT_TYPE]);
>> +	if (etype < ARRAY_SIZE(event_types_str)) {
>> +		print_string(PRINT_ANY, "event_type", "%s\t",
>> +			     event_types_str[etype]);
>> +	} else {
>> +		snprintf(unknown_type, sizeof(unknown_type), "[UNKNOWN 0x%02x]", etype);
> wrap at about 80 columns; in this case etype should go on the next line

I'll prepare the v3 patch with these changes.

Additionally, in the case where rdma monitor is not supported (rdma sys returns
"monitor off"), it will return an error instead of looping.

Thanks,
Chiara
diff mbox series

Patch

diff --git a/include/mnl_utils.h b/include/mnl_utils.h
index 76fe1dfe..0ddf2932 100644
--- a/include/mnl_utils.h
+++ b/include/mnl_utils.h
@@ -24,6 +24,7 @@  int mnlu_gen_socket_sndrcv(struct mnlu_gen_socket *nlg, const struct nlmsghdr *n
 			   mnl_cb_t data_cb, void *data);
 
 struct mnl_socket *mnlu_socket_open(int bus);
+int mnl_add_nl_group(struct mnl_socket *nl, unsigned int group);
 struct nlmsghdr *mnlu_msg_prepare(void *buf, uint32_t nlmsg_type, uint16_t flags,
 				  void *extra_header, size_t extra_header_size);
 int mnlu_socket_recv_run(struct mnl_socket *nl, unsigned int seq, void *buf, size_t buf_size,
diff --git a/lib/mnl_utils.c b/lib/mnl_utils.c
index 6c8f527e..5f6671bf 100644
--- a/lib/mnl_utils.c
+++ b/lib/mnl_utils.c
@@ -35,6 +35,11 @@  err_bind:
 	return NULL;
 }
 
+int mnl_add_nl_group(struct mnl_socket *nl, unsigned int group)
+{
+	return mnl_socket_bind(nl, group, MNL_SOCKET_AUTOPID);
+}
+
 struct nlmsghdr *mnlu_msg_prepare(void *buf, uint32_t nlmsg_type, uint16_t flags,
 				  void *extra_header, size_t extra_header_size)
 {
diff --git a/man/man8/rdma-monitor.8 b/man/man8/rdma-monitor.8
new file mode 100644
index 00000000..d445cba0
--- /dev/null
+++ b/man/man8/rdma-monitor.8
@@ -0,0 +1,51 @@ 
+.TH RDMA\-MONITOR 8 "22 Jul 2024" "iproute2" "Linux"
+.SH NAME
+rdma-monitor \- RDMA events monitoring
+.SH SYNOPSIS
+.sp
+.ad l
+.in +8
+.ti -8
+.B rdma
+.RI "[ " OPTIONS " ]"
+.B monitor
+.RI " { " help " }"
+.sp
+
+.ti -8
+.IR OPTIONS " := { "
+\fB\-V\fR[\fIersion\fR] }
+
+.ti -8
+.B rdma monitor
+
+.ti -8
+.B rdma monitor help
+
+.SH "DESCRIPTION"
+.SS rdma monitor - utility can monitor RDMA device events on all RDMA devices.
+.PP
+.B rdma
+opens an RDMA Netlink socket, listens on it and dumps the event info.
+
+The event types supported are RDMA device registration/unregistration
+and net device attachment/detachment.
+
+.SH "EXAMPLES"
+.PP
+rdma monitor
+.RS 4
+Listen for events of all RDMA devices
+.RE
+.PP
+
+.SH SEE ALSO
+.BR rdma (8),
+.BR rdma-link (8),
+.BR rdma-resource (8),
+.BR rdma-system (8),
+.BR rdma-statistic (8),
+.br
+
+.SH AUTHOR
+Chiara Meiohas <cmeiohas@nvidia.com>
diff --git a/man/man8/rdma.8 b/man/man8/rdma.8
index 5088b9ec..df86284d 100644
--- a/man/man8/rdma.8
+++ b/man/man8/rdma.8
@@ -19,7 +19,7 @@  rdma \- RDMA tool
 
 .ti -8
 .IR OBJECT " := { "
-.BR dev " | " link " | " resource " | " system " | " statistic " }"
+.BR dev " | " link " | " resource " | " system " | " statistic " | " monitor " }"
 .sp
 
 .ti -8
@@ -94,6 +94,10 @@  character.
 .B statistic
 - RDMA counter statistic related.
 
+.TP
+.B monitor
+- RDMA events monitor
+
 .PP
 The names of all objects may be written in full or
 abbreviated form, for example
@@ -133,6 +137,7 @@  Exit status is 0 if command was successful or a positive integer upon failure.
 .BR rdma-resource (8),
 .BR rdma-system (8),
 .BR rdma-statistic (8),
+.BR rdma-monitor (8),
 .br
 
 .SH REPORTING BUGS
diff --git a/rdma/Makefile b/rdma/Makefile
index 37d904a7..ed3c1c1c 100644
--- a/rdma/Makefile
+++ b/rdma/Makefile
@@ -4,7 +4,8 @@  include ../config.mk
 CFLAGS += -I./include/uapi/
 
 RDMA_OBJ = rdma.o utils.o dev.o link.o res.o res-pd.o res-mr.o res-cq.o \
-	   res-cmid.o res-qp.o sys.o stat.o stat-mr.o res-ctx.o res-srq.o
+	   res-cmid.o res-qp.o sys.o stat.o stat-mr.o res-ctx.o res-srq.o \
+	   monitor.o
 
 TARGETS += rdma
 
diff --git a/rdma/monitor.c b/rdma/monitor.c
new file mode 100644
index 00000000..0a2d3053
--- /dev/null
+++ b/rdma/monitor.c
@@ -0,0 +1,169 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * monitor.c	RDMA tool
+ * Authors:     Chiara Meiohas <cmeiohas@nvidia.com>
+ */
+
+#include "rdma.h"
+
+/* Global utils flags */
+extern int json;
+
+static void mon_print_event_type(struct nlattr **tb)
+{
+	static const char *const event_types_str[] = {
+		"[REGISTER]",
+		"[UNREGISTER]",
+		"[NETDEV_ATTACH]",
+		"[NETDEV_DETACH]",
+	};
+	enum rdma_nl_notify_event_type etype;
+	static char unknown_type[32];
+
+	if (!tb[RDMA_NLDEV_ATTR_EVENT_TYPE])
+		return;
+
+	etype = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_EVENT_TYPE]);
+	if (etype < ARRAY_SIZE(event_types_str)) {
+		print_string(PRINT_ANY, "event_type", "%s\t",
+			     event_types_str[etype]);
+	} else {
+		snprintf(unknown_type, sizeof(unknown_type), "[UNKNOWN 0x%02x]", etype);
+		print_string(PRINT_ANY, "event_type", "%s\t", unknown_type);
+	}
+}
+
+static int mon_print_dev(struct nlattr **tb)
+{
+	const char *name;
+	uint32_t idx;
+
+	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
+		idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+		print_uint(PRINT_ANY, "rdma_index", "dev %u", idx);
+	}
+
+	if(tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
+		name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]);
+		print_string(PRINT_ANY, "rdma_dev", " %s", name);
+	}
+
+	return 0;
+}
+
+static void mon_print_port_idx(struct nlattr **tb)
+{
+	uint32_t port;
+
+	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+		port = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+		print_uint(PRINT_ANY, "port", " port %u", port);
+	}
+}
+
+static void mon_print_netdev(struct nlattr **tb)
+{
+	uint32_t netdev_idx;
+	const char *name;
+
+	if (tb[RDMA_NLDEV_ATTR_NDEV_INDEX]) {
+		netdev_idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_NDEV_INDEX]);
+		print_uint(PRINT_ANY, "netdev_idx", " netdev %u", netdev_idx);
+	}
+
+	if(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) {
+		name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_NDEV_NAME]);
+		print_string(PRINT_ANY, "netdev_name", " %s", name);
+	}
+}
+
+static int mon_show_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX + 1] = {};
+
+	mnl_attr_parse(nlh, 0, rd_attr_cb, tb);
+	if (!tb[RDMA_NLDEV_ATTR_EVENT_TYPE])
+		return MNL_CB_ERROR;
+
+	open_json_object(NULL);
+
+	mon_print_event_type(tb);
+	mon_print_dev(tb);
+	mon_print_port_idx(tb);
+	mon_print_netdev(tb);
+
+	close_json_object();
+	newline();
+	fflush(stdout);
+
+	return MNL_CB_OK;
+}
+
+static int mon_show(struct rd* rd)
+{
+	unsigned int groups = 0;
+	int one = 1;
+	char *buf;
+	int err;
+
+	buf = malloc(MNL_SOCKET_BUFFER_SIZE);
+	if (!buf) {
+		pr_err("Buffer allocation failed\n");
+		return -ENOMEM;
+	}
+
+	rd->nl = mnl_socket_open(NETLINK_RDMA);
+	if (!rd->nl) {
+		pr_err("Failed to open NETLINK_RDMA socket. Error: %s\n",
+		       strerror(errno));
+		err = -ENODEV;
+		goto err_free;
+	}
+	mnl_socket_setsockopt(rd->nl, NETLINK_CAP_ACK, &one, sizeof(one));
+	mnl_socket_setsockopt(rd->nl, NETLINK_EXT_ACK, &one, sizeof(one));
+
+	groups |= nl_mgrp(RDMA_NL_GROUP_NOTIFY);
+
+	err = mnl_add_nl_group(rd->nl, groups);
+	if (err < 0) {
+		pr_err("Failed to add NETLINK_RDMA multicast group. Error: %s\n",
+		       strerror(errno));
+		goto err_close;
+	}
+	new_json_obj(json);
+
+	err = mnlu_socket_recv_run(rd->nl, 0, buf, MNL_SOCKET_BUFFER_SIZE,
+				   mon_show_cb, rd);
+	if (err) {
+		pr_err("Failed to listen to rdma socket\n");
+		goto err_free_json;
+	}
+
+	return 0;
+
+err_free_json:
+	delete_json_obj();
+err_close:
+	mnl_socket_close(rd->nl);
+err_free:
+	free(buf);
+	return err;
+}
+
+static int mon_help(struct rd *rd)
+{
+	pr_out("Usage: rdma monitor [ -j ]\n");
+	return 0;
+}
+
+int cmd_mon(struct rd *rd)
+{
+	const struct rd_cmd cmds[] = {
+		{ NULL,		mon_show },
+		{ "help",	mon_help },
+		{ 0 }
+	};
+
+	return rd_exec_cmd(rd, cmds, "mon command");
+}
+
diff --git a/rdma/rdma.c b/rdma/rdma.c
index 131c6b2a..253ac58b 100644
--- a/rdma/rdma.c
+++ b/rdma/rdma.c
@@ -15,7 +15,7 @@  static void help(char *name)
 {
 	pr_out("Usage: %s [ OPTIONS ] OBJECT { COMMAND | help }\n"
 	       "       %s [ -f[orce] ] -b[atch] filename\n"
-	       "where  OBJECT := { dev | link | resource | system | statistic | help }\n"
+	       "where  OBJECT := { dev | link | resource | monitor | system | statistic | help }\n"
 	       "       OPTIONS := { -V[ersion] | -d[etails] | -j[son] | -p[retty] | -r[aw]}\n", name, name);
 }
 
@@ -35,6 +35,7 @@  static int rd_cmd(struct rd *rd, int argc, char **argv)
 		{ "resource",	cmd_res },
 		{ "system",	cmd_sys },
 		{ "statistic",	cmd_stat },
+		{ "monitor",	cmd_mon },
 		{ 0 }
 	};
 
diff --git a/rdma/rdma.h b/rdma/rdma.h
index d224ec57..fb037bcf 100644
--- a/rdma/rdma.h
+++ b/rdma/rdma.h
@@ -98,6 +98,7 @@  int cmd_link(struct rd *rd);
 int cmd_res(struct rd *rd);
 int cmd_sys(struct rd *rd);
 int cmd_stat(struct rd *rd);
+int cmd_mon(struct rd* rd);
 int rd_exec_cmd(struct rd *rd, const struct rd_cmd *c, const char *str);
 int rd_exec_dev(struct rd *rd, int (*cb)(struct rd *rd));
 int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd));
diff --git a/rdma/utils.c b/rdma/utils.c
index 4d3803b5..bc104e0f 100644
--- a/rdma/utils.c
+++ b/rdma/utils.c
@@ -477,6 +477,7 @@  static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = MNL_TYPE_U8,
 	[RDMA_NLDEV_ATTR_DEV_TYPE] = MNL_TYPE_U8,
 	[RDMA_NLDEV_ATTR_PARENT_NAME] = MNL_TYPE_STRING,
+	[RDMA_NLDEV_ATTR_EVENT_TYPE] = MNL_TYPE_U8,
 };
 
 static int rd_attr_check(const struct nlattr *attr, int *typep)