diff mbox series

[v2,1/2] RDMA/Core: add RDMA_NLDEV_CMD_NEWLINK/DELLINK support

Message ID a499e7dd59a131631216a54d62f9f4e7ab2693b2.1542047436.git.swise@opengridcomputing.com (mailing list archive)
State Superseded
Headers show
Series Dynamic rdma link creation | expand

Commit Message

Steve Wise Nov. 12, 2018, 4:25 p.m. UTC
Add support for new LINK messages to allow adding and deleting rdma
interfaces.  This will be used initially for soft rdma drivers which
instantiate device instances dynamically by the admin specifying a
netdev device to use.  The rdma_rxe module will be the first user of
these messages.

The design is modeled after RTNL_NEWLINK/DELLINK:  rdma drivers
register with the rdma core if they provide link add/delete functions.
Each driver registers with a unique "type" string, that is used to
dispatch messages coming from user space.  A new RDMA_NLDEV_ATTR is
defined for the "type" string.  User mode will pass 3 attributes in a
NEWLINK message: RDMA_NLDEV_ATTR_IBDEV_NAME for the desired rdma device
name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type" of link
being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device interface
to use for this link. The DELLINK message will contain the IBDEV_NAME
and LINK_TYPE attributes.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/nldev.c  | 136 +++++++++++++++++++++++++++++++++++++++
 include/rdma/rdma_netlink.h      |  12 ++++
 include/uapi/rdma/rdma_netlink.h |   9 ++-
 3 files changed, 156 insertions(+), 1 deletion(-)

Comments

Ruhl, Michael J Nov. 15, 2018, 6:38 p.m. UTC | #1
>-----Original Message-----
>From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
>owner@vger.kernel.org] On Behalf Of Steve Wise
>Sent: Monday, November 12, 2018 11:26 AM
>To: dledford@redhat.com; jgg@mellanox.com
>Cc: linux-rdma@vger.kernel.org; BMT@zurich.ibm.com
>Subject: [PATCH v2 1/2] RDMA/Core: add
>RDMA_NLDEV_CMD_NEWLINK/DELLINK support
>
>Add support for new LINK messages to allow adding and deleting rdma
>interfaces.  This will be used initially for soft rdma drivers which
>instantiate device instances dynamically by the admin specifying a
>netdev device to use.  The rdma_rxe module will be the first user of
>these messages.
>
>The design is modeled after RTNL_NEWLINK/DELLINK:  rdma drivers
>register with the rdma core if they provide link add/delete functions.
>Each driver registers with a unique "type" string, that is used to
>dispatch messages coming from user space.  A new RDMA_NLDEV_ATTR is
>defined for the "type" string.  User mode will pass 3 attributes in a
>NEWLINK message: RDMA_NLDEV_ATTR_IBDEV_NAME for the desired rdma
>device
>name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type" of link
>being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device
>interface
>to use for this link. The DELLINK message will contain the IBDEV_NAME
>and LINK_TYPE attributes.
>
>Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
>---
> drivers/infiniband/core/nldev.c  | 136
>+++++++++++++++++++++++++++++++++++++++
> include/rdma/rdma_netlink.h      |  12 ++++
> include/uapi/rdma/rdma_netlink.h |   9 ++-
> 3 files changed, 156 insertions(+), 1 deletion(-)
>
>diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
>index 573399e3ccc1..926638cd52bc 100644
>--- a/drivers/infiniband/core/nldev.c
>+++ b/drivers/infiniband/core/nldev.c
>@@ -33,6 +33,7 @@
> #include <linux/module.h>
> #include <linux/pid.h>
> #include <linux/pid_namespace.h>
>+#include <linux/mutex.h>
> #include <net/netlink.h>
> #include <rdma/rdma_cm.h>
> #include <rdma/rdma_netlink.h>
>@@ -107,6 +108,8 @@
> 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
> 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
> 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
>+	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type =
>NLA_NUL_STRING,
>+				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN
>},
> };
>
> static int put_driver_name_print_type(struct sk_buff *msg, const char
>*name,
>@@ -1103,6 +1106,131 @@ static int nldev_res_get_pd_dumpit(struct
>sk_buff *skb,
> 	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
> }
>
>+static LIST_HEAD(link_ops);
>+static DEFINE_MUTEX(link_ops_mutex);
>+
>+static const struct rdma_link_ops *link_ops_get(const char *type)
>+{

_get() usually seems like a reference counted mechanism.

Would this be better named as _ops_find()?

>+	const struct rdma_link_ops *ops;
>+
>+	mutex_lock(&link_ops_mutex);
>+	list_for_each_entry(ops, &link_ops, list) {
>+		if (!strcmp(ops->type, type))
>+			goto out;
>+	}
>+	ops = NULL;
>+out:
>+	mutex_unlock(&link_ops_mutex);
>+	return ops;
>+}
>+
>+void rdma_link_register(struct rdma_link_ops *ops)
>+{
>+	if (link_ops_get(ops->type)) {
>+		WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
>+		return;
>+	}
>+	mutex_lock(&link_ops_mutex);
>+	list_add(&ops->list, &link_ops);
>+	mutex_unlock(&link_ops_mutex);
>+}
>+EXPORT_SYMBOL(rdma_link_register);
>+
>+void rdma_link_unregister(struct rdma_link_ops *ops)
>+{
>+	mutex_lock(&link_ops_mutex);
>+	list_del(&ops->list);
>+	mutex_unlock(&link_ops_mutex);
>+}
>+EXPORT_SYMBOL(rdma_link_unregister);

Will there be any issues with a link being unregistered while in use?

>+
>+static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
>+			  struct netlink_ext_ack *extack)
>+{
>+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
>+	char ibdev_name[IB_DEVICE_NAME_MAX];
>+	const struct rdma_link_ops *ops;
>+	char ndev_name[IFNAMSIZ];
>+	char type[IFNAMSIZ];
>+	int err;
>+
>+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
>+			  nldev_policy, extack);
>+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
>+	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] ||
>!tb[RDMA_NLDEV_ATTR_NDEV_NAME]) {
>+		err = -EINVAL;

You are losing the information of nlmsg_parse() failing, is that ok?

>+		goto err_out;

Is err_out really necessary?  Maybe just return?

>+	}
>+	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) >
>sizeof(ibdev_name) ||
>+	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type) ||
>+	    nla_len(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) >
>sizeof(ndev_name)) {
>+		err = -EINVAL;
>+		goto err_out;
>+	}
>+	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
>+		    sizeof(ibdev_name));
>+	if (strchr(ibdev_name, '%')) {
>+		err = -EINVAL;
>+		goto err_out;
>+	}
>+	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
>+	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
>+		    sizeof(ndev_name));
>+
>+	ops = link_ops_get(type);
>+#ifdef CONFIG_MODULES
>+	if (!ops) {
>+		request_module("rdma-link-%s", type);
>+		ops = link_ops_get(type);
>+	}
>+#endif
>+	if (!ops) {
>+		err = -ENODEV;
>+		goto err_out;
>+	}
>+
>+	err = ops->newlink(ibdev_name, ndev_name);
>+err_out:
>+	return err;
>+}
>+
>+static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
>+			  struct netlink_ext_ack *extack)
>+{
>+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
>+	char ibdev_name[IB_DEVICE_NAME_MAX];
>+	const struct rdma_link_ops *ops;
>+	char type[IFNAMSIZ];
>+	int err;
>+
>+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
>+			  nldev_policy, extack);
>+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
>+	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE]) {
>+		err = -EINVAL;
>+		goto err_out;
>+	}
>+
>+	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) >
>sizeof(ibdev_name) ||
>+	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type)) {
>+		err = -EINVAL;
>+		goto err_out;
>+	}
>+	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
>+		    sizeof(ibdev_name));
>+	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
>+
>+	ops = link_ops_get(type);
>+	if (!ops) {
>+		err = -ENODEV;
>+		goto err_out;
>+	}
>+
>+	err = ops->dellink(ibdev_name);
>+err_out:
>+	return err;
>+}
>+
> static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] =
>{
> 	[RDMA_NLDEV_CMD_GET] = {
> 		.doit = nldev_get_doit,
>@@ -1112,6 +1240,14 @@ static int nldev_res_get_pd_dumpit(struct sk_buff
>*skb,
> 		.doit = nldev_set_doit,
> 		.flags = RDMA_NL_ADMIN_PERM,
> 	},
>+	[RDMA_NLDEV_CMD_NEWLINK] = {
>+		.doit = nldev_newlink,
>+		.flags = RDMA_NL_ADMIN_PERM,
>+	},
>+	[RDMA_NLDEV_CMD_DELLINK] = {
>+		.doit = nldev_dellink,
>+		.flags = RDMA_NL_ADMIN_PERM,
>+	},
> 	[RDMA_NLDEV_CMD_PORT_GET] = {
> 		.doit = nldev_port_get_doit,
> 		.dump = nldev_port_get_dumpit,
>diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
>index 70218e6b5187..42daca1589bd 100644
>--- a/include/rdma/rdma_netlink.h
>+++ b/include/rdma/rdma_netlink.h
>@@ -99,4 +99,16 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr
>*nlh,
>  * Returns true on success or false if no listeners.
>  */
> bool rdma_nl_chk_listeners(unsigned int group);
>+
>+struct rdma_link_ops {
>+	struct list_head list;
>+	const char *type;
>+	int (*newlink)(const char *ibdev_name, const char *ndev_name);
>+	int (*dellink)(const char *ibdev_name);
>+};
>+
>+void rdma_link_register(struct rdma_link_ops *ops);
>+void rdma_link_unregister(struct rdma_link_ops *ops);
>+
>+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-"
>type)
> #endif /* _RDMA_NETLINK_H */
>diff --git a/include/uapi/rdma/rdma_netlink.h
>b/include/uapi/rdma/rdma_netlink.h
>index f9c41bf59efc..7feb087ab6fd 100644
>--- a/include/uapi/rdma/rdma_netlink.h
>+++ b/include/uapi/rdma/rdma_netlink.h
>@@ -229,7 +229,9 @@ enum rdma_nldev_command {
> 	RDMA_NLDEV_CMD_GET, /* can dump */
> 	RDMA_NLDEV_CMD_SET,
>
>-	/* 3 - 4 are free to use */
>+	RDMA_NLDEV_CMD_NEWLINK,
>+
>+	RDMA_NLDEV_CMD_DELLINK,
>
> 	RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */

Minor nit...

Should the "= 5 " be cleaned up?

This may be off topic, but is this going the general mechanism for getting stuff to the specific driver?

1) I.e. create a new API interface with a  call back
2) Add the plumbing to the underlying driver

Is there a plan for adding other things?

Do you happen to have any thoughts on things like driver specific config information?

Thanks,

Mike

>@@ -428,6 +430,11 @@ enum rdma_nldev_attr {
> 	RDMA_NLDEV_ATTR_DRIVER_U64,		/* u64 */
>
> 	/*
>+	 * Identifies the rdma driver. eg: "rxe" or "siw"
>+	 */
>+	RDMA_NLDEV_ATTR_LINK_TYPE,		/* string */
>+
>+	/*
> 	 * Always the end
> 	 */
> 	RDMA_NLDEV_ATTR_MAX
>--
>1.8.3.1
Steve Wise Nov. 15, 2018, 10:03 p.m. UTC | #2
Hey Mike:

> >Add support for new LINK messages to allow adding and deleting rdma
> >interfaces.  This will be used initially for soft rdma drivers which
> >instantiate device instances dynamically by the admin specifying a
> >netdev device to use.  The rdma_rxe module will be the first user of
> >these messages.
> >
> >The design is modeled after RTNL_NEWLINK/DELLINK:  rdma drivers
> >register with the rdma core if they provide link add/delete functions.
> >Each driver registers with a unique "type" string, that is used to
> >dispatch messages coming from user space.  A new RDMA_NLDEV_ATTR is
> >defined for the "type" string.  User mode will pass 3 attributes in a
> >NEWLINK message: RDMA_NLDEV_ATTR_IBDEV_NAME for the desired
> rdma
> >device
> >name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type" of link
> >being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device
> >interface
> >to use for this link. The DELLINK message will contain the IBDEV_NAME
> >and LINK_TYPE attributes.
> >
> >Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> >Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
> >---
> > drivers/infiniband/core/nldev.c  | 136
> >+++++++++++++++++++++++++++++++++++++++
> > include/rdma/rdma_netlink.h      |  12 ++++
> > include/uapi/rdma/rdma_netlink.h |   9 ++-
> > 3 files changed, 156 insertions(+), 1 deletion(-)
> >
> >diff --git a/drivers/infiniband/core/nldev.c
> b/drivers/infiniband/core/nldev.c
> >index 573399e3ccc1..926638cd52bc 100644
> >--- a/drivers/infiniband/core/nldev.c
> >+++ b/drivers/infiniband/core/nldev.c
> >@@ -33,6 +33,7 @@
> > #include <linux/module.h>
> > #include <linux/pid.h>
> > #include <linux/pid_namespace.h>
> >+#include <linux/mutex.h>
> > #include <net/netlink.h>
> > #include <rdma/rdma_cm.h>
> > #include <rdma/rdma_netlink.h>
> >@@ -107,6 +108,8 @@
> > 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
> > 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
> > 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
> >+	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type =
> >NLA_NUL_STRING,
> >+				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN
> >},
> > };
> >
> > static int put_driver_name_print_type(struct sk_buff *msg, const char
> >*name,
> >@@ -1103,6 +1106,131 @@ static int nldev_res_get_pd_dumpit(struct
> >sk_buff *skb,
> > 	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
> > }
> >
> >+static LIST_HEAD(link_ops);
> >+static DEFINE_MUTEX(link_ops_mutex);
> >+
> >+static const struct rdma_link_ops *link_ops_get(const char *type)
> >+{
> 
> _get() usually seems like a reference counted mechanism.
> 
> Would this be better named as _ops_find()?
>

Perhaps, but see my answers below.  I think we need the reference.
 
> >+	const struct rdma_link_ops *ops;
> >+
> >+	mutex_lock(&link_ops_mutex);
> >+	list_for_each_entry(ops, &link_ops, list) {
> >+		if (!strcmp(ops->type, type))
> >+			goto out;
> >+	}
> >+	ops = NULL;
> >+out:
> >+	mutex_unlock(&link_ops_mutex);
> >+	return ops;
> >+}
> >+
> >+void rdma_link_register(struct rdma_link_ops *ops)
> >+{
> >+	if (link_ops_get(ops->type)) {
> >+		WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
> >+		return;
> >+	}
> >+	mutex_lock(&link_ops_mutex);
> >+	list_add(&ops->list, &link_ops);
> >+	mutex_unlock(&link_ops_mutex);
> >+}
> >+EXPORT_SYMBOL(rdma_link_register);
> >+
> >+void rdma_link_unregister(struct rdma_link_ops *ops)
> >+{
> >+	mutex_lock(&link_ops_mutex);
> >+	list_del(&ops->list);
> >+	mutex_unlock(&link_ops_mutex);
> >+}
> >+EXPORT_SYMBOL(rdma_link_unregister);
> 
> Will there be any issues with a link being unregistered while in use?
> 

I guess what could happen is a NL message arrives and the ops ptr is
fetched, but before it is used, the link is unregistered because the module
is unloading.  Then the function pointer is dereferenced and the memory has
been freed and the module unloaded!

Perhaps we just need to reference the module refcnt in link_ops_get() and
add a link_ops_put() that deref's the module.  

Jason, is this a good approach?

> >+
> >+static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
> >+			  struct netlink_ext_ack *extack)
> >+{
> >+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> >+	char ibdev_name[IB_DEVICE_NAME_MAX];
> >+	const struct rdma_link_ops *ops;
> >+	char ndev_name[IFNAMSIZ];
> >+	char type[IFNAMSIZ];
> >+	int err;
> >+
> >+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> >+			  nldev_policy, extack);
> >+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
> >+	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] ||
> >!tb[RDMA_NLDEV_ATTR_NDEV_NAME]) {
> >+		err = -EINVAL;
> 
> You are losing the information of nlmsg_parse() failing, is that ok?
> 

I think  any of the the possible failures map to EINVAL anyway:  bad
attributes or missing attributes.

> >+		goto err_out;
> 
> Is err_out really necessary?  Maybe just return?
>

Since there is no unwind code, I guess each of these failures could be just
a return.  But then if we end up adding unwind code, we'd want the goto's
back in.  
 
> >+	}
> >+	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) >
> >sizeof(ibdev_name) ||
> >+	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type) ||
> >+	    nla_len(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) >
> >sizeof(ndev_name)) {
> >+		err = -EINVAL;
> >+		goto err_out;
> >+	}
> >+	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
> >+		    sizeof(ibdev_name));
> >+	if (strchr(ibdev_name, '%')) {
> >+		err = -EINVAL;
> >+		goto err_out;
> >+	}
> >+	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
> >+	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
> >+		    sizeof(ndev_name));
> >+
> >+	ops = link_ops_get(type);
> >+#ifdef CONFIG_MODULES
> >+	if (!ops) {
> >+		request_module("rdma-link-%s", type);
> >+		ops = link_ops_get(type);
> >+	}
> >+#endif
> >+	if (!ops) {
> >+		err = -ENODEV;
> >+		goto err_out;
> >+	}
> >+
> >+	err = ops->newlink(ibdev_name, ndev_name);
> >+err_out:
> >+	return err;
> >+}
> >+
> >+static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
> >+			  struct netlink_ext_ack *extack)
> >+{
> >+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> >+	char ibdev_name[IB_DEVICE_NAME_MAX];
> >+	const struct rdma_link_ops *ops;
> >+	char type[IFNAMSIZ];
> >+	int err;
> >+
> >+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> >+			  nldev_policy, extack);
> >+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
> >+	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE]) {
> >+		err = -EINVAL;
> >+		goto err_out;
> >+	}
> >+
> >+	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) >
> >sizeof(ibdev_name) ||
> >+	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type)) {
> >+		err = -EINVAL;
> >+		goto err_out;
> >+	}
> >+	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
> >+		    sizeof(ibdev_name));
> >+	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
> >+
> >+	ops = link_ops_get(type);
> >+	if (!ops) {
> >+		err = -ENODEV;
> >+		goto err_out;
> >+	}
> >+
> >+	err = ops->dellink(ibdev_name);
> >+err_out:
> >+	return err;
> >+}
> >+
> > static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS]
> =
> >{
> > 	[RDMA_NLDEV_CMD_GET] = {
> > 		.doit = nldev_get_doit,
> >@@ -1112,6 +1240,14 @@ static int nldev_res_get_pd_dumpit(struct
> sk_buff
> >*skb,
> > 		.doit = nldev_set_doit,
> > 		.flags = RDMA_NL_ADMIN_PERM,
> > 	},
> >+	[RDMA_NLDEV_CMD_NEWLINK] = {
> >+		.doit = nldev_newlink,
> >+		.flags = RDMA_NL_ADMIN_PERM,
> >+	},
> >+	[RDMA_NLDEV_CMD_DELLINK] = {
> >+		.doit = nldev_dellink,
> >+		.flags = RDMA_NL_ADMIN_PERM,
> >+	},
> > 	[RDMA_NLDEV_CMD_PORT_GET] = {
> > 		.doit = nldev_port_get_doit,
> > 		.dump = nldev_port_get_dumpit,
> >diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
> >index 70218e6b5187..42daca1589bd 100644
> >--- a/include/rdma/rdma_netlink.h
> >+++ b/include/rdma/rdma_netlink.h
> >@@ -99,4 +99,16 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr
> >*nlh,
> >  * Returns true on success or false if no listeners.
> >  */
> > bool rdma_nl_chk_listeners(unsigned int group);
> >+
> >+struct rdma_link_ops {
> >+	struct list_head list;
> >+	const char *type;
> >+	int (*newlink)(const char *ibdev_name, const char *ndev_name);
> >+	int (*dellink)(const char *ibdev_name);
> >+};
> >+
> >+void rdma_link_register(struct rdma_link_ops *ops);
> >+void rdma_link_unregister(struct rdma_link_ops *ops);
> >+
> >+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-"
> >type)
> > #endif /* _RDMA_NETLINK_H */
> >diff --git a/include/uapi/rdma/rdma_netlink.h
> >b/include/uapi/rdma/rdma_netlink.h
> >index f9c41bf59efc..7feb087ab6fd 100644
> >--- a/include/uapi/rdma/rdma_netlink.h
> >+++ b/include/uapi/rdma/rdma_netlink.h
> >@@ -229,7 +229,9 @@ enum rdma_nldev_command {
> > 	RDMA_NLDEV_CMD_GET, /* can dump */
> > 	RDMA_NLDEV_CMD_SET,
> >
> >-	/* 3 - 4 are free to use */
> >+	RDMA_NLDEV_CMD_NEWLINK,
> >+
> >+	RDMA_NLDEV_CMD_DELLINK,
> >
> > 	RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */
> 
> Minor nit...
> 
> Should the "= 5 " be cleaned up?
> 

Yes.  Will do.

> This may be off topic, but is this going the general mechanism for getting
> stuff to the specific driver?
> 
> 1) I.e. create a new API interface with a  call back
> 2) Add the plumbing to the underlying driver
> 
> Is there a plan for adding other things?
> 
> Do you happen to have any thoughts on things like driver specific config
> information?

The plan is to add link get/set messages to allow tweaking individual link
attributes.  And yes, the proposal is that the plumbing from this series
will be further extended to handle that.  So I would see new functions added
to the link_ops for these.  For iwarp devices, this could allow setting
whether MPA_CRC and MPA_MARKERS are enabled for a link. 

> 
> Thanks,
> 
> Mike
> 

Thanks for the review!

Steve
Ruhl, Michael J Nov. 16, 2018, 2:31 p.m. UTC | #3
>-----Original Message-----
>From: Steve Wise [mailto:swise@opengridcomputing.com]
>Sent: Thursday, November 15, 2018 5:04 PM
>To: Ruhl, Michael J <michael.j.ruhl@intel.com>; dledford@redhat.com;
>jgg@mellanox.com
>Cc: linux-rdma@vger.kernel.org; BMT@zurich.ibm.com
>Subject: RE: [PATCH v2 1/2] RDMA/Core: add
>RDMA_NLDEV_CMD_NEWLINK/DELLINK support
>
>Hey Mike:
>
>> >Add support for new LINK messages to allow adding and deleting rdma
>> >interfaces.  This will be used initially for soft rdma drivers which
>> >instantiate device instances dynamically by the admin specifying a
>> >netdev device to use.  The rdma_rxe module will be the first user of
>> >these messages.
>> >
>> >The design is modeled after RTNL_NEWLINK/DELLINK:  rdma drivers
>> >register with the rdma core if they provide link add/delete functions.
>> >Each driver registers with a unique "type" string, that is used to
>> >dispatch messages coming from user space.  A new RDMA_NLDEV_ATTR is
>> >defined for the "type" string.  User mode will pass 3 attributes in a
>> >NEWLINK message: RDMA_NLDEV_ATTR_IBDEV_NAME for the desired
>> rdma
>> >device
>> >name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type" of link
>> >being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device
>> >interface
>> >to use for this link. The DELLINK message will contain the IBDEV_NAME
>> >and LINK_TYPE attributes.
>> >
>> >Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>> >Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
>> >---
>> > drivers/infiniband/core/nldev.c  | 136
>> >+++++++++++++++++++++++++++++++++++++++
>> > include/rdma/rdma_netlink.h      |  12 ++++
>> > include/uapi/rdma/rdma_netlink.h |   9 ++-
>> > 3 files changed, 156 insertions(+), 1 deletion(-)
>> >
>> >diff --git a/drivers/infiniband/core/nldev.c
>> b/drivers/infiniband/core/nldev.c
>> >index 573399e3ccc1..926638cd52bc 100644
>> >--- a/drivers/infiniband/core/nldev.c
>> >+++ b/drivers/infiniband/core/nldev.c
>> >@@ -33,6 +33,7 @@
>> > #include <linux/module.h>
>> > #include <linux/pid.h>
>> > #include <linux/pid_namespace.h>
>> >+#include <linux/mutex.h>
>> > #include <net/netlink.h>
>> > #include <rdma/rdma_cm.h>
>> > #include <rdma/rdma_netlink.h>
>> >@@ -107,6 +108,8 @@
>> > 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
>> > 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
>> > 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
>> >+	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type =
>> >NLA_NUL_STRING,
>> >+				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN
>> >},
>> > };
>> >
>> > static int put_driver_name_print_type(struct sk_buff *msg, const char
>> >*name,
>> >@@ -1103,6 +1106,131 @@ static int nldev_res_get_pd_dumpit(struct
>> >sk_buff *skb,
>> > 	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
>> > }
>> >
>> >+static LIST_HEAD(link_ops);
>> >+static DEFINE_MUTEX(link_ops_mutex);
>> >+
>> >+static const struct rdma_link_ops *link_ops_get(const char *type)
>> >+{
>>
>> _get() usually seems like a reference counted mechanism.
>>
>> Would this be better named as _ops_find()?
>>
>
>Perhaps, but see my answers below.  I think we need the reference.
>
>> >+	const struct rdma_link_ops *ops;
>> >+
>> >+	mutex_lock(&link_ops_mutex);
>> >+	list_for_each_entry(ops, &link_ops, list) {
>> >+		if (!strcmp(ops->type, type))
>> >+			goto out;
>> >+	}
>> >+	ops = NULL;
>> >+out:
>> >+	mutex_unlock(&link_ops_mutex);
>> >+	return ops;
>> >+}
>> >+
>> >+void rdma_link_register(struct rdma_link_ops *ops)
>> >+{
>> >+	if (link_ops_get(ops->type)) {
>> >+		WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
>> >+		return;
>> >+	}
>> >+	mutex_lock(&link_ops_mutex);
>> >+	list_add(&ops->list, &link_ops);
>> >+	mutex_unlock(&link_ops_mutex);
>> >+}
>> >+EXPORT_SYMBOL(rdma_link_register);
>> >+
>> >+void rdma_link_unregister(struct rdma_link_ops *ops)
>> >+{
>> >+	mutex_lock(&link_ops_mutex);
>> >+	list_del(&ops->list);
>> >+	mutex_unlock(&link_ops_mutex);
>> >+}
>> >+EXPORT_SYMBOL(rdma_link_unregister);
>>
>> Will there be any issues with a link being unregistered while in use?
>>
>
>I guess what could happen is a NL message arrives and the ops ptr is
>fetched, but before it is used, the link is unregistered because the module
>is unloading.  Then the function pointer is dereferenced and the memory has
>been freed and the module unloaded!
>
>Perhaps we just need to reference the module refcnt in link_ops_get() and
>add a link_ops_put() that deref's the module.

That was kinda what I was thinking.

>Jason, is this a good approach?
>
>> >+
>> >+static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
>> >+			  struct netlink_ext_ack *extack)
>> >+{
>> >+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
>> >+	char ibdev_name[IB_DEVICE_NAME_MAX];
>> >+	const struct rdma_link_ops *ops;
>> >+	char ndev_name[IFNAMSIZ];
>> >+	char type[IFNAMSIZ];
>> >+	int err;
>> >+
>> >+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
>> >+			  nldev_policy, extack);
>> >+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
>> >+	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] ||
>> >!tb[RDMA_NLDEV_ATTR_NDEV_NAME]) {
>> >+		err = -EINVAL;
>>
>> You are losing the information of nlmsg_parse() failing, is that ok?
>>
>
>I think  any of the the possible failures map to EINVAL anyway:  bad
>attributes or missing attributes.

I followed nlmsg_parse down, and it looks like most of the errors are
EINVAL, but it does return an ERANGE as well.

>
>> >+		goto err_out;
>>
>> Is err_out really necessary?  Maybe just return?
>>
>
>Since there is no unwind code, I guess each of these failures could be just
>a return.  But then if we end up adding unwind code, we'd want the goto's
>back in.

Either way work for me.  Done without the goto seems to be preferred if
there is no unwind.

>> >+	}
>> >+	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) >
>> >sizeof(ibdev_name) ||
>> >+	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type) ||
>> >+	    nla_len(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) >
>> >sizeof(ndev_name)) {
>> >+		err = -EINVAL;
>> >+		goto err_out;
>> >+	}
>> >+	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
>> >+		    sizeof(ibdev_name));
>> >+	if (strchr(ibdev_name, '%')) {
>> >+		err = -EINVAL;
>> >+		goto err_out;
>> >+	}
>> >+	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
>> >+	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
>> >+		    sizeof(ndev_name));
>> >+
>> >+	ops = link_ops_get(type);
>> >+#ifdef CONFIG_MODULES
>> >+	if (!ops) {
>> >+		request_module("rdma-link-%s", type);
>> >+		ops = link_ops_get(type);
>> >+	}
>> >+#endif
>> >+	if (!ops) {
>> >+		err = -ENODEV;
>> >+		goto err_out;
>> >+	}
>> >+
>> >+	err = ops->newlink(ibdev_name, ndev_name);
>> >+err_out:
>> >+	return err;
>> >+}
>> >+
>> >+static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
>> >+			  struct netlink_ext_ack *extack)
>> >+{
>> >+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
>> >+	char ibdev_name[IB_DEVICE_NAME_MAX];
>> >+	const struct rdma_link_ops *ops;
>> >+	char type[IFNAMSIZ];
>> >+	int err;
>> >+
>> >+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
>> >+			  nldev_policy, extack);
>> >+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
>> >+	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE]) {
>> >+		err = -EINVAL;
>> >+		goto err_out;
>> >+	}
>> >+
>> >+	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) >
>> >sizeof(ibdev_name) ||
>> >+	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type)) {
>> >+		err = -EINVAL;
>> >+		goto err_out;
>> >+	}
>> >+	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
>> >+		    sizeof(ibdev_name));
>> >+	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
>> >+
>> >+	ops = link_ops_get(type);
>> >+	if (!ops) {
>> >+		err = -ENODEV;
>> >+		goto err_out;
>> >+	}
>> >+
>> >+	err = ops->dellink(ibdev_name);
>> >+err_out:
>> >+	return err;
>> >+}
>> >+
>> > static const struct rdma_nl_cbs
>nldev_cb_table[RDMA_NLDEV_NUM_OPS]
>> =
>> >{
>> > 	[RDMA_NLDEV_CMD_GET] = {
>> > 		.doit = nldev_get_doit,
>> >@@ -1112,6 +1240,14 @@ static int nldev_res_get_pd_dumpit(struct
>> sk_buff
>> >*skb,
>> > 		.doit = nldev_set_doit,
>> > 		.flags = RDMA_NL_ADMIN_PERM,
>> > 	},
>> >+	[RDMA_NLDEV_CMD_NEWLINK] = {
>> >+		.doit = nldev_newlink,
>> >+		.flags = RDMA_NL_ADMIN_PERM,
>> >+	},
>> >+	[RDMA_NLDEV_CMD_DELLINK] = {
>> >+		.doit = nldev_dellink,
>> >+		.flags = RDMA_NL_ADMIN_PERM,
>> >+	},
>> > 	[RDMA_NLDEV_CMD_PORT_GET] = {
>> > 		.doit = nldev_port_get_doit,
>> > 		.dump = nldev_port_get_dumpit,
>> >diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
>> >index 70218e6b5187..42daca1589bd 100644
>> >--- a/include/rdma/rdma_netlink.h
>> >+++ b/include/rdma/rdma_netlink.h
>> >@@ -99,4 +99,16 @@ int ibnl_put_attr(struct sk_buff *skb, struct
>nlmsghdr
>> >*nlh,
>> >  * Returns true on success or false if no listeners.
>> >  */
>> > bool rdma_nl_chk_listeners(unsigned int group);
>> >+
>> >+struct rdma_link_ops {
>> >+	struct list_head list;
>> >+	const char *type;
>> >+	int (*newlink)(const char *ibdev_name, const char *ndev_name);
>> >+	int (*dellink)(const char *ibdev_name);
>> >+};
>> >+
>> >+void rdma_link_register(struct rdma_link_ops *ops);
>> >+void rdma_link_unregister(struct rdma_link_ops *ops);
>> >+
>> >+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-
>"
>> >type)
>> > #endif /* _RDMA_NETLINK_H */
>> >diff --git a/include/uapi/rdma/rdma_netlink.h
>> >b/include/uapi/rdma/rdma_netlink.h
>> >index f9c41bf59efc..7feb087ab6fd 100644
>> >--- a/include/uapi/rdma/rdma_netlink.h
>> >+++ b/include/uapi/rdma/rdma_netlink.h
>> >@@ -229,7 +229,9 @@ enum rdma_nldev_command {
>> > 	RDMA_NLDEV_CMD_GET, /* can dump */
>> > 	RDMA_NLDEV_CMD_SET,
>> >
>> >-	/* 3 - 4 are free to use */
>> >+	RDMA_NLDEV_CMD_NEWLINK,
>> >+
>> >+	RDMA_NLDEV_CMD_DELLINK,
>> >
>> > 	RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */
>>
>> Minor nit...
>>
>> Should the "= 5 " be cleaned up?
>>
>
>Yes.  Will do.
>
>> This may be off topic, but is this going the general mechanism for getting
>> stuff to the specific driver?
>>
>> 1) I.e. create a new API interface with a  call back
>> 2) Add the plumbing to the underlying driver
>>
>> Is there a plan for adding other things?
>>
>> Do you happen to have any thoughts on things like driver specific config
>> information?
>
>The plan is to add link get/set messages to allow tweaking individual link
>attributes.  And yes, the proposal is that the plumbing from this series
>will be further extended to handle that.  So I would see new functions added
>to the link_ops for these.  For iwarp devices, this could allow setting
>whether MPA_CRC and MPA_MARKERS are enabled for a link.

Got it.  Thanks for the insights.

>>
>> Thanks,
>>
>> Mike
>>
>
>Thanks for the review!

Sure thing.  Thanks for moving this stuff forward!

Mike

>Steve
>
Jason Gunthorpe Nov. 19, 2018, 9:57 p.m. UTC | #4
On Thu, Nov 15, 2018 at 04:03:59PM -0600, Steve Wise wrote:
> > >+	const struct rdma_link_ops *ops;
> > >+
> > >+	mutex_lock(&link_ops_mutex);
> > >+	list_for_each_entry(ops, &link_ops, list) {
> > >+		if (!strcmp(ops->type, type))
> > >+			goto out;
> > >+	}
> > >+	ops = NULL;
> > >+out:
> > >+	mutex_unlock(&link_ops_mutex);
> > >+	return ops;
> > >+}
> > >+
> > >+void rdma_link_register(struct rdma_link_ops *ops)
> > >+{
> > >+	if (link_ops_get(ops->type)) {
> > >+		WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
> > >+		return;
> > >+	}
> > >+	mutex_lock(&link_ops_mutex);
> > >+	list_add(&ops->list, &link_ops);
> > >+	mutex_unlock(&link_ops_mutex);
> > >+}
> > >+EXPORT_SYMBOL(rdma_link_register);
> > >+
> > >+void rdma_link_unregister(struct rdma_link_ops *ops)
> > >+{
> > >+	mutex_lock(&link_ops_mutex);
> > >+	list_del(&ops->list);
> > >+	mutex_unlock(&link_ops_mutex);
> > >+}
> > >+EXPORT_SYMBOL(rdma_link_unregister);
> > 
> > Will there be any issues with a link being unregistered while in use?
> > 
> 
> I guess what could happen is a NL message arrives and the ops ptr is
> fetched, but before it is used, the link is unregistered because the module
> is unloading.  Then the function pointer is dereferenced and the memory has
> been freed and the module unloaded!
> 
> Perhaps we just need to reference the module refcnt in link_ops_get() and
> add a link_ops_put() that deref's the module.  
> 
> Jason, is this a good approach?

No, rdma_link_unregister should not return while there are active
link_ops_get() callers.

I think you should have it be more like

mutex_lock(&link_ops_mutex);
list_for_each_entry(ops, &link_ops, list) ..
ops->newlink(...);
mutex_unlock(&link_ops_mutex);

So unregister serialization works as expected.

> > >+		goto err_out;
> > 
> > Is err_out really necessary?  Maybe just return?
> >
> 
> Since there is no unwind code, I guess each of these failures could be just
> a return.  But then if we end up adding unwind code, we'd want the goto's
> back in.  

Generally we don't do the gotos if there is no current unwind

Even if there is unwind the goto's that only do 'return' are usually
kept as return

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 573399e3ccc1..926638cd52bc 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -33,6 +33,7 @@ 
 #include <linux/module.h>
 #include <linux/pid.h>
 #include <linux/pid_namespace.h>
+#include <linux/mutex.h>
 #include <net/netlink.h>
 #include <rdma/rdma_cm.h>
 #include <rdma/rdma_netlink.h>
@@ -107,6 +108,8 @@ 
 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
+	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
+				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -1103,6 +1106,131 @@  static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
 	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
 }
 
+static LIST_HEAD(link_ops);
+static DEFINE_MUTEX(link_ops_mutex);
+
+static const struct rdma_link_ops *link_ops_get(const char *type)
+{
+	const struct rdma_link_ops *ops;
+
+	mutex_lock(&link_ops_mutex);
+	list_for_each_entry(ops, &link_ops, list) {
+		if (!strcmp(ops->type, type))
+			goto out;
+	}
+	ops = NULL;
+out:
+	mutex_unlock(&link_ops_mutex);
+	return ops;
+}
+
+void rdma_link_register(struct rdma_link_ops *ops)
+{
+	if (link_ops_get(ops->type)) {
+		WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
+		return;
+	}
+	mutex_lock(&link_ops_mutex);
+	list_add(&ops->list, &link_ops);
+	mutex_unlock(&link_ops_mutex);
+}
+EXPORT_SYMBOL(rdma_link_register);
+
+void rdma_link_unregister(struct rdma_link_ops *ops)
+{
+	mutex_lock(&link_ops_mutex);
+	list_del(&ops->list);
+	mutex_unlock(&link_ops_mutex);
+}
+EXPORT_SYMBOL(rdma_link_unregister);
+
+static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	char ibdev_name[IB_DEVICE_NAME_MAX];
+	const struct rdma_link_ops *ops;
+	char ndev_name[IFNAMSIZ];
+	char type[IFNAMSIZ];
+	int err;
+
+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, extack);
+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
+	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) > sizeof(ibdev_name) ||
+	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type) ||
+	    nla_len(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) > sizeof(ndev_name)) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+		    sizeof(ibdev_name));
+	if (strchr(ibdev_name, '%')) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
+	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
+		    sizeof(ndev_name));
+
+	ops = link_ops_get(type);
+#ifdef CONFIG_MODULES
+	if (!ops) {
+		request_module("rdma-link-%s", type);
+		ops = link_ops_get(type);
+	}
+#endif
+	if (!ops) {
+		err = -ENODEV;
+		goto err_out;
+	}
+
+	err = ops->newlink(ibdev_name, ndev_name);
+err_out:
+	return err;
+}
+
+static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	char ibdev_name[IB_DEVICE_NAME_MAX];
+	const struct rdma_link_ops *ops;
+	char type[IFNAMSIZ];
+	int err;
+
+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, extack);
+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
+	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE]) {
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) > sizeof(ibdev_name) ||
+	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type)) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+		    sizeof(ibdev_name));
+	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
+
+	ops = link_ops_get(type);
+	if (!ops) {
+		err = -ENODEV;
+		goto err_out;
+	}
+
+	err = ops->dellink(ibdev_name);
+err_out:
+	return err;
+}
+
 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	[RDMA_NLDEV_CMD_GET] = {
 		.doit = nldev_get_doit,
@@ -1112,6 +1240,14 @@  static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
 		.doit = nldev_set_doit,
 		.flags = RDMA_NL_ADMIN_PERM,
 	},
+	[RDMA_NLDEV_CMD_NEWLINK] = {
+		.doit = nldev_newlink,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
+	[RDMA_NLDEV_CMD_DELLINK] = {
+		.doit = nldev_dellink,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
 	[RDMA_NLDEV_CMD_PORT_GET] = {
 		.doit = nldev_port_get_doit,
 		.dump = nldev_port_get_dumpit,
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 70218e6b5187..42daca1589bd 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -99,4 +99,16 @@  int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
  * Returns true on success or false if no listeners.
  */
 bool rdma_nl_chk_listeners(unsigned int group);
+
+struct rdma_link_ops {
+	struct list_head list;
+	const char *type;
+	int (*newlink)(const char *ibdev_name, const char *ndev_name);
+	int (*dellink)(const char *ibdev_name);
+};
+
+void rdma_link_register(struct rdma_link_ops *ops);
+void rdma_link_unregister(struct rdma_link_ops *ops);
+
+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type)
 #endif /* _RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index f9c41bf59efc..7feb087ab6fd 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -229,7 +229,9 @@  enum rdma_nldev_command {
 	RDMA_NLDEV_CMD_GET, /* can dump */
 	RDMA_NLDEV_CMD_SET,
 
-	/* 3 - 4 are free to use */
+	RDMA_NLDEV_CMD_NEWLINK,
+
+	RDMA_NLDEV_CMD_DELLINK,
 
 	RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */
 
@@ -428,6 +430,11 @@  enum rdma_nldev_attr {
 	RDMA_NLDEV_ATTR_DRIVER_U64,		/* u64 */
 
 	/*
+	 * Identifies the rdma driver. eg: "rxe" or "siw"
+	 */
+	RDMA_NLDEV_ATTR_LINK_TYPE,		/* string */
+
+	/*
 	 * Always the end
 	 */
 	RDMA_NLDEV_ATTR_MAX