diff mbox series

[v5,1/2] RDMA/Core: add RDMA_NLDEV_CMD_NEWLINK/DELLINK support

Message ID 93dd69b9c8dfbd6e768da0e8dbfdeb29b8551eed.1543615225.git.swise@opengridcomputing.com (mailing list archive)
State Superseded
Headers show
Series Dynamic rdma link creation | expand

Commit Message

Steve Wise Nov. 30, 2018, 9:58 p.m. UTC
Add support for new LINK messages to allow adding and deleting rdma
interfaces.  This will be used initially for soft rdma drivers which
instantiate device instances dynamically by the admin specifying a
netdev device to use.  The rdma_rxe module will be the first user of
these messages.

The design is modeled after RTNL_NEWLINK/DELLINK:  rdma drivers
register with the rdma core if they provide link add/delete functions.
Each driver registers with a unique "type" string, that is used to
dispatch messages coming from user space.  A new RDMA_NLDEV_ATTR
is defined for the "type" string.  User mode will pass 3 attributes
in a NEWLINK message: RDMA_NLDEV_ATTR_DEV_NAME for the desired rdma
device name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type"
of link being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device
interface to use for this link.  The DELLINK message will contain the
RDMA_NLDEV_ATTR_DEV_INDEX of the device to delete.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/nldev.c  | 139 +++++++++++++++++++++++++++++++++++++++
 include/rdma/ib_verbs.h          |   2 +
 include/rdma/rdma_netlink.h      |  13 ++++
 include/uapi/rdma/rdma_netlink.h |  11 +++-
 4 files changed, 163 insertions(+), 2 deletions(-)

Comments

Leon Romanovsky Dec. 1, 2018, 8 a.m. UTC | #1
On Fri, Nov 30, 2018 at 01:58:03PM -0800, Steve Wise wrote:
> Add support for new LINK messages to allow adding and deleting rdma
> interfaces.  This will be used initially for soft rdma drivers which
> instantiate device instances dynamically by the admin specifying a
> netdev device to use.  The rdma_rxe module will be the first user of
> these messages.
>
> The design is modeled after RTNL_NEWLINK/DELLINK:  rdma drivers
> register with the rdma core if they provide link add/delete functions.
> Each driver registers with a unique "type" string, that is used to
> dispatch messages coming from user space.  A new RDMA_NLDEV_ATTR
> is defined for the "type" string.  User mode will pass 3 attributes
> in a NEWLINK message: RDMA_NLDEV_ATTR_DEV_NAME for the desired rdma
> device name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type"
> of link being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the net_device
> interface to use for this link.  The DELLINK message will contain the
> RDMA_NLDEV_ATTR_DEV_INDEX of the device to delete.
>
> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
> ---
>  drivers/infiniband/core/nldev.c  | 139 +++++++++++++++++++++++++++++++++++++++
>  include/rdma/ib_verbs.h          |   2 +
>  include/rdma/rdma_netlink.h      |  13 ++++
>  include/uapi/rdma/rdma_netlink.h |  11 +++-
>  4 files changed, 163 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
> index 63cc74483188..6df4c98da365 100644
> --- a/drivers/infiniband/core/nldev.c
> +++ b/drivers/infiniband/core/nldev.c
> @@ -33,6 +33,7 @@
>  #include <linux/module.h>
>  #include <linux/pid.h>
>  #include <linux/pid_namespace.h>
> +#include <linux/mutex.h>
>  #include <net/netlink.h>
>  #include <rdma/rdma_cm.h>
>  #include <rdma/rdma_netlink.h>
> @@ -107,6 +108,8 @@
>  	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
>  	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
>  	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
> +	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
> +				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
>  };
>
>  static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
> @@ -1103,6 +1106,134 @@ static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
>  	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
>  }
>
> +static LIST_HEAD(link_ops);
> +static DECLARE_RWSEM(link_ops_rwsem);
> +
> +static const struct rdma_link_ops *link_ops_get(const char *type)
> +{
> +	const struct rdma_link_ops *ops;
> +
> +	list_for_each_entry(ops, &link_ops, list) {
> +		if (!strcmp(ops->type, type))
> +			goto out;
> +	}
> +	ops = NULL;
> +out:
> +	return ops;
> +}
> +
> +void rdma_link_register(struct rdma_link_ops *ops)
> +{
> +	down_write(&link_ops_rwsem);
> +	if (link_ops_get(ops->type)) {
> +		WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
> +		goto out;
> +	}
> +	list_add(&ops->list, &link_ops);
> +out:
> +	up_write(&link_ops_rwsem);
> +}
> +EXPORT_SYMBOL(rdma_link_register);
> +
> +void rdma_link_unregister(struct rdma_link_ops *ops)
> +{
> +	down_write(&link_ops_rwsem);
> +	list_del(&ops->list);
> +	up_write(&link_ops_rwsem);
> +}
> +EXPORT_SYMBOL(rdma_link_unregister);
> +
> +static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
> +			  struct netlink_ext_ack *extack)
> +{
> +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> +	char ibdev_name[IB_DEVICE_NAME_MAX];
> +	const struct rdma_link_ops *ops;
> +	struct ib_device *device;
> +	char ndev_name[IFNAMSIZ];
> +	char type[IFNAMSIZ];
> +	int err;
> +
> +	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> +			  nldev_policy, extack);
> +	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
> +	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
> +		return -EINVAL;
> +
> +	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) > sizeof(ibdev_name) ||
> +	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type) ||
> +	    nla_len(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) > sizeof(ndev_name))
> +		return -EINVAL;

Why is that? It is supposed to be tested by nldev_policy in nlmsg_parse stage.

> +
> +	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
> +		    sizeof(ibdev_name));
> +	if (strchr(ibdev_name, '%'))
> +		return -EINVAL;
> +
> +	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
> +	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
> +		    sizeof(ndev_name));
> +
> +	down_read(&link_ops_rwsem);
> +	ops = link_ops_get(type);
> +#ifdef CONFIG_MODULES
> +	if (!ops) {
> +		up_read(&link_ops_rwsem);
> +		request_module("rdma-link-%s", type);
> +		down_read(&link_ops_rwsem);
> +		ops = link_ops_get(type);
> +	}
> +#endif
> +	if (ops) {
> +		device = ops->newlink(ibdev_name, ndev_name);
> +		if (IS_ERR(device))
> +			err = PTR_ERR(device);
> +		else
> +			device->link_ops = ops;
> +	} else {
> +		err = -ENODEV;
> +	}
> +	up_read(&link_ops_rwsem);
> +
> +	return err;
> +}
> +
> +static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
> +			  struct netlink_ext_ack *extack)
> +{
> +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> +	const struct rdma_link_ops *ops;
> +	struct device *dma_device;
> +	struct ib_device *device;
> +	u32 index;
> +	int err;
> +
> +	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> +			  nldev_policy, extack);
> +	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
> +		return -EINVAL;
> +
> +	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
> +	device = ib_device_get_by_index(index);
> +	if (!device)
> +		return -EINVAL;
> +
> +	ops = device->link_ops;
> +
> +	/*
> +	 * Deref the ib_device before deleting it.  Otherwise we
> +	 * deadlock unregistering the device.  Hold a ref on the
> +	 * underlying dma_device though to keep the memory around
> +	 * until we're done.
> +	 */
> +	dma_device = get_device(device->dma_device);
> +	ib_device_put(device);
> +	err = ops ? ops->dellink(device) : -ENODEV;
> +	put_device(dma_device);
> +
> +	return err;
> +}
> +
>  static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
>  	[RDMA_NLDEV_CMD_GET] = {
>  		.doit = nldev_get_doit,
> @@ -1112,6 +1243,14 @@ static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
>  		.doit = nldev_set_doit,
>  		.flags = RDMA_NL_ADMIN_PERM,
>  	},
> +	[RDMA_NLDEV_CMD_NEWLINK] = {
> +		.doit = nldev_newlink,
> +		.flags = RDMA_NL_ADMIN_PERM,
> +	},
> +	[RDMA_NLDEV_CMD_DELLINK] = {
> +		.doit = nldev_dellink,
> +		.flags = RDMA_NL_ADMIN_PERM,
> +	},
>  	[RDMA_NLDEV_CMD_PORT_GET] = {
>  		.doit = nldev_port_get_doit,
>  		.dump = nldev_port_get_dumpit,
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index ee8a6008e222..7a5ef71e5348 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -2608,6 +2608,8 @@ struct ib_device {
>  	 */
>  	refcount_t refcount;
>  	struct completion unreg_completion;
> +
> +	const struct rdma_link_ops     *link_ops;
>  };
>
>  struct ib_client {
> diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
> index 70218e6b5187..9b50e957cbae 100644
> --- a/include/rdma/rdma_netlink.h
> +++ b/include/rdma/rdma_netlink.h
> @@ -99,4 +99,17 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
>   * Returns true on success or false if no listeners.
>   */
>  bool rdma_nl_chk_listeners(unsigned int group);
> +
> +struct rdma_link_ops {
> +	struct list_head list;
> +	const char *type;
> +	struct ib_device *(*newlink)(const char *ibdev_name,
> +				     const char *ndev_name);
> +	int (*dellink)(struct ib_device *ibdev);
> +};
> +
> +void rdma_link_register(struct rdma_link_ops *ops);
> +void rdma_link_unregister(struct rdma_link_ops *ops);
> +
> +#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type)
>  #endif /* _RDMA_NETLINK_H */
> diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
> index f9c41bf59efc..dfdfc2b608b8 100644
> --- a/include/uapi/rdma/rdma_netlink.h
> +++ b/include/uapi/rdma/rdma_netlink.h
> @@ -229,9 +229,11 @@ enum rdma_nldev_command {
>  	RDMA_NLDEV_CMD_GET, /* can dump */
>  	RDMA_NLDEV_CMD_SET,
>
> -	/* 3 - 4 are free to use */
> +	RDMA_NLDEV_CMD_NEWLINK,
>
> -	RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */
> +	RDMA_NLDEV_CMD_DELLINK,
> +
> +	RDMA_NLDEV_CMD_PORT_GET, /* can dump */
>
>  	/* 6 - 8 are free to use */
>
> @@ -428,6 +430,11 @@ enum rdma_nldev_attr {
>  	RDMA_NLDEV_ATTR_DRIVER_U64,		/* u64 */
>
>  	/*
> +	 * Identifies the rdma driver. eg: "rxe" or "siw"
> +	 */
> +	RDMA_NLDEV_ATTR_LINK_TYPE,		/* string */
> +
> +	/*
>  	 * Always the end
>  	 */
>  	RDMA_NLDEV_ATTR_MAX
> --
> 1.8.3.1
>
Steve Wise Dec. 1, 2018, 5:02 p.m. UTC | #2
> -----Original Message-----
> From: Leon Romanovsky <leon@kernel.org>
> Sent: Saturday, December 1, 2018 2:00 AM
> To: Steve Wise <swise@opengridcomputing.com>
> Cc: dledford@redhat.com; jgg@mellanox.com; linux-rdma@vger.kernel.org;
> BMT@zurich.ibm.com
> Subject: Re: [PATCH v5 1/2] RDMA/Core: add
> RDMA_NLDEV_CMD_NEWLINK/DELLINK support
> 
> On Fri, Nov 30, 2018 at 01:58:03PM -0800, Steve Wise wrote:
> > Add support for new LINK messages to allow adding and deleting rdma
> > interfaces.  This will be used initially for soft rdma drivers which
> > instantiate device instances dynamically by the admin specifying a
> > netdev device to use.  The rdma_rxe module will be the first user of
> > these messages.
> >
> > The design is modeled after RTNL_NEWLINK/DELLINK:  rdma drivers
> > register with the rdma core if they provide link add/delete functions.
> > Each driver registers with a unique "type" string, that is used to
> > dispatch messages coming from user space.  A new RDMA_NLDEV_ATTR
> > is defined for the "type" string.  User mode will pass 3 attributes
> > in a NEWLINK message: RDMA_NLDEV_ATTR_DEV_NAME for the desired
> rdma
> > device name to be created, RDMA_NLDEV_ATTR_LINK_TYPE for the "type"
> > of link being added, and RDMA_NLDEV_ATTR_NDEV_NAME for the
> net_device
> > interface to use for this link.  The DELLINK message will contain the
> > RDMA_NLDEV_ATTR_DEV_INDEX of the device to delete.
> >
> > Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> > Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
> > ---
> >  drivers/infiniband/core/nldev.c  | 139
> +++++++++++++++++++++++++++++++++++++++
> >  include/rdma/ib_verbs.h          |   2 +
> >  include/rdma/rdma_netlink.h      |  13 ++++
> >  include/uapi/rdma/rdma_netlink.h |  11 +++-
> >  4 files changed, 163 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/infiniband/core/nldev.c
> b/drivers/infiniband/core/nldev.c
> > index 63cc74483188..6df4c98da365 100644
> > --- a/drivers/infiniband/core/nldev.c
> > +++ b/drivers/infiniband/core/nldev.c
> > @@ -33,6 +33,7 @@
> >  #include <linux/module.h>
> >  #include <linux/pid.h>
> >  #include <linux/pid_namespace.h>
> > +#include <linux/mutex.h>
> >  #include <net/netlink.h>
> >  #include <rdma/rdma_cm.h>
> >  #include <rdma/rdma_netlink.h>
> > @@ -107,6 +108,8 @@
> >  	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
> >  	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
> >  	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
> > +	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type =
> NLA_NUL_STRING,
> > +				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN
> },
> >  };
> >
> >  static int put_driver_name_print_type(struct sk_buff *msg, const char
> *name,
> > @@ -1103,6 +1106,134 @@ static int nldev_res_get_pd_dumpit(struct
> sk_buff *skb,
> >  	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
> >  }
> >
> > +static LIST_HEAD(link_ops);
> > +static DECLARE_RWSEM(link_ops_rwsem);
> > +
> > +static const struct rdma_link_ops *link_ops_get(const char *type)
> > +{
> > +	const struct rdma_link_ops *ops;
> > +
> > +	list_for_each_entry(ops, &link_ops, list) {
> > +		if (!strcmp(ops->type, type))
> > +			goto out;
> > +	}
> > +	ops = NULL;
> > +out:
> > +	return ops;
> > +}
> > +
> > +void rdma_link_register(struct rdma_link_ops *ops)
> > +{
> > +	down_write(&link_ops_rwsem);
> > +	if (link_ops_get(ops->type)) {
> > +		WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
> > +		goto out;
> > +	}
> > +	list_add(&ops->list, &link_ops);
> > +out:
> > +	up_write(&link_ops_rwsem);
> > +}
> > +EXPORT_SYMBOL(rdma_link_register);
> > +
> > +void rdma_link_unregister(struct rdma_link_ops *ops)
> > +{
> > +	down_write(&link_ops_rwsem);
> > +	list_del(&ops->list);
> > +	up_write(&link_ops_rwsem);
> > +}
> > +EXPORT_SYMBOL(rdma_link_unregister);
> > +
> > +static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
> > +			  struct netlink_ext_ack *extack)
> > +{
> > +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> > +	char ibdev_name[IB_DEVICE_NAME_MAX];
> > +	const struct rdma_link_ops *ops;
> > +	struct ib_device *device;
> > +	char ndev_name[IFNAMSIZ];
> > +	char type[IFNAMSIZ];
> > +	int err;
> > +
> > +	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> > +			  nldev_policy, extack);
> > +	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
> > +	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] ||
> !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
> > +		return -EINVAL;
> > +
> > +	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) >
> sizeof(ibdev_name) ||
> > +	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type) ||
> > +	    nla_len(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) >
> sizeof(ndev_name))
> > +		return -EINVAL;
> 
> Why is that? It is supposed to be tested by nldev_policy in nlmsg_parse
> stage.
> 

If the policy does it, then I'll remove these.  I believe Jason requested
adding this.  But I didn't think about the nldev_policy...

Thanks,

Steve.
diff mbox series

Patch

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 63cc74483188..6df4c98da365 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -33,6 +33,7 @@ 
 #include <linux/module.h>
 #include <linux/pid.h>
 #include <linux/pid_namespace.h>
+#include <linux/mutex.h>
 #include <net/netlink.h>
 #include <rdma/rdma_cm.h>
 #include <rdma/rdma_netlink.h>
@@ -107,6 +108,8 @@ 
 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
+	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
+				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -1103,6 +1106,134 @@  static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
 	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
 }
 
+static LIST_HEAD(link_ops);
+static DECLARE_RWSEM(link_ops_rwsem);
+
+static const struct rdma_link_ops *link_ops_get(const char *type)
+{
+	const struct rdma_link_ops *ops;
+
+	list_for_each_entry(ops, &link_ops, list) {
+		if (!strcmp(ops->type, type))
+			goto out;
+	}
+	ops = NULL;
+out:
+	return ops;
+}
+
+void rdma_link_register(struct rdma_link_ops *ops)
+{
+	down_write(&link_ops_rwsem);
+	if (link_ops_get(ops->type)) {
+		WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
+		goto out;
+	}
+	list_add(&ops->list, &link_ops);
+out:
+	up_write(&link_ops_rwsem);
+}
+EXPORT_SYMBOL(rdma_link_register);
+
+void rdma_link_unregister(struct rdma_link_ops *ops)
+{
+	down_write(&link_ops_rwsem);
+	list_del(&ops->list);
+	up_write(&link_ops_rwsem);
+}
+EXPORT_SYMBOL(rdma_link_unregister);
+
+static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	char ibdev_name[IB_DEVICE_NAME_MAX];
+	const struct rdma_link_ops *ops;
+	struct ib_device *device;
+	char ndev_name[IFNAMSIZ];
+	char type[IFNAMSIZ];
+	int err;
+
+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, extack);
+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
+	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
+		return -EINVAL;
+
+	if (nla_len(tb[RDMA_NLDEV_ATTR_DEV_NAME]) > sizeof(ibdev_name) ||
+	    nla_len(tb[RDMA_NLDEV_ATTR_LINK_TYPE]) > sizeof(type) ||
+	    nla_len(tb[RDMA_NLDEV_ATTR_NDEV_NAME]) > sizeof(ndev_name))
+		return -EINVAL;
+
+	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+		    sizeof(ibdev_name));
+	if (strchr(ibdev_name, '%'))
+		return -EINVAL;
+
+	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
+	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
+		    sizeof(ndev_name));
+
+	down_read(&link_ops_rwsem);
+	ops = link_ops_get(type);
+#ifdef CONFIG_MODULES
+	if (!ops) {
+		up_read(&link_ops_rwsem);
+		request_module("rdma-link-%s", type);
+		down_read(&link_ops_rwsem);
+		ops = link_ops_get(type);
+	}
+#endif
+	if (ops) {
+		device = ops->newlink(ibdev_name, ndev_name);
+		if (IS_ERR(device))
+			err = PTR_ERR(device);
+		else
+			device->link_ops = ops;
+	} else {
+		err = -ENODEV;
+	}
+	up_read(&link_ops_rwsem);
+
+	return err;
+}
+
+static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	const struct rdma_link_ops *ops;
+	struct device *dma_device;
+	struct ib_device *device;
+	u32 index;
+	int err;
+
+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, extack);
+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+		return -EINVAL;
+
+	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+	device = ib_device_get_by_index(index);
+	if (!device)
+		return -EINVAL;
+
+	ops = device->link_ops;
+
+	/*
+	 * Deref the ib_device before deleting it.  Otherwise we
+	 * deadlock unregistering the device.  Hold a ref on the
+	 * underlying dma_device though to keep the memory around
+	 * until we're done.
+	 */
+	dma_device = get_device(device->dma_device);
+	ib_device_put(device);
+	err = ops ? ops->dellink(device) : -ENODEV;
+	put_device(dma_device);
+
+	return err;
+}
+
 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	[RDMA_NLDEV_CMD_GET] = {
 		.doit = nldev_get_doit,
@@ -1112,6 +1243,14 @@  static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
 		.doit = nldev_set_doit,
 		.flags = RDMA_NL_ADMIN_PERM,
 	},
+	[RDMA_NLDEV_CMD_NEWLINK] = {
+		.doit = nldev_newlink,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
+	[RDMA_NLDEV_CMD_DELLINK] = {
+		.doit = nldev_dellink,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
 	[RDMA_NLDEV_CMD_PORT_GET] = {
 		.doit = nldev_port_get_doit,
 		.dump = nldev_port_get_dumpit,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index ee8a6008e222..7a5ef71e5348 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2608,6 +2608,8 @@  struct ib_device {
 	 */
 	refcount_t refcount;
 	struct completion unreg_completion;
+
+	const struct rdma_link_ops     *link_ops;
 };
 
 struct ib_client {
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 70218e6b5187..9b50e957cbae 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -99,4 +99,17 @@  int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
  * Returns true on success or false if no listeners.
  */
 bool rdma_nl_chk_listeners(unsigned int group);
+
+struct rdma_link_ops {
+	struct list_head list;
+	const char *type;
+	struct ib_device *(*newlink)(const char *ibdev_name,
+				     const char *ndev_name);
+	int (*dellink)(struct ib_device *ibdev);
+};
+
+void rdma_link_register(struct rdma_link_ops *ops);
+void rdma_link_unregister(struct rdma_link_ops *ops);
+
+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type)
 #endif /* _RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index f9c41bf59efc..dfdfc2b608b8 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -229,9 +229,11 @@  enum rdma_nldev_command {
 	RDMA_NLDEV_CMD_GET, /* can dump */
 	RDMA_NLDEV_CMD_SET,
 
-	/* 3 - 4 are free to use */
+	RDMA_NLDEV_CMD_NEWLINK,
 
-	RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */
+	RDMA_NLDEV_CMD_DELLINK,
+
+	RDMA_NLDEV_CMD_PORT_GET, /* can dump */
 
 	/* 6 - 8 are free to use */
 
@@ -428,6 +430,11 @@  enum rdma_nldev_attr {
 	RDMA_NLDEV_ATTR_DRIVER_U64,		/* u64 */
 
 	/*
+	 * Identifies the rdma driver. eg: "rxe" or "siw"
+	 */
+	RDMA_NLDEV_ATTR_LINK_TYPE,		/* string */
+
+	/*
 	 * Always the end
 	 */
 	RDMA_NLDEV_ATTR_MAX