diff mbox

[RFC,2/2] RDMA/nldev: provide detailed CM_ID information

Message ID 531889e6a24f7919dec71734c91298d266aa9721.1517418595.git.swise@opengridcomputing.com (mailing list archive)
State RFC
Headers show

Commit Message

Steve Wise Jan. 30, 2018, 4:59 p.m. UTC
Implement RDMA nldev netlink interface to get detailed CM_ID information.

Because cm_id's are attached to rdma devices in various work queue contexts,
the pid and task information at device-attach time is sometimes not useful.
For example, an nvme/f host connection ends up being bound to a device
in a work queue context and the resulting pid at attach time no longer exists
after connection setup.  So instead we mark all cm_id's created via the
rdma_ucm as "user", and all others as "kernel".  This required tweaking
the restrack code a little.  It also required wrapping some rdma_cm
functions to allow passing the module name string.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
 drivers/infiniband/core/cma.c      |  55 ++++++---
 drivers/infiniband/core/nldev.c    | 245 +++++++++++++++++++++++++++++++++++++
 drivers/infiniband/core/restrack.c |  29 ++++-
 drivers/infiniband/core/ucma.c     |   8 +-
 include/rdma/rdma_cm.h             |  24 +++-
 include/rdma/restrack.h            |   4 +
 include/uapi/rdma/rdma_netlink.h   |  30 +++++
 7 files changed, 365 insertions(+), 30 deletions(-)

Comments

Parav Pandit Jan. 31, 2018, 8:47 p.m. UTC | #1
> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> owner@vger.kernel.org] On Behalf Of Steve Wise
> Sent: Tuesday, January 30, 2018 10:59 AM
> To: dledford@redhat.com; Jason Gunthorpe <jgg@mellanox.com>
> Cc: linux-rdma@vger.kernel.org; leon@kernel.org
> Subject: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
> 
> Implement RDMA nldev netlink interface to get detailed CM_ID information.
> 
> Because cm_id's are attached to rdma devices in various work queue contexts,
> the pid and task information at device-attach time is sometimes not useful.
> For example, an nvme/f host connection ends up being bound to a device in a
> work queue context and the resulting pid at attach time no longer exists after
> connection setup.  So instead we mark all cm_id's created via the rdma_ucm as
> "user", and all others as "kernel".  This required tweaking the restrack code a
> little.  It also required wrapping some rdma_cm functions to allow passing the
> module name string.
> 
> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> ---
>  drivers/infiniband/core/cma.c      |  55 ++++++---
>  drivers/infiniband/core/nldev.c    | 245
> +++++++++++++++++++++++++++++++++++++
>  drivers/infiniband/core/restrack.c |  29 ++++-
>  drivers/infiniband/core/ucma.c     |   8 +-
>  include/rdma/rdma_cm.h             |  24 +++-
>  include/rdma/restrack.h            |   4 +
>  include/uapi/rdma/rdma_netlink.h   |  30 +++++
>  7 files changed, 365 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index
> 72ad52b..51fbfa1 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct rdma_id_private
> *id_priv,
>  	id_priv->id.route.addr.dev_addr.transport =
>  		rdma_node_get_transport(cma_dev->device->node_type);
>  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> +	id_priv->id.res.kern_name = id_priv->id.caller;
> +	rdma_restrack_add(&id_priv->id.res);
>  }
> 
>  static void cma_attach_to_dev(struct rdma_id_private *id_priv, @@ -737,10
> +740,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
>  		complete(&id_priv->comp);
>  }
> 
> -struct rdma_cm_id *rdma_create_id(struct net *net,
> -				  rdma_cm_event_handler event_handler,
> -				  void *context, enum rdma_port_space ps,
> -				  enum ib_qp_type qp_type)
> +struct rdma_cm_id *__rdma_create_id(struct net *net,
> +				    rdma_cm_event_handler event_handler,
> +				    void *context, enum rdma_port_space ps,
> +				    enum ib_qp_type qp_type, const char *caller)
>  {
>  	struct rdma_id_private *id_priv;
> 
> @@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
>  	if (!id_priv)
>  		return ERR_PTR(-ENOMEM);
> 
> -	id_priv->owner = task_pid_nr(current);
> +	if (caller)
> +		id_priv->id.caller = caller;
> +	else
> +		id_priv->owner = task_pid_nr(current);
>  	id_priv->state = RDMA_CM_IDLE;
>  	id_priv->id.context = context;
>  	id_priv->id.event_handler = event_handler; @@ -768,7 +774,7 @@
> struct rdma_cm_id *rdma_create_id(struct net *net,
> 
>  	return &id_priv->id;
>  }
> -EXPORT_SYMBOL(rdma_create_id);
> +EXPORT_SYMBOL(__rdma_create_id);
> 
>  static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)  {
> @@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
>  	mutex_unlock(&id_priv->handler_mutex);
> 
>  	if (id_priv->cma_dev) {
> +		rdma_restrack_del(&id_priv->id.res);
>  		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
>  			if (id_priv->cm_id.ib)
>  				ib_destroy_cm_id(id_priv->cm_id.ib);
> @@ -1786,9 +1793,10 @@ static struct rdma_id_private
> *cma_new_conn_id(struct rdma_cm_id *listen_id,
>  		ib_event->param.req_rcvd.primary_path->service_id;
>  	int ret;
> 
> -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
>  			    listen_id->event_handler, listen_id->context,
> -			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
> +			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
> +			    listen_id->caller);
>  	if (IS_ERR(id))
>  		return NULL;
> 
> @@ -1843,8 +1851,8 @@ static struct rdma_id_private
> *cma_new_udp_id(struct rdma_cm_id *listen_id,
>  	struct net *net = listen_id->route.addr.dev_addr.net;
>  	int ret;
> 
> -	id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
> -			    listen_id->ps, IB_QPT_UD);
> +	id = __rdma_create_id(net, listen_id->event_handler, listen_id-
> >context,
> +			      listen_id->ps, IB_QPT_UD, listen_id->caller);
>  	if (IS_ERR(id))
>  		return NULL;
> 
> @@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct iw_cm_id
> *cm_id,
>  		goto out;
> 
>  	/* Create a new RDMA id for the new IW CM ID */
> -	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> -				   listen_id->id.event_handler,
> -				   listen_id->id.context,
> -				   RDMA_PS_TCP, IB_QPT_RC);
> +	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> +				     listen_id->id.event_handler,
> +				     listen_id->id.context,
> +				     RDMA_PS_TCP, IB_QPT_RC,
> +				     listen_id->id.caller);
>  	if (IS_ERR(new_cm_id)) {
>  		ret = -ENOMEM;
>  		goto out;
> @@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct rdma_id_private
> *id_priv,
>  	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev-
> >device, 1))
>  		return;
> 
> -	id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
> -			    id_priv->id.qp_type);
> +	id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
> +			      id_priv->id.qp_type, id_priv->id.caller);
>  	if (IS_ERR(id))
>  		return;
> 
> @@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct
> sockaddr *addr)
> 
>  	return 0;
>  err2:
> -	if (id_priv->cma_dev)
> +	if (id_priv->cma_dev) {
> +		rdma_restrack_del(&id_priv->id.res);
>  		cma_release_dev(id_priv);
> +	}
>  err1:
>  	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
>  	return ret;
> @@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct
> rdma_id_private *id_priv,
>  	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);  }
> 
> -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param)
> +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param,
> +		  const char *caller)
>  {
>  	struct rdma_id_private *id_priv;
>  	int ret;
> 
>  	id_priv = container_of(id, struct rdma_id_private, id);
> 
> -	id_priv->owner = task_pid_nr(current);
> +	if (caller)
> +		id_priv->id.caller = caller;
> +	else
> +		id_priv->owner = task_pid_nr(current);
> 
>  	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
>  		return -EINVAL;
> @@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct
> rdma_conn_param *conn_param)
>  	rdma_reject(id, NULL, 0);
>  	return ret;
>  }
> -EXPORT_SYMBOL(rdma_accept);
> +EXPORT_SYMBOL(__rdma_accept);
> 
>  int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)  { diff --git
> a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index
> fa8655e..a4091b5 100644
> --- a/drivers/infiniband/core/nldev.c
> +++ b/drivers/infiniband/core/nldev.c
> @@ -34,6 +34,7 @@
>  #include <linux/pid.h>
>  #include <linux/pid_namespace.h>
>  #include <net/netlink.h>
> +#include <rdma/rdma_cm.h>
>  #include <rdma/rdma_netlink.h>
> 
>  #include "core_priv.h"
> @@ -71,6 +72,22 @@
>  	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
>  	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type =
> NLA_NUL_STRING,
>  						    .len = TASK_COMM_LEN },
> +	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type =
> NLA_NESTED },
> +	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type =
> NLA_NESTED },
> +	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
> +	[RDMA_NLDEV_ATTR_RES_IPV4_SADDR]	= {
> +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV4_DADDR]	= {
> +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV6_SADDR]	= {
> +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV6_DADDR]	= {
> +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IP_SPORT]		= { .type = NLA_U16 },
> +	[RDMA_NLDEV_ATTR_RES_IP_DPORT]		= { .type = NLA_U16 },
> +	[RDMA_NLDEV_ATTR_RES_DEV_TYPE]		= { .type = NLA_U8 },
> +	[RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE]	= { .type = NLA_U8 },
> +	[RDMA_NLDEV_ATTR_RES_NETWORK_TYPE]	= { .type = NLA_U8 },
>  };
> 
>  static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) @@ -
> 182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device
> *device)
>  		[RDMA_RESTRACK_PD] = "pd",
>  		[RDMA_RESTRACK_CQ] = "cq",
>  		[RDMA_RESTRACK_QP] = "qp",
> +		[RDMA_RESTRACK_CM_ID] = "cm_id",

May be rdmacm_id a better name to avoid confusion with ib_cm/iw_cm ids?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Jan. 31, 2018, 8:56 p.m. UTC | #2
> > 182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct
ib_device
> > *device)
> >  		[RDMA_RESTRACK_PD] = "pd",
> >  		[RDMA_RESTRACK_CQ] = "cq",
> >  		[RDMA_RESTRACK_QP] = "qp",
> > +		[RDMA_RESTRACK_CM_ID] = "cm_id",
> 
> May be rdmacm_id a better name to avoid confusion with ib_cm/iw_cm ids?

Eventually, I could see the ib_cm and/or iw_cm info also getting dumped as
part of this.  So perhaps the name "cm_id" is ok?  I prefer it also because
its shorter (lazy typer! :) )



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Parav Pandit Jan. 31, 2018, 9:18 p.m. UTC | #3
> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> owner@vger.kernel.org] On Behalf Of Steve Wise
> Sent: Wednesday, January 31, 2018 2:56 PM
> To: Parav Pandit <parav@mellanox.com>; dledford@redhat.com; Jason
> Gunthorpe <jgg@mellanox.com>
> Cc: linux-rdma@vger.kernel.org; leon@kernel.org
> Subject: RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
> 
> > > 182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct
> ib_device
> > > *device)
> > >  		[RDMA_RESTRACK_PD] = "pd",
> > >  		[RDMA_RESTRACK_CQ] = "cq",
> > >  		[RDMA_RESTRACK_QP] = "qp",
> > > +		[RDMA_RESTRACK_CM_ID] = "cm_id",
> >
> > May be rdmacm_id a better name to avoid confusion with ib_cm/iw_cm ids?
> 
> Eventually, I could see the ib_cm and/or iw_cm info also getting dumped as part
Yes. But it is still coming through id of rdmacm_id as transport id information.
> of this.  So perhaps the name "cm_id" is ok? 
It's just that longer name removes the ambiguity.
I am not too particular about this particular naming.

> I prefer it also because its shorter (lazy typer! :) )
Sure, shorter names are useful to end user.
But I think this is handled well at iproute2 tool level.
For example below two commands achieves the same task.
#ip address show
#ip a s
So may be shorter naming at command level is better left at user space tool level.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Feb. 1, 2018, 8:01 a.m. UTC | #4
On Wed, Jan 31, 2018 at 09:18:39PM +0000, Parav Pandit wrote:
>
>
> > -----Original Message-----
> > From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> > owner@vger.kernel.org] On Behalf Of Steve Wise
> > Sent: Wednesday, January 31, 2018 2:56 PM
> > To: Parav Pandit <parav@mellanox.com>; dledford@redhat.com; Jason
> > Gunthorpe <jgg@mellanox.com>
> > Cc: linux-rdma@vger.kernel.org; leon@kernel.org
> > Subject: RE: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID information
> >
> > > > 182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct
> > ib_device
> > > > *device)
> > > >  		[RDMA_RESTRACK_PD] = "pd",
> > > >  		[RDMA_RESTRACK_CQ] = "cq",
> > > >  		[RDMA_RESTRACK_QP] = "qp",
> > > > +		[RDMA_RESTRACK_CM_ID] = "cm_id",
> > >
> > > May be rdmacm_id a better name to avoid confusion with ib_cm/iw_cm ids?
> >
> > Eventually, I could see the ib_cm and/or iw_cm info also getting dumped as part
> Yes. But it is still coming through id of rdmacm_id as transport id information.
> > of this.  So perhaps the name "cm_id" is ok?
> It's just that longer name removes the ambiguity.
> I am not too particular about this particular naming.
>
> > I prefer it also because its shorter (lazy typer! :) )
> Sure, shorter names are useful to end user.
> But I think this is handled well at iproute2 tool level.
> For example below two commands achieves the same task.
> #ip address show
> #ip a s
> So may be shorter naming at command level is better left at user space tool level.

The shorter naming is working on commands and not on objects.
In rdamtool, you can write "rdma d s" instead of "rdma dev show", but
the name of device (mlx5_0) or object (qp) should be full.

Regarding the name, I personally think that cm_id is better because it
is general and applicable both to ib_cm and iw_cm. The separation
between them can be done with introduction of new netlink attribute, e.g.
cm_id_type.

Thanks

> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Feb. 1, 2018, 8:49 a.m. UTC | #5
On Tue, Jan 30, 2018 at 08:59:11AM -0800, Steve Wise wrote:
> Implement RDMA nldev netlink interface to get detailed CM_ID information.
>
> Because cm_id's are attached to rdma devices in various work queue contexts,
> the pid and task information at device-attach time is sometimes not useful.
> For example, an nvme/f host connection ends up being bound to a device
> in a work queue context and the resulting pid at attach time no longer exists
> after connection setup.  So instead we mark all cm_id's created via the
> rdma_ucm as "user", and all others as "kernel".  This required tweaking
> the restrack code a little.  It also required wrapping some rdma_cm
> functions to allow passing the module name string.
>
> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> ---
>  drivers/infiniband/core/cma.c      |  55 ++++++---
>  drivers/infiniband/core/nldev.c    | 245 +++++++++++++++++++++++++++++++++++++
>  drivers/infiniband/core/restrack.c |  29 ++++-
>  drivers/infiniband/core/ucma.c     |   8 +-
>  include/rdma/rdma_cm.h             |  24 +++-
>  include/rdma/restrack.h            |   4 +
>  include/uapi/rdma/rdma_netlink.h   |  30 +++++
>  7 files changed, 365 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> index 72ad52b..51fbfa1 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
>  	id_priv->id.route.addr.dev_addr.transport =
>  		rdma_node_get_transport(cma_dev->device->node_type);
>  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> +	id_priv->id.res.kern_name = id_priv->id.caller;

Steve, I don't like it, I worked hard to hide it from the users of restrack,
and don't see reason why the same trick as with ib_create_cq/ib_create_pd won't
work here.

> +	rdma_restrack_add(&id_priv->id.res);
>  }
>
>  static void cma_attach_to_dev(struct rdma_id_private *id_priv,
> @@ -737,10 +740,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
>  		complete(&id_priv->comp);
>  }
>
> -struct rdma_cm_id *rdma_create_id(struct net *net,
> -				  rdma_cm_event_handler event_handler,
> -				  void *context, enum rdma_port_space ps,
> -				  enum ib_qp_type qp_type)
> +struct rdma_cm_id *__rdma_create_id(struct net *net,
> +				    rdma_cm_event_handler event_handler,
> +				    void *context, enum rdma_port_space ps,
> +				    enum ib_qp_type qp_type, const char *caller)
>  {
>  	struct rdma_id_private *id_priv;
>
> @@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
>  	if (!id_priv)
>  		return ERR_PTR(-ENOMEM);
>
> -	id_priv->owner = task_pid_nr(current);
> +	if (caller)
> +		id_priv->id.caller = caller;
> +	else
> +		id_priv->owner = task_pid_nr(current);

See the comment above

>  	id_priv->state = RDMA_CM_IDLE;
>  	id_priv->id.context = context;
>  	id_priv->id.event_handler = event_handler;

Not saying that we need to do it now, but it is important to write, most
probably we can drop certain initialization from rdma_create_id() adn
reuse rdma_restrack_put/_get.

> @@ -768,7 +774,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
>
>  	return &id_priv->id;
>  }
> -EXPORT_SYMBOL(rdma_create_id);
> +EXPORT_SYMBOL(__rdma_create_id);
>
>  static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
>  {
> @@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
>  	mutex_unlock(&id_priv->handler_mutex);
>
>  	if (id_priv->cma_dev) {
> +		rdma_restrack_del(&id_priv->id.res);

You should count all created cm_ids and not only binded.

>  		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
>  			if (id_priv->cm_id.ib)
>  				ib_destroy_cm_id(id_priv->cm_id.ib);
> @@ -1786,9 +1793,10 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
>  		ib_event->param.req_rcvd.primary_path->service_id;
>  	int ret;
>
> -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
>  			    listen_id->event_handler, listen_id->context,
> -			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
> +			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
> +			    listen_id->caller);

I think the cleanest way will be to create some struct and pass pointer to it so
you can unfold all relevant data inside of __rdma_create_id().

>  	if (IS_ERR(id))
>  		return NULL;
>
> @@ -1843,8 +1851,8 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
>  	struct net *net = listen_id->route.addr.dev_addr.net;
>  	int ret;
>
> -	id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
> -			    listen_id->ps, IB_QPT_UD);
> +	id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
> +			      listen_id->ps, IB_QPT_UD, listen_id->caller);
>  	if (IS_ERR(id))
>  		return NULL;
>
> @@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
>  		goto out;
>
>  	/* Create a new RDMA id for the new IW CM ID */
> -	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> -				   listen_id->id.event_handler,
> -				   listen_id->id.context,
> -				   RDMA_PS_TCP, IB_QPT_RC);
> +	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> +				     listen_id->id.event_handler,
> +				     listen_id->id.context,
> +				     RDMA_PS_TCP, IB_QPT_RC,
> +				     listen_id->id.caller);
>  	if (IS_ERR(new_cm_id)) {
>  		ret = -ENOMEM;
>  		goto out;
> @@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
>  	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
>  		return;
>
> -	id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
> -			    id_priv->id.qp_type);
> +	id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
> +			      id_priv->id.qp_type, id_priv->id.caller);
>  	if (IS_ERR(id))
>  		return;
>
> @@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
>
>  	return 0;
>  err2:
> -	if (id_priv->cma_dev)
> +	if (id_priv->cma_dev) {
> +		rdma_restrack_del(&id_priv->id.res);
>  		cma_release_dev(id_priv);
> +	}
>  err1:
>  	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
>  	return ret;
> @@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
>  	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
>  }
>
> -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
> +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
> +		  const char *caller)
>  {
>  	struct rdma_id_private *id_priv;
>  	int ret;
>
>  	id_priv = container_of(id, struct rdma_id_private, id);
>
> -	id_priv->owner = task_pid_nr(current);
> +	if (caller)
> +		id_priv->id.caller = caller;
> +	else
> +		id_priv->owner = task_pid_nr(current);
>
>  	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
>  		return -EINVAL;
> @@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
>  	rdma_reject(id, NULL, 0);
>  	return ret;
>  }
> -EXPORT_SYMBOL(rdma_accept);
> +EXPORT_SYMBOL(__rdma_accept);
>
>  int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
>  {
> diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
> index fa8655e..a4091b5 100644
> --- a/drivers/infiniband/core/nldev.c
> +++ b/drivers/infiniband/core/nldev.c
> @@ -34,6 +34,7 @@
>  #include <linux/pid.h>
>  #include <linux/pid_namespace.h>
>  #include <net/netlink.h>
> +#include <rdma/rdma_cm.h>
>  #include <rdma/rdma_netlink.h>
>
>  #include "core_priv.h"
> @@ -71,6 +72,22 @@
>  	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
>  	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
>  						    .len = TASK_COMM_LEN },
> +	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },

I would like to use this opportunity. There is CM_ID, so users will be
able to query nldev directly on this ID (once we implement .doit), but
can we found a proper abstraction for other objects (PD, CQ, QP e.t.c.)?

I want to have that all resources will have something similar to ifindex/ibindex.


> +	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
> +	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
> +	[RDMA_NLDEV_ATTR_RES_IPV4_SADDR]	= {
> +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV4_DADDR]	= {
> +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV6_SADDR]	= {
> +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IPV6_DADDR]	= {
> +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
> +	[RDMA_NLDEV_ATTR_RES_IP_SPORT]		= { .type = NLA_U16 },
> +	[RDMA_NLDEV_ATTR_RES_IP_DPORT]		= { .type = NLA_U16 },
> +	[RDMA_NLDEV_ATTR_RES_DEV_TYPE]		= { .type = NLA_U8 },
> +	[RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE]	= { .type = NLA_U8 },
> +	[RDMA_NLDEV_ATTR_RES_NETWORK_TYPE]	= { .type = NLA_U8 },
>  };
>
>  static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
> @@ -182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
>  		[RDMA_RESTRACK_PD] = "pd",
>  		[RDMA_RESTRACK_CQ] = "cq",
>  		[RDMA_RESTRACK_QP] = "qp",
> +		[RDMA_RESTRACK_CM_ID] = "cm_id",
>  	};
>
>  	struct rdma_restrack_root *res = &device->res;
> @@ -284,6 +302,99 @@ static int fill_res_qp_entry(struct sk_buff *msg,
>  	return -EMSGSIZE;
>  }
>
> +static int fill_res_cm_id_entry(struct sk_buff *msg,
> +				struct rdma_cm_id *cm_id, uint32_t port)
> +{
> +	struct rdma_id_private *id_priv;
> +	struct nlattr *entry_attr;
> +
> +	if (port && port != cm_id->port_num)
> +		return 0;
> +
> +	id_priv = container_of(cm_id, struct rdma_id_private, id);
> +	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
> +	if (!entry_attr)
> +		goto out;
> +
> +	if (cm_id->port_num &&
> +	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
> +		goto err;
> +
> +	if (id_priv->qp_num &&
> +	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
> +		goto err;
> +
> +	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
> +		goto err;
> +
> +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
> +		goto err;
> +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
> +		goto err;
> +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_DEV_TYPE,
> +		       id_priv->id.route.addr.dev_addr.dev_type))
> +		goto err;
> +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,
> +		       id_priv->id.route.addr.dev_addr.transport))
> +		goto err;
> +
> +	if (cm_id->route.addr.src_addr.ss_family == AF_INET) {
> +		struct sockaddr_in *sin;
> +
> +		sin = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
> +		if (nla_put_in_addr(msg, RDMA_NLDEV_ATTR_RES_IPV4_SADDR,
> +				    sin->sin_addr.s_addr))
> +			goto err;
> +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
> +				  sin->sin_port))
> +			goto err;
> +
> +		sin = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
> +		if (nla_put_in_addr(msg, RDMA_NLDEV_ATTR_RES_IPV4_DADDR,
> +				    sin->sin_addr.s_addr))
> +			goto err;
> +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
> +				  sin->sin_port))
> +			goto err;
> +	} else {
> +		struct sockaddr_in6 *sin6;
> +
> +		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.src_addr;
> +		if (nla_put_in6_addr(msg, RDMA_NLDEV_ATTR_RES_IPV6_SADDR,
> +				     &sin6->sin6_addr))
> +			goto err;
> +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
> +				  sin6->sin6_port))
> +			goto err;
> +
> +		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.dst_addr;
> +		if (nla_put_in6_addr(msg, RDMA_NLDEV_ATTR_RES_IPV6_DADDR,
> +				     &sin6->sin6_addr))
> +			goto err;
> +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
> +				  sin6->sin6_port))
> +			goto err;
> +	}
> +
> +	if (id_priv->id.caller) {
> +		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
> +				   id_priv->id.caller))
> +			goto err;
> +	} else {
> +		/* CMA keeps the owning pid. */
> +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, id_priv->owner))
> +			goto err;
> +	}
> +
> +	nla_nest_end(msg, entry_attr);
> +	return 0;
> +
> +err:
> +	nla_nest_cancel(msg, entry_attr);
> +out:
> +	return -EMSGSIZE;
> +}
> +
>  static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
>  			  struct netlink_ext_ack *extack)
>  {
> @@ -686,6 +797,137 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
>  	return ret;
>  }
>
> +static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
> +				      struct netlink_callback *cb)
> +{
> +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> +	struct rdma_restrack_entry *res;
> +	int err, ret = 0, idx = 0;
> +	struct nlattr *table_attr;
> +	struct ib_device *device;
> +	int start = cb->args[0];
> +	struct rdma_cm_id *cm_id = NULL;
> +	struct nlmsghdr *nlh;
> +	u32 index, port = 0;
> +
> +	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> +			  nldev_policy, NULL);
> +	/*
> +	 * Right now, we are expecting the device index to get QP information,
> +	 * but it is possible to extend this code to return all devices in
> +	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
> +	 * if it doesn't exist, we will iterate over all devices.
> +	 *
> +	 * But it is not needed for now.
> +	 */
> +	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
> +		return -EINVAL;
> +
> +	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
> +	device = ib_device_get_by_index(index);
> +	if (!device)
> +		return -EINVAL;
> +
> +	/*
> +	 * If no PORT_INDEX is supplied, we will return all QPs from that device

QPs->CM_IDs

> +	 */
> +	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
> +		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
> +		if (!rdma_is_port_valid(device, port)) {
> +			ret = -EINVAL;
> +			goto err_index;
> +		}
> +	}
> +
> +	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> +		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),

RDMA_NLDEV_CMD_RES_QP_GET -> RDMA_NLDEV_CMD_RES_CM_ID_GET

> +		0, NLM_F_MULTI);
> +
> +	if (fill_nldev_handle(skb, device)) {
> +		ret = -EMSGSIZE;
> +		goto err;
> +	}
> +
> +	table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_CM_ID);
> +	if (!table_attr) {
> +		ret = -EMSGSIZE;
> +		goto err;
> +	}
> +
> +	down_read(&device->res.rwsem);
> +	hash_for_each_possible(device->res.hash, res, node,
> +			       RDMA_RESTRACK_CM_ID) {
> +		if (idx < start)
> +			goto next;
> +
> +		if ((rdma_is_kernel_res(res) &&
> +		     task_active_pid_ns(current) != &init_pid_ns) ||
> +		    (!rdma_is_kernel_res(res) &&
> +		     task_active_pid_ns(current) !=
> +		     task_active_pid_ns(res->task)))
> +			/*
> +			 * 1. Kernel QPs should be visible in init namsapce only
> +			 * 2. Preent only QPs visible in the current namespace
> +			 */
> +			goto next;
> +
> +		if (!rdma_restrack_get(res))
> +			/*
> +			 * Resource is under release now, but we are not
> +			 * relesing lock now, so it will be released in
> +			 * our next pass, once we will get ->next pointer.
> +			 */
> +			goto next;
> +
> +		cm_id = container_of(res, struct rdma_cm_id, res);
> +
> +		up_read(&device->res.rwsem);
> +		ret = fill_res_cm_id_entry(skb, cm_id, port);
> +		down_read(&device->res.rwsem);
> +		/*
> +		 * Return resource back, but it won't be released till
> +		 * the &device->res.rwsem will be released for write.
> +		 */
> +		rdma_restrack_put(res);
> +
> +		if (ret == -EMSGSIZE)
> +			/*
> +			 * There is a chance to optimize here.
> +			 * It can be done by using list_prepare_entry
> +			 * and list_for_each_entry_continue afterwards.
> +			 */
> +			break;
> +		if (ret)
> +			goto res_err;
> +next:		idx++;
> +	}
> +	up_read(&device->res.rwsem);
> +
> +	nla_nest_end(skb, table_attr);
> +	nlmsg_end(skb, nlh);
> +	cb->args[0] = idx;
> +
> +	/*
> +	 * No more CM_IDs to fill, cancel the message and
> +	 * return 0 to mark end of dumpit.
> +	 */
> +	if (!cm_id)
> +		goto err;
> +
> +	put_device(&device->dev);
> +	return skb->len;
> +
> +res_err:
> +	nla_nest_cancel(skb, table_attr);
> +	up_read(&device->res.rwsem);
> +
> +err:
> +	nlmsg_cancel(skb, nlh);
> +
> +err_index:
> +	put_device(&device->dev);
> +	return ret;
> +}
>  static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
>  	[RDMA_NLDEV_CMD_GET] = {
>  		.doit = nldev_get_doit,
> @@ -712,6 +954,9 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
>  		 * too.
>  		 */
>  	},
> +	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
> +		.dump = nldev_res_get_cm_id_dumpit,
> +	},
>  };
>
>  void __init nldev_init(void)
> diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
> index 857637b..bb13169 100644
> --- a/drivers/infiniband/core/restrack.c
> +++ b/drivers/infiniband/core/restrack.c
> @@ -3,6 +3,7 @@
>   * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
>   */
>
> +#include <rdma/rdma_cm.h>
>  #include <rdma/ib_verbs.h>
>  #include <rdma/restrack.h>
>  #include <linux/mutex.h>
> @@ -67,6 +68,7 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
>  	struct ib_pd *pd;
>  	struct ib_cq *cq;
>  	struct ib_qp *qp;
> +	struct rdma_cm_id *cm_id;
>
>  	switch (type) {
>  	case RDMA_RESTRACK_PD:
> @@ -85,6 +87,10 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
>  		xrcd = container_of(res, struct ib_xrcd, res);
>  		dev = xrcd->device;
>  		break;
> +	case RDMA_RESTRACK_CM_ID:
> +		cm_id = container_of(res, struct rdma_cm_id, res);
> +		dev = cm_id->device;
> +		break;
>  	default:
>  		WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
>  		return NULL;
> @@ -93,6 +99,27 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
>  	return dev;
>  }
>
> +static bool res_is_user(struct rdma_restrack_entry *res)
> +{
> +	enum rdma_restrack_type type = res->type;
> +	bool is_user;
> +
> +	switch (type) {
> +	case RDMA_RESTRACK_CM_ID: {
> +		struct rdma_cm_id *cm_id;
> +
> +		cm_id = container_of(res, struct rdma_cm_id, res);
> +		is_user = !cm_id->caller;
> +		break;
> +	}
> +	default:
> +		is_user = !uaccess_kernel();
> +		break;
> +	}
> +
> +	return is_user;
> +}
> +
>  void rdma_restrack_add(struct rdma_restrack_entry *res)
>  {
>  	struct ib_device *dev = res_to_dev(res);
> @@ -100,7 +127,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
>  	if (!dev)
>  		return;
>
> -	if (!uaccess_kernel()) {
> +	if (res_is_user(res)) {
>  		get_task_struct(current);
>  		res->task = current;
>  		res->kern_name = NULL;
> diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
> index d67219d..f7f0282 100644
> --- a/drivers/infiniband/core/ucma.c
> +++ b/drivers/infiniband/core/ucma.c
> @@ -476,8 +476,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
>  		return -ENOMEM;
>
>  	ctx->uid = cmd.uid;
> -	ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
> -				    ucma_event_handler, ctx, cmd.ps, qp_type);
> +	ctx->cm_id = __rdma_create_id(current->nsproxy->net_ns,
> +			      ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
>  	if (IS_ERR(ctx->cm_id)) {
>  		ret = PTR_ERR(ctx->cm_id);
>  		goto err1;
> @@ -1084,12 +1084,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
>  	if (cmd.conn_param.valid) {
>  		ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
>  		mutex_lock(&file->mut);
> -		ret = rdma_accept(ctx->cm_id, &conn_param);
> +		ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
>  		if (!ret)
>  			ctx->uid = cmd.uid;
>  		mutex_unlock(&file->mut);
>  	} else
> -		ret = rdma_accept(ctx->cm_id, NULL);
> +		ret = __rdma_accept(ctx->cm_id, NULL, NULL);
>
>  	ucma_put_ctx(ctx);
>  	return ret;
> diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
> index 5984225..73dac9b 100644
> --- a/include/rdma/rdma_cm.h
> +++ b/include/rdma/rdma_cm.h
> @@ -155,6 +155,12 @@ struct rdma_cm_id {
>  	enum rdma_port_space	 ps;
>  	enum ib_qp_type		 qp_type;
>  	u8			 port_num;
> +	const char *caller;
> +
> +	/*
> +	 * Internal to RDMA/core, don't use in the drivers
> +	 */
> +	struct rdma_restrack_entry     res;
>  };
>
>  struct rdma_id_private {
> @@ -198,6 +204,11 @@ struct rdma_id_private {
>  	enum ib_gid_type	gid_type;
>  };
>
> +struct rdma_cm_id *__rdma_create_id(struct net *net,
> +				  rdma_cm_event_handler event_handler,
> +				  void *context, enum rdma_port_space ps,
> +				  enum ib_qp_type qp_type, const char *caller);
> +
>  /**
>   * rdma_create_id - Create an RDMA identifier.
>   *
> @@ -210,10 +221,9 @@ struct rdma_id_private {
>   *
>   * The id holds a reference on the network namespace until it is destroyed.
>   */
> -struct rdma_cm_id *rdma_create_id(struct net *net,
> -				  rdma_cm_event_handler event_handler,
> -				  void *context, enum rdma_port_space ps,
> -				  enum ib_qp_type qp_type);
> +#define rdma_create_id(net, event_handler, context, ps, qp_type) \
> +	__rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
> +			 KBUILD_MODNAME)
>
>  /**
>    * rdma_destroy_id - Destroys an RDMA identifier.
> @@ -325,6 +335,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
>   */
>  int rdma_listen(struct rdma_cm_id *id, int backlog);
>
> +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
> +		  const char *caller);
> +
>  /**
>   * rdma_accept - Called to accept a connection request or response.
>   * @id: Connection identifier associated with the request.
> @@ -340,7 +353,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
>   * state of the qp associated with the id is modified to error, such that any
>   * previously posted receive buffers would be flushed.
>   */
> -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
> +#define rdma_accept(id, conn_param) \
> +	__rdma_accept((id), (conn_param),  KBUILD_MODNAME)
>
>  /**
>   * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
> diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
> index c2d8116..a794e0e 100644
> --- a/include/rdma/restrack.h
> +++ b/include/rdma/restrack.h
> @@ -33,6 +33,10 @@ enum rdma_restrack_type {
>  	 */
>  	RDMA_RESTRACK_XRCD,
>  	/**
> +	 * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID)
> +	 */
> +	RDMA_RESTRACK_CM_ID,
> +	/**
>  	 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
>  	 */
>  	RDMA_RESTRACK_MAX
> diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
> index 17e59be..4ed21ee 100644
> --- a/include/uapi/rdma/rdma_netlink.h
> +++ b/include/uapi/rdma/rdma_netlink.h
> @@ -240,6 +240,8 @@ enum rdma_nldev_command {
>
>  	RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
>
> +	RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */
> +
>  	RDMA_NLDEV_NUM_OPS
>  };
>
> @@ -352,6 +354,34 @@ enum rdma_nldev_attr {
>  	 */
>  	RDMA_NLDEV_ATTR_RES_KERN_NAME,		/* string */
>
> +	RDMA_NLDEV_ATTR_RES_CM_ID,		/* nested table */
> +	RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,	/* nested table */
> +	/*
> +	 * rdma_cm_id port space.
> +	 */
> +	RDMA_NLDEV_ATTR_RES_PS,			/* u32 */
> +	/*
> +	 * IP Addresses/port attributes.
> +	 */
> +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */

Can you please document the meaning of S (source) and D (destination)
in regards of this netlink output? It is needed to remove ambiguity.

> +	/*
> +	 * ARPHRD_INFINIBAND, ARPHRD_ETHER, ...
> +	 */
> +	RDMA_NLDEV_ATTR_RES_DEV_TYPE,		/* u8 */
> +	/*
> +	 * enum enum rdma_transport_type (IB, IWARP, ...)
> +	 */
> +	RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,	/* u8 */
> +	/*
> +	 * enum rdma_network_type (IB, IPv4, IPv6,...)
> +	 */
> +	RDMA_NLDEV_ATTR_RES_NETWORK_TYPE,	/* u8 */
> +
>  	RDMA_NLDEV_ATTR_MAX
>  };
>  #endif /* _UAPI_RDMA_NETLINK_H */
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 1, 2018, 4:07 p.m. UTC | #6
> 
> On Tue, Jan 30, 2018 at 08:59:11AM -0800, Steve Wise wrote:
> > Implement RDMA nldev netlink interface to get detailed CM_ID
information.
> >
> > Because cm_id's are attached to rdma devices in various work queue
contexts,
> > the pid and task information at device-attach time is sometimes not
useful.
> > For example, an nvme/f host connection ends up being bound to a device
> > in a work queue context and the resulting pid at attach time no longer
exists
> > after connection setup.  So instead we mark all cm_id's created via the
> > rdma_ucm as "user", and all others as "kernel".  This required tweaking
> > the restrack code a little.  It also required wrapping some rdma_cm
> > functions to allow passing the module name string.
> >
> > Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> > ---
> >  drivers/infiniband/core/cma.c      |  55 ++++++---
> >  drivers/infiniband/core/nldev.c    | 245
> +++++++++++++++++++++++++++++++++++++
> >  drivers/infiniband/core/restrack.c |  29 ++++-
> >  drivers/infiniband/core/ucma.c     |   8 +-
> >  include/rdma/rdma_cm.h             |  24 +++-
> >  include/rdma/restrack.h            |   4 +
> >  include/uapi/rdma/rdma_netlink.h   |  30 +++++
> >  7 files changed, 365 insertions(+), 30 deletions(-)
> >
> > diff --git a/drivers/infiniband/core/cma.c
b/drivers/infiniband/core/cma.c
> > index 72ad52b..51fbfa1 100644
> > --- a/drivers/infiniband/core/cma.c
> > +++ b/drivers/infiniband/core/cma.c
> > @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct
> rdma_id_private *id_priv,
> >  	id_priv->id.route.addr.dev_addr.transport =
> >  		rdma_node_get_transport(cma_dev->device->node_type);
> >  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> > +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> > +	id_priv->id.res.kern_name = id_priv->id.caller;
> 
> Steve, I don't like it, I worked hard to hide it from the users of
restrack,
> and don't see reason why the same trick as with ib_create_cq/ib_create_pd
> won't
> work here.

I am doing the same trick, no?  rdma_create_id() is a static inline that
passes KBUILD_MODNAME.  The issue is that at the time the rdma_cm_id is
created, it is not associated with any ib_device.  That only happens at
cma_attach time.  So how can the resource be added if there is no device?

> 
> > +	rdma_restrack_add(&id_priv->id.res);
> >  }
> >
> >  static void cma_attach_to_dev(struct rdma_id_private *id_priv,
> > @@ -737,10 +740,10 @@ static void cma_deref_id(struct rdma_id_private
> *id_priv)
> >  		complete(&id_priv->comp);
> >  }
> >
> > -struct rdma_cm_id *rdma_create_id(struct net *net,
> > -				  rdma_cm_event_handler event_handler,
> > -				  void *context, enum rdma_port_space ps,
> > -				  enum ib_qp_type qp_type)
> > +struct rdma_cm_id *__rdma_create_id(struct net *net,
> > +				    rdma_cm_event_handler event_handler,
> > +				    void *context, enum rdma_port_space ps,
> > +				    enum ib_qp_type qp_type, const char
*caller)
> >  {
> >  	struct rdma_id_private *id_priv;
> >
> > @@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
> >  	if (!id_priv)
> >  		return ERR_PTR(-ENOMEM);
> >
> > -	id_priv->owner = task_pid_nr(current);
> > +	if (caller)
> > +		id_priv->id.caller = caller;
> > +	else
> > +		id_priv->owner = task_pid_nr(current);
> 
> See the comment above

There is no ib_device at this point, so caller (and owner) must be saved
until the cm_id is bound to a device (or possibly devices for listening
ids).

> 
> >  	id_priv->state = RDMA_CM_IDLE;
> >  	id_priv->id.context = context;
> >  	id_priv->id.event_handler = event_handler;
> 
> Not saying that we need to do it now, but it is important to write, most
> probably we can drop certain initialization from rdma_create_id() adn
> reuse rdma_restrack_put/_get.
> 

I don't understand this comment.  Can you please elaborate? 

> > @@ -768,7 +774,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
> >
> >  	return &id_priv->id;
> >  }
> > -EXPORT_SYMBOL(rdma_create_id);
> > +EXPORT_SYMBOL(__rdma_create_id);
> >
> >  static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp
*qp)
> >  {
> > @@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
> >  	mutex_unlock(&id_priv->handler_mutex);
> >
> >  	if (id_priv->cma_dev) {
> > +		rdma_restrack_del(&id_priv->id.res);
> 
> You should count all created cm_ids and not only binded.

No ib_device if they aren't bound.

> 
> >  		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
> >  			if (id_priv->cm_id.ib)
> >  				ib_destroy_cm_id(id_priv->cm_id.ib);
> > @@ -1786,9 +1793,10 @@ static struct rdma_id_private
> *cma_new_conn_id(struct rdma_cm_id *listen_id,
> >  		ib_event->param.req_rcvd.primary_path->service_id;
> >  	int ret;
> >
> > -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> > +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
> >  			    listen_id->event_handler, listen_id->context,
> > -			    listen_id->ps,
ib_event->param.req_rcvd.qp_type);
> > +			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
> > +			    listen_id->caller);
> 
> I think the cleanest way will be to create some struct and pass pointer to
it so
> you can unfold all relevant data inside of __rdma_create_id().
> 

Why is that cleaner?  Marshall up the data into a struct, pass a ptr,
unmarshall it all...


> >  	if (IS_ERR(id))
> >  		return NULL;
> >
> > @@ -1843,8 +1851,8 @@ static struct rdma_id_private
> *cma_new_udp_id(struct rdma_cm_id *listen_id,
> >  	struct net *net = listen_id->route.addr.dev_addr.net;
> >  	int ret;
> >
> > -	id = rdma_create_id(net, listen_id->event_handler,
listen_id->context,
> > -			    listen_id->ps, IB_QPT_UD);
> > +	id = __rdma_create_id(net, listen_id->event_handler, listen_id-
> >context,
> > +			      listen_id->ps, IB_QPT_UD, listen_id->caller);
> >  	if (IS_ERR(id))
> >  		return NULL;
> >
> > @@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct iw_cm_id
> *cm_id,
> >  		goto out;
> >
> >  	/* Create a new RDMA id for the new IW CM ID */
> > -	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > -				   listen_id->id.event_handler,
> > -				   listen_id->id.context,
> > -				   RDMA_PS_TCP, IB_QPT_RC);
> > +	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > +				     listen_id->id.event_handler,
> > +				     listen_id->id.context,
> > +				     RDMA_PS_TCP, IB_QPT_RC,
> > +				     listen_id->id.caller);
> >  	if (IS_ERR(new_cm_id)) {
> >  		ret = -ENOMEM;
> >  		goto out;
> > @@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct
> rdma_id_private *id_priv,
> >  	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev-
> >device, 1))
> >  		return;
> >
> > -	id = rdma_create_id(net, cma_listen_handler, id_priv,
id_priv->id.ps,
> > -			    id_priv->id.qp_type);
> > +	id = __rdma_create_id(net, cma_listen_handler, id_priv,
id_priv->id.ps,
> > +			      id_priv->id.qp_type, id_priv->id.caller);
> >  	if (IS_ERR(id))
> >  		return;
> >
> > @@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct
> sockaddr *addr)
> >
> >  	return 0;
> >  err2:
> > -	if (id_priv->cma_dev)
> > +	if (id_priv->cma_dev) {
> > +		rdma_restrack_del(&id_priv->id.res);
> >  		cma_release_dev(id_priv);
> > +	}
> >  err1:
> >  	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
> >  	return ret;
> > @@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct
> rdma_id_private *id_priv,
> >  	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
> >  }
> >
> > -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param)
> > +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param,
> > +		  const char *caller)
> >  {
> >  	struct rdma_id_private *id_priv;
> >  	int ret;
> >
> >  	id_priv = container_of(id, struct rdma_id_private, id);
> >
> > -	id_priv->owner = task_pid_nr(current);
> > +	if (caller)
> > +		id_priv->id.caller = caller;
> > +	else
> > +		id_priv->owner = task_pid_nr(current);
> >
> >  	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
> >  		return -EINVAL;
> > @@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct
> rdma_conn_param *conn_param)
> >  	rdma_reject(id, NULL, 0);
> >  	return ret;
> >  }
> > -EXPORT_SYMBOL(rdma_accept);
> > +EXPORT_SYMBOL(__rdma_accept);
> >
> >  int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
> >  {
> > diff --git a/drivers/infiniband/core/nldev.c
b/drivers/infiniband/core/nldev.c
> > index fa8655e..a4091b5 100644
> > --- a/drivers/infiniband/core/nldev.c
> > +++ b/drivers/infiniband/core/nldev.c
> > @@ -34,6 +34,7 @@
> >  #include <linux/pid.h>
> >  #include <linux/pid_namespace.h>
> >  #include <net/netlink.h>
> > +#include <rdma/rdma_cm.h>
> >  #include <rdma/rdma_netlink.h>
> >
> >  #include "core_priv.h"
> > @@ -71,6 +72,22 @@
> >  	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
> >  	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type =
> NLA_NUL_STRING,
> >  						    .len = TASK_COMM_LEN },
> > +	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type =
> NLA_NESTED },
> 
> I would like to use this opportunity. There is CM_ID, so users will be
> able to query nldev directly on this ID (once we implement .doit), but
> can we found a proper abstraction for other objects (PD, CQ, QP e.t.c.)?
> 
> I want to have that all resources will have something similar to
ifindex/ibindex.
> 

Pds, cqs, and qps, all have  a device-unique number.  So
ibindex/restrack_type/object_id should work.  But cm_id's don't have that.
Similar to a socket I guess.  So I'm not sure how to identify cm_ids other
than by the ipaddresses/ip ports.

> 
> > +	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type =
> NLA_NESTED },
> > +	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
> > +	[RDMA_NLDEV_ATTR_RES_IPV4_SADDR]	= {
> > +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> > +	[RDMA_NLDEV_ATTR_RES_IPV4_DADDR]	= {
> > +				.len = FIELD_SIZEOF(struct iphdr, saddr) },
> > +	[RDMA_NLDEV_ATTR_RES_IPV6_SADDR]	= {
> > +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr)
},
> > +	[RDMA_NLDEV_ATTR_RES_IPV6_DADDR]	= {
> > +				.len = FIELD_SIZEOF(struct ipv6hdr, saddr)
},
> > +	[RDMA_NLDEV_ATTR_RES_IP_SPORT]		= { .type = NLA_U16 },
> > +	[RDMA_NLDEV_ATTR_RES_IP_DPORT]		= { .type = NLA_U16 },
> > +	[RDMA_NLDEV_ATTR_RES_DEV_TYPE]		= { .type = NLA_U8 },
> > +	[RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE]	= { .type = NLA_U8 },
> > +	[RDMA_NLDEV_ATTR_RES_NETWORK_TYPE]	= { .type = NLA_U8 },
> >  };
> >
> >  static int fill_nldev_handle(struct sk_buff *msg, struct ib_device
*device)
> > @@ -182,6 +199,7 @@ static int fill_res_info(struct sk_buff *msg, struct
> ib_device *device)
> >  		[RDMA_RESTRACK_PD] = "pd",
> >  		[RDMA_RESTRACK_CQ] = "cq",
> >  		[RDMA_RESTRACK_QP] = "qp",
> > +		[RDMA_RESTRACK_CM_ID] = "cm_id",
> >  	};
> >
> >  	struct rdma_restrack_root *res = &device->res;
> > @@ -284,6 +302,99 @@ static int fill_res_qp_entry(struct sk_buff *msg,
> >  	return -EMSGSIZE;
> >  }
> >
> > +static int fill_res_cm_id_entry(struct sk_buff *msg,
> > +				struct rdma_cm_id *cm_id, uint32_t port)
> > +{
> > +	struct rdma_id_private *id_priv;
> > +	struct nlattr *entry_attr;
> > +
> > +	if (port && port != cm_id->port_num)
> > +		return 0;
> > +
> > +	id_priv = container_of(cm_id, struct rdma_id_private, id);
> > +	entry_attr = nla_nest_start(msg,
> RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
> > +	if (!entry_attr)
> > +		goto out;
> > +
> > +	if (cm_id->port_num &&
> > +	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id-
> >port_num))
> > +		goto err;
> > +
> > +	if (id_priv->qp_num &&
> > +	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv-
> >qp_num))
> > +		goto err;
> > +
> > +	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
> > +		goto err;
> > +
> > +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
> > +		goto err;
> > +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
> > +		goto err;
> > +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_DEV_TYPE,
> > +		       id_priv->id.route.addr.dev_addr.dev_type))
> > +		goto err;
> > +	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,
> > +		       id_priv->id.route.addr.dev_addr.transport))
> > +		goto err;
> > +
> > +	if (cm_id->route.addr.src_addr.ss_family == AF_INET) {
> > +		struct sockaddr_in *sin;
> > +
> > +		sin = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
> > +		if (nla_put_in_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV4_SADDR,
> > +				    sin->sin_addr.s_addr))
> > +			goto err;
> > +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
> > +				  sin->sin_port))
> > +			goto err;
> > +
> > +		sin = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
> > +		if (nla_put_in_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV4_DADDR,
> > +				    sin->sin_addr.s_addr))
> > +			goto err;
> > +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
> > +				  sin->sin_port))
> > +			goto err;
> > +	} else {
> > +		struct sockaddr_in6 *sin6;
> > +
> > +		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.src_addr;
> > +		if (nla_put_in6_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV6_SADDR,
> > +				     &sin6->sin6_addr))
> > +			goto err;
> > +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
> > +				  sin6->sin6_port))
> > +			goto err;
> > +
> > +		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.dst_addr;
> > +		if (nla_put_in6_addr(msg,
> RDMA_NLDEV_ATTR_RES_IPV6_DADDR,
> > +				     &sin6->sin6_addr))
> > +			goto err;
> > +		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
> > +				  sin6->sin6_port))
> > +			goto err;
> > +	}
> > +
> > +	if (id_priv->id.caller) {
> > +		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
> > +				   id_priv->id.caller))
> > +			goto err;
> > +	} else {
> > +		/* CMA keeps the owning pid. */
> > +		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, id_priv-
> >owner))
> > +			goto err;
> > +	}
> > +
> > +	nla_nest_end(msg, entry_attr);
> > +	return 0;
> > +
> > +err:
> > +	nla_nest_cancel(msg, entry_attr);
> > +out:
> > +	return -EMSGSIZE;
> > +}
> > +
> >  static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
> >  			  struct netlink_ext_ack *extack)
> >  {
> > @@ -686,6 +797,137 @@ static int nldev_res_get_qp_dumpit(struct sk_buff
> *skb,
> >  	return ret;
> >  }
> >
> > +static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
> > +				      struct netlink_callback *cb)
> > +{
> > +	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> > +	struct rdma_restrack_entry *res;
> > +	int err, ret = 0, idx = 0;
> > +	struct nlattr *table_attr;
> > +	struct ib_device *device;
> > +	int start = cb->args[0];
> > +	struct rdma_cm_id *cm_id = NULL;
> > +	struct nlmsghdr *nlh;
> > +	u32 index, port = 0;
> > +
> > +	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> > +			  nldev_policy, NULL);
> > +	/*
> > +	 * Right now, we are expecting the device index to get QP
information,
> > +	 * but it is possible to extend this code to return all devices in
> > +	 * one shot by checking the existence of
> RDMA_NLDEV_ATTR_DEV_INDEX.
> > +	 * if it doesn't exist, we will iterate over all devices.
> > +	 *
> > +	 * But it is not needed for now.
> > +	 */
> > +	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
> > +		return -EINVAL;
> > +
> > +	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
> > +	device = ib_device_get_by_index(index);
> > +	if (!device)
> > +		return -EINVAL;
> > +
> > +	/*
> > +	 * If no PORT_INDEX is supplied, we will return all QPs from that
device
> 
> QPs->CM_IDs
> 

Oops!

> > +	 */
> > +	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
> > +		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
> > +		if (!rdma_is_port_valid(device, port)) {
> > +			ret = -EINVAL;
> > +			goto err_index;
> > +		}
> > +	}
> > +
> > +	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> > +		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
> RDMA_NLDEV_CMD_RES_QP_GET),
> 
> RDMA_NLDEV_CMD_RES_QP_GET -> RDMA_NLDEV_CMD_RES_CM_ID_GET
>

Oops2!

 
> > +		0, NLM_F_MULTI);
> > +
> > +	if (fill_nldev_handle(skb, device)) {
> > +		ret = -EMSGSIZE;
> > +		goto err;
> > +	}
> > +
> > +	table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_CM_ID);
> > +	if (!table_attr) {
> > +		ret = -EMSGSIZE;
> > +		goto err;
> > +	}
> > +
> > +	down_read(&device->res.rwsem);
> > +	hash_for_each_possible(device->res.hash, res, node,
> > +			       RDMA_RESTRACK_CM_ID) {
> > +		if (idx < start)
> > +			goto next;
> > +
> > +		if ((rdma_is_kernel_res(res) &&
> > +		     task_active_pid_ns(current) != &init_pid_ns) ||
> > +		    (!rdma_is_kernel_res(res) &&
> > +		     task_active_pid_ns(current) !=
> > +		     task_active_pid_ns(res->task)))
> > +			/*
> > +			 * 1. Kernel QPs should be visible in init namsapce
only
> > +			 * 2. Preent only QPs visible in the current
namespace
> > +			 */
> > +			goto next;
> > +
> > +		if (!rdma_restrack_get(res))
> > +			/*
> > +			 * Resource is under release now, but we are not
> > +			 * relesing lock now, so it will be released in
> > +			 * our next pass, once we will get ->next pointer.
> > +			 */
> > +			goto next;
> > +
> > +		cm_id = container_of(res, struct rdma_cm_id, res);
> > +
> > +		up_read(&device->res.rwsem);
> > +		ret = fill_res_cm_id_entry(skb, cm_id, port);
> > +		down_read(&device->res.rwsem);
> > +		/*
> > +		 * Return resource back, but it won't be released till
> > +		 * the &device->res.rwsem will be released for write.
> > +		 */
> > +		rdma_restrack_put(res);
> > +
> > +		if (ret == -EMSGSIZE)
> > +			/*
> > +			 * There is a chance to optimize here.
> > +			 * It can be done by using list_prepare_entry
> > +			 * and list_for_each_entry_continue afterwards.
> > +			 */
> > +			break;
> > +		if (ret)
> > +			goto res_err;
> > +next:		idx++;
> > +	}
> > +	up_read(&device->res.rwsem);
> > +
> > +	nla_nest_end(skb, table_attr);
> > +	nlmsg_end(skb, nlh);
> > +	cb->args[0] = idx;
> > +
> > +	/*
> > +	 * No more CM_IDs to fill, cancel the message and
> > +	 * return 0 to mark end of dumpit.
> > +	 */
> > +	if (!cm_id)
> > +		goto err;
> > +
> > +	put_device(&device->dev);
> > +	return skb->len;
> > +
> > +res_err:
> > +	nla_nest_cancel(skb, table_attr);
> > +	up_read(&device->res.rwsem);
> > +
> > +err:
> > +	nlmsg_cancel(skb, nlh);
> > +
> > +err_index:
> > +	put_device(&device->dev);
> > +	return ret;
> > +}
> >  static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
> >  	[RDMA_NLDEV_CMD_GET] = {
> >  		.doit = nldev_get_doit,
> > @@ -712,6 +954,9 @@ static int nldev_res_get_qp_dumpit(struct sk_buff
> *skb,
> >  		 * too.
> >  		 */
> >  	},
> > +	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
> > +		.dump = nldev_res_get_cm_id_dumpit,
> > +	},
> >  };
> >
> >  void __init nldev_init(void)
> > diff --git a/drivers/infiniband/core/restrack.c
> b/drivers/infiniband/core/restrack.c
> > index 857637b..bb13169 100644
> > --- a/drivers/infiniband/core/restrack.c
> > +++ b/drivers/infiniband/core/restrack.c
> > @@ -3,6 +3,7 @@
> >   * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
> >   */
> >
> > +#include <rdma/rdma_cm.h>
> >  #include <rdma/ib_verbs.h>
> >  #include <rdma/restrack.h>
> >  #include <linux/mutex.h>
> > @@ -67,6 +68,7 @@ static struct ib_device *res_to_dev(struct
> rdma_restrack_entry *res)
> >  	struct ib_pd *pd;
> >  	struct ib_cq *cq;
> >  	struct ib_qp *qp;
> > +	struct rdma_cm_id *cm_id;
> >
> >  	switch (type) {
> >  	case RDMA_RESTRACK_PD:
> > @@ -85,6 +87,10 @@ static struct ib_device *res_to_dev(struct
> rdma_restrack_entry *res)
> >  		xrcd = container_of(res, struct ib_xrcd, res);
> >  		dev = xrcd->device;
> >  		break;
> > +	case RDMA_RESTRACK_CM_ID:
> > +		cm_id = container_of(res, struct rdma_cm_id, res);
> > +		dev = cm_id->device;
> > +		break;
> >  	default:
> >  		WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
> >  		return NULL;
> > @@ -93,6 +99,27 @@ static struct ib_device *res_to_dev(struct
> rdma_restrack_entry *res)
> >  	return dev;
> >  }
> >
> > +static bool res_is_user(struct rdma_restrack_entry *res)
> > +{
> > +	enum rdma_restrack_type type = res->type;
> > +	bool is_user;
> > +
> > +	switch (type) {
> > +	case RDMA_RESTRACK_CM_ID: {
> > +		struct rdma_cm_id *cm_id;
> > +
> > +		cm_id = container_of(res, struct rdma_cm_id, res);
> > +		is_user = !cm_id->caller;
> > +		break;
> > +	}
> > +	default:
> > +		is_user = !uaccess_kernel();
> > +		break;
> > +	}
> > +
> > +	return is_user;
> > +}
> > +
> >  void rdma_restrack_add(struct rdma_restrack_entry *res)
> >  {
> >  	struct ib_device *dev = res_to_dev(res);
> > @@ -100,7 +127,7 @@ void rdma_restrack_add(struct rdma_restrack_entry
> *res)
> >  	if (!dev)
> >  		return;
> >
> > -	if (!uaccess_kernel()) {
> > +	if (res_is_user(res)) {
> >  		get_task_struct(current);
> >  		res->task = current;
> >  		res->kern_name = NULL;
> > diff --git a/drivers/infiniband/core/ucma.c
b/drivers/infiniband/core/ucma.c
> > index d67219d..f7f0282 100644
> > --- a/drivers/infiniband/core/ucma.c
> > +++ b/drivers/infiniband/core/ucma.c
> > @@ -476,8 +476,8 @@ static ssize_t ucma_create_id(struct ucma_file
*file,
> const char __user *inbuf,
> >  		return -ENOMEM;
> >
> >  	ctx->uid = cmd.uid;
> > -	ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
> > -				    ucma_event_handler, ctx, cmd.ps,
qp_type);
> > +	ctx->cm_id = __rdma_create_id(current->nsproxy->net_ns,
> > +			      ucma_event_handler, ctx, cmd.ps, qp_type,
NULL);
> >  	if (IS_ERR(ctx->cm_id)) {
> >  		ret = PTR_ERR(ctx->cm_id);
> >  		goto err1;
> > @@ -1084,12 +1084,12 @@ static ssize_t ucma_accept(struct ucma_file
*file,
> const char __user *inbuf,
> >  	if (cmd.conn_param.valid) {
> >  		ucma_copy_conn_param(ctx->cm_id, &conn_param,
> &cmd.conn_param);
> >  		mutex_lock(&file->mut);
> > -		ret = rdma_accept(ctx->cm_id, &conn_param);
> > +		ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
> >  		if (!ret)
> >  			ctx->uid = cmd.uid;
> >  		mutex_unlock(&file->mut);
> >  	} else
> > -		ret = rdma_accept(ctx->cm_id, NULL);
> > +		ret = __rdma_accept(ctx->cm_id, NULL, NULL);
> >
> >  	ucma_put_ctx(ctx);
> >  	return ret;
> > diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
> > index 5984225..73dac9b 100644
> > --- a/include/rdma/rdma_cm.h
> > +++ b/include/rdma/rdma_cm.h
> > @@ -155,6 +155,12 @@ struct rdma_cm_id {
> >  	enum rdma_port_space	 ps;
> >  	enum ib_qp_type		 qp_type;
> >  	u8			 port_num;
> > +	const char *caller;
> > +
> > +	/*
> > +	 * Internal to RDMA/core, don't use in the drivers
> > +	 */
> > +	struct rdma_restrack_entry     res;
> >  };
> >
> >  struct rdma_id_private {
> > @@ -198,6 +204,11 @@ struct rdma_id_private {
> >  	enum ib_gid_type	gid_type;
> >  };
> >
> > +struct rdma_cm_id *__rdma_create_id(struct net *net,
> > +				  rdma_cm_event_handler event_handler,
> > +				  void *context, enum rdma_port_space ps,
> > +				  enum ib_qp_type qp_type, const char
*caller);
> > +
> >  /**
> >   * rdma_create_id - Create an RDMA identifier.
> >   *
> > @@ -210,10 +221,9 @@ struct rdma_id_private {
> >   *
> >   * The id holds a reference on the network namespace until it is
destroyed.
> >   */
> > -struct rdma_cm_id *rdma_create_id(struct net *net,
> > -				  rdma_cm_event_handler event_handler,
> > -				  void *context, enum rdma_port_space ps,
> > -				  enum ib_qp_type qp_type);
> > +#define rdma_create_id(net, event_handler, context, ps, qp_type) \
> > +	__rdma_create_id((net), (event_handler), (context), (ps), (qp_type),
\
> > +			 KBUILD_MODNAME)
> >
> >  /**
> >    * rdma_destroy_id - Destroys an RDMA identifier.
> > @@ -325,6 +335,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct
> ib_qp_attr *qp_attr,
> >   */
> >  int rdma_listen(struct rdma_cm_id *id, int backlog);
> >
> > +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param,
> > +		  const char *caller);
> > +
> >  /**
> >   * rdma_accept - Called to accept a connection request or response.
> >   * @id: Connection identifier associated with the request.
> > @@ -340,7 +353,8 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct
> ib_qp_attr *qp_attr,
> >   * state of the qp associated with the id is modified to error, such
that any
> >   * previously posted receive buffers would be flushed.
> >   */
> > -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> *conn_param);
> > +#define rdma_accept(id, conn_param) \
> > +	__rdma_accept((id), (conn_param),  KBUILD_MODNAME)
> >
> >  /**
> >   * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
> > diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
> > index c2d8116..a794e0e 100644
> > --- a/include/rdma/restrack.h
> > +++ b/include/rdma/restrack.h
> > @@ -33,6 +33,10 @@ enum rdma_restrack_type {
> >  	 */
> >  	RDMA_RESTRACK_XRCD,
> >  	/**
> > +	 * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID)
> > +	 */
> > +	RDMA_RESTRACK_CM_ID,
> > +	/**
> >  	 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
> >  	 */
> >  	RDMA_RESTRACK_MAX
> > diff --git a/include/uapi/rdma/rdma_netlink.h
> b/include/uapi/rdma/rdma_netlink.h
> > index 17e59be..4ed21ee 100644
> > --- a/include/uapi/rdma/rdma_netlink.h
> > +++ b/include/uapi/rdma/rdma_netlink.h
> > @@ -240,6 +240,8 @@ enum rdma_nldev_command {
> >
> >  	RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
> >
> > +	RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */
> > +
> >  	RDMA_NLDEV_NUM_OPS
> >  };
> >
> > @@ -352,6 +354,34 @@ enum rdma_nldev_attr {
> >  	 */
> >  	RDMA_NLDEV_ATTR_RES_KERN_NAME,		/* string */
> >
> > +	RDMA_NLDEV_ATTR_RES_CM_ID,		/* nested table */
> > +	RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,	/* nested table */
> > +	/*
> > +	 * rdma_cm_id port space.
> > +	 */
> > +	RDMA_NLDEV_ATTR_RES_PS,			/* u32 */
> > +	/*
> > +	 * IP Addresses/port attributes.
> > +	 */
> > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> 
> Can you please document the meaning of S (source) and D (destination)
> in regards of this netlink output? It is needed to remove ambiguity.

Sure. 

> 
> > +	/*
> > +	 * ARPHRD_INFINIBAND, ARPHRD_ETHER, ...
> > +	 */
> > +	RDMA_NLDEV_ATTR_RES_DEV_TYPE,		/* u8 */
> > +	/*
> > +	 * enum enum rdma_transport_type (IB, IWARP, ...)
> > +	 */
> > +	RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,	/* u8 */
> > +	/*
> > +	 * enum rdma_network_type (IB, IPv4, IPv6,...)
> > +	 */
> > +	RDMA_NLDEV_ATTR_RES_NETWORK_TYPE,	/* u8 */
> > +
> >  	RDMA_NLDEV_ATTR_MAX
> >  };
> >  #endif /* _UAPI_RDMA_NETLINK_H */
> > --
> > 1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 1, 2018, 5:50 p.m. UTC | #7
On Thu, Feb 01, 2018 at 10:01:09AM +0200, Leon Romanovsky wrote:

> Regarding the name, I personally think that cm_id is better because it
> is general and applicable both to ib_cm and iw_cm. The separation
> between them can be done with introduction of new netlink attribute, e.g.
> cm_id_type.

In netlink the attributes should be self-describing, or intrinsically
related to something mandatory and fundamental about their container
(eg AF_ family in rtnl)

What information do we actually need from the cm_id in various
protocol families?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 1, 2018, 5:53 p.m. UTC | #8
On Thu, Feb 01, 2018 at 10:49:44AM +0200, Leon Romanovsky wrote:

> > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> 
> Can you please document the meaning of S (source) and D (destination)
> in regards of this netlink output? It is needed to remove ambiguity.

And no on BE's in netlink, I think.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 1, 2018, 6:14 p.m. UTC | #9
> 
> On Thu, Feb 01, 2018 at 10:01:09AM +0200, Leon Romanovsky wrote:
> 
> > Regarding the name, I personally think that cm_id is better because it
> > is general and applicable both to ib_cm and iw_cm. The separation
> > between them can be done with introduction of new netlink attribute,
e.g.
> > cm_id_type.
> 
> In netlink the attributes should be self-describing, or intrinsically
> related to something mandatory and fundamental about their container
> (eg AF_ family in rtnl)
> 
> What information do we actually need from the cm_id in various
> protocol families?
> 

Looking at iw_cm_id and iwcm_id_private: tos, mapped, state, flags,
refcount.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 1, 2018, 6:18 p.m. UTC | #10
> -----Original Message-----
> From: Jason Gunthorpe [mailto:jgg@ziepe.ca]
> Sent: Thursday, February 01, 2018 11:54 AM
> To: Leon Romanovsky
> Cc: Steve Wise; dledford@redhat.com; linux-rdma@vger.kernel.org
> Subject: Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID
information
> 
> On Thu, Feb 01, 2018 at 10:49:44AM +0200, Leon Romanovsky wrote:
> 
> > > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> >
> > Can you please document the meaning of S (source) and D (destination)
> > in regards of this netlink output? It is needed to remove ambiguity.
> 
> And no on BE's in netlink, I think.


Sure there are, see nla_put_be32() for example. I'm using nla_put_net16() to
store the ports.  Perhaps I need to change the above comment to ne16 instead
of BE u16?





--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 1, 2018, 6:32 p.m. UTC | #11
On Thu, Feb 01, 2018 at 12:18:05PM -0600, Steve Wise wrote:
> 
> 
> > From: Jason Gunthorpe [mailto:jgg@ziepe.ca]
> > Sent: Thursday, February 01, 2018 11:54 AM
> > To: Leon Romanovsky
> > Cc: Steve Wise; dledford@redhat.com; linux-rdma@vger.kernel.org
> > Subject: Re: [PATCH RFC 2/2] RDMA/nldev: provide detailed CM_ID
> information
> > 
> > On Thu, Feb 01, 2018 at 10:49:44AM +0200, Leon Romanovsky wrote:
> > 
> > > > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > > > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > > > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > > > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > > > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > > > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> > >
> > > Can you please document the meaning of S (source) and D (destination)
> > > in regards of this netlink output? It is needed to remove ambiguity.
> > 
> > And no on BE's in netlink, I think.
> 
> 
> Sure there are, see nla_put_be32() for example. I'm using nla_put_net16() to
> store the ports.  Perhaps I need to change the above comment to ne16 instead
> of BE u16?

There is no reason thes ports should be be, so don't make it be.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 1, 2018, 6:37 p.m. UTC | #12
> > > On Thu, Feb 01, 2018 at 10:49:44AM +0200, Leon Romanovsky wrote:
> > >
> > > > > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > > > > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > > > > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > > > > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > > > > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > > > > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> > > >
> > > > Can you please document the meaning of S (source) and D
(destination)
> > > > in regards of this netlink output? It is needed to remove ambiguity.
> > >
> > > And no on BE's in netlink, I think.
> >
> >
> > Sure there are, see nla_put_be32() for example. I'm using
nla_put_net16() to
> > store the ports.  Perhaps I need to change the above comment to ne16
instead
> > of BE u16?
> 
> There is no reason thes ports should be be, so don't make it be.

Well, they're BE on the wire.  That's the reason.  And other APIs return
them to user space in their NBO, like getsockname().  

You would rather the nldev code puts it in HBO before sending it up?  

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 1, 2018, 10:01 p.m. UTC | #13
On Thu, Feb 01, 2018 at 12:37:31PM -0600, Steve Wise wrote:
> > > > On Thu, Feb 01, 2018 at 10:49:44AM +0200, Leon Romanovsky wrote:
> > > >
> > > > > > +	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
> > > > > > +	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
> > > > > > +	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
> > > > > > +	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
> > > > > > +	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
> > > > > > +	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
> > > > >
> > > > > Can you please document the meaning of S (source) and D
> (destination)
> > > > > in regards of this netlink output? It is needed to remove ambiguity.
> > > >
> > > > And no on BE's in netlink, I think.
> > >
> > >
> > > Sure there are, see nla_put_be32() for example. I'm using
> nla_put_net16() to
> > > store the ports.  Perhaps I need to change the above comment to ne16
> instead
> > > of BE u16?
> > 
> > There is no reason thes ports should be be, so don't make it be.
> 
> Well, they're BE on the wire.  That's the reason.  And other APIs return
> them to user space in their NBO, like getsockname().  
> 
> You would rather the nldev code puts it in HBO before sending it up?  

Yes, we have far too much 'guess the endian' in our uapi, it doesn't
make sense in modern HW, the swap costs nothing.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Feb. 4, 2018, 3:05 p.m. UTC | #14
On Thu, Feb 01, 2018 at 10:07:20AM -0600, Steve Wise wrote:
>
> >
> > On Tue, Jan 30, 2018 at 08:59:11AM -0800, Steve Wise wrote:
> > > Implement RDMA nldev netlink interface to get detailed CM_ID
> information.
> > >
> > > Because cm_id's are attached to rdma devices in various work queue
> contexts,
> > > the pid and task information at device-attach time is sometimes not
> useful.
> > > For example, an nvme/f host connection ends up being bound to a device
> > > in a work queue context and the resulting pid at attach time no longer
> exists
> > > after connection setup.  So instead we mark all cm_id's created via the
> > > rdma_ucm as "user", and all others as "kernel".  This required tweaking
> > > the restrack code a little.  It also required wrapping some rdma_cm
> > > functions to allow passing the module name string.
> > >
> > > Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> > > ---
> > >  drivers/infiniband/core/cma.c      |  55 ++++++---
> > >  drivers/infiniband/core/nldev.c    | 245
> > +++++++++++++++++++++++++++++++++++++
> > >  drivers/infiniband/core/restrack.c |  29 ++++-
> > >  drivers/infiniband/core/ucma.c     |   8 +-
> > >  include/rdma/rdma_cm.h             |  24 +++-
> > >  include/rdma/restrack.h            |   4 +
> > >  include/uapi/rdma/rdma_netlink.h   |  30 +++++
> > >  7 files changed, 365 insertions(+), 30 deletions(-)
> > >
> > > diff --git a/drivers/infiniband/core/cma.c
> b/drivers/infiniband/core/cma.c
> > > index 72ad52b..51fbfa1 100644
> > > --- a/drivers/infiniband/core/cma.c
> > > +++ b/drivers/infiniband/core/cma.c
> > > @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct
> > rdma_id_private *id_priv,
> > >  	id_priv->id.route.addr.dev_addr.transport =
> > >  		rdma_node_get_transport(cma_dev->device->node_type);
> > >  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> > > +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> > > +	id_priv->id.res.kern_name = id_priv->id.caller;
> >
> > Steve, I don't like it, I worked hard to hide it from the users of
> restrack,
> > and don't see reason why the same trick as with ib_create_cq/ib_create_pd
> > won't
> > work here.
>
> I am doing the same trick, no?  rdma_create_id() is a static inline that
> passes KBUILD_MODNAME.  The issue is that at the time the rdma_cm_id is
> created, it is not associated with any ib_device.  That only happens at
> cma_attach time.  So how can the resource be added if there is no device?
>

So maybe, we don't need to add resource to the DB at rdma_create_id
stage and do it in cma_attach only, and in that stage you will update
the kern_name with KBUILD_MODNAME.

> >
> > > +	rdma_restrack_add(&id_priv->id.res);
> > >  }
> > >
> > >  static void cma_attach_to_dev(struct rdma_id_private *id_priv,
> > > @@ -737,10 +740,10 @@ static void cma_deref_id(struct rdma_id_private
> > *id_priv)
> > >  		complete(&id_priv->comp);
> > >  }
> > >
> > > -struct rdma_cm_id *rdma_create_id(struct net *net,
> > > -				  rdma_cm_event_handler event_handler,
> > > -				  void *context, enum rdma_port_space ps,
> > > -				  enum ib_qp_type qp_type)
> > > +struct rdma_cm_id *__rdma_create_id(struct net *net,
> > > +				    rdma_cm_event_handler event_handler,
> > > +				    void *context, enum rdma_port_space ps,
> > > +				    enum ib_qp_type qp_type, const char
> *caller)
> > >  {
> > >  	struct rdma_id_private *id_priv;
> > >
> > > @@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
> > >  	if (!id_priv)
> > >  		return ERR_PTR(-ENOMEM);
> > >
> > > -	id_priv->owner = task_pid_nr(current);
> > > +	if (caller)
> > > +		id_priv->id.caller = caller;
> > > +	else
> > > +		id_priv->owner = task_pid_nr(current);
> >
> > See the comment above
>
> There is no ib_device at this point, so caller (and owner) must be saved
> until the cm_id is bound to a device (or possibly devices for listening
> ids).

Why do we need previous owner? Can it be that rdma_create_id was
performed by one process and cma_attach by another?

>
> >
> > >  	id_priv->state = RDMA_CM_IDLE;
> > >  	id_priv->id.context = context;
> > >  	id_priv->id.event_handler = event_handler;
> >
> > Not saying that we need to do it now, but it is important to write, most
> > probably we can drop certain initialization from rdma_create_id() adn
> > reuse rdma_restrack_put/_get.
> >
>
> I don't understand this comment.  Can you please elaborate?

Most probably, we will be able to drop id_priv->qp_mutex in the future,
but let's not discuss it now, it is not related to this series.

>
> > > @@ -768,7 +774,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
> > >
> > >  	return &id_priv->id;
> > >  }
> > > -EXPORT_SYMBOL(rdma_create_id);
> > > +EXPORT_SYMBOL(__rdma_create_id);
> > >
> > >  static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp
> *qp)
> > >  {
> > > @@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
> > >  	mutex_unlock(&id_priv->handler_mutex);
> > >
> > >  	if (id_priv->cma_dev) {
> > > +		rdma_restrack_del(&id_priv->id.res);
> >
> > You should count all created cm_ids and not only binded.
>
> No ib_device if they aren't bound.
>
> >
> > >  		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
> > >  			if (id_priv->cm_id.ib)
> > >  				ib_destroy_cm_id(id_priv->cm_id.ib);
> > > @@ -1786,9 +1793,10 @@ static struct rdma_id_private
> > *cma_new_conn_id(struct rdma_cm_id *listen_id,
> > >  		ib_event->param.req_rcvd.primary_path->service_id;
> > >  	int ret;
> > >
> > > -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> > > +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
> > >  			    listen_id->event_handler, listen_id->context,
> > > -			    listen_id->ps,
> ib_event->param.req_rcvd.qp_type);
> > > +			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
> > > +			    listen_id->caller);
> >
> > I think the cleanest way will be to create some struct and pass pointer to
> it so
> > you can unfold all relevant data inside of __rdma_create_id().
> >
>
> Why is that cleaner?  Marshall up the data into a struct, pass a ptr,
> unmarshall it all...

I counted 6 arguments, and for me, it smells like something wrong.

>
>
> > >  	if (IS_ERR(id))
> > >  		return NULL;
> > >
> > > @@ -1843,8 +1851,8 @@ static struct rdma_id_private
> > *cma_new_udp_id(struct rdma_cm_id *listen_id,
> > >  	struct net *net = listen_id->route.addr.dev_addr.net;
> > >  	int ret;
> > >
> > > -	id = rdma_create_id(net, listen_id->event_handler,
> listen_id->context,
> > > -			    listen_id->ps, IB_QPT_UD);
> > > +	id = __rdma_create_id(net, listen_id->event_handler, listen_id-
> > >context,
> > > +			      listen_id->ps, IB_QPT_UD, listen_id->caller);
> > >  	if (IS_ERR(id))
> > >  		return NULL;
> > >
> > > @@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct iw_cm_id
> > *cm_id,
> > >  		goto out;
> > >
> > >  	/* Create a new RDMA id for the new IW CM ID */
> > > -	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > > -				   listen_id->id.event_handler,
> > > -				   listen_id->id.context,
> > > -				   RDMA_PS_TCP, IB_QPT_RC);
> > > +	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > > +				     listen_id->id.event_handler,
> > > +				     listen_id->id.context,
> > > +				     RDMA_PS_TCP, IB_QPT_RC,
> > > +				     listen_id->id.caller);
> > >  	if (IS_ERR(new_cm_id)) {
> > >  		ret = -ENOMEM;
> > >  		goto out;
> > > @@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct
> > rdma_id_private *id_priv,
> > >  	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev-
> > >device, 1))
> > >  		return;
> > >
> > > -	id = rdma_create_id(net, cma_listen_handler, id_priv,
> id_priv->id.ps,
> > > -			    id_priv->id.qp_type);
> > > +	id = __rdma_create_id(net, cma_listen_handler, id_priv,
> id_priv->id.ps,
> > > +			      id_priv->id.qp_type, id_priv->id.caller);
> > >  	if (IS_ERR(id))
> > >  		return;
> > >
> > > @@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct
> > sockaddr *addr)
> > >
> > >  	return 0;
> > >  err2:
> > > -	if (id_priv->cma_dev)
> > > +	if (id_priv->cma_dev) {
> > > +		rdma_restrack_del(&id_priv->id.res);
> > >  		cma_release_dev(id_priv);
> > > +	}
> > >  err1:
> > >  	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
> > >  	return ret;
> > > @@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct
> > rdma_id_private *id_priv,
> > >  	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
> > >  }
> > >
> > > -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> > *conn_param)
> > > +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> > *conn_param,
> > > +		  const char *caller)
> > >  {
> > >  	struct rdma_id_private *id_priv;
> > >  	int ret;
> > >
> > >  	id_priv = container_of(id, struct rdma_id_private, id);
> > >
> > > -	id_priv->owner = task_pid_nr(current);
> > > +	if (caller)
> > > +		id_priv->id.caller = caller;
> > > +	else
> > > +		id_priv->owner = task_pid_nr(current);
> > >
> > >  	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
> > >  		return -EINVAL;
> > > @@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct
> > rdma_conn_param *conn_param)
> > >  	rdma_reject(id, NULL, 0);
> > >  	return ret;
> > >  }
> > > -EXPORT_SYMBOL(rdma_accept);
> > > +EXPORT_SYMBOL(__rdma_accept);
> > >
> > >  int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
> > >  {
> > > diff --git a/drivers/infiniband/core/nldev.c
> b/drivers/infiniband/core/nldev.c
> > > index fa8655e..a4091b5 100644
> > > --- a/drivers/infiniband/core/nldev.c
> > > +++ b/drivers/infiniband/core/nldev.c
> > > @@ -34,6 +34,7 @@
> > >  #include <linux/pid.h>
> > >  #include <linux/pid_namespace.h>
> > >  #include <net/netlink.h>
> > > +#include <rdma/rdma_cm.h>
> > >  #include <rdma/rdma_netlink.h>
> > >
> > >  #include "core_priv.h"
> > > @@ -71,6 +72,22 @@
> > >  	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
> > >  	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type =
> > NLA_NUL_STRING,
> > >  						    .len = TASK_COMM_LEN },
> > > +	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type =
> > NLA_NESTED },
> >
> > I would like to use this opportunity. There is CM_ID, so users will be
> > able to query nldev directly on this ID (once we implement .doit), but
> > can we found a proper abstraction for other objects (PD, CQ, QP e.t.c.)?
> >
> > I want to have that all resources will have something similar to
> ifindex/ibindex.
> >
>
> Pds, cqs, and qps, all have  a device-unique number.  So
> ibindex/restrack_type/object_id should work.  But cm_id's don't have that.
> Similar to a socket I guess.  So I'm not sure how to identify cm_ids other
> than by the ipaddresses/ip ports.

It is opposite, cm_id is a unique number, but other objects don't have
such. What about PD and CQ?

We can declare that access to QP .doit willbe based on QPN, PD .doit
will be based on local_dma_lkey, but what will be CQ identifier?

Thanks
Steve Wise Feb. 5, 2018, 3:33 p.m. UTC | #15
Hey Leon,

> On Thu, Feb 01, 2018 at 10:07:20AM -0600, Steve Wise wrote:
> >
> > >
> > > On Tue, Jan 30, 2018 at 08:59:11AM -0800, Steve Wise wrote:
> > > > Implement RDMA nldev netlink interface to get detailed CM_ID
> > information.
> > > >
> > > > Because cm_id's are attached to rdma devices in various work queue
> > contexts,
> > > > the pid and task information at device-attach time is sometimes not
> > useful.
> > > > For example, an nvme/f host connection ends up being bound to a
device
> > > > in a work queue context and the resulting pid at attach time no
longer
> > exists
> > > > after connection setup.  So instead we mark all cm_id's created via
the
> > > > rdma_ucm as "user", and all others as "kernel".  This required
tweaking
> > > > the restrack code a little.  It also required wrapping some rdma_cm
> > > > functions to allow passing the module name string.
> > > >
> > > > Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> > > > ---
> > > >  drivers/infiniband/core/cma.c      |  55 ++++++---
> > > >  drivers/infiniband/core/nldev.c    | 245
> > > +++++++++++++++++++++++++++++++++++++
> > > >  drivers/infiniband/core/restrack.c |  29 ++++-
> > > >  drivers/infiniband/core/ucma.c     |   8 +-
> > > >  include/rdma/rdma_cm.h             |  24 +++-
> > > >  include/rdma/restrack.h            |   4 +
> > > >  include/uapi/rdma/rdma_netlink.h   |  30 +++++
> > > >  7 files changed, 365 insertions(+), 30 deletions(-)
> > > >
> > > > diff --git a/drivers/infiniband/core/cma.c
> > b/drivers/infiniband/core/cma.c
> > > > index 72ad52b..51fbfa1 100644
> > > > --- a/drivers/infiniband/core/cma.c
> > > > +++ b/drivers/infiniband/core/cma.c
> > > > @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct
> > > rdma_id_private *id_priv,
> > > >  	id_priv->id.route.addr.dev_addr.transport =
> > > >  		rdma_node_get_transport(cma_dev->device->node_type);
> > > >  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> > > > +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> > > > +	id_priv->id.res.kern_name = id_priv->id.caller;
> > >
> > > Steve, I don't like it, I worked hard to hide it from the users of
> > restrack,
> > > and don't see reason why the same trick as with
ib_create_cq/ib_create_pd
> > > won't
> > > work here.
> >
> > I am doing the same trick, no?  rdma_create_id() is a static inline that
> > passes KBUILD_MODNAME.  The issue is that at the time the rdma_cm_id is
> > created, it is not associated with any ib_device.  That only happens at
> > cma_attach time.  So how can the resource be added if there is no
device?
> >
> 
> So maybe, we don't need to add resource to the DB at rdma_create_id
> stage and do it in cma_attach only, and in that stage you will update
> the kern_name with KBUILD_MODNAME.

Yea, I'll look into this. 

> 
> > >
> > > > +	rdma_restrack_add(&id_priv->id.res);
> > > >  }
> > > >
> > > >  static void cma_attach_to_dev(struct rdma_id_private *id_priv,
> > > > @@ -737,10 +740,10 @@ static void cma_deref_id(struct
rdma_id_private
> > > *id_priv)
> > > >  		complete(&id_priv->comp);
> > > >  }
> > > >
> > > > -struct rdma_cm_id *rdma_create_id(struct net *net,
> > > > -				  rdma_cm_event_handler
event_handler,
> > > > -				  void *context, enum
rdma_port_space ps,
> > > > -				  enum ib_qp_type qp_type)
> > > > +struct rdma_cm_id *__rdma_create_id(struct net *net,
> > > > +				    rdma_cm_event_handler
event_handler,
> > > > +				    void *context, enum
rdma_port_space ps,
> > > > +				    enum ib_qp_type qp_type, const
char
> > *caller)
> > > >  {
> > > >  	struct rdma_id_private *id_priv;
> > > >
> > > > @@ -748,7 +751,10 @@ struct rdma_cm_id *rdma_create_id(struct net
> *net,
> > > >  	if (!id_priv)
> > > >  		return ERR_PTR(-ENOMEM);
> > > >
> > > > -	id_priv->owner = task_pid_nr(current);
> > > > +	if (caller)
> > > > +		id_priv->id.caller = caller;
> > > > +	else
> > > > +		id_priv->owner = task_pid_nr(current);
> > >
> > > See the comment above
> >
> > There is no ib_device at this point, so caller (and owner) must be saved
> > until the cm_id is bound to a device (or possibly devices for listening
> > ids).
> 
> Why do we need previous owner? Can it be that rdma_create_id was
> performed by one process and cma_attach by another?

Yes.  Connection setup events are processed on rdma_cm (and ib_cm/iw_cm)
workqueue threads.   Here's one example:  The application creates a cm_id,
binds to 0.0.0.0/0 and listens.  But there are no rdma devices at this
point.  There is a cm_id owned by the application, but not bound to any
device.  Then, lets say mlx4 and cxgb4 both get inserted.  The rdma_cm will
discover the new rdma devices, create and attach child cm_ids to those
devices.  This is done in a workq thread driven off of the ib_client
device_add upcall.  So what we really want to show is that these per-device
cm_ids are owned by the same application. 

There are other cases, that I've seen with just nvme/f host.   I'll produce
more examples to help us understand if there is a better path than what I've
proposed in this patch.

> 
> >
> > >
> > > >  	id_priv->state = RDMA_CM_IDLE;
> > > >  	id_priv->id.context = context;
> > > >  	id_priv->id.event_handler = event_handler;
> > >
> > > Not saying that we need to do it now, but it is important to write,
most
> > > probably we can drop certain initialization from rdma_create_id() adn
> > > reuse rdma_restrack_put/_get.
> > >
> >
> > I don't understand this comment.  Can you please elaborate?
> 
> Most probably, we will be able to drop id_priv->qp_mutex in the future,
> but let's not discuss it now, it is not related to this series.
> 
> >
> > > > @@ -768,7 +774,7 @@ struct rdma_cm_id *rdma_create_id(struct net
> *net,
> > > >
> > > >  	return &id_priv->id;
> > > >  }
> > > > -EXPORT_SYMBOL(rdma_create_id);
> > > > +EXPORT_SYMBOL(__rdma_create_id);
> > > >
> > > >  static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct
ib_qp
> > *qp)
> > > >  {
> > > > @@ -1628,6 +1634,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
> > > >  	mutex_unlock(&id_priv->handler_mutex);
> > > >
> > > >  	if (id_priv->cma_dev) {
> > > > +		rdma_restrack_del(&id_priv->id.res);
> > >
> > > You should count all created cm_ids and not only binded.
> >
> > No ib_device if they aren't bound.
> >
> > >
> > > >  		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
> > > >  			if (id_priv->cm_id.ib)
> > > >  				ib_destroy_cm_id(id_priv->cm_id.ib);
> > > > @@ -1786,9 +1793,10 @@ static struct rdma_id_private
> > > *cma_new_conn_id(struct rdma_cm_id *listen_id,
> > > >  		ib_event->param.req_rcvd.primary_path->service_id;
> > > >  	int ret;
> > > >
> > > > -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> > > > +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
> > > >  			    listen_id->event_handler,
listen_id->context,
> > > > -			    listen_id->ps,
> > ib_event->param.req_rcvd.qp_type);
> > > > +			    listen_id->ps,
ib_event->param.req_rcvd.qp_type,
> > > > +			    listen_id->caller);
> > >
> > > I think the cleanest way will be to create some struct and pass
pointer to
> > it so
> > > you can unfold all relevant data inside of __rdma_create_id().
> > >
> >
> > Why is that cleaner?  Marshall up the data into a struct, pass a ptr,
> > unmarshall it all...
> 
> I counted 6 arguments, and for me, it smells like something wrong.
> 

I'll look into changing this. 

> >
> >
> > > >  	if (IS_ERR(id))
> > > >  		return NULL;
> > > >
> > > > @@ -1843,8 +1851,8 @@ static struct rdma_id_private
> > > *cma_new_udp_id(struct rdma_cm_id *listen_id,
> > > >  	struct net *net = listen_id->route.addr.dev_addr.net;
> > > >  	int ret;
> > > >
> > > > -	id = rdma_create_id(net, listen_id->event_handler,
> > listen_id->context,
> > > > -			    listen_id->ps, IB_QPT_UD);
> > > > +	id = __rdma_create_id(net, listen_id->event_handler,
listen_id-
> > > >context,
> > > > +			      listen_id->ps, IB_QPT_UD,
listen_id->caller);
> > > >  	if (IS_ERR(id))
> > > >  		return NULL;
> > > >
> > > > @@ -2110,10 +2118,11 @@ static int iw_conn_req_handler(struct
> iw_cm_id
> > > *cm_id,
> > > >  		goto out;
> > > >
> > > >  	/* Create a new RDMA id for the new IW CM ID */
> > > > -	new_cm_id =
rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > > > -				   listen_id->id.event_handler,
> > > > -				   listen_id->id.context,
> > > > -				   RDMA_PS_TCP, IB_QPT_RC);
> > > > +	new_cm_id =
__rdma_create_id(listen_id->id.route.addr.dev_addr.net,
> > > > +				     listen_id->id.event_handler,
> > > > +				     listen_id->id.context,
> > > > +				     RDMA_PS_TCP, IB_QPT_RC,
> > > > +				     listen_id->id.caller);
> > > >  	if (IS_ERR(new_cm_id)) {
> > > >  		ret = -ENOMEM;
> > > >  		goto out;
> > > > @@ -2238,8 +2247,8 @@ static void cma_listen_on_dev(struct
> > > rdma_id_private *id_priv,
> > > >  	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev-
> > > >device, 1))
> > > >  		return;
> > > >
> > > > -	id = rdma_create_id(net, cma_listen_handler, id_priv,
> > id_priv->id.ps,
> > > > -			    id_priv->id.qp_type);
> > > > +	id = __rdma_create_id(net, cma_listen_handler, id_priv,
> > id_priv->id.ps,
> > > > +			      id_priv->id.qp_type,
id_priv->id.caller);
> > > >  	if (IS_ERR(id))
> > > >  		return;
> > > >
> > > > @@ -3347,8 +3356,10 @@ int rdma_bind_addr(struct rdma_cm_id *id,
> struct
> > > sockaddr *addr)
> > > >
> > > >  	return 0;
> > > >  err2:
> > > > -	if (id_priv->cma_dev)
> > > > +	if (id_priv->cma_dev) {
> > > > +		rdma_restrack_del(&id_priv->id.res);
> > > >  		cma_release_dev(id_priv);
> > > > +	}
> > > >  err1:
> > > >  	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
> > > >  	return ret;
> > > > @@ -3731,14 +3742,18 @@ static int cma_send_sidr_rep(struct
> > > rdma_id_private *id_priv,
> > > >  	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
> > > >  }
> > > >
> > > > -int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> > > *conn_param)
> > > > +int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param
> > > *conn_param,
> > > > +		  const char *caller)
> > > >  {
> > > >  	struct rdma_id_private *id_priv;
> > > >  	int ret;
> > > >
> > > >  	id_priv = container_of(id, struct rdma_id_private, id);
> > > >
> > > > -	id_priv->owner = task_pid_nr(current);
> > > > +	if (caller)
> > > > +		id_priv->id.caller = caller;
> > > > +	else
> > > > +		id_priv->owner = task_pid_nr(current);
> > > >
> > > >  	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
> > > >  		return -EINVAL;
> > > > @@ -3778,7 +3793,7 @@ int rdma_accept(struct rdma_cm_id *id, struct
> > > rdma_conn_param *conn_param)
> > > >  	rdma_reject(id, NULL, 0);
> > > >  	return ret;
> > > >  }
> > > > -EXPORT_SYMBOL(rdma_accept);
> > > > +EXPORT_SYMBOL(__rdma_accept);
> > > >
> > > >  int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
> > > >  {
> > > > diff --git a/drivers/infiniband/core/nldev.c
> > b/drivers/infiniband/core/nldev.c
> > > > index fa8655e..a4091b5 100644
> > > > --- a/drivers/infiniband/core/nldev.c
> > > > +++ b/drivers/infiniband/core/nldev.c
> > > > @@ -34,6 +34,7 @@
> > > >  #include <linux/pid.h>
> > > >  #include <linux/pid_namespace.h>
> > > >  #include <net/netlink.h>
> > > > +#include <rdma/rdma_cm.h>
> > > >  #include <rdma/rdma_netlink.h>
> > > >
> > > >  #include "core_priv.h"
> > > > @@ -71,6 +72,22 @@
> > > >  	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32
},
> > > >  	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type =
> > > NLA_NUL_STRING,
> > > >  						    .len =
TASK_COMM_LEN },
> > > > +	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type =
> > > NLA_NESTED },
> > >
> > > I would like to use this opportunity. There is CM_ID, so users will be
> > > able to query nldev directly on this ID (once we implement .doit), but
> > > can we found a proper abstraction for other objects (PD, CQ, QP
e.t.c.)?
> > >
> > > I want to have that all resources will have something similar to
> > ifindex/ibindex.
> > >
> >
> > Pds, cqs, and qps, all have  a device-unique number.  So
> > ibindex/restrack_type/object_id should work.  But cm_id's don't have
that.
> > Similar to a socket I guess.  So I'm not sure how to identify cm_ids
other
> > than by the ipaddresses/ip ports.
> 
> It is opposite, cm_id is a unique number, but other objects don't have
> such. What about PD and CQ?
> 
> We can declare that access to QP .doit willbe based on QPN, PD .doit
> will be based on local_dma_lkey, but what will be CQ identifier?
> 

Now that I've thought about this more, I realize the core verbs data
structures don't have identifiers for most objects.  I'm not sure why the
qp_num is exposed, but I guess it is because the qp num values must be
exchanged between both ends of an IB connection to setup the connection.    

The  local_dma_lkey  value will not be unique for each PD.  For all iwarp
devices the local_dma_lkey is 0.  (I think the same is true for IB devices).
So PD has the same issue as CQ.   

I suppose we can use rkey for MRs, but if the MR is not in the VALID state,
the rkey isn't necessarily accurate (the LSB of the rkey can be changed by
the application with each REG_MR operation).

The cm_id doesn't have any unique identifier either, except maybe the
combination of src/dst addresses and port space. 

With iw_cxgb4, CQs, PDs, and MRs all have a unique identifier just like a QP
does.  I expect all hw implementations of rdma have numeric identifiers for
CQs, PDs, QPs, etc.    They aren't exposed to the core API though.  

Perhaps we add a device-unique (or globally unique) identifier as part of
the restrack struct and use that for GET/SET?

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Feb. 5, 2018, 3:43 p.m. UTC | #16
On Mon, Feb 05, 2018 at 09:33:07AM -0600, Steve Wise wrote:
> Hey Leon,
>
> >
> > It is opposite, cm_id is a unique number, but other objects don't have
> > such. What about PD and CQ?
> >
> > We can declare that access to QP .doit willbe based on QPN, PD .doit
> > will be based on local_dma_lkey, but what will be CQ identifier?
> >
>
> Now that I've thought about this more, I realize the core verbs data
> structures don't have identifiers for most objects.  I'm not sure why the
> qp_num is exposed, but I guess it is because the qp num values must be
> exchanged between both ends of an IB connection to setup the connection.
>
> The  local_dma_lkey  value will not be unique for each PD.  For all iwarp
> devices the local_dma_lkey is 0.  (I think the same is true for IB devices).
> So PD has the same issue as CQ.
>
> I suppose we can use rkey for MRs, but if the MR is not in the VALID state,
> the rkey isn't necessarily accurate (the LSB of the rkey can be changed by
> the application with each REG_MR operation).
>
> The cm_id doesn't have any unique identifier either, except maybe the
> combination of src/dst addresses and port space.
>
> With iw_cxgb4, CQs, PDs, and MRs all have a unique identifier just like a QP
> does.  I expect all hw implementations of rdma have numeric identifiers for
> CQs, PDs, QPs, etc.    They aren't exposed to the core API though.
>
> Perhaps we add a device-unique (or globally unique) identifier as part of
> the restrack struct and use that for GET/SET?

+1,
I like it, it is very clean.

Thanks

>
> Steve.
>
Steve Wise Feb. 5, 2018, 5:06 p.m. UTC | #17
> > >
> > > It is opposite, cm_id is a unique number, but other objects don't have
> > > such. What about PD and CQ?
> > >
> > > We can declare that access to QP .doit willbe based on QPN, PD .doit
> > > will be based on local_dma_lkey, but what will be CQ identifier?
> > >
> >
> > Now that I've thought about this more, I realize the core verbs data
> > structures don't have identifiers for most objects.  I'm not sure why
the
> > qp_num is exposed, but I guess it is because the qp num values must be
> > exchanged between both ends of an IB connection to setup the connection.
> >
> > The  local_dma_lkey  value will not be unique for each PD.  For all
iwarp
> > devices the local_dma_lkey is 0.  (I think the same is true for IB
devices).
> > So PD has the same issue as CQ.
> >
> > I suppose we can use rkey for MRs, but if the MR is not in the VALID
state,
> > the rkey isn't necessarily accurate (the LSB of the rkey can be changed
by
> > the application with each REG_MR operation).
> >
> > The cm_id doesn't have any unique identifier either, except maybe the
> > combination of src/dst addresses and port space.
> >
> > With iw_cxgb4, CQs, PDs, and MRs all have a unique identifier just like
a QP
> > does.  I expect all hw implementations of rdma have numeric identifiers
for
> > CQs, PDs, QPs, etc.    They aren't exposed to the core API though.
> >
> > Perhaps we add a device-unique (or globally unique) identifier as part
of
> > the restrack struct and use that for GET/SET?
> 
> +1,
> I like it, it is very clean.
> 
> Thanks

This should be done to a subsequent series.  Perhaps when GET/SET are
actually implemented...

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 5, 2018, 5:12 p.m. UTC | #18
> Hey Leon,
> 

...

> > > > > diff --git a/drivers/infiniband/core/cma.c
> > > b/drivers/infiniband/core/cma.c
> > > > > index 72ad52b..51fbfa1 100644
> > > > > --- a/drivers/infiniband/core/cma.c
> > > > > +++ b/drivers/infiniband/core/cma.c
> > > > > @@ -465,6 +465,9 @@ static void _cma_attach_to_dev(struct
> > > > rdma_id_private *id_priv,
> > > > >  	id_priv->id.route.addr.dev_addr.transport =
> > > > >  		rdma_node_get_transport(cma_dev->device->node_type);
> > > > >  	list_add_tail(&id_priv->list, &cma_dev->id_list);
> > > > > +	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
> > > > > +	id_priv->id.res.kern_name = id_priv->id.caller;
> > > >
> > > > Steve, I don't like it, I worked hard to hide it from the users of
> > > restrack,
> > > > and don't see reason why the same trick as with
> ib_create_cq/ib_create_pd
> > > > won't
> > > > work here.
> > >
> > > I am doing the same trick, no?  rdma_create_id() is a static inline
that
> > > passes KBUILD_MODNAME.  The issue is that at the time the rdma_cm_id
is
> > > created, it is not associated with any ib_device.  That only happens
at
> > > cma_attach time.  So how can the resource be added if there is no
device?
> > >
> >
> > So maybe, we don't need to add resource to the DB at rdma_create_id
> > stage and do it in cma_attach only, and in that stage you will update
> > the kern_name with KBUILD_MODNAME.
> 
> Yea, I'll look into this.

The current patch is only adding the resource in _cma_attach_to_dev().  And
using KBUILD_MODNAME in _cma_attach_to_dev() would always end up with
"[rdma_cm]".   That's why the caller string is saved in rdma_create_id()
static inline function; so it will get the true module name.  

I don't see any change needed here.

Thanks,

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 5, 2018, 7:35 p.m. UTC | #19
> > > > > @@ -1786,9 +1793,10 @@ static struct rdma_id_private
> > > > *cma_new_conn_id(struct rdma_cm_id *listen_id,
> > > > >  		ib_event->param.req_rcvd.primary_path->service_id;
> > > > >  	int ret;
> > > > >
> > > > > -	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
> > > > > +	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
> > > > >  			    listen_id->event_handler,
listen_id->context,
> > > > > -			    listen_id->ps,
> > > ib_event->param.req_rcvd.qp_type);
> > > > > +			    listen_id->ps, ib_event-
> >param.req_rcvd.qp_type,
> > > > > +			    listen_id->caller);
> > > >
> > > > I think the cleanest way will be to create some struct and pass
pointer to
> > > it so
> > > > you can unfold all relevant data inside of __rdma_create_id().
> > > >
> > >
> > > Why is that cleaner?  Marshall up the data into a struct, pass a ptr,
> > > unmarshall it all...
> >
> > I counted 6 arguments, and for me, it smells like something wrong.
> >
> 
> I'll look into changing this.
> 

Changing this will force changing all the applications using
rdma_create_id().  I'd rather not do that as part of this series.  It
dilutes the subject of the series.

Does anyone else care either way? 

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 5, 2018, 8 p.m. UTC | #20
On Mon, Feb 05, 2018 at 09:33:07AM -0600, Steve Wise wrote:

> Perhaps we add a device-unique (or globally unique) identifier as part of
> the restrack struct and use that for GET/SET?

What use is an ID if the user can't associate that with something
meaningful?

For userspace, a tuple of the filehandle and existing the per-file
handle # would make some sense as a unique ID, but userspace is going
to have a pointer not necessarily the handle number :(

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 5, 2018, 8:28 p.m. UTC | #21
> On Mon, Feb 05, 2018 at 09:33:07AM -0600, Steve Wise wrote:
> 
> > Perhaps we add a device-unique (or globally unique) identifier as part
of
> > the restrack struct and use that for GET/SET?
> 
> What use is an ID if the user can't associate that with something
> meaningful?

It uniquely identifies the widget you want to GET or SET.  Ideally, having
it be something meaningful is nice, but if we want to support GET/SET on
various single objects, we need identifiers.  And assigning them as part of
restrack would do the trick.  

However, I don't see GET/SET being useful for QP, CQ, PD, CM_ID, nor MR
resources.  Just DUMP.  I do see perhaps devices and links having GET/SET.
For instance setting up stuff for RXE and SIW soft devices. 

> 
> For userspace, a tuple of the filehandle and existing the per-file
> handle # would make some sense as a unique ID, but userspace is going
> to have a pointer not necessarily the handle number :(
> 

I'm not following you here.  Who does filehandle/handle# pertain to rdmatool
fetching objects via netlnk and possibly setting them?  

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 5, 2018, 8:36 p.m. UTC | #22
On Mon, Feb 05, 2018 at 02:28:02PM -0600, Steve Wise wrote:
> > On Mon, Feb 05, 2018 at 09:33:07AM -0600, Steve Wise wrote:
> > 
> > > Perhaps we add a device-unique (or globally unique) identifier as part
> of
> > > the restrack struct and use that for GET/SET?
> > 
> > What use is an ID if the user can't associate that with something
> > meaningful?
> 
> It uniquely identifies the widget you want to GET or SET.  Ideally, having
> it be something meaningful is nice, but if we want to support GET/SET on
> various single objects, we need identifiers.  And assigning them as part of
> restrack would do the trick.  
> 
> However, I don't see GET/SET being useful for QP, CQ, PD, CM_ID, nor MR
> resources.  Just DUMP.  I do see perhaps devices and links having GET/SET.
> For instance setting up stuff for RXE and SIW soft devices. 

Well exactly.. I don't expect GET/SET for restrack objects..

> > For userspace, a tuple of the filehandle and existing the per-file
> > handle # would make some sense as a unique ID, but userspace is going
> > to have a pointer not necessarily the handle number :(
> 
> I'm not following you here.  Who does filehandle/handle# pertain to rdmatool
> fetching objects via netlnk and possibly setting them?  

It is the other side of the question - when rdmatool presents me an
object how do I figure out what it is in 'my world' as userspace.

Like a PD for instance.. If I have two in a process how do I know
which is which compared to a dump?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 5, 2018, 8:53 p.m. UTC | #23
> 
> > > For userspace, a tuple of the filehandle and existing the per-file
> > > handle # would make some sense as a unique ID, but userspace is going
> > > to have a pointer not necessarily the handle number :(
> >
> > I'm not following you here.  Who does filehandle/handle# pertain to
rdmatool
> > fetching objects via netlnk and possibly setting them?
> 
> It is the other side of the question - when rdmatool presents me an
> object how do I figure out what it is in 'my world' as userspace.
> 
> Like a PD for instance.. If I have two in a process how do I know
> which is which compared to a dump?

You don't. :)  As it stands now, you can tell only if a given PD is in your
process or not based on the process id.

Even if we create a device-unique ID for each restrack object, to make it
useful to for userspace analysis would require exposing that ID in the
various user object structures (or via query methods).   EG having
pd->res_id, cq->res_id, etc...

Steve. 


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 5, 2018, 9:16 p.m. UTC | #24
On Mon, Feb 05, 2018 at 02:53:23PM -0600, Steve Wise wrote:

> Even if we create a device-unique ID for each restrack object, to make it
> useful to for userspace analysis would require exposing that ID in the
> various user object structures (or via query methods).   EG having
> pd->res_id, cq->res_id, etc...

As I said, we have that, it is the fd, handle # tuple and most verbs
objects expose the handle # in the user visible part of the object
struct..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 5, 2018, 10:16 p.m. UTC | #25
> 
> On Mon, Feb 05, 2018 at 02:53:23PM -0600, Steve Wise wrote:
> 
> > Even if we create a device-unique ID for each restrack object, to make
it
> > useful to for userspace analysis would require exposing that ID in the
> > various user object structures (or via query methods).   EG having
> > pd->res_id, cq->res_id, etc...
> 
> As I said, we have that, it is the fd, handle # tuple and most verbs
> objects expose the handle # in the user visible part of the object
> struct..

Can you please show me an example of the handle # for some object like PD or
CQ?  This doesn't address kernel users' object, though, correct?

Thanks,

Steve.




--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 5, 2018, 10:20 p.m. UTC | #26
On Mon, Feb 05, 2018 at 04:16:37PM -0600, Steve Wise wrote:
> > 
> > On Mon, Feb 05, 2018 at 02:53:23PM -0600, Steve Wise wrote:
> > 
> > > Even if we create a device-unique ID for each restrack object, to make
> it
> > > useful to for userspace analysis would require exposing that ID in the
> > > various user object structures (or via query methods).   EG having
> > > pd->res_id, cq->res_id, etc...
> > 
> > As I said, we have that, it is the fd, handle # tuple and most verbs
> > objects expose the handle # in the user visible part of the object
> > struct..
> 
> Can you please show me an example of the handle # for some object like PD or
> CQ?

From rdma-core verbs.h:

struct ibv_pd {
        struct ibv_context     *context;
        uint32_t                handle;
};

> This doesn't address kernel users' object, though, correct?

Right.

And id'ing a file descriptor gets kinda hard too..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise Feb. 5, 2018, 10:22 p.m. UTC | #27
> >
> > As I said, we have that, it is the fd, handle # tuple and most verbs
> > objects expose the handle # in the user visible part of the object
> > struct..
> 
> Can you please show me an example of the handle # for some object like PD
or
> CQ?  This doesn't address kernel users' object, though, correct?


Never mind, I see: ibv_pd.handle,  ibv_mr.handle, etc...


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Feb. 6, 2018, 8:40 a.m. UTC | #28
On Mon, Feb 05, 2018 at 03:20:25PM -0700, Jason Gunthorpe wrote:
> On Mon, Feb 05, 2018 at 04:16:37PM -0600, Steve Wise wrote:
> > >
> > > On Mon, Feb 05, 2018 at 02:53:23PM -0600, Steve Wise wrote:
> > >
> > > > Even if we create a device-unique ID for each restrack object, to make
> > it
> > > > useful to for userspace analysis would require exposing that ID in the
> > > > various user object structures (or via query methods).   EG having
> > > > pd->res_id, cq->res_id, etc...
> > >
> > > As I said, we have that, it is the fd, handle # tuple and most verbs
> > > objects expose the handle # in the user visible part of the object
> > > struct..
> >
> > Can you please show me an example of the handle # for some object like PD or
> > CQ?
>
> From rdma-core verbs.h:
>
> struct ibv_pd {
>         struct ibv_context     *context;
>         uint32_t                handle;
> };
>
> > This doesn't address kernel users' object, though, correct?
>
> Right.
>
> And id'ing a file descriptor gets kinda hard too..

And how should we handle kernel objects?

>
> Jason
Jason Gunthorpe Feb. 6, 2018, 3:25 p.m. UTC | #29
On Tue, Feb 06, 2018 at 10:40:19AM +0200, Leon Romanovsky wrote:
> On Mon, Feb 05, 2018 at 03:20:25PM -0700, Jason Gunthorpe wrote:
> > On Mon, Feb 05, 2018 at 04:16:37PM -0600, Steve Wise wrote:
> > > >
> > > > On Mon, Feb 05, 2018 at 02:53:23PM -0600, Steve Wise wrote:
> > > >
> > > > > Even if we create a device-unique ID for each restrack object, to make
> > > it
> > > > > useful to for userspace analysis would require exposing that ID in the
> > > > > various user object structures (or via query methods).   EG having
> > > > > pd->res_id, cq->res_id, etc...
> > > >
> > > > As I said, we have that, it is the fd, handle # tuple and most verbs
> > > > objects expose the handle # in the user visible part of the object
> > > > struct..
> > >
> > > Can you please show me an example of the handle # for some object like PD or
> > > CQ?
> >
> > From rdma-core verbs.h:
> >
> > struct ibv_pd {
> >         struct ibv_context     *context;
> >         uint32_t                handle;
> > };
> >
> > > This doesn't address kernel users' object, though, correct?
> >
> > Right.
> >
> > And id'ing a file descriptor gets kinda hard too..
> 
> And how should we handle kernel objects?

hash'd pointer to internal struct?

Jason

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 72ad52b..51fbfa1 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -465,6 +465,9 @@  static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
 	id_priv->id.route.addr.dev_addr.transport =
 		rdma_node_get_transport(cma_dev->device->node_type);
 	list_add_tail(&id_priv->list, &cma_dev->id_list);
+	id_priv->id.res.type = RDMA_RESTRACK_CM_ID;
+	id_priv->id.res.kern_name = id_priv->id.caller;
+	rdma_restrack_add(&id_priv->id.res);
 }
 
 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -737,10 +740,10 @@  static void cma_deref_id(struct rdma_id_private *id_priv)
 		complete(&id_priv->comp);
 }
 
-struct rdma_cm_id *rdma_create_id(struct net *net,
-				  rdma_cm_event_handler event_handler,
-				  void *context, enum rdma_port_space ps,
-				  enum ib_qp_type qp_type)
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+				    rdma_cm_event_handler event_handler,
+				    void *context, enum rdma_port_space ps,
+				    enum ib_qp_type qp_type, const char *caller)
 {
 	struct rdma_id_private *id_priv;
 
@@ -748,7 +751,10 @@  struct rdma_cm_id *rdma_create_id(struct net *net,
 	if (!id_priv)
 		return ERR_PTR(-ENOMEM);
 
-	id_priv->owner = task_pid_nr(current);
+	if (caller)
+		id_priv->id.caller = caller;
+	else
+		id_priv->owner = task_pid_nr(current);
 	id_priv->state = RDMA_CM_IDLE;
 	id_priv->id.context = context;
 	id_priv->id.event_handler = event_handler;
@@ -768,7 +774,7 @@  struct rdma_cm_id *rdma_create_id(struct net *net,
 
 	return &id_priv->id;
 }
-EXPORT_SYMBOL(rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_id);
 
 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 {
@@ -1628,6 +1634,7 @@  void rdma_destroy_id(struct rdma_cm_id *id)
 	mutex_unlock(&id_priv->handler_mutex);
 
 	if (id_priv->cma_dev) {
+		rdma_restrack_del(&id_priv->id.res);
 		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
 			if (id_priv->cm_id.ib)
 				ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1786,9 +1793,10 @@  static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 		ib_event->param.req_rcvd.primary_path->service_id;
 	int ret;
 
-	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
+	id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
 			    listen_id->event_handler, listen_id->context,
-			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
+			    listen_id->ps, ib_event->param.req_rcvd.qp_type,
+			    listen_id->caller);
 	if (IS_ERR(id))
 		return NULL;
 
@@ -1843,8 +1851,8 @@  static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
 	struct net *net = listen_id->route.addr.dev_addr.net;
 	int ret;
 
-	id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
-			    listen_id->ps, IB_QPT_UD);
+	id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
+			      listen_id->ps, IB_QPT_UD, listen_id->caller);
 	if (IS_ERR(id))
 		return NULL;
 
@@ -2110,10 +2118,11 @@  static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 		goto out;
 
 	/* Create a new RDMA id for the new IW CM ID */
-	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
-				   listen_id->id.event_handler,
-				   listen_id->id.context,
-				   RDMA_PS_TCP, IB_QPT_RC);
+	new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+				     listen_id->id.event_handler,
+				     listen_id->id.context,
+				     RDMA_PS_TCP, IB_QPT_RC,
+				     listen_id->id.caller);
 	if (IS_ERR(new_cm_id)) {
 		ret = -ENOMEM;
 		goto out;
@@ -2238,8 +2247,8 @@  static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
 		return;
 
-	id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
-			    id_priv->id.qp_type);
+	id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
+			      id_priv->id.qp_type, id_priv->id.caller);
 	if (IS_ERR(id))
 		return;
 
@@ -3347,8 +3356,10 @@  int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
 	return 0;
 err2:
-	if (id_priv->cma_dev)
+	if (id_priv->cma_dev) {
+		rdma_restrack_del(&id_priv->id.res);
 		cma_release_dev(id_priv);
+	}
 err1:
 	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
 	return ret;
@@ -3731,14 +3742,18 @@  static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
 	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
 }
 
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+		  const char *caller)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 
-	id_priv->owner = task_pid_nr(current);
+	if (caller)
+		id_priv->id.caller = caller;
+	else
+		id_priv->owner = task_pid_nr(current);
 
 	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
 		return -EINVAL;
@@ -3778,7 +3793,7 @@  int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 	rdma_reject(id, NULL, 0);
 	return ret;
 }
-EXPORT_SYMBOL(rdma_accept);
+EXPORT_SYMBOL(__rdma_accept);
 
 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
 {
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index fa8655e..a4091b5 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -34,6 +34,7 @@ 
 #include <linux/pid.h>
 #include <linux/pid_namespace.h>
 #include <net/netlink.h>
+#include <rdma/rdma_cm.h>
 #include <rdma/rdma_netlink.h>
 
 #include "core_priv.h"
@@ -71,6 +72,22 @@ 
 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
 						    .len = TASK_COMM_LEN },
+	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
+	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
+	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_RES_IPV4_SADDR]	= {
+				.len = FIELD_SIZEOF(struct iphdr, saddr) },
+	[RDMA_NLDEV_ATTR_RES_IPV4_DADDR]	= {
+				.len = FIELD_SIZEOF(struct iphdr, saddr) },
+	[RDMA_NLDEV_ATTR_RES_IPV6_SADDR]	= {
+				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+	[RDMA_NLDEV_ATTR_RES_IPV6_DADDR]	= {
+				.len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+	[RDMA_NLDEV_ATTR_RES_IP_SPORT]		= { .type = NLA_U16 },
+	[RDMA_NLDEV_ATTR_RES_IP_DPORT]		= { .type = NLA_U16 },
+	[RDMA_NLDEV_ATTR_RES_DEV_TYPE]		= { .type = NLA_U8 },
+	[RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE]	= { .type = NLA_U8 },
+	[RDMA_NLDEV_ATTR_RES_NETWORK_TYPE]	= { .type = NLA_U8 },
 };
 
 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -182,6 +199,7 @@  static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
 		[RDMA_RESTRACK_PD] = "pd",
 		[RDMA_RESTRACK_CQ] = "cq",
 		[RDMA_RESTRACK_QP] = "qp",
+		[RDMA_RESTRACK_CM_ID] = "cm_id",
 	};
 
 	struct rdma_restrack_root *res = &device->res;
@@ -284,6 +302,99 @@  static int fill_res_qp_entry(struct sk_buff *msg,
 	return -EMSGSIZE;
 }
 
+static int fill_res_cm_id_entry(struct sk_buff *msg,
+				struct rdma_cm_id *cm_id, uint32_t port)
+{
+	struct rdma_id_private *id_priv;
+	struct nlattr *entry_attr;
+
+	if (port && port != cm_id->port_num)
+		return 0;
+
+	id_priv = container_of(cm_id, struct rdma_id_private, id);
+	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
+	if (!entry_attr)
+		goto out;
+
+	if (cm_id->port_num &&
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
+		goto err;
+
+	if (id_priv->qp_num &&
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
+		goto err;
+
+	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
+		goto err;
+
+	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
+		goto err;
+	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
+		goto err;
+	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_DEV_TYPE,
+		       id_priv->id.route.addr.dev_addr.dev_type))
+		goto err;
+	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,
+		       id_priv->id.route.addr.dev_addr.transport))
+		goto err;
+
+	if (cm_id->route.addr.src_addr.ss_family == AF_INET) {
+		struct sockaddr_in *sin;
+
+		sin = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
+		if (nla_put_in_addr(msg, RDMA_NLDEV_ATTR_RES_IPV4_SADDR,
+				    sin->sin_addr.s_addr))
+			goto err;
+		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
+				  sin->sin_port))
+			goto err;
+
+		sin = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
+		if (nla_put_in_addr(msg, RDMA_NLDEV_ATTR_RES_IPV4_DADDR,
+				    sin->sin_addr.s_addr))
+			goto err;
+		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
+				  sin->sin_port))
+			goto err;
+	} else {
+		struct sockaddr_in6 *sin6;
+
+		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.src_addr;
+		if (nla_put_in6_addr(msg, RDMA_NLDEV_ATTR_RES_IPV6_SADDR,
+				     &sin6->sin6_addr))
+			goto err;
+		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_SPORT,
+				  sin6->sin6_port))
+			goto err;
+
+		sin6 = (struct sockaddr_in6 *)&cm_id->route.addr.dst_addr;
+		if (nla_put_in6_addr(msg, RDMA_NLDEV_ATTR_RES_IPV6_DADDR,
+				     &sin6->sin6_addr))
+			goto err;
+		if (nla_put_net16(msg, RDMA_NLDEV_ATTR_RES_IP_DPORT,
+				  sin6->sin6_port))
+			goto err;
+	}
+
+	if (id_priv->id.caller) {
+		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+				   id_priv->id.caller))
+			goto err;
+	} else {
+		/* CMA keeps the owning pid. */
+		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, id_priv->owner))
+			goto err;
+	}
+
+	nla_nest_end(msg, entry_attr);
+	return 0;
+
+err:
+	nla_nest_cancel(msg, entry_attr);
+out:
+	return -EMSGSIZE;
+}
+
 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			  struct netlink_ext_ack *extack)
 {
@@ -686,6 +797,137 @@  static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
 	return ret;
 }
 
+static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
+				      struct netlink_callback *cb)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	struct rdma_restrack_entry *res;
+	int err, ret = 0, idx = 0;
+	struct nlattr *table_attr;
+	struct ib_device *device;
+	int start = cb->args[0];
+	struct rdma_cm_id *cm_id = NULL;
+	struct nlmsghdr *nlh;
+	u32 index, port = 0;
+
+	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, NULL);
+	/*
+	 * Right now, we are expecting the device index to get QP information,
+	 * but it is possible to extend this code to return all devices in
+	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
+	 * if it doesn't exist, we will iterate over all devices.
+	 *
+	 * But it is not needed for now.
+	 */
+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+		return -EINVAL;
+
+	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+	device = ib_device_get_by_index(index);
+	if (!device)
+		return -EINVAL;
+
+	/*
+	 * If no PORT_INDEX is supplied, we will return all QPs from that device
+	 */
+	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+		if (!rdma_is_port_valid(device, port)) {
+			ret = -EINVAL;
+			goto err_index;
+		}
+	}
+
+	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
+		0, NLM_F_MULTI);
+
+	if (fill_nldev_handle(skb, device)) {
+		ret = -EMSGSIZE;
+		goto err;
+	}
+
+	table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_CM_ID);
+	if (!table_attr) {
+		ret = -EMSGSIZE;
+		goto err;
+	}
+
+	down_read(&device->res.rwsem);
+	hash_for_each_possible(device->res.hash, res, node,
+			       RDMA_RESTRACK_CM_ID) {
+		if (idx < start)
+			goto next;
+
+		if ((rdma_is_kernel_res(res) &&
+		     task_active_pid_ns(current) != &init_pid_ns) ||
+		    (!rdma_is_kernel_res(res) &&
+		     task_active_pid_ns(current) !=
+		     task_active_pid_ns(res->task)))
+			/*
+			 * 1. Kernel QPs should be visible in init namsapce only
+			 * 2. Preent only QPs visible in the current namespace
+			 */
+			goto next;
+
+		if (!rdma_restrack_get(res))
+			/*
+			 * Resource is under release now, but we are not
+			 * relesing lock now, so it will be released in
+			 * our next pass, once we will get ->next pointer.
+			 */
+			goto next;
+
+		cm_id = container_of(res, struct rdma_cm_id, res);
+
+		up_read(&device->res.rwsem);
+		ret = fill_res_cm_id_entry(skb, cm_id, port);
+		down_read(&device->res.rwsem);
+		/*
+		 * Return resource back, but it won't be released till
+		 * the &device->res.rwsem will be released for write.
+		 */
+		rdma_restrack_put(res);
+
+		if (ret == -EMSGSIZE)
+			/*
+			 * There is a chance to optimize here.
+			 * It can be done by using list_prepare_entry
+			 * and list_for_each_entry_continue afterwards.
+			 */
+			break;
+		if (ret)
+			goto res_err;
+next:		idx++;
+	}
+	up_read(&device->res.rwsem);
+
+	nla_nest_end(skb, table_attr);
+	nlmsg_end(skb, nlh);
+	cb->args[0] = idx;
+
+	/*
+	 * No more CM_IDs to fill, cancel the message and
+	 * return 0 to mark end of dumpit.
+	 */
+	if (!cm_id)
+		goto err;
+
+	put_device(&device->dev);
+	return skb->len;
+
+res_err:
+	nla_nest_cancel(skb, table_attr);
+	up_read(&device->res.rwsem);
+
+err:
+	nlmsg_cancel(skb, nlh);
+
+err_index:
+	put_device(&device->dev);
+	return ret;
+}
 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	[RDMA_NLDEV_CMD_GET] = {
 		.doit = nldev_get_doit,
@@ -712,6 +954,9 @@  static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
 		 * too.
 		 */
 	},
+	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
+		.dump = nldev_res_get_cm_id_dumpit,
+	},
 };
 
 void __init nldev_init(void)
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 857637b..bb13169 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -3,6 +3,7 @@ 
  * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
  */
 
+#include <rdma/rdma_cm.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/restrack.h>
 #include <linux/mutex.h>
@@ -67,6 +68,7 @@  static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 	struct ib_pd *pd;
 	struct ib_cq *cq;
 	struct ib_qp *qp;
+	struct rdma_cm_id *cm_id;
 
 	switch (type) {
 	case RDMA_RESTRACK_PD:
@@ -85,6 +87,10 @@  static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 		xrcd = container_of(res, struct ib_xrcd, res);
 		dev = xrcd->device;
 		break;
+	case RDMA_RESTRACK_CM_ID:
+		cm_id = container_of(res, struct rdma_cm_id, res);
+		dev = cm_id->device;
+		break;
 	default:
 		WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
 		return NULL;
@@ -93,6 +99,27 @@  static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 	return dev;
 }
 
+static bool res_is_user(struct rdma_restrack_entry *res)
+{
+	enum rdma_restrack_type type = res->type;
+	bool is_user;
+
+	switch (type) {
+	case RDMA_RESTRACK_CM_ID: {
+		struct rdma_cm_id *cm_id;
+
+		cm_id = container_of(res, struct rdma_cm_id, res);
+		is_user = !cm_id->caller;
+		break;
+	}
+	default:
+		is_user = !uaccess_kernel();
+		break;
+	}
+
+	return is_user;
+}
+
 void rdma_restrack_add(struct rdma_restrack_entry *res)
 {
 	struct ib_device *dev = res_to_dev(res);
@@ -100,7 +127,7 @@  void rdma_restrack_add(struct rdma_restrack_entry *res)
 	if (!dev)
 		return;
 
-	if (!uaccess_kernel()) {
+	if (res_is_user(res)) {
 		get_task_struct(current);
 		res->task = current;
 		res->kern_name = NULL;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index d67219d..f7f0282 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -476,8 +476,8 @@  static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
 		return -ENOMEM;
 
 	ctx->uid = cmd.uid;
-	ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
-				    ucma_event_handler, ctx, cmd.ps, qp_type);
+	ctx->cm_id = __rdma_create_id(current->nsproxy->net_ns,
+			      ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
 	if (IS_ERR(ctx->cm_id)) {
 		ret = PTR_ERR(ctx->cm_id);
 		goto err1;
@@ -1084,12 +1084,12 @@  static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
 	if (cmd.conn_param.valid) {
 		ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
 		mutex_lock(&file->mut);
-		ret = rdma_accept(ctx->cm_id, &conn_param);
+		ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
 		if (!ret)
 			ctx->uid = cmd.uid;
 		mutex_unlock(&file->mut);
 	} else
-		ret = rdma_accept(ctx->cm_id, NULL);
+		ret = __rdma_accept(ctx->cm_id, NULL, NULL);
 
 	ucma_put_ctx(ctx);
 	return ret;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 5984225..73dac9b 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -155,6 +155,12 @@  struct rdma_cm_id {
 	enum rdma_port_space	 ps;
 	enum ib_qp_type		 qp_type;
 	u8			 port_num;
+	const char *caller;
+
+	/*
+	 * Internal to RDMA/core, don't use in the drivers
+	 */
+	struct rdma_restrack_entry     res;
 };
 
 struct rdma_id_private {
@@ -198,6 +204,11 @@  struct rdma_id_private {
 	enum ib_gid_type	gid_type;
 };
 
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+				  rdma_cm_event_handler event_handler,
+				  void *context, enum rdma_port_space ps,
+				  enum ib_qp_type qp_type, const char *caller);
+
 /**
  * rdma_create_id - Create an RDMA identifier.
  *
@@ -210,10 +221,9 @@  struct rdma_id_private {
  *
  * The id holds a reference on the network namespace until it is destroyed.
  */
-struct rdma_cm_id *rdma_create_id(struct net *net,
-				  rdma_cm_event_handler event_handler,
-				  void *context, enum rdma_port_space ps,
-				  enum ib_qp_type qp_type);
+#define rdma_create_id(net, event_handler, context, ps, qp_type) \
+	__rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
+			 KBUILD_MODNAME)
 
 /**
   * rdma_destroy_id - Destroys an RDMA identifier.
@@ -325,6 +335,9 @@  int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
  */
 int rdma_listen(struct rdma_cm_id *id, int backlog);
 
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+		  const char *caller);
+
 /**
  * rdma_accept - Called to accept a connection request or response.
  * @id: Connection identifier associated with the request.
@@ -340,7 +353,8 @@  int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
  * state of the qp associated with the id is modified to error, such that any
  * previously posted receive buffers would be flushed.
  */
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+#define rdma_accept(id, conn_param) \
+	__rdma_accept((id), (conn_param),  KBUILD_MODNAME)
 
 /**
  * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index c2d8116..a794e0e 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -33,6 +33,10 @@  enum rdma_restrack_type {
 	 */
 	RDMA_RESTRACK_XRCD,
 	/**
+	 * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID)
+	 */
+	RDMA_RESTRACK_CM_ID,
+	/**
 	 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
 	 */
 	RDMA_RESTRACK_MAX
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 17e59be..4ed21ee 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -240,6 +240,8 @@  enum rdma_nldev_command {
 
 	RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
 
+	RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */
+
 	RDMA_NLDEV_NUM_OPS
 };
 
@@ -352,6 +354,34 @@  enum rdma_nldev_attr {
 	 */
 	RDMA_NLDEV_ATTR_RES_KERN_NAME,		/* string */
 
+	RDMA_NLDEV_ATTR_RES_CM_ID,		/* nested table */
+	RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,	/* nested table */
+	/*
+	 * rdma_cm_id port space.
+	 */
+	RDMA_NLDEV_ATTR_RES_PS,			/* u32 */
+	/*
+	 * IP Addresses/port attributes.
+	 */
+	RDMA_NLDEV_ATTR_RES_IPV4_SADDR,		/* u8[4] */
+	RDMA_NLDEV_ATTR_RES_IPV4_DADDR,		/* u8[4] */
+	RDMA_NLDEV_ATTR_RES_IPV6_SADDR,		/* u8[16] */
+	RDMA_NLDEV_ATTR_RES_IPV6_DADDR,		/* u8[16] */
+	RDMA_NLDEV_ATTR_RES_IP_SPORT,		/* BE u16 */
+	RDMA_NLDEV_ATTR_RES_IP_DPORT,		/* BE u16 */
+	/*
+	 * ARPHRD_INFINIBAND, ARPHRD_ETHER, ...
+	 */
+	RDMA_NLDEV_ATTR_RES_DEV_TYPE,		/* u8 */
+	/*
+	 * enum enum rdma_transport_type (IB, IWARP, ...)
+	 */
+	RDMA_NLDEV_ATTR_RES_TRANSPORT_TYPE,	/* u8 */
+	/*
+	 * enum rdma_network_type (IB, IPv4, IPv6,...)
+	 */
+	RDMA_NLDEV_ATTR_RES_NETWORK_TYPE,	/* u8 */
+
 	RDMA_NLDEV_ATTR_MAX
 };
 #endif /* _UAPI_RDMA_NETLINK_H */