diff mbox series

[v2,2/3] RDMA: Add NLDEV_GET_CHARDEV to allow char dev discovery and autoload

Message ID 20190614003819.19974-3-jgg@ziepe.ca (mailing list archive)
State Accepted
Headers show
Series Add RDMA_NLDEV_GET_CHARDEV | expand

Commit Message

Jason Gunthorpe June 14, 2019, 12:38 a.m. UTC
From: Jason Gunthorpe <jgg@mellanox.com>

Allow userspace to issue a netlink query against the ib_device for
something like "uverbs" and get back the char dev name, inode major/minor,
and interface ABI information for "uverbs0".

Since we are now in netlink this can also trigger a module autoload to
make the uverbs device come into existence.

Largely this will let us replace searching and reading inside sysfs to
setup devices, and provides an alternative (using driver_id) to device
name based provider binding for things like rxe.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/core_priv.h |  9 +++
 drivers/infiniband/core/device.c    | 98 +++++++++++++++++++++++++++++
 drivers/infiniband/core/nldev.c     | 91 +++++++++++++++++++++++++++
 include/rdma/ib_verbs.h             |  4 ++
 include/rdma/rdma_netlink.h         |  2 +
 include/uapi/rdma/rdma_netlink.h    | 14 +++++
 6 files changed, 218 insertions(+)

Comments

Doug Ledford June 14, 2019, 7 p.m. UTC | #1
On Thu, 2019-06-13 at 21:38 -0300, Jason Gunthorpe wrote:
> +       if (ibdev)
> +               ret = __ib_get_client_nl_info(ibdev, client_name,
> res);
> +       else
> +               ret = __ib_get_global_client_nl_info(client_name,
> res);
> +#ifdef CONFIG_MODULES
> +       if (ret == -ENOENT) {
> +               request_module("rdma-client-%s", client_name);
> +               if (ibdev)
> +                       ret = __ib_get_client_nl_info(ibdev,
> client_name, res);
> +               else
> +                       ret =
> __ib_get_global_client_nl_info(client_name, res);
> +       }
> +#endif

I was trying to put my finger on something yesterday while reading the
code, and this change makes it more clear for me.  Do we really want to
limit the info type based on ibdev?  It seems to me that all global
info retrieval should work whether you open a specific ibdev or not. 
It's only the things that need the ibdev to return the correct response
that should require it.  Right now we only have one global info
provider, but would it be better to do:

	if (!strcmp("rdma_cm", client_name))
		ret = __ib_get_global_client_nl_info(client_name, res);
	else
		ret = __ib_get_client_nl_info(ibdev, client_name, res);

The other thing I was wondering about was the module loading.  Every
attempt to load a module is a fork/exec cycle and a context switch over
to modprobe and back, and we make no attempt here to keep each
invocation of the netlink query from requesting a module.  I'm
concerned this is actually a potential DoS attack vector.  I was
thinking we should track each client name that's valid, and only try
each name once.  I saw four module names: rdma_cm, umad, issm, and
uverbs.  I'm wondering if we should have a static table in the netlink
file with an entry for each of the client names and a variable to
indicate we've attempted to load that module, and on -ENOENT, we check
the table for a match to our passed in client_name, and only if we have
a match, and it's load count is 0, do we call request_module() and
increment the load count.  Thoughts?
Leon Romanovsky June 15, 2019, 7:16 a.m. UTC | #2
On Fri, Jun 14, 2019 at 03:00:32PM -0400, Doug Ledford wrote:
> On Thu, 2019-06-13 at 21:38 -0300, Jason Gunthorpe wrote:
> > +       if (ibdev)
> > +               ret = __ib_get_client_nl_info(ibdev, client_name,
> > res);
> > +       else
> > +               ret = __ib_get_global_client_nl_info(client_name,
> > res);
> > +#ifdef CONFIG_MODULES
> > +       if (ret == -ENOENT) {
> > +               request_module("rdma-client-%s", client_name);
> > +               if (ibdev)
> > +                       ret = __ib_get_client_nl_info(ibdev,
> > client_name, res);
> > +               else
> > +                       ret =
> > __ib_get_global_client_nl_info(client_name, res);
> > +       }
> > +#endif
>
> I was trying to put my finger on something yesterday while reading the
> code, and this change makes it more clear for me.  Do we really want to
> limit the info type based on ibdev?  It seems to me that all global
> info retrieval should work whether you open a specific ibdev or not.
> It's only the things that need the ibdev to return the correct response
> that should require it.  Right now we only have one global info
> provider, but would it be better to do:
>
> 	if (!strcmp("rdma_cm", client_name))
> 		ret = __ib_get_global_client_nl_info(client_name, res);
> 	else
> 		ret = __ib_get_client_nl_info(ibdev, client_name, res);
>
> The other thing I was wondering about was the module loading.  Every
> attempt to load a module is a fork/exec cycle and a context switch over
> to modprobe and back, and we make no attempt here to keep each
> invocation of the netlink query from requesting a module.  I'm
> concerned this is actually a potential DoS attack vector.  I was
> thinking we should track each client name that's valid, and only try
> each name once.  I saw four module names: rdma_cm, umad, issm, and
> uverbs.  I'm wondering if we should have a static table in the netlink
> file with an entry for each of the client names and a variable to
> indicate we've attempted to load that module, and on -ENOENT, we check
> the table for a match to our passed in client_name, and only if we have
> a match, and it's load count is 0, do we call request_module() and
> increment the load count.  Thoughts?

Isn't request_module privileged operation, so only root or his friends
can do this DDoS?

Thanks

>
> --
> Doug Ledford <dledford@redhat.com>
>     GPG KeyID: B826A3330E572FDD
>     Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57
> 2FDD
Doug Ledford June 15, 2019, 3:01 p.m. UTC | #3
On Sat, 2019-06-15 at 10:16 +0300, Leon Romanovsky wrote:
> On Fri, Jun 14, 2019 at 03:00:32PM -0400, Doug Ledford wrote:
> > On Thu, 2019-06-13 at 21:38 -0300, Jason Gunthorpe wrote:
> > > +       if (ibdev)
> > > +               ret = __ib_get_client_nl_info(ibdev, client_name,
> > > res);
> > > +       else
> > > +               ret = __ib_get_global_client_nl_info(client_name,
> > > res);
> > > +#ifdef CONFIG_MODULES
> > > +       if (ret == -ENOENT) {
> > > +               request_module("rdma-client-%s", client_name);
> > > +               if (ibdev)
> > > +                       ret = __ib_get_client_nl_info(ibdev,
> > > client_name, res);
> > > +               else
> > > +                       ret =
> > > __ib_get_global_client_nl_info(client_name, res);
> > > +       }
> > > +#endif
> > 
> > I was trying to put my finger on something yesterday while reading
> > the
> > code, and this change makes it more clear for me.  Do we really
> > want to
> > limit the info type based on ibdev?  It seems to me that all global
> > info retrieval should work whether you open a specific ibdev or
> > not.
> > It's only the things that need the ibdev to return the correct
> > response
> > that should require it.  Right now we only have one global info
> > provider, but would it be better to do:
> > 
> > 	if (!strcmp("rdma_cm", client_name))
> > 		ret = __ib_get_global_client_nl_info(client_name, res);
> > 	else
> > 		ret = __ib_get_client_nl_info(ibdev, client_name, res);
> > 
> > The other thing I was wondering about was the module
> > loading.  Every
> > attempt to load a module is a fork/exec cycle and a context switch
> > over
> > to modprobe and back, and we make no attempt here to keep each
> > invocation of the netlink query from requesting a module.  I'm
> > concerned this is actually a potential DoS attack vector.  I was
> > thinking we should track each client name that's valid, and only
> > try
> > each name once.  I saw four module names: rdma_cm, umad, issm, and
> > uverbs.  I'm wondering if we should have a static table in the
> > netlink
> > file with an entry for each of the client names and a variable to
> > indicate we've attempted to load that module, and on -ENOENT, we
> > check
> > the table for a match to our passed in client_name, and only if we
> > have
> > a match, and it's load count is 0, do we call request_module() and
> > increment the load count.  Thoughts?
> 
> Isn't request_module privileged operation, so only root or his
> friends
> can do this DDoS?

From what I can tell, SELinux *might* prevent a user from doing this,
but it is the only thing I could find that even tries to stop it. 
Which of course means there are no protections for SELinux disabled
case.  And I'm not sure SELinux stops it anyway.  In general, autoload
isn't very useful if the only person that can trigger it is root.  I
think in most cases, it is initiated by device file open or some such
that regular users can do.  It's possible things are fine because the
core code serializes module autoload and 1,000,000 simultaneous
requests will result in one real request, and 999,999 waiters that wait
for the result of the 1 and then take that back.  But as I read through
stuff, it's enough of a head scratcher that I'm tempted to try writing
an exploit and seeing what it does.
Jason Gunthorpe June 17, 2019, 6:54 p.m. UTC | #4
On Fri, Jun 14, 2019 at 03:00:32PM -0400, Doug Ledford wrote:
> On Thu, 2019-06-13 at 21:38 -0300, Jason Gunthorpe wrote:
> > +       if (ibdev)
> > +               ret = __ib_get_client_nl_info(ibdev, client_name,
> > res);
> > +       else
> > +               ret = __ib_get_global_client_nl_info(client_name,
> > res);
> > +#ifdef CONFIG_MODULES
> > +       if (ret == -ENOENT) {
> > +               request_module("rdma-client-%s", client_name);
> > +               if (ibdev)
> > +                       ret = __ib_get_client_nl_info(ibdev,
> > client_name, res);
> > +               else
> > +                       ret =
> > __ib_get_global_client_nl_info(client_name, res);
> > +       }
> > +#endif
> 
> I was trying to put my finger on something yesterday while reading the
> code, and this change makes it more clear for me.  Do we really want to
> limit the info type based on ibdev?  It seems to me that all global
> info retrieval should work whether you open a specific ibdev or
> not. 

Each chardev name has a specified query protocol - global chardevs
must not specify a ibdev, and local ones must. Each name can only be
global or ibdev - no mixing. Too confusing.

It is uapi so we should be strict, if the ibdev is not allowed then it
should be checked to be absent in case we do something different
later.

> The other thing I was wondering about was the module loading.  Every
> attempt to load a module is a fork/exec cycle and a context switch over
> to modprobe and back, and we make no attempt here to keep each

It is a common pattern in the kernel, ie we did exactly this code to
load the ib netlink module in the netlink core.

If there is a problem then it should be addressed globally..

> indicate we've attempted to load that module, and on -ENOENT, we check
> the table for a match to our passed in client_name, and only if we have
> a match, and it's load count is 0, do we call request_module() and
> increment the load count.  Thoughts?

I assume it becomes single threaded and batched at some point, so it
is not so bad...

Jason
Doug Ledford June 17, 2019, 7:27 p.m. UTC | #5
On Mon, 2019-06-17 at 18:54 +0000, Jason Gunthorpe wrote:
> On Fri, Jun 14, 2019 at 03:00:32PM -0400, Doug Ledford wrote:
> > On Thu, 2019-06-13 at 21:38 -0300, Jason Gunthorpe wrote:
> > > +       if (ibdev)
> > > +               ret = __ib_get_client_nl_info(ibdev, client_name,
> > > res);
> > > +       else
> > > +               ret = __ib_get_global_client_nl_info(client_name,
> > > res);
> > > +#ifdef CONFIG_MODULES
> > > +       if (ret == -ENOENT) {
> > > +               request_module("rdma-client-%s", client_name);
> > > +               if (ibdev)
> > > +                       ret = __ib_get_client_nl_info(ibdev,
> > > client_name, res);
> > > +               else
> > > +                       ret =
> > > __ib_get_global_client_nl_info(client_name, res);
> > > +       }
> > > +#endif
> > 
> > I was trying to put my finger on something yesterday while reading
> > the
> > code, and this change makes it more clear for me.  Do we really
> > want to
> > limit the info type based on ibdev?  It seems to me that all global
> > info retrieval should work whether you open a specific ibdev or
> > not. 
> 
> Each chardev name has a specified query protocol - global chardevs
> must not specify a ibdev, and local ones must. Each name can only be
> global or ibdev - no mixing. Too confusing.

I can see where that's the uapi as envisioned, my point though is would
it be better to allow opening of an ibdev, retrieval of device specific
data, and also retrieval of the available global data?  It just
prevents having to open two files to get information that isn't device
specific.  But, it's not a big deal either.

> It is uapi so we should be strict, if the ibdev is not allowed then
> it
> should be checked to be absent in case we do something different
> later.
> 
> > The other thing I was wondering about was the module
> > loading.  Every
> > attempt to load a module is a fork/exec cycle and a context switch
> > over
> > to modprobe and back, and we make no attempt here to keep each
> 
> It is a common pattern in the kernel, ie we did exactly this code to
> load the ib netlink module in the netlink core.
> 
> If there is a problem then it should be addressed globally..

Yeah, I agree.  I'm not sure there is a problem.

> > indicate we've attempted to load that module, and on -ENOENT, we
> > check
> > the table for a match to our passed in client_name, and only if we
> > have
> > a match, and it's load count is 0, do we call request_module() and
> > increment the load count.  Thoughts?
> 
> I assume it becomes single threaded and batched at some point, so it
> is not so bad...

Thanks, series applied.
Jason Gunthorpe June 18, 2019, 1:11 a.m. UTC | #6
On Mon, Jun 17, 2019 at 03:27:22PM -0400, Doug Ledford wrote:
> On Mon, 2019-06-17 at 18:54 +0000, Jason Gunthorpe wrote:
> > On Fri, Jun 14, 2019 at 03:00:32PM -0400, Doug Ledford wrote:
> > > On Thu, 2019-06-13 at 21:38 -0300, Jason Gunthorpe wrote:
> > > > +       if (ibdev)
> > > > +               ret = __ib_get_client_nl_info(ibdev, client_name,
> > > > res);
> > > > +       else
> > > > +               ret = __ib_get_global_client_nl_info(client_name,
> > > > res);
> > > > +#ifdef CONFIG_MODULES
> > > > +       if (ret == -ENOENT) {
> > > > +               request_module("rdma-client-%s", client_name);
> > > > +               if (ibdev)
> > > > +                       ret = __ib_get_client_nl_info(ibdev,
> > > > client_name, res);
> > > > +               else
> > > > +                       ret =
> > > > __ib_get_global_client_nl_info(client_name, res);
> > > > +       }
> > > > +#endif
> > > 
> > > I was trying to put my finger on something yesterday while reading
> > > the
> > > code, and this change makes it more clear for me.  Do we really
> > > want to
> > > limit the info type based on ibdev?  It seems to me that all global
> > > info retrieval should work whether you open a specific ibdev or
> > > not. 
> > 
> > Each chardev name has a specified query protocol - global chardevs
> > must not specify a ibdev, and local ones must. Each name can only be
> > global or ibdev - no mixing. Too confusing.
> 
> I can see where that's the uapi as envisioned, my point though is would
> it be better to allow opening of an ibdev, retrieval of device specific
> data, and also retrieval of the available global data?  It just
> prevents having to open two files to get information that isn't device
> specific.  But, it's not a big deal either.

This runs on netlink, there is no 'opening the ibdev' or a 2nd
file. It is just the ibdev interface index # at this point on the same
netlink socket.

Thanks,
Jason
Leon Romanovsky June 18, 2019, 12:15 p.m. UTC | #7
On Mon, Jun 17, 2019 at 03:27:22PM -0400, Doug Ledford wrote:
> On Mon, 2019-06-17 at 18:54 +0000, Jason Gunthorpe wrote:
> > On Fri, Jun 14, 2019 at 03:00:32PM -0400, Doug Ledford wrote:
> > > On Thu, 2019-06-13 at 21:38 -0300, Jason Gunthorpe wrote:
> > > > +       if (ibdev)
> > > > +               ret = __ib_get_client_nl_info(ibdev, client_name,
> > > > res);
> > > > +       else
> > > > +               ret = __ib_get_global_client_nl_info(client_name,
> > > > res);
> > > > +#ifdef CONFIG_MODULES
> > > > +       if (ret == -ENOENT) {
> > > > +               request_module("rdma-client-%s", client_name);
> > > > +               if (ibdev)
> > > > +                       ret = __ib_get_client_nl_info(ibdev,
> > > > client_name, res);
> > > > +               else
> > > > +                       ret =
> > > > __ib_get_global_client_nl_info(client_name, res);
> > > > +       }
> > > > +#endif
> > >
> > > I was trying to put my finger on something yesterday while reading
> > > the
> > > code, and this change makes it more clear for me.  Do we really
> > > want to
> > > limit the info type based on ibdev?  It seems to me that all global
> > > info retrieval should work whether you open a specific ibdev or
> > > not.
> >
> > Each chardev name has a specified query protocol - global chardevs
> > must not specify a ibdev, and local ones must. Each name can only be
> > global or ibdev - no mixing. Too confusing.
>
> I can see where that's the uapi as envisioned, my point though is would
> it be better to allow opening of an ibdev, retrieval of device specific
> data, and also retrieval of the available global data?  It just
> prevents having to open two files to get information that isn't device
> specific.  But, it's not a big deal either.
>
> > It is uapi so we should be strict, if the ibdev is not allowed then
> > it
> > should be checked to be absent in case we do something different
> > later.
> >
> > > The other thing I was wondering about was the module
> > > loading.  Every
> > > attempt to load a module is a fork/exec cycle and a context switch
> > > over
> > > to modprobe and back, and we make no attempt here to keep each
> >
> > It is a common pattern in the kernel, ie we did exactly this code to
> > load the ib netlink module in the netlink core.
> >
> > If there is a problem then it should be addressed globally..
>
> Yeah, I agree.  I'm not sure there is a problem.
>
> > > indicate we've attempted to load that module, and on -ENOENT, we
> > > check
> > > the table for a match to our passed in client_name, and only if we
> > > have
> > > a match, and it's load count is 0, do we call request_module() and
> > > increment the load count.  Thoughts?
> >
> > I assume it becomes single threaded and batched at some point, so it
> > is not so bad...
>
> Thanks, series applied.

Doug,

Please drop it.

Thanks

>
>
> --
> Doug Ledford <dledford@redhat.com>
>     GPG KeyID: B826A3330E572FDD
>     Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57
> 2FDD
Leon Romanovsky June 18, 2019, 12:17 p.m. UTC | #8
On Thu, Jun 13, 2019 at 09:38:18PM -0300, Jason Gunthorpe wrote:
> From: Jason Gunthorpe <jgg@mellanox.com>
>
> Allow userspace to issue a netlink query against the ib_device for
> something like "uverbs" and get back the char dev name, inode major/minor,
> and interface ABI information for "uverbs0".
>
> Since we are now in netlink this can also trigger a module autoload to
> make the uverbs device come into existence.
>
> Largely this will let us replace searching and reading inside sysfs to
> setup devices, and provides an alternative (using driver_id) to device
> name based provider binding for things like rxe.
>
> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
> ---
>  drivers/infiniband/core/core_priv.h |  9 +++
>  drivers/infiniband/core/device.c    | 98 +++++++++++++++++++++++++++++
>  drivers/infiniband/core/nldev.c     | 91 +++++++++++++++++++++++++++
>  include/rdma/ib_verbs.h             |  4 ++
>  include/rdma/rdma_netlink.h         |  2 +
>  include/uapi/rdma/rdma_netlink.h    | 14 +++++
>  6 files changed, 218 insertions(+)
>
> diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
> index ff40a450b5d28e..a953c2fa2e7811 100644
> --- a/drivers/infiniband/core/core_priv.h
> +++ b/drivers/infiniband/core/core_priv.h
> @@ -88,6 +88,15 @@ typedef int (*nldev_callback)(struct ib_device *device,
>  int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
>  		     struct netlink_callback *cb);
>
> +struct ib_client_nl_info {
> +	struct sk_buff *nl_msg;
> +	struct device *cdev;
> +	unsigned int port;
> +	u64 abi;
> +};
> +int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
> +			  struct ib_client_nl_info *res);
> +
>  enum ib_cache_gid_default_mode {
>  	IB_CACHE_GID_DEFAULT_MODE_SET,
>  	IB_CACHE_GID_DEFAULT_MODE_DELETE
> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index abb169f31d0fe3..7db8566cdb8904 100644
> --- a/drivers/infiniband/core/device.c
> +++ b/drivers/infiniband/core/device.c
> @@ -1726,6 +1726,104 @@ void ib_unregister_client(struct ib_client *client)
>  }
>  EXPORT_SYMBOL(ib_unregister_client);
>
> +static int __ib_get_global_client_nl_info(const char *client_name,
> +					  struct ib_client_nl_info *res)
> +{
> +	struct ib_client *client;
> +	unsigned long index;
> +	int ret = -ENOENT;
> +
> +	down_read(&clients_rwsem);
> +	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
> +		if (strcmp(client->name, client_name) != 0)
> +			continue;
> +		if (!client->get_global_nl_info) {
> +			ret = -EOPNOTSUPP;
> +			break;
> +		}
> +		ret = client->get_global_nl_info(res);
> +		if (WARN_ON(ret == -ENOENT))
> +			ret = -EINVAL;
> +		if (!ret && res->cdev)
> +			get_device(res->cdev);
> +		break;
> +	}
> +	up_read(&clients_rwsem);
> +	return ret;
> +}
> +
> +static int __ib_get_client_nl_info(struct ib_device *ibdev,
> +				   const char *client_name,
> +				   struct ib_client_nl_info *res)
> +{
> +	unsigned long index;
> +	void *client_data;
> +	int ret = -ENOENT;
> +
> +	down_read(&ibdev->client_data_rwsem);
> +	xan_for_each_marked (&ibdev->client_data, index, client_data,
> +			     CLIENT_DATA_REGISTERED) {
> +		struct ib_client *client = xa_load(&clients, index);
> +
> +		if (!client || strcmp(client->name, client_name) != 0)
> +			continue;
> +		if (!client->get_nl_info) {
> +			ret = -EOPNOTSUPP;
> +			break;
> +		}
> +		ret = client->get_nl_info(ibdev, client_data, res);
> +		if (WARN_ON(ret == -ENOENT))
> +			ret = -EINVAL;
> +
> +		/*
> +		 * The cdev is guaranteed valid as long as we are inside the
> +		 * client_data_rwsem as remove_one can't be called. Keep it
> +		 * valid for the caller.
> +		 */
> +		if (!ret && res->cdev)
> +			get_device(res->cdev);
> +		break;
> +	}
> +	up_read(&ibdev->client_data_rwsem);
> +
> +	return ret;
> +}
> +
> +/**
> + * ib_get_client_nl_info - Fetch the nl_info from a client
> + * @device - IB device
> + * @client_name - Name of the client
> + * @res - Result of the query
> + */
> +int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
> +			  struct ib_client_nl_info *res)
> +{
> +	int ret;
> +
> +	if (ibdev)
> +		ret = __ib_get_client_nl_info(ibdev, client_name, res);
> +	else
> +		ret = __ib_get_global_client_nl_info(client_name, res);
> +#ifdef CONFIG_MODULES
> +	if (ret == -ENOENT) {
> +		request_module("rdma-client-%s", client_name);
> +		if (ibdev)
> +			ret = __ib_get_client_nl_info(ibdev, client_name, res);
> +		else
> +			ret = __ib_get_global_client_nl_info(client_name, res);
> +	}
> +#endif
> +	if (ret) {
> +		if (ret == -ENOENT)
> +			return -EOPNOTSUPP;
> +		return ret;
> +	}
> +
> +	if (WARN_ON(!res->cdev))
> +		return -EINVAL;
> +	return 0;
> +}
> +
>  /**
>   * ib_set_client_data - Set IB client context
>   * @device:Device to set context for
> diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
> index 69188cbbd99bd5..55eccea628e99f 100644
> --- a/drivers/infiniband/core/nldev.c
> +++ b/drivers/infiniband/core/nldev.c
> @@ -120,6 +120,9 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
>  	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
>  				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
>  	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
> +	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
> +				    .len = 128 },
> +	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },

It is wrong, we already have RDMA_NLDEV_ATTR_PORT_INDEX declared in nla_policy.
But we don't have other RDMA_NLDEV_ATTR_CHARDEV_* declarations here.

Thanks
Jason Gunthorpe June 18, 2019, 1:10 p.m. UTC | #9
On Tue, Jun 18, 2019 at 03:17:09PM +0300, Leon Romanovsky wrote:
> >  /**
> >   * ib_set_client_data - Set IB client context
> >   * @device:Device to set context for
> > diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
> > index 69188cbbd99bd5..55eccea628e99f 100644
> > +++ b/drivers/infiniband/core/nldev.c
> > @@ -120,6 +120,9 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
> >  	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
> >  				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
> >  	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
> > +	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
> > +				    .len = 128 },
> > +	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
> 
> It is wrong, we already have RDMA_NLDEV_ATTR_PORT_INDEX declared in nla_policy.
> But we don't have other RDMA_NLDEV_ATTR_CHARDEV_* declarations here.

Doug can you fix it?

Thanks,
Jason
Doug Ledford June 18, 2019, 3:55 p.m. UTC | #10
On Tue, 2019-06-18 at 10:10 -0300, Jason Gunthorpe wrote:
> On Tue, Jun 18, 2019 at 03:17:09PM +0300, Leon Romanovsky wrote:
> > >  /**
> > >   * ib_set_client_data - Set IB client context
> > >   * @device:Device to set context for
> > > diff --git a/drivers/infiniband/core/nldev.c
> > > b/drivers/infiniband/core/nldev.c
> > > index 69188cbbd99bd5..55eccea628e99f 100644
> > > +++ b/drivers/infiniband/core/nldev.c
> > > @@ -120,6 +120,9 @@ static const struct nla_policy
> > > nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
> > >  	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type =
> > > NLA_NUL_STRING,
> > >  				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN
> > > },
> > >  	[RDMA_NLDEV_NET_NS_FD]			= { .type =
> > > NLA_U32 },
> > > +	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type =
> > > NLA_NUL_STRING,
> > > +				    .len = 128 },
> > > +	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
> > 
> > It is wrong, we already have RDMA_NLDEV_ATTR_PORT_INDEX declared in
> > nla_policy.
> > But we don't have other RDMA_NLDEV_ATTR_CHARDEV_* declarations
> > here.
> 
> Doug can you fix it?

I haven't pushed my wip to for-next yet, so yeah, I can fix it.  We
just need to decide on what the full fix is ;-)

Drop the duplicate ATTR_PORT_INDEX, but what about a final decision on
including the outputs for possible future type checking?  You and Leon
seem to be going back and forth, and I don't have strong feelings
either way on this one.  It's just a definition statement, not like
it's a dead subroutine.
Leon Romanovsky June 18, 2019, 4:53 p.m. UTC | #11
On Tue, Jun 18, 2019 at 11:55:08AM -0400, Doug Ledford wrote:
> On Tue, 2019-06-18 at 10:10 -0300, Jason Gunthorpe wrote:
> > On Tue, Jun 18, 2019 at 03:17:09PM +0300, Leon Romanovsky wrote:
> > > >  /**
> > > >   * ib_set_client_data - Set IB client context
> > > >   * @device:Device to set context for
> > > > diff --git a/drivers/infiniband/core/nldev.c
> > > > b/drivers/infiniband/core/nldev.c
> > > > index 69188cbbd99bd5..55eccea628e99f 100644
> > > > +++ b/drivers/infiniband/core/nldev.c
> > > > @@ -120,6 +120,9 @@ static const struct nla_policy
> > > > nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
> > > >  	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type =
> > > > NLA_NUL_STRING,
> > > >  				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN
> > > > },
> > > >  	[RDMA_NLDEV_NET_NS_FD]			= { .type =
> > > > NLA_U32 },
> > > > +	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type =
> > > > NLA_NUL_STRING,
> > > > +				    .len = 128 },
> > > > +	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
> > >
> > > It is wrong, we already have RDMA_NLDEV_ATTR_PORT_INDEX declared in
> > > nla_policy.
> > > But we don't have other RDMA_NLDEV_ATTR_CHARDEV_* declarations
> > > here.
> >
> > Doug can you fix it?
>
> I haven't pushed my wip to for-next yet, so yeah, I can fix it.  We
> just need to decide on what the full fix is ;-)
>
> Drop the duplicate ATTR_PORT_INDEX, but what about a final decision on
> including the outputs for possible future type checking?  You and Leon
> seem to be going back and forth, and I don't have strong feelings
> either way on this one.  It's just a definition statement, not like
> it's a dead subroutine.

I have a very strong opinion about it.

Thanks

>
> --
> Doug Ledford <dledford@redhat.com>
>     GPG KeyID: B826A3330E572FDD
>     Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57
> 2FDD
Jason Gunthorpe June 18, 2019, 6:46 p.m. UTC | #12
On Tue, Jun 18, 2019 at 07:53:38PM +0300, Leon Romanovsky wrote:
> On Tue, Jun 18, 2019 at 11:55:08AM -0400, Doug Ledford wrote:
> > On Tue, 2019-06-18 at 10:10 -0300, Jason Gunthorpe wrote:
> > > On Tue, Jun 18, 2019 at 03:17:09PM +0300, Leon Romanovsky wrote:
> > > > >  /**
> > > > >   * ib_set_client_data - Set IB client context
> > > > >   * @device:Device to set context for
> > > > > diff --git a/drivers/infiniband/core/nldev.c
> > > > > b/drivers/infiniband/core/nldev.c
> > > > > index 69188cbbd99bd5..55eccea628e99f 100644
> > > > > +++ b/drivers/infiniband/core/nldev.c
> > > > > @@ -120,6 +120,9 @@ static const struct nla_policy
> > > > > nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
> > > > >  	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type =
> > > > > NLA_NUL_STRING,
> > > > >  				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN
> > > > > },
> > > > >  	[RDMA_NLDEV_NET_NS_FD]			= { .type =
> > > > > NLA_U32 },
> > > > > +	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type =
> > > > > NLA_NUL_STRING,
> > > > > +				    .len = 128 },
> > > > > +	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
> > > >
> > > > It is wrong, we already have RDMA_NLDEV_ATTR_PORT_INDEX declared in
> > > > nla_policy.
> > > > But we don't have other RDMA_NLDEV_ATTR_CHARDEV_* declarations
> > > > here.
> > >
> > > Doug can you fix it?
> >
> > I haven't pushed my wip to for-next yet, so yeah, I can fix it.  We
> > just need to decide on what the full fix is ;-)
> >
> > Drop the duplicate ATTR_PORT_INDEX, but what about a final decision on
> > including the outputs for possible future type checking?  You and Leon
> > seem to be going back and forth, and I don't have strong feelings
> > either way on this one.  It's just a definition statement, not like
> > it's a dead subroutine.
> 
> I have a very strong opinion about it.

Then Doug should add the policies, here are the output values from the
userspace:

        [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
        [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
        [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
        [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
        [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
        [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
        [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING },
        [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
        [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING },
        [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING },

Jason
Doug Ledford June 18, 2019, 8:58 p.m. UTC | #13
On Tue, 2019-06-18 at 15:46 -0300, Jason Gunthorpe wrote:
> On Tue, Jun 18, 2019 at 07:53:38PM +0300, Leon Romanovsky wrote:
> > 
> > I have a very strong opinion about it.
> 
> Then Doug should add the policies, here are the output values from the
> userspace:
> 
>         [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
>         [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
>         [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
>         [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
>         [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
>         [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
>         [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING },
>         [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
>         [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING },
>         [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING },

Most of those were already in the policies.  Only the four that you
added to enum rdma_nldev_attr needed added to the policies, and two of
them your patch already added.  The only question I had is what the
string length should be on ATTR_CHARDEV_NAME?  I throw in the default of
.len = RDMA_NLDEV_ATTR_ENTRY_STRLEN, but I wasn't sure if that was right
for this entry?
Doug Ledford June 19, 2019, 2:45 a.m. UTC | #14
On Tue, 2019-06-18 at 16:58 -0400, Doug Ledford wrote:
> On Tue, 2019-06-18 at 15:46 -0300, Jason Gunthorpe wrote:
> > On Tue, Jun 18, 2019 at 07:53:38PM +0300, Leon Romanovsky wrote:
> > > I have a very strong opinion about it.
> > 
> > Then Doug should add the policies, here are the output values from
> > the
> > userspace:
> > 
> >         [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
> >         [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
> >         [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
> >         [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
> >         [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
> >         [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
> >         [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING },
> >         [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
> >         [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING },
> >         [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING },
> 
> Most of those were already in the policies.  Only the four that you
> added to enum rdma_nldev_attr needed added to the policies, and two of
> them your patch already added.  The only question I had is what the
> string length should be on ATTR_CHARDEV_NAME?  I throw in the default
> of
> .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN, but I wasn't sure if that was
> right
> for this entry?

First of all, let me say that this is a PITA.  I'm thinking we need to
order all of the attributes in the policy array in alphabetical order so
it's easier to tell what's there and what's missing.

Also, I'm starting to not like adding items to the policy array before
they are actually acceptable as inputs.  Sure, there's the argument that
they won't get missed.  But there's the counter argument that until you
define them as an input in some netlink function that reads them, at
least for all of the string items, you can't actually set the real
string length limit.  By adding them to the policy now, you make it a
guarantee that you'll end up changing the API under userspace later when
they become a legitimate input.  Not sure if that's actually wise.  The
u32 and u64, sure, that's fine.  But the strings, that can cause
problems later.,
Doug Ledford June 19, 2019, 3:19 a.m. UTC | #15
On Tue, 2019-06-18 at 22:45 -0400, Doug Ledford wrote:
> On Tue, 2019-06-18 at 16:58 -0400, Doug Ledford wrote:
> > On Tue, 2019-06-18 at 15:46 -0300, Jason Gunthorpe wrote:
> > > On Tue, Jun 18, 2019 at 07:53:38PM +0300, Leon Romanovsky wrote:
> > > > I have a very strong opinion about it.
> > > 
> > > Then Doug should add the policies, here are the output values from
> > > the
> > > userspace:
> > > 
> > >         [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
> > >         [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
> > >         [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
> > >         [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
> > >         [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
> > >         [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
> > >         [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING
> > > },
> > >         [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
> > >         [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING
> > > },
> > >         [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING },
> > 
> > Most of those were already in the policies.  Only the four that you
> > added to enum rdma_nldev_attr needed added to the policies, and two
> > of
> > them your patch already added.  The only question I had is what the
> > string length should be on ATTR_CHARDEV_NAME?  I throw in the
> > default
> > of
> > .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN, but I wasn't sure if that was
> > right
> > for this entry?
> 
> First of all, let me say that this is a PITA.  I'm thinking we need to
> order all of the attributes in the policy array in alphabetical order
> so
> it's easier to tell what's there and what's missing.
> 
> Also, I'm starting to not like adding items to the policy array before
> they are actually acceptable as inputs.  Sure, there's the argument
> that
> they won't get missed.  But there's the counter argument that until
> you
> define them as an input in some netlink function that reads them, at
> least for all of the string items, you can't actually set the real
> string length limit.  By adding them to the policy now, you make it a
> guarantee that you'll end up changing the API under userspace later
> when
> they become a legitimate input.  Not sure if that's actually
> wise.  The
> u32 and u64, sure, that's fine.  But the strings, that can cause
> problems later.,
> 

Maybe the best way to do string items if you add them ahead of time is
to give them an input length of 1.  Theoretically that's just the null
byte.  Then when you add a function to actually read the data, set the
size to something real.

Anyway, pushed to wip/dl-for-next, check it to make sure you're happy
with the final results.
Leon Romanovsky June 19, 2019, 6:07 a.m. UTC | #16
On Tue, Jun 18, 2019 at 11:19:58PM -0400, Doug Ledford wrote:
> On Tue, 2019-06-18 at 22:45 -0400, Doug Ledford wrote:
> > On Tue, 2019-06-18 at 16:58 -0400, Doug Ledford wrote:
> > > On Tue, 2019-06-18 at 15:46 -0300, Jason Gunthorpe wrote:
> > > > On Tue, Jun 18, 2019 at 07:53:38PM +0300, Leon Romanovsky wrote:
> > > > > I have a very strong opinion about it.
> > > >
> > > > Then Doug should add the policies, here are the output values from
> > > > the
> > > > userspace:
> > > >
> > > >         [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
> > > >         [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
> > > >         [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
> > > >         [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
> > > >         [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
> > > >         [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
> > > >         [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING
> > > > },
> > > >         [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
> > > >         [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING
> > > > },
> > > >         [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING },
> > >
> > > Most of those were already in the policies.  Only the four that you
> > > added to enum rdma_nldev_attr needed added to the policies, and two
> > > of
> > > them your patch already added.  The only question I had is what the
> > > string length should be on ATTR_CHARDEV_NAME?  I throw in the
> > > default
> > > of
> > > .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN, but I wasn't sure if that was
> > > right
> > > for this entry?
> >
> > First of all, let me say that this is a PITA.  I'm thinking we need to
> > order all of the attributes in the policy array in alphabetical order
> > so
> > it's easier to tell what's there and what's missing.
> >
> > Also, I'm starting to not like adding items to the policy array before
> > they are actually acceptable as inputs.  Sure, there's the argument
> > that
> > they won't get missed.  But there's the counter argument that until
> > you
> > define them as an input in some netlink function that reads them, at
> > least for all of the string items, you can't actually set the real
> > string length limit.  By adding them to the policy now, you make it a
> > guarantee that you'll end up changing the API under userspace later
> > when
> > they become a legitimate input.  Not sure if that's actually
> > wise.  The
> > u32 and u64, sure, that's fine.  But the strings, that can cause
> > problems later.,
> >
>
> Maybe the best way to do string items if you add them ahead of time is
> to give them an input length of 1.  Theoretically that's just the null
> byte.  Then when you add a function to actually read the data, set the
> size to something real.

Like you said, it won't be needed till new input code is used, so from
API perspective, it doesn't matter what size will be. I liked your idea
to put it to be "1". It goes hand by hand to avoid possible addition
of input handlers without update of update nla_policy.

>
> Anyway, pushed to wip/dl-for-next, check it to make sure you're happy
> with the final results.

It looks good, thanks.

>
> --
> Doug Ledford <dledford@redhat.com>
>     GPG KeyID: B826A3330E572FDD
>     Fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD
Doug Ledford June 19, 2019, 2:15 p.m. UTC | #17
On Wed, 2019-06-19 at 09:07 +0300, Leon Romanovsky wrote:
> On Tue, Jun 18, 2019 at 11:19:58PM -0400, Doug Ledford wrote:
> > On Tue, 2019-06-18 at 22:45 -0400, Doug Ledford wrote:
> > > On Tue, 2019-06-18 at 16:58 -0400, Doug Ledford wrote:
> > > > On Tue, 2019-06-18 at 15:46 -0300, Jason Gunthorpe wrote:
> > > > > On Tue, Jun 18, 2019 at 07:53:38PM +0300, Leon Romanovsky
> > > > > wrote:
> > > > > > I have a very strong opinion about it.
> > > > > 
> > > > > Then Doug should add the policies, here are the output values
> > > > > from
> > > > > the
> > > > > userspace:
> > > > > 
> > > > >         [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
> > > > >         [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
> > > > >         [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
> > > > >         [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
> > > > >         [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
> > > > >         [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32
> > > > > },
> > > > >         [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type =
> > > > > NLA_NUL_STRING
> > > > > },
> > > > >         [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING
> > > > > },
> > > > >         [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type =
> > > > > NLA_NUL_STRING
> > > > > },
> > > > >         [RDMA_NLDEV_ATTR_FW_VERSION] = { .type =
> > > > > NLA_NUL_STRING },
> > > > 
> > > > Most of those were already in the policies.  Only the four that
> > > > you
> > > > added to enum rdma_nldev_attr needed added to the policies, and
> > > > two
> > > > of
> > > > them your patch already added.  The only question I had is what
> > > > the
> > > > string length should be on ATTR_CHARDEV_NAME?  I throw in the
> > > > default
> > > > of
> > > > .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN, but I wasn't sure if that
> > > > was
> > > > right
> > > > for this entry?
> > > 
> > > First of all, let me say that this is a PITA.  I'm thinking we
> > > need to
> > > order all of the attributes in the policy array in alphabetical
> > > order
> > > so
> > > it's easier to tell what's there and what's missing.
> > > 
> > > Also, I'm starting to not like adding items to the policy array
> > > before
> > > they are actually acceptable as inputs.  Sure, there's the
> > > argument
> > > that
> > > they won't get missed.  But there's the counter argument that
> > > until
> > > you
> > > define them as an input in some netlink function that reads them,
> > > at
> > > least for all of the string items, you can't actually set the real
> > > string length limit.  By adding them to the policy now, you make
> > > it a
> > > guarantee that you'll end up changing the API under userspace
> > > later
> > > when
> > > they become a legitimate input.  Not sure if that's actually
> > > wise.  The
> > > u32 and u64, sure, that's fine.  But the strings, that can cause
> > > problems later.,
> > > 
> > 
> > Maybe the best way to do string items if you add them ahead of time
> > is
> > to give them an input length of 1.  Theoretically that's just the
> > null
> > byte.  Then when you add a function to actually read the data, set
> > the
> > size to something real.
> 
> Like you said, it won't be needed till new input code is used, so from
> API perspective, it doesn't matter what size will be. I liked your
> idea
> to put it to be "1". It goes hand by hand to avoid possible addition
> of input handlers without update of update nla_policy.

Ok, I cleaned up the policy array by sorting it so you can find things
when you need to add new stuff.  New policy additions should preserve
the sorting.  Then I audited all of the string inputs, fixed their
definitions in the policy, and removed the (now) unnecessary checks for
string input overflow.  Their already in my wip branch, but I'll send
them to the list too.

> > Anyway, pushed to wip/dl-for-next, check it to make sure you're
> > happy
> > with the final results.
> 
> It looks good, thanks.
> 
> > --
> > Doug Ledford <dledford@redhat.com>
> >     GPG KeyID: B826A3330E572FDD
> >     Fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD
> 
>
diff mbox series

Patch

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index ff40a450b5d28e..a953c2fa2e7811 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -88,6 +88,15 @@  typedef int (*nldev_callback)(struct ib_device *device,
 int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
 		     struct netlink_callback *cb);
 
+struct ib_client_nl_info {
+	struct sk_buff *nl_msg;
+	struct device *cdev;
+	unsigned int port;
+	u64 abi;
+};
+int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
+			  struct ib_client_nl_info *res);
+
 enum ib_cache_gid_default_mode {
 	IB_CACHE_GID_DEFAULT_MODE_SET,
 	IB_CACHE_GID_DEFAULT_MODE_DELETE
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index abb169f31d0fe3..7db8566cdb8904 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1726,6 +1726,104 @@  void ib_unregister_client(struct ib_client *client)
 }
 EXPORT_SYMBOL(ib_unregister_client);
 
+static int __ib_get_global_client_nl_info(const char *client_name,
+					  struct ib_client_nl_info *res)
+{
+	struct ib_client *client;
+	unsigned long index;
+	int ret = -ENOENT;
+
+	down_read(&clients_rwsem);
+	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
+		if (strcmp(client->name, client_name) != 0)
+			continue;
+		if (!client->get_global_nl_info) {
+			ret = -EOPNOTSUPP;
+			break;
+		}
+		ret = client->get_global_nl_info(res);
+		if (WARN_ON(ret == -ENOENT))
+			ret = -EINVAL;
+		if (!ret && res->cdev)
+			get_device(res->cdev);
+		break;
+	}
+	up_read(&clients_rwsem);
+	return ret;
+}
+
+static int __ib_get_client_nl_info(struct ib_device *ibdev,
+				   const char *client_name,
+				   struct ib_client_nl_info *res)
+{
+	unsigned long index;
+	void *client_data;
+	int ret = -ENOENT;
+
+	down_read(&ibdev->client_data_rwsem);
+	xan_for_each_marked (&ibdev->client_data, index, client_data,
+			     CLIENT_DATA_REGISTERED) {
+		struct ib_client *client = xa_load(&clients, index);
+
+		if (!client || strcmp(client->name, client_name) != 0)
+			continue;
+		if (!client->get_nl_info) {
+			ret = -EOPNOTSUPP;
+			break;
+		}
+		ret = client->get_nl_info(ibdev, client_data, res);
+		if (WARN_ON(ret == -ENOENT))
+			ret = -EINVAL;
+
+		/*
+		 * The cdev is guaranteed valid as long as we are inside the
+		 * client_data_rwsem as remove_one can't be called. Keep it
+		 * valid for the caller.
+		 */
+		if (!ret && res->cdev)
+			get_device(res->cdev);
+		break;
+	}
+	up_read(&ibdev->client_data_rwsem);
+
+	return ret;
+}
+
+/**
+ * ib_get_client_nl_info - Fetch the nl_info from a client
+ * @device - IB device
+ * @client_name - Name of the client
+ * @res - Result of the query
+ */
+int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
+			  struct ib_client_nl_info *res)
+{
+	int ret;
+
+	if (ibdev)
+		ret = __ib_get_client_nl_info(ibdev, client_name, res);
+	else
+		ret = __ib_get_global_client_nl_info(client_name, res);
+#ifdef CONFIG_MODULES
+	if (ret == -ENOENT) {
+		request_module("rdma-client-%s", client_name);
+		if (ibdev)
+			ret = __ib_get_client_nl_info(ibdev, client_name, res);
+		else
+			ret = __ib_get_global_client_nl_info(client_name, res);
+	}
+#endif
+	if (ret) {
+		if (ret == -ENOENT)
+			return -EOPNOTSUPP;
+		return ret;
+	}
+
+	if (WARN_ON(!res->cdev))
+		return -EINVAL;
+	return 0;
+}
+
 /**
  * ib_set_client_data - Set IB client context
  * @device:Device to set context for
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 69188cbbd99bd5..55eccea628e99f 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -120,6 +120,9 @@  static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
 				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
+				    .len = 128 },
+	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -1347,6 +1350,91 @@  static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return 0;
 }
 
+static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	char client_name[IB_DEVICE_NAME_MAX];
+	struct ib_client_nl_info data = {};
+	struct ib_device *ibdev = NULL;
+	struct sk_buff *msg;
+	u32 index;
+	int err;
+
+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+			  extack);
+	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
+		return -EINVAL;
+
+	if (nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
+			sizeof(client_name)) >= sizeof(client_name))
+		return -EINVAL;
+
+	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
+		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
+		if (!ibdev)
+			return -EINVAL;
+
+		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+			if (!rdma_is_port_valid(ibdev, data.port)) {
+				err = -EINVAL;
+				goto out_put;
+			}
+		} else {
+			data.port = -1;
+		}
+	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+		return -EINVAL;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out_put;
+	}
+	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+					 RDMA_NLDEV_CMD_GET_CHARDEV),
+			0, 0);
+
+	data.nl_msg = msg;
+	err = ib_get_client_nl_info(ibdev, client_name, &data);
+	if (err)
+		goto out_nlmsg;
+
+	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
+				huge_encode_dev(data.cdev->devt),
+				RDMA_NLDEV_ATTR_PAD);
+	if (err)
+		goto out_data;
+	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
+				RDMA_NLDEV_ATTR_PAD);
+	if (err)
+		goto out_data;
+	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
+			   dev_name(data.cdev))) {
+		err = -EMSGSIZE;
+		goto out_data;
+	}
+
+	nlmsg_end(msg, nlh);
+	put_device(data.cdev);
+	if (ibdev)
+		ib_device_put(ibdev);
+	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+out_data:
+	put_device(data.cdev);
+out_nlmsg:
+	nlmsg_free(msg);
+out_put:
+	if (ibdev)
+		ib_device_put(ibdev);
+	return err;
+}
+
 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			      struct netlink_ext_ack *extack)
 {
@@ -1404,6 +1492,9 @@  static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 		.doit = nldev_get_doit,
 		.dump = nldev_get_dumpit,
 	},
+	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
+		.doit = nldev_get_chardev,
+	},
 	[RDMA_NLDEV_CMD_SET] = {
 		.doit = nldev_set_doit,
 		.flags = RDMA_NL_ADMIN_PERM,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 973514ea17a79b..a1265e9ce2d11f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2684,10 +2684,14 @@  struct ib_device {
 	u32 iw_driver_flags;
 };
 
+struct ib_client_nl_info;
 struct ib_client {
 	const char *name;
 	void (*add)   (struct ib_device *);
 	void (*remove)(struct ib_device *, void *client_data);
+	int (*get_nl_info)(struct ib_device *ibdev, void *client_data,
+			   struct ib_client_nl_info *res);
+	int (*get_global_nl_info)(struct ib_client_nl_info *res);
 
 	/* Returns the net_dev belonging to this ib_client and matching the
 	 * given parameters.
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 10732ab31ba2f9..c7acbe08342828 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -110,4 +110,6 @@  void rdma_link_register(struct rdma_link_ops *ops);
 void rdma_link_unregister(struct rdma_link_ops *ops);
 
 #define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type)
+#define MODULE_ALIAS_RDMA_CLIENT(type) MODULE_ALIAS("rdma-client-" type)
+
 #endif /* _RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index f588e8551c6cea..9903db21a42c58 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -279,6 +279,8 @@  enum rdma_nldev_command {
 
 	RDMA_NLDEV_CMD_RES_PD_GET, /* can dump */
 
+	RDMA_NLDEV_CMD_GET_CHARDEV,
+
 	RDMA_NLDEV_NUM_OPS
 };
 
@@ -491,6 +493,18 @@  enum rdma_nldev_attr {
 	 */
 	RDMA_NLDEV_NET_NS_FD,			/* u32 */
 
+	/*
+	 * Information about a chardev.
+	 * CHARDEV_TYPE is the name of the chardev ABI (ie uverbs, umad, etc)
+	 * CHARDEV_ABI signals the ABI revision (historical)
+	 * CHARDEV_NAME is the kernel name for the /dev/ file (no directory)
+	 * CHARDEV is the 64 bit dev_t for the inode
+	 */
+	RDMA_NLDEV_ATTR_CHARDEV_TYPE,		/* string */
+	RDMA_NLDEV_ATTR_CHARDEV_NAME,		/* string */
+	RDMA_NLDEV_ATTR_CHARDEV_ABI,		/* u64 */
+	RDMA_NLDEV_ATTR_CHARDEV,		/* u64 */
+
 	/*
 	 * Always the end
 	 */