diff mbox series

[rdma-next,v5,1/2] RDMA: Add indication for in kernel API support to IB device

Message ID 1547130216-12663-2-git-send-email-galpress@amazon.com (mailing list archive)
State Superseded
Headers show
Series IB device in-kernel API support indication | expand

Commit Message

Gal Pressman Jan. 10, 2019, 2:23 p.m. UTC
Drivers that do not provide kernel verbs support should not be used by
ib kernel clients and fail.
In case a device does not implement all mandatory verbs for kverbs usage
mark it as a non kverbs provider and prevent its usage for all clients
except for uverbs.

The device is marked as a non kverbs provider using the
'kverbs_provider' flag which should only be set by the core code.
The clients can choose whether kverbs are requested for its usage using
the 'no_kverbs_req' flag which is currently set for uverbs only.

This patch allows drivers to remove mandatory verbs stubs and simply set
the callback to NULL. The IB device will be registered as a non-kverbs
provider.

Signed-off-by: Gal Pressman <galpress@amazon.com>
---
 drivers/infiniband/core/device.c      | 35 ++++++++++++++++++++++-------------
 drivers/infiniband/core/uverbs_main.c |  1 +
 include/rdma/ib_verbs.h               |  5 +++++
 3 files changed, 28 insertions(+), 13 deletions(-)

Comments

Jason Gunthorpe Jan. 14, 2019, 8:25 p.m. UTC | #1
On Thu, Jan 10, 2019 at 04:23:35PM +0200, Gal Pressman wrote:
> Drivers that do not provide kernel verbs support should not be used by
> ib kernel clients and fail.
> In case a device does not implement all mandatory verbs for kverbs usage
> mark it as a non kverbs provider and prevent its usage for all clients
> except for uverbs.
> 
> The device is marked as a non kverbs provider using the
> 'kverbs_provider' flag which should only be set by the core code.
> The clients can choose whether kverbs are requested for its usage using
> the 'no_kverbs_req' flag which is currently set for uverbs only.
> 
> This patch allows drivers to remove mandatory verbs stubs and simply set
> the callback to NULL. The IB device will be registered as a non-kverbs
> provider.
> 
> Signed-off-by: Gal Pressman <galpress@amazon.com>
>  drivers/infiniband/core/device.c      | 35 ++++++++++++++++++++++-------------
>  drivers/infiniband/core/uverbs_main.c |  1 +
>  include/rdma/ib_verbs.h               |  5 +++++
>  3 files changed, 28 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index 8872453e26c0..3f32d05df55d 100644
> +++ b/drivers/infiniband/core/device.c
> @@ -121,13 +121,12 @@ static int ib_device_check_mandatory(struct ib_device *device)
>  	};
>  	int i;
>  
> +	device->kverbs_provider = true;
>  	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
>  		if (!*(void **) ((void *) &device->ops +
>  				 mandatory_table[i].offset)) {
> -			dev_warn(&device->dev,
> -				 "Device is missing mandatory function %s\n",
> -				 mandatory_table[i].name);
> -			return -EINVAL;
> +			device->kverbs_provider = false;
> +			break;
>  		}
>  	}
>  
> @@ -330,6 +329,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
>  {
>  	struct ib_client_data *context;
>  
> +	if (!device->kverbs_provider && !client->no_kverbs_req)
> +		return -EOPNOTSUPP;

Return 0, it is not a failure, the client is just not supported on
this device. Not that it matters right now..

> @@ -374,10 +376,12 @@ static int read_port_immutable(struct ib_device *device)
>  		return -ENOMEM;
>  
>  	for (port = start_port; port <= end_port; ++port) {
> -		ret = device->ops.get_port_immutable(
> -			device, port, &device->port_immutable[port]);
> -		if (ret)
> -			return ret;
> +		if (device->ops.get_port_immutable) {
> +			ret = device->ops.get_port_immutable(
> +				device, port, &device->port_immutable[port]);
> +			if (ret)
> +				return ret;
> +		}
>  
>  		if (verify_immutable(device, port))
>  			return -EINVAL;

Can we still call verify_immutable?

> @@ -537,11 +541,13 @@ static int setup_device(struct ib_device *device)
>  	}
>  
>  	memset(&device->attrs, 0, sizeof(device->attrs));
> -	ret = device->ops.query_device(device, &device->attrs, &uhw);
> -	if (ret) {
> -		dev_warn(&device->dev,
> -			 "Couldn't query the device attributes\n");
> -		goto port_cleanup;
> +	if (device->ops.query_device) {
> +		ret = device->ops.query_device(device, &device->attrs, &uhw);
> +		if (ret) {
> +			dev_warn(&device->dev,
> +				 "Couldn't query the device attributes\n");
> +			goto port_cleanup;
> +		}

Even uverbs_cmd.c needs attrs to be valid, drop this hunk. If a driver
providers a NULL query_device then it will rightly crash on
registration.

> @@ -920,6 +926,9 @@ int ib_query_port(struct ib_device *device,
>  	union ib_gid gid;
>  	int err;
>  
> +	if (!device->ops.query_port)
> +		return -EOPNOTSUPP;

Again, if query_port is not supported then the related sysfs file
should not even be created.

> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index c073a4720d28..fb6074d2e394 100644
> +++ b/include/rdma/ib_verbs.h
> @@ -2566,6 +2566,8 @@ struct ib_device {
>  	__be64			     node_guid;
>  	u32			     local_dma_lkey;
>  	u16                          is_switch:1;
> +	/* Indicates kernel verbs support, should not be used in drivers */
> +	u8                           kverbs_provider:1;

u16 surely?

Jason
Gal Pressman Jan. 15, 2019, 11:16 a.m. UTC | #2
On 14-Jan-19 22:25, Jason Gunthorpe wrote:
> On Thu, Jan 10, 2019 at 04:23:35PM +0200, Gal Pressman wrote:
>> Drivers that do not provide kernel verbs support should not be used by
>> ib kernel clients and fail.
>> In case a device does not implement all mandatory verbs for kverbs usage
>> mark it as a non kverbs provider and prevent its usage for all clients
>> except for uverbs.
>>
>> The device is marked as a non kverbs provider using the
>> 'kverbs_provider' flag which should only be set by the core code.
>> The clients can choose whether kverbs are requested for its usage using
>> the 'no_kverbs_req' flag which is currently set for uverbs only.
>>
>> This patch allows drivers to remove mandatory verbs stubs and simply set
>> the callback to NULL. The IB device will be registered as a non-kverbs
>> provider.
>>
>> Signed-off-by: Gal Pressman <galpress@amazon.com>
>>  drivers/infiniband/core/device.c      | 35 ++++++++++++++++++++++-------------
>>  drivers/infiniband/core/uverbs_main.c |  1 +
>>  include/rdma/ib_verbs.h               |  5 +++++
>>  3 files changed, 28 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
>> index 8872453e26c0..3f32d05df55d 100644
>> +++ b/drivers/infiniband/core/device.c
>> @@ -121,13 +121,12 @@ static int ib_device_check_mandatory(struct ib_device *device)
>>  	};
>>  	int i;
>>  
>> +	device->kverbs_provider = true;
>>  	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
>>  		if (!*(void **) ((void *) &device->ops +
>>  				 mandatory_table[i].offset)) {
>> -			dev_warn(&device->dev,
>> -				 "Device is missing mandatory function %s\n",
>> -				 mandatory_table[i].name);
>> -			return -EINVAL;
>> +			device->kverbs_provider = false;
>> +			break;
>>  		}
>>  	}
>>  
>> @@ -330,6 +329,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
>>  {
>>  	struct ib_client_data *context;
>>  
>> +	if (!device->kverbs_provider && !client->no_kverbs_req)
>> +		return -EOPNOTSUPP;
> 
> Return 0, it is not a failure, the client is just not supported on
> this device. Not that it matters right now..

Return 0 means that client->add() will be called, which is what we're trying to
avoid.

> 
>> @@ -374,10 +376,12 @@ static int read_port_immutable(struct ib_device *device)
>>  		return -ENOMEM;
>>  
>>  	for (port = start_port; port <= end_port; ++port) {
>> -		ret = device->ops.get_port_immutable(
>> -			device, port, &device->port_immutable[port]);
>> -		if (ret)
>> -			return ret;
>> +		if (device->ops.get_port_immutable) {
>> +			ret = device->ops.get_port_immutable(
>> +				device, port, &device->port_immutable[port]);
>> +			if (ret)
>> +				return ret;
>> +		}
>>  
>>  		if (verify_immutable(device, port))
>>  			return -EINVAL;
> 
> Can we still call verify_immutable?

static int verify_immutable(const struct ib_device *dev, u8 port)
{
	return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
			    rdma_max_mad_size(dev, port) != 0);
}

Since port immutable struct is cleared, the check will pass.
Do you prefer verify_immutable under the if statement?

> 
>> @@ -537,11 +541,13 @@ static int setup_device(struct ib_device *device)
>>  	}
>>  
>>  	memset(&device->attrs, 0, sizeof(device->attrs));
>> -	ret = device->ops.query_device(device, &device->attrs, &uhw);
>> -	if (ret) {
>> -		dev_warn(&device->dev,
>> -			 "Couldn't query the device attributes\n");
>> -		goto port_cleanup;
>> +	if (device->ops.query_device) {
>> +		ret = device->ops.query_device(device, &device->attrs, &uhw);
>> +		if (ret) {
>> +			dev_warn(&device->dev,
>> +				 "Couldn't query the device attributes\n");
>> +			goto port_cleanup;
>> +		}
> 
> Even uverbs_cmd.c needs attrs to be valid, drop this hunk. If a driver
> providers a NULL query_device then it will rightly crash on
> registration.

ACK.

> 
>> @@ -920,6 +926,9 @@ int ib_query_port(struct ib_device *device,
>>  	union ib_gid gid;
>>  	int err;
>>  
>> +	if (!device->ops.query_port)
>> +		return -EOPNOTSUPP;
> 
> Again, if query_port is not supported then the related sysfs file
> should not even be created.

Needed for the cache setup as part of ib_register_device.

> 
>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>> index c073a4720d28..fb6074d2e394 100644
>> +++ b/include/rdma/ib_verbs.h
>> @@ -2566,6 +2566,8 @@ struct ib_device {
>>  	__be64			     node_guid;
>>  	u32			     local_dma_lkey;
>>  	u16                          is_switch:1;
>> +	/* Indicates kernel verbs support, should not be used in drivers */
>> +	u8                           kverbs_provider:1;
> 
> u16 surely?

Right.
Jason Gunthorpe Jan. 18, 2019, 9:22 p.m. UTC | #3
On Tue, Jan 15, 2019 at 01:16:00PM +0200, Gal Pressman wrote:
> On 14-Jan-19 22:25, Jason Gunthorpe wrote:
> > On Thu, Jan 10, 2019 at 04:23:35PM +0200, Gal Pressman wrote:
> >> Drivers that do not provide kernel verbs support should not be used by
> >> ib kernel clients and fail.
> >> In case a device does not implement all mandatory verbs for kverbs usage
> >> mark it as a non kverbs provider and prevent its usage for all clients
> >> except for uverbs.
> >>
> >> The device is marked as a non kverbs provider using the
> >> 'kverbs_provider' flag which should only be set by the core code.
> >> The clients can choose whether kverbs are requested for its usage using
> >> the 'no_kverbs_req' flag which is currently set for uverbs only.
> >>
> >> This patch allows drivers to remove mandatory verbs stubs and simply set
> >> the callback to NULL. The IB device will be registered as a non-kverbs
> >> provider.
> >>
> >> Signed-off-by: Gal Pressman <galpress@amazon.com>
> >>  drivers/infiniband/core/device.c      | 35 ++++++++++++++++++++++-------------
> >>  drivers/infiniband/core/uverbs_main.c |  1 +
> >>  include/rdma/ib_verbs.h               |  5 +++++
> >>  3 files changed, 28 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> >> index 8872453e26c0..3f32d05df55d 100644
> >> +++ b/drivers/infiniband/core/device.c
> >> @@ -121,13 +121,12 @@ static int ib_device_check_mandatory(struct ib_device *device)
> >>  	};
> >>  	int i;
> >>  
> >> +	device->kverbs_provider = true;
> >>  	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
> >>  		if (!*(void **) ((void *) &device->ops +
> >>  				 mandatory_table[i].offset)) {
> >> -			dev_warn(&device->dev,
> >> -				 "Device is missing mandatory function %s\n",
> >> -				 mandatory_table[i].name);
> >> -			return -EINVAL;
> >> +			device->kverbs_provider = false;
> >> +			break;
> >>  		}
> >>  	}
> >>  
> >> @@ -330,6 +329,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
> >>  {
> >>  	struct ib_client_data *context;
> >>  
> >> +	if (!device->kverbs_provider && !client->no_kverbs_req)
> >> +		return -EOPNOTSUPP;
> > 
> > Return 0, it is not a failure, the client is just not supported on
> > this device. Not that it matters right now..
> 
> Return 0 means that client->add() will be called, which is what we're trying to
> avoid.

Okay, I have a series that changes this..

> > 
> >> @@ -374,10 +376,12 @@ static int read_port_immutable(struct ib_device *device)
> >>  		return -ENOMEM;
> >>  
> >>  	for (port = start_port; port <= end_port; ++port) {
> >> -		ret = device->ops.get_port_immutable(
> >> -			device, port, &device->port_immutable[port]);
> >> -		if (ret)
> >> -			return ret;
> >> +		if (device->ops.get_port_immutable) {
> >> +			ret = device->ops.get_port_immutable(
> >> +				device, port, &device->port_immutable[port]);
> >> +			if (ret)
> >> +				return ret;
> >> +		}
> >>  
> >>  		if (verify_immutable(device, port))
> >>  			return -EINVAL;
> > 
> > Can we still call verify_immutable?
> 
> static int verify_immutable(const struct ib_device *dev, u8 port)
> {
> 	return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
> 			    rdma_max_mad_size(dev, port) != 0);
> }
> 
> Since port immutable struct is cleared, the check will pass.
> Do you prefer verify_immutable under the if statement?

Yes, no clarity in verifying unfilled data

> >> @@ -920,6 +926,9 @@ int ib_query_port(struct ib_device *device,
> >>  	union ib_gid gid;
> >>  	int err;
> >>  
> >> +	if (!device->ops.query_port)
> >> +		return -EOPNOTSUPP;
> > 
> > Again, if query_port is not supported then the related sysfs file
> > should not even be created.
> 
> Needed for the cache setup as part of ib_register_device.

Hum, what does the gid cache even do for !kverbs? uverbs uses it I
suppose, but it won't work right at all if kverbs are not present..

That probably needs a similar fixing to sysfs, to safely disable the
functionality...

Jason
Gal Pressman Jan. 20, 2019, 9:05 a.m. UTC | #4
On 18-Jan-19 23:22, Jason Gunthorpe wrote:
>>>> @@ -920,6 +926,9 @@ int ib_query_port(struct ib_device *device,
>>>>  	union ib_gid gid;
>>>>  	int err;
>>>>  
>>>> +	if (!device->ops.query_port)
>>>> +		return -EOPNOTSUPP;
>>>
>>> Again, if query_port is not supported then the related sysfs file
>>> should not even be created.
>>
>> Needed for the cache setup as part of ib_register_device.
> 
> Hum, what does the gid cache even do for !kverbs? uverbs uses it I
> suppose, but it won't work right at all if kverbs are not present..
> 
> That probably needs a similar fixing to sysfs, to safely disable the
> functionality...

Why is the GID cache table dependent on kverbs? EFA for example uses it when
querying the GID sysfs. Sure, it can bypass the cache and query the driver, but
it's probably better to use the same flow for both kverbs and non-kverbs.

I can return 0 instead of -EOPNOTSUPP in ib_query_port in order to pass the
device registration successfully.
Alternatively, one could argue that if phys_port_cnt > 0 query_port should be
implemented as well (and remove the if statement entirely).
Jason Gunthorpe Jan. 21, 2019, 2:15 a.m. UTC | #5
On Sun, Jan 20, 2019 at 11:05:12AM +0200, Gal Pressman wrote:
> On 18-Jan-19 23:22, Jason Gunthorpe wrote:
> >>>> @@ -920,6 +926,9 @@ int ib_query_port(struct ib_device *device,
> >>>>  	union ib_gid gid;
> >>>>  	int err;
> >>>>  
> >>>> +	if (!device->ops.query_port)
> >>>> +		return -EOPNOTSUPP;
> >>>
> >>> Again, if query_port is not supported then the related sysfs file
> >>> should not even be created.
> >>
> >> Needed for the cache setup as part of ib_register_device.
> > 
> > Hum, what does the gid cache even do for !kverbs? uverbs uses it I
> > suppose, but it won't work right at all if kverbs are not present..
> > 
> > That probably needs a similar fixing to sysfs, to safely disable the
> > functionality...
> 
> Why is the GID cache table dependent on kverbs? EFA for example uses it when
> querying the GID sysfs. 

It used to be mostly used by kverbs only, but I guess we now have some
other stuff with the various new lifetime things and what not.

> Sure, it can bypass the cache and query the driver, but it's
> probably better to use the same flow for both kverbs and non-kverbs.

The gid cache code is some of the more terrifying code in the
tree.. Are you going to review all patches for it to make sure it
doesn't break your device?

IMHO, just return the only GID in query device and be done with it -
forget about sysfs.

> I can return 0 instead of -EOPNOTSUPP in ib_query_port in order to pass the
> device registration successfully.
> Alternatively, one could argue that if phys_port_cnt > 0 query_port should be
> implemented as well (and remove the if statement entirely).

Maybe some of this stuff should be split from the 'gid cache', the
pkey cache and subnet prefix cache are really unrelated and maybe only
used by kverbs..

Jason
Gal Pressman Jan. 21, 2019, 11:30 a.m. UTC | #6
On 21-Jan-19 04:15, Jason Gunthorpe wrote:
> On Sun, Jan 20, 2019 at 11:05:12AM +0200, Gal Pressman wrote:
>> On 18-Jan-19 23:22, Jason Gunthorpe wrote:
>>>>>> @@ -920,6 +926,9 @@ int ib_query_port(struct ib_device *device,
>>>>>>  	union ib_gid gid;
>>>>>>  	int err;
>>>>>>  
>>>>>> +	if (!device->ops.query_port)
>>>>>> +		return -EOPNOTSUPP;
>>>>>
>>>>> Again, if query_port is not supported then the related sysfs file
>>>>> should not even be created.
>>>>
>>>> Needed for the cache setup as part of ib_register_device.
>>>
>>> Hum, what does the gid cache even do for !kverbs? uverbs uses it I
>>> suppose, but it won't work right at all if kverbs are not present..
>>>
>>> That probably needs a similar fixing to sysfs, to safely disable the
>>> functionality...
>>
>> Why is the GID cache table dependent on kverbs? EFA for example uses it when
>> querying the GID sysfs. 
> 
> It used to be mostly used by kverbs only, but I guess we now have some
> other stuff with the various new lifetime things and what not.
> 
>> Sure, it can bypass the cache and query the driver, but it's
>> probably better to use the same flow for both kverbs and non-kverbs.
> 
> The gid cache code is some of the more terrifying code in the
> tree.. Are you going to review all patches for it to make sure it
> doesn't break your device?
> 
> IMHO, just return the only GID in query device and be done with it -
> forget about sysfs.
> 
>> I can return 0 instead of -EOPNOTSUPP in ib_query_port in order to pass the
>> device registration successfully.
>> Alternatively, one could argue that if phys_port_cnt > 0 query_port should be
>> implemented as well (and remove the if statement entirely).
> 
> Maybe some of this stuff should be split from the 'gid cache', the
> pkey cache and subnet prefix cache are really unrelated and maybe only
> used by kverbs..

The query_port callback is used from netlink (link query) as well.
I think that consolidating the checks to ib_query_port is the most reasonable
thing to do in order to protect all different flows. Both returning 0 and
-EOPNOTSUPP are fine with me.

We can also split the mandatory funcs to two: mandatory and mandatory for
kverbs. This way if a mandatory function (one that is needed for device
registration, such as query_device, query_port, port_immutable) is missing we
will fail the registration. If a kverb function is missing we'll mark the device
as non-kverbs provider. This way we can remove all the additional checks added
in this patch.
Jason Gunthorpe Jan. 21, 2019, 4:57 p.m. UTC | #7
On Mon, Jan 21, 2019 at 01:30:17PM +0200, Gal Pressman wrote:
> On 21-Jan-19 04:15, Jason Gunthorpe wrote:
> > On Sun, Jan 20, 2019 at 11:05:12AM +0200, Gal Pressman wrote:
> >> On 18-Jan-19 23:22, Jason Gunthorpe wrote:
> >>>>>> @@ -920,6 +926,9 @@ int ib_query_port(struct ib_device *device,
> >>>>>>  	union ib_gid gid;
> >>>>>>  	int err;
> >>>>>>  
> >>>>>> +	if (!device->ops.query_port)
> >>>>>> +		return -EOPNOTSUPP;
> >>>>>
> >>>>> Again, if query_port is not supported then the related sysfs file
> >>>>> should not even be created.
> >>>>
> >>>> Needed for the cache setup as part of ib_register_device.
> >>>
> >>> Hum, what does the gid cache even do for !kverbs? uverbs uses it I
> >>> suppose, but it won't work right at all if kverbs are not present..
> >>>
> >>> That probably needs a similar fixing to sysfs, to safely disable the
> >>> functionality...
> >>
> >> Why is the GID cache table dependent on kverbs? EFA for example uses it when
> >> querying the GID sysfs. 
> > 
> > It used to be mostly used by kverbs only, but I guess we now have some
> > other stuff with the various new lifetime things and what not.
> > 
> >> Sure, it can bypass the cache and query the driver, but it's
> >> probably better to use the same flow for both kverbs and non-kverbs.
> > 
> > The gid cache code is some of the more terrifying code in the
> > tree.. Are you going to review all patches for it to make sure it
> > doesn't break your device?
> > 
> > IMHO, just return the only GID in query device and be done with it -
> > forget about sysfs.
> > 
> >> I can return 0 instead of -EOPNOTSUPP in ib_query_port in order to pass the
> >> device registration successfully.
> >> Alternatively, one could argue that if phys_port_cnt > 0 query_port should be
> >> implemented as well (and remove the if statement entirely).
> > 
> > Maybe some of this stuff should be split from the 'gid cache', the
> > pkey cache and subnet prefix cache are really unrelated and maybe only
> > used by kverbs..
> 
> The query_port callback is used from netlink (link query) as well.
> I think that consolidating the checks to ib_query_port is the most reasonable
> thing to do in order to protect all different flows. Both returning 0 and
> -EOPNOTSUPP are fine with me.

I don't think query_port should fail.. ie in netlink if it fails then
you can't do get_netdev which seems useful for something like
usnic.. All the call sites would have to be audited otherwise.

> We can also split the mandatory funcs to two: mandatory and mandatory for
> kverbs. This way if a mandatory function (one that is needed for device
> registration, such as query_device, query_port, port_immutable) is missing we
> will fail the registration. If a kverb function is missing we'll mark the device
> as non-kverbs provider. This way we can remove all the additional checks added
> in this patch.

Well, this is sort of what we are doing.. Checking the functions seems
like too much babying of drivers though - we call the really mandatory
ones during registration so just NULL deref for terminally broken
drivers is not so bad.

Jason
Gal Pressman Jan. 21, 2019, 5:33 p.m. UTC | #8
On 21-Jan-19 18:57, Jason Gunthorpe wrote:
> On Mon, Jan 21, 2019 at 01:30:17PM +0200, Gal Pressman wrote:
>> On 21-Jan-19 04:15, Jason Gunthorpe wrote:
>>> On Sun, Jan 20, 2019 at 11:05:12AM +0200, Gal Pressman wrote:
>>>> On 18-Jan-19 23:22, Jason Gunthorpe wrote:
>>>>>>>> @@ -920,6 +926,9 @@ int ib_query_port(struct ib_device *device,
>>>>>>>>  	union ib_gid gid;
>>>>>>>>  	int err;
>>>>>>>>  
>>>>>>>> +	if (!device->ops.query_port)
>>>>>>>> +		return -EOPNOTSUPP;
>>>>>>>
>>>>>>> Again, if query_port is not supported then the related sysfs file
>>>>>>> should not even be created.
>>>>>>
>>>>>> Needed for the cache setup as part of ib_register_device.
>>>>>
>>>>> Hum, what does the gid cache even do for !kverbs? uverbs uses it I
>>>>> suppose, but it won't work right at all if kverbs are not present..
>>>>>
>>>>> That probably needs a similar fixing to sysfs, to safely disable the
>>>>> functionality...
>>>>
>>>> Why is the GID cache table dependent on kverbs? EFA for example uses it when
>>>> querying the GID sysfs. 
>>>
>>> It used to be mostly used by kverbs only, but I guess we now have some
>>> other stuff with the various new lifetime things and what not.
>>>
>>>> Sure, it can bypass the cache and query the driver, but it's
>>>> probably better to use the same flow for both kverbs and non-kverbs.
>>>
>>> The gid cache code is some of the more terrifying code in the
>>> tree.. Are you going to review all patches for it to make sure it
>>> doesn't break your device?
>>>
>>> IMHO, just return the only GID in query device and be done with it -
>>> forget about sysfs.
>>>
>>>> I can return 0 instead of -EOPNOTSUPP in ib_query_port in order to pass the
>>>> device registration successfully.
>>>> Alternatively, one could argue that if phys_port_cnt > 0 query_port should be
>>>> implemented as well (and remove the if statement entirely).
>>>
>>> Maybe some of this stuff should be split from the 'gid cache', the
>>> pkey cache and subnet prefix cache are really unrelated and maybe only
>>> used by kverbs..
>>
>> The query_port callback is used from netlink (link query) as well.
>> I think that consolidating the checks to ib_query_port is the most reasonable
>> thing to do in order to protect all different flows. Both returning 0 and
>> -EOPNOTSUPP are fine with me.
> 
> I don't think query_port should fail.. ie in netlink if it fails then
> you can't do get_netdev which seems useful for something like
> usnic.. All the call sites would have to be audited otherwise.
> 
>> We can also split the mandatory funcs to two: mandatory and mandatory for
>> kverbs. This way if a mandatory function (one that is needed for device
>> registration, such as query_device, query_port, port_immutable) is missing we
>> will fail the registration. If a kverb function is missing we'll mark the device
>> as non-kverbs provider. This way we can remove all the additional checks added
>> in this patch.
> 
> Well, this is sort of what we are doing.. Checking the functions seems
> like too much babying of drivers though - we call the really mandatory
> ones during registration so just NULL deref for terminally broken
> drivers is not so bad.

That sounds reasonable as well.
I'll send v6 without the NULL checks.
diff mbox series

Patch

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 8872453e26c0..3f32d05df55d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -121,13 +121,12 @@  static int ib_device_check_mandatory(struct ib_device *device)
 	};
 	int i;
 
+	device->kverbs_provider = true;
 	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
 		if (!*(void **) ((void *) &device->ops +
 				 mandatory_table[i].offset)) {
-			dev_warn(&device->dev,
-				 "Device is missing mandatory function %s\n",
-				 mandatory_table[i].name);
-			return -EINVAL;
+			device->kverbs_provider = false;
+			break;
 		}
 	}
 
@@ -330,6 +329,9 @@  static int add_client_context(struct ib_device *device, struct ib_client *client
 {
 	struct ib_client_data *context;
 
+	if (!device->kverbs_provider && !client->no_kverbs_req)
+		return -EOPNOTSUPP;
+
 	context = kmalloc(sizeof(*context), GFP_KERNEL);
 	if (!context)
 		return -ENOMEM;
@@ -374,10 +376,12 @@  static int read_port_immutable(struct ib_device *device)
 		return -ENOMEM;
 
 	for (port = start_port; port <= end_port; ++port) {
-		ret = device->ops.get_port_immutable(
-			device, port, &device->port_immutable[port]);
-		if (ret)
-			return ret;
+		if (device->ops.get_port_immutable) {
+			ret = device->ops.get_port_immutable(
+				device, port, &device->port_immutable[port]);
+			if (ret)
+				return ret;
+		}
 
 		if (verify_immutable(device, port))
 			return -EINVAL;
@@ -537,11 +541,13 @@  static int setup_device(struct ib_device *device)
 	}
 
 	memset(&device->attrs, 0, sizeof(device->attrs));
-	ret = device->ops.query_device(device, &device->attrs, &uhw);
-	if (ret) {
-		dev_warn(&device->dev,
-			 "Couldn't query the device attributes\n");
-		goto port_cleanup;
+	if (device->ops.query_device) {
+		ret = device->ops.query_device(device, &device->attrs, &uhw);
+		if (ret) {
+			dev_warn(&device->dev,
+				 "Couldn't query the device attributes\n");
+			goto port_cleanup;
+		}
 	}
 
 	ret = setup_port_pkey_list(device);
@@ -920,6 +926,9 @@  int ib_query_port(struct ib_device *device,
 	union ib_gid gid;
 	int err;
 
+	if (!device->ops.query_port)
+		return -EOPNOTSUPP;
+
 	if (!rdma_is_port_valid(device, port_num))
 		return -EINVAL;
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index fb0007aa0c27..0eafee9a2ffc 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1127,6 +1127,7 @@  static const struct file_operations uverbs_mmap_fops = {
 
 static struct ib_client uverbs_client = {
 	.name   = "uverbs",
+	.no_kverbs_req = true,
 	.add    = ib_uverbs_add_one,
 	.remove = ib_uverbs_remove_one
 };
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c073a4720d28..fb6074d2e394 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2566,6 +2566,8 @@  struct ib_device {
 	__be64			     node_guid;
 	u32			     local_dma_lkey;
 	u16                          is_switch:1;
+	/* Indicates kernel verbs support, should not be used in drivers */
+	u8                           kverbs_provider:1;
 	u8                           node_type;
 	u8                           phys_port_cnt;
 	struct ib_device_attr        attrs;
@@ -2620,6 +2622,9 @@  struct ib_client {
 			const struct sockaddr *addr,
 			void *client_data);
 	struct list_head list;
+
+	/* kverbs are not required by the client */
+	u8 no_kverbs_req:1;
 };
 
 struct ib_device *ib_alloc_device(size_t size);