diff mbox

[for-next,1/2] IB/uverbs: Add QP creation flags, allow blocking UD multicast loopback

Message ID 1410700802-27848-2-git-send-email-ogerlitz@mellanox.com (mailing list archive)
State Rejected
Headers show

Commit Message

Or Gerlitz Sept. 14, 2014, 1:20 p.m. UTC
Currently, there's no way for user-space applications to specify
the IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK QP creation flags defined
by commit 47ee1b9 "IB/core: Add support for multicast loopback blocking".

As a result, applications who send and recieve over the same QP to
the same multicast group get all their packets bouncded back to them,
which is terribly bad performance wise.

To fix this long standing issue, add the ability to provide QP
creation flags through uverbs.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c |   35 ++++++++++++++++++++++++++++++++-
 include/uapi/rdma/ib_user_verbs.h    |    2 +-
 2 files changed, 34 insertions(+), 3 deletions(-)

Comments

Yann Droneaud Sept. 15, 2014, 4:52 p.m. UTC | #1
Hi,

Le dimanche 14 septembre 2014 à 16:20 +0300, Or Gerlitz a écrit :
> Currently, there's no way for user-space applications to specify
> the IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK QP creation flags defined
> by commit 47ee1b9 "IB/core: Add support for multicast loopback blocking".
> 

Commit identifier is too short, 12 digits is the norm.

> As a result, applications who send and recieve over the same QP to
> the same multicast group get all their packets bouncded back to them,
> which is terribly bad performance wise.
> 
> To fix this long standing issue, add the ability to provide QP
> creation flags through uverbs.
> 
> Signed-off-by: Matan Barak <matanb@mellanox.com>
> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> ---
>  drivers/infiniband/core/uverbs_cmd.c |   35 ++++++++++++++++++++++++++++++++-
>  include/uapi/rdma/ib_user_verbs.h    |    2 +-
>  2 files changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
> index 0600c50..1ad489c 100644
> --- a/drivers/infiniband/core/uverbs_cmd.c
> +++ b/drivers/infiniband/core/uverbs_cmd.c
> @@ -1579,6 +1579,31 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
>  	return in_len;
>  }
>  
> +enum ib_uverbs_qp_create_flags {
> +	IB_UVERBS_QP_CREATE_LSO	    = 0,
> +	IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1,
> +	IB_UVERBS_SUPPORTED_FLAGS
> +};

IB_UVERBS_QP_CREATE_LSO and IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK
should moved in include/uapi/rdma/ib_user_verbs.h.

If this is going to be flags, these values might be OR-ed together ?
Then the userspace values should be defined like the kernel ones
(see commit 47ee1b9f2e7b):

+enum ib_uverbs_qp_create_flags {
+	IB_UVERBS_QP_CREATE_LSO	    =                  1 << 0,
+	IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+}

IB_UVERBS_SUPPORTED_FLAGS must be kept in this module, but defined
as:

#define IB_UVERBS_QP_CREATE_FLAGS_ALL (IB_UVERBS_QP_CREATE_LSO | \
				       IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)

> +
> +static int ib_uverbs_create_qp_trans(u8 u_flags)
> +{
> +	int i;
> +	int res;
> +	static const enum ib_qp_create_flags ib_uverbs_qp_create_flags[IB_UVERBS_SUPPORTED_FLAGS] = {
> +		[IB_UVERBS_QP_CREATE_LSO] = IB_QP_CREATE_IPOIB_UD_LSO,
> +		[IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK] = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
> +	};
> +
> +	if (u_flags & ~((1 << IB_UVERBS_SUPPORTED_FLAGS) - 1))
> +		return -1;
> +

	if (u_flags & ~(u8)(IB_UVERBS_QP_CREATE_FLAGS_ALL))
	   return -1;

> +	for (i = 0; i < IB_UVERBS_SUPPORTED_FLAGS; i++)
> +		if (u_flags & (1 << i))
> +			res |= ib_uverbs_qp_create_flags[i];
> +
> +	return res;
> +}
> +

This function can be replaced by compile time assert:

BUILD_BUG_ON(IB_UVERBS_QP_CREATE_LSO != IB_QP_CREATE_IPOIB_UD_LSO);
BUILD_BUG_ON(IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK !=
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK);

This way the values can be used without conversion.

>  ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>  			    const char __user *buf, int in_len,
>  			    int out_len)
> @@ -1595,7 +1620,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>  	struct ib_srq                  *srq = NULL;
>  	struct ib_qp                   *qp;
>  	struct ib_qp_init_attr          attr;
> -	int ret;
> +	int flags, ret;
>  
>  	if (out_len < sizeof resp)
>  		return -ENOSPC;
> @@ -1664,7 +1689,13 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>  	attr.xrcd	   = xrcd;
>  	attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
>  	attr.qp_type       = cmd.qp_type;
> -	attr.create_flags  = 0;
> +
> +	flags  = ib_uverbs_create_qp_trans(cmd.create_flags);
> +	if (flags < 0) {
> +		ret = -EINVAL;
> +		goto err_put;
> +	}
> +	attr.create_flags  = flags;
>  
>  	attr.cap.max_send_wr     = cmd.max_send_wr;
>  	attr.cap.max_recv_wr     = cmd.max_recv_wr;
> diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
> index 26daf55..fac6975 100644
> --- a/include/uapi/rdma/ib_user_verbs.h
> +++ b/include/uapi/rdma/ib_user_verbs.h
> @@ -470,7 +470,7 @@ struct ib_uverbs_create_qp {
>  	__u8  sq_sig_all;
>  	__u8  qp_type;
>  	__u8  is_srq;
> -	__u8  reserved;
> +	__u8  create_flags;
>  	__u64 driver_data[0];
>  };
>  

I'm not really happy with the way "reserved" field is used by this
patch: as the field wasn't check for being set to 0, any value could be
given by userspace (imagine the structure lay on stack). Using it now
could be dangerous. It must be double checked.

http://blog.ffwll.ch/2013/11/botching-up-ioctls.html

"Check all unused fields and flags and all the padding for whether it's
0, and reject the ioctl if that's not the case. Otherwise your nice plan
for future extensions is going right down the gutters since someone will
submit an ioctl struct with random stack garbage in the yet unused
parts. Which then bakes in the ABI that those fields can never be used
for anything else but garbage."

Regards.
Or Gerlitz Sept. 15, 2014, 5:36 p.m. UTC | #2
On Mon, Sep 15, 2014 at 7:52 PM, Yann Droneaud <ydroneaud@opteya.com> wrote:
>> --- a/include/uapi/rdma/ib_user_verbs.h
>> +++ b/include/uapi/rdma/ib_user_verbs.h
>> @@ -470,7 +470,7 @@ struct ib_uverbs_create_qp {
>>       __u8  sq_sig_all;
>>       __u8  qp_type;
>>       __u8  is_srq;
>> -     __u8  reserved;
>> +     __u8  create_flags;
>>       __u64 driver_data[0];
>>  };
>>
>
> I'm not really happy with the way "reserved" field is used by this
> patch: as the field wasn't check for being set to 0, any value could be
> given by userspace (imagine the structure lay on stack). Using it now
> could be dangerous. It must be double checked.

We  are only allowing user space applications to program certain
aspects in the behavior
of their own QPs, no risk to the system/kernel state.

We've done it very successfully in the past with adding the link_layer field
to struct ib_uverbs_query_port_resp as part of the RoCE story instead of a
reserved field.

Or
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yann Droneaud Sept. 18, 2014, 7:25 a.m. UTC | #3
Hi,

Le lundi 15 septembre 2014 à 20:36 +0300, Or Gerlitz a écrit :
> On Mon, Sep 15, 2014 at 7:52 PM, Yann Droneaud <ydroneaud@opteya.com> wrote:
> >> --- a/include/uapi/rdma/ib_user_verbs.h
> >> +++ b/include/uapi/rdma/ib_user_verbs.h
> >> @@ -470,7 +470,7 @@ struct ib_uverbs_create_qp {
> >>       __u8  sq_sig_all;
> >>       __u8  qp_type;
> >>       __u8  is_srq;
> >> -     __u8  reserved;
> >> +     __u8  create_flags;
> >>       __u64 driver_data[0];
> >>  };
> >>
> >
> > I'm not really happy with the way "reserved" field is used by this
> > patch: as the field wasn't check for being set to 0, any value could be
> > given by userspace (imagine the structure lay on stack). Using it now
> > could be dangerous. It must be double checked.
> 
> We  are only allowing user space applications to program certain
> aspects in the behavior
> of their own QPs, no risk to the system/kernel state.
> 

I've checked the implementation on the most common userspace code
accessing the uverbs API, libibverbs, and found the "reserved" field is
explicitely cleared in:

ibv_cmd_create_qp_ex():

c7e3e61052dd7 (Sean Hefty       2013-08-01 18:04:16 +0300  651)         cmd->reserved        = 0;

ibv_cmd_create_qp():

0e0604213ed79 (Roland Dreier    2006-10-04 23:57:10 +0000  726)         cmd->reserved        = 0;

Latter commit is part of libibverbs-1.1-rc1, so before libibverbs-1.1,
reserved field was not cleared.

For example, mlx4_create_qp() allocate the ibv_create_qp structure on
stack and doesn't clear reserved field:

^d049a1279b82 (Roland Dreier    2007-04-09 00:49:42 -0700 358)  struct mlx4_create_qp     cmd;

So existing userspace program using libibverbs < 1.1 is likely to break
with newer kernel if reserved field is going to be used.

> We've done it very successfully in the past with adding the link_layer field
> to struct ib_uverbs_query_port_resp as part of the RoCE story instead of a
> reserved field.
> 

ib_uverbs_query_port_resp is the opposite side: the kernel is adding stuff
where older userspace code don't expect to found anything, so this extra
information is skipped by such pre-existing program.

Using an unused field in the request has more chance to break than using
an extra field in the response.

Again:

"Check all unused fields and flags and all the padding for whether it's 0,
 and reject the ioctl if that's not the case. Otherwise your nice plan for
 future extensions is going right down the gutters since someone will
 submit an ioctl struct with random stack garbage in the yet unused parts.
 Which then bakes in the ABI that those fields can never be used for
 anything else but garbage."

As the "reserved" field was never check for being 0 in kernel side,
ensuring it could be used in the future for other purpose, we're in
a situation where "reserved" field is garbage when using older
libibverbs or other userspace software that can address uverbs
by-passing libibverbs.

That's likely to break existing userspace application.

Regards.
Or Gerlitz Sept. 18, 2014, 12:51 p.m. UTC | #4
On 9/15/2014 7:52 PM, Yann Droneaud wrote:
> Hi,
>
> Le dimanche 14 septembre 2014 à 16:20 +0300, Or Gerlitz a écrit :
> Commit identifier is too short, 12 digits is the norm. 

OK, will fix

>> As a result, applications who send and recieve over the same QP to
>> the same multicast group get all their packets bouncded back to them,
>> which is terribly bad performance wise.
>>
>> To fix this long standing issue, add the ability to provide QP
>> creation flags through uverbs.
>>
>> Signed-off-by: Matan Barak <matanb@mellanox.com>
>> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
>> ---
>>   drivers/infiniband/core/uverbs_cmd.c |   35 ++++++++++++++++++++++++++++++++-
>>   include/uapi/rdma/ib_user_verbs.h    |    2 +-
>>   2 files changed, 34 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
>> index 0600c50..1ad489c 100644
>> --- a/drivers/infiniband/core/uverbs_cmd.c
>> +++ b/drivers/infiniband/core/uverbs_cmd.c
>> @@ -1579,6 +1579,31 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
>>   	return in_len;
>>   }
>>   
>> +enum ib_uverbs_qp_create_flags {
>> +	IB_UVERBS_QP_CREATE_LSO	    = 0,
>> +	IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1,
>> +	IB_UVERBS_SUPPORTED_FLAGS
>> +};
> IB_UVERBS_QP_CREATE_LSO and IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK
> should moved in include/uapi/rdma/ib_user_verbs.h.
>
> If this is going to be flags, these values might be OR-ed together ?
> Then the userspace values should be defined like the kernel ones
> (see commit 47ee1b9f2e7b):
>
> +enum ib_uverbs_qp_create_flags {
> +	IB_UVERBS_QP_CREATE_LSO	    =                  1 << 0,
> +	IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
> +}
>

Some misunderstanding here.. enum ib_uverbs_qp_create_flags is internal 
kernel construct used
to check the flags provided by user space (the _flags suffix is 
confusing, will remove it). The actual flags
values to be used by user space are shifted values (e.g 1 << 
IB_UVERBS_QP_CREATE_LSO).

As the whole set of flags/enum/attribute values moved between libibverbs 
to the kernel, this will be defined
in libibverbs header (take a look on include/uapi/rdma/ib_user_verbs.h 
it doesn't have the enumeration for event types defined).


> IB_UVERBS_SUPPORTED_FLAGS must be kept in this module, but defined
> as:
>
> #define IB_UVERBS_QP_CREATE_FLAGS_ALL (IB_UVERBS_QP_CREATE_LSO | \
> 				       IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
>
>> +
>> +static int ib_uverbs_create_qp_trans(u8 u_flags)
>> +{
>> +	int i;
>> +	int res;
>> +	static const enum ib_qp_create_flags ib_uverbs_qp_create_flags[IB_UVERBS_SUPPORTED_FLAGS] = {
>> +		[IB_UVERBS_QP_CREATE_LSO] = IB_QP_CREATE_IPOIB_UD_LSO,
>> +		[IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK] = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
>> +	};
>> +
>> +	if (u_flags & ~((1 << IB_UVERBS_SUPPORTED_FLAGS) - 1))
>> +		return -1;
>> +
> 	if (u_flags & ~(u8)(IB_UVERBS_QP_CREATE_FLAGS_ALL))
> 	   return -1;
>
>> +	for (i = 0; i < IB_UVERBS_SUPPORTED_FLAGS; i++)
>> +		if (u_flags & (1 << i))
>> +			res |= ib_uverbs_qp_create_flags[i];
>> +
>> +	return res;
>> +}
>> +
> This function can be replaced by compile time assert:
>
> BUILD_BUG_ON(IB_UVERBS_QP_CREATE_LSO != IB_QP_CREATE_IPOIB_UD_LSO);
> BUILD_BUG_ON(IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK !=
> IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK);
>
> This way the values can be used without conversion.

We prefer to have larger flexibility here which we can do with run time 
checks (this is slow path) and remain with the practice suggested by the 
original patch.


Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Sept. 18, 2014, 1:03 p.m. UTC | #5
On 9/18/2014 10:25 AM, Yann Droneaud wrote:
> Le lundi 15 septembre 2014 à 20:36 +0300, Or Gerlitz a écrit :
>> On Mon, Sep 15, 2014 at 7:52 PM, Yann Droneaud <ydroneaud@opteya.com> wrote:
>>>> --- a/include/uapi/rdma/ib_user_verbs.h
>>>> +++ b/include/uapi/rdma/ib_user_verbs.h
>>>> @@ -470,7 +470,7 @@ struct ib_uverbs_create_qp {
>>>>        __u8  sq_sig_all;
>>>>        __u8  qp_type;
>>>>        __u8  is_srq;
>>>> -     __u8  reserved;
>>>> +     __u8  create_flags;
>>>>        __u64 driver_data[0];
>>>>   };
>>>>
>>> I'm not really happy with the way "reserved" field is used by this
>>> patch: as the field wasn't check for being set to 0, any value could be
>>> given by userspace (imagine the structure lay on stack). Using it now
>>> could be dangerous. It must be double checked.
>> We are only allowing user space applications to program certain
>> aspects in the behavior of their own QPs, no risk to the system/kernel state.
>>
> I've checked the implementation on the most common userspace code
> accessing the uverbs API, libibverbs, and found the "reserved" field is
> explicitely cleared in:
>
> ibv_cmd_create_qp_ex():
>
> c7e3e61052dd7 (Sean Hefty       2013-08-01 18:04:16 +0300  651)         cmd->reserved        = 0;
>
> ibv_cmd_create_qp():
>
> 0e0604213ed79 (Roland Dreier    2006-10-04 23:57:10 +0000  726)         cmd->reserved        = 0;
>
> Latter commit is part of libibverbs-1.1-rc1, so before libibverbs-1.1,
> reserved field was not cleared.
>
> For example, mlx4_create_qp() allocate the ibv_create_qp structure on
> stack and doesn't clear reserved field:
>
> ^d049a1279b82 (Roland Dreier    2007-04-09 00:49:42 -0700 358)  struct mlx4_create_qp     cmd;
>

As  you noted, in libibverbs this field is cleared since 2006 for all 
use cases of this uverbs command . The libmlx4 code pointer you provided 
is calling into libibverbs code (ibv_cmd_create_qp) which does the zeroing.


> So existing userspace program using libibverbs < 1.1 is likely to break with newer kernel if reserved field is going to be used.

as I wrote earlier, not break.

>> We've done it very successfully in the past with adding the link_layer field
>> to struct ib_uverbs_query_port_resp as part of the RoCE story instead of a
>> reserved field.
>>
> ib_uverbs_query_port_resp is the opposite side: the kernel is adding stuff
> where older userspace code don't expect to found anything, so this extra
> information is skipped by such pre-existing program.

yep, the example wasn't 1:1 to this case, understood.

> Using an unused field in the request has more chance to break than using
> an extra field in the response.
>
> Again:
>
> "Check all unused fields and flags and all the padding for whether it's 0,
>   and reject the ioctl if that's not the case. Otherwise your nice plan for
>   future extensions is going right down the gutters since someone will
>   submit an ioctl struct with random stack garbage in the yet unused parts.
>   Which then bakes in the ABI that those fields can never be used for
>   anything else but garbage."
>
> As the "reserved" field was never check for being 0 in kernel side,
> ensuring it could be used in the future for other purpose, we're in
> a situation where "reserved" field is garbage when using older
> libibverbs or other userspace software that can address uverbs
> by-passing libibverbs.
>
> That's likely to break existing userspace application.

Again, applications that for some reason don't zero out this field, will 
get their QP to potentially support some features which they will not use.

Or.



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yann Droneaud Sept. 19, 2014, 8:23 a.m. UTC | #6
Hi,

Le jeudi 18 septembre 2014 à 15:51 +0300, Or Gerlitz a écrit :
> On 9/15/2014 7:52 PM, Yann Droneaud wrote:
> > Le dimanche 14 septembre 2014 à 16:20 +0300, Or Gerlitz a écrit :
>

[...]

> >> As a result, applications who send and recieve over the same QP to
> >> the same multicast group get all their packets bouncded back to them,
> >> which is terribly bad performance wise.
> >>
> >> To fix this long standing issue, add the ability to provide QP
> >> creation flags through uverbs.
> >>
> >> Signed-off-by: Matan Barak <matanb@mellanox.com>
> >> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> >> ---
> >>   drivers/infiniband/core/uverbs_cmd.c |   35 ++++++++++++++++++++++++++++++++-
> >>   include/uapi/rdma/ib_user_verbs.h    |    2 +-
> >>   2 files changed, 34 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
> >> index 0600c50..1ad489c 100644
> >> --- a/drivers/infiniband/core/uverbs_cmd.c
> >> +++ b/drivers/infiniband/core/uverbs_cmd.c
> >> @@ -1579,6 +1579,31 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
> >>   	return in_len;
> >>   }
> >>   
> >> +enum ib_uverbs_qp_create_flags {
> >> +	IB_UVERBS_QP_CREATE_LSO	    = 0,
> >> +	IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1,
> >> +	IB_UVERBS_SUPPORTED_FLAGS
> >> +};
> > IB_UVERBS_QP_CREATE_LSO and IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK
> > should moved in include/uapi/rdma/ib_user_verbs.h.
> >
> > If this is going to be flags, these values might be OR-ed together ?
> > Then the userspace values should be defined like the kernel ones
> > (see commit 47ee1b9f2e7b):
> >
> > +enum ib_uverbs_qp_create_flags {
> > +	IB_UVERBS_QP_CREATE_LSO	    =                  1 << 0,
> > +	IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
> > +}
> >
> 
> Some misunderstanding here.. enum ib_uverbs_qp_create_flags is internal 
> kernel construct used
> to check the flags provided by user space (the _flags suffix is 
> confusing, will remove it). The actual flags
> values to be used by user space are shifted values (e.g 1 << 
> IB_UVERBS_QP_CREATE_LSO).
> 

OK, I haven't pay attention to the shift in the conversion function.

I would still prefer having the flag value be declared, instead of the 
shift offset.

> As the whole set of flags/enum/attribute values moved between libibverbs 
> to the kernel, this will be defined
> in libibverbs header (take a look on include/uapi/rdma/ib_user_verbs.h 
> it doesn't have the enumeration for event types defined).
> 
> 

OK.

(BTW, not having the flags/enum/attributes in 
 include/uapi/rdma/ib_user_verbs.h is a pity, as the header 
 is supposed to expose kernel API/ABI to userspace).

> > IB_UVERBS_SUPPORTED_FLAGS must be kept in this module, but defined
> > as:
> >
> > #define IB_UVERBS_QP_CREATE_FLAGS_ALL (IB_UVERBS_QP_CREATE_LSO | \
> > 				       IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
> >
> >> +
> >> +static int ib_uverbs_create_qp_trans(u8 u_flags)
> >> +{
> >> +	int i;
> >> +	int res;
> >> +	static const enum ib_qp_create_flags ib_uverbs_qp_create_flags[IB_UVERBS_SUPPORTED_FLAGS] = {
> >> +		[IB_UVERBS_QP_CREATE_LSO] = IB_QP_CREATE_IPOIB_UD_LSO,
> >> +		[IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK] = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
> >> +	};
> >> +
> >> +	if (u_flags & ~((1 << IB_UVERBS_SUPPORTED_FLAGS) - 1))
> >> +		return -1;
> >> +
> > 	if (u_flags & ~(u8)(IB_UVERBS_QP_CREATE_FLAGS_ALL))
> > 	   return -1;
> >
> >> +	for (i = 0; i < IB_UVERBS_SUPPORTED_FLAGS; i++)
> >> +		if (u_flags & (1 << i))
> >> +			res |= ib_uverbs_qp_create_flags[i];
> >> +
> >> +	return res;
> >> +}
> >> +
> > This function can be replaced by compile time assert:
> >
> > BUILD_BUG_ON(IB_UVERBS_QP_CREATE_LSO != IB_QP_CREATE_IPOIB_UD_LSO);
> > BUILD_BUG_ON(IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK !=
> > IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK);
> >
> > This way the values can be used without conversion.
> 
> We prefer to have larger flexibility here which we can do with run time 
> checks (this is slow path) and remain with the practice suggested by the 
> original patch.
> 

I don't buy this argument: it's not clear for me that there's an 
advantage in such complexity. If the internal kernel flags are going to
change, the function might be needed, but until it happen, it's not.

What I've suggested is a common construction, see

http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/fs/signalfd.c?id=v3.17-rc5#n262
fs/signalfd.c:signalfd()

        /* Check the SFD_* constants for consistency.  */
        BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
        BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);

        if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
                return -EINVAL;


http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/fs/signalfd.c?id=v3.17-rc5#n381
fs/timerfd.c:timerfd()

        /* Check the TFD_* constants for consistency.  */
        BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
        BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);

        if ((flags & ~TFD_CREATE_FLAGS) ||


See also perf_event_open() for the check against recognized flags.


Regards.
diff mbox

Patch

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 0600c50..1ad489c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1579,6 +1579,31 @@  ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 	return in_len;
 }
 
+enum ib_uverbs_qp_create_flags {
+	IB_UVERBS_QP_CREATE_LSO	    = 0,
+	IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1,
+	IB_UVERBS_SUPPORTED_FLAGS
+};
+
+static int ib_uverbs_create_qp_trans(u8 u_flags)
+{
+	int i;
+	int res;
+	static const enum ib_qp_create_flags ib_uverbs_qp_create_flags[IB_UVERBS_SUPPORTED_FLAGS] = {
+		[IB_UVERBS_QP_CREATE_LSO] = IB_QP_CREATE_IPOIB_UD_LSO,
+		[IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK] = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+	};
+
+	if (u_flags & ~((1 << IB_UVERBS_SUPPORTED_FLAGS) - 1))
+		return -1;
+
+	for (i = 0; i < IB_UVERBS_SUPPORTED_FLAGS; i++)
+		if (u_flags & (1 << i))
+			res |= ib_uverbs_qp_create_flags[i];
+
+	return res;
+}
+
 ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 			    const char __user *buf, int in_len,
 			    int out_len)
@@ -1595,7 +1620,7 @@  ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	struct ib_srq                  *srq = NULL;
 	struct ib_qp                   *qp;
 	struct ib_qp_init_attr          attr;
-	int ret;
+	int flags, ret;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -1664,7 +1689,13 @@  ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	attr.xrcd	   = xrcd;
 	attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
 	attr.qp_type       = cmd.qp_type;
-	attr.create_flags  = 0;
+
+	flags  = ib_uverbs_create_qp_trans(cmd.create_flags);
+	if (flags < 0) {
+		ret = -EINVAL;
+		goto err_put;
+	}
+	attr.create_flags  = flags;
 
 	attr.cap.max_send_wr     = cmd.max_send_wr;
 	attr.cap.max_recv_wr     = cmd.max_recv_wr;
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 26daf55..fac6975 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -470,7 +470,7 @@  struct ib_uverbs_create_qp {
 	__u8  sq_sig_all;
 	__u8  qp_type;
 	__u8  is_srq;
-	__u8  reserved;
+	__u8  create_flags;
 	__u64 driver_data[0];
 };