diff mbox

[v2,for-next,1/7] IB/core: Extend ib_uverbs_create_qp

Message ID 1444909482-17113-2-git-send-email-eranbe@mellanox.com (mailing list archive)
State Superseded
Headers show

Commit Message

Eran Ben Elisha Oct. 15, 2015, 11:44 a.m. UTC
ib_uverbs_ex_create_qp follows the extension verbs
mechanism. New features (for example, QP creation flags
field which is added in a downstream patch) could used
via user-space libraries without breaking the ABI.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
---
 drivers/infiniband/core/uverbs.h      |   1 +
 drivers/infiniband/core/uverbs_cmd.c  | 259 +++++++++++++++++++++++++---------
 drivers/infiniband/core/uverbs_main.c |   1 +
 include/uapi/rdma/ib_user_verbs.h     |  26 ++++
 4 files changed, 222 insertions(+), 65 deletions(-)

Comments

Haggai Eran Oct. 21, 2015, 8:34 a.m. UTC | #1
On 15/10/2015 14:44, Eran Ben Elisha wrote:
> ib_uverbs_ex_create_qp follows the extension verbs
> mechanism. New features (for example, QP creation flags
> field which is added in a downstream patch) could used
> via user-space libraries without breaking the ABI.
> 
> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
> ---

> diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
> index be4cb9f..e795d59 100644
> --- a/drivers/infiniband/core/uverbs_cmd.c
> +++ b/drivers/infiniband/core/uverbs_cmd.c
> @@ -1741,66 +1741,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
>  	return in_len;
>  }
>  
> -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
> -			    struct ib_device *ib_dev,
> -			    const char __user *buf, int in_len,
> -			    int out_len)
> -{
> -	struct ib_uverbs_create_qp      cmd;
> -	struct ib_uverbs_create_qp_resp resp;
> -	struct ib_udata                 udata;
> -	struct ib_uqp_object           *obj;
> -	struct ib_device	       *device;
> -	struct ib_pd                   *pd = NULL;
> -	struct ib_xrcd		       *xrcd = NULL;
> -	struct ib_uobject	       *uninitialized_var(xrcd_uobj);
> -	struct ib_cq                   *scq = NULL, *rcq = NULL;
> -	struct ib_srq                  *srq = NULL;
> -	struct ib_qp                   *qp;
> -	struct ib_qp_init_attr          attr;
> -	int ret;
> -
> -	if (out_len < sizeof resp)
> -		return -ENOSPC;
> -
> -	if (copy_from_user(&cmd, buf, sizeof cmd))
> -		return -EFAULT;
> +static int create_qp(struct ib_uverbs_file *file,
> +		     struct ib_udata *ucore,
> +		     struct ib_udata *uhw,
> +		     struct ib_uverbs_ex_create_qp *cmd,
> +		     size_t cmd_sz,
> +		     int (*cb)(struct ib_uverbs_file *file,
> +			       struct ib_uverbs_ex_create_qp_resp *resp,
> +			       struct ib_udata *udata),
> +		     void *context)
> +{
> +	struct ib_uqp_object		*obj;
> +	struct ib_device		*device;
> +	struct ib_pd			*pd = NULL;
> +	struct ib_xrcd			*xrcd = NULL;
> +	struct ib_uobject		*uninitialized_var(xrcd_uobj);
> +	struct ib_cq			*scq = NULL, *rcq = NULL;
> +	struct ib_srq			*srq = NULL;
> +	struct ib_qp			*qp;
> +	char				*buf;
> +	struct ib_qp_init_attr		attr;
> +	struct ib_uverbs_ex_create_qp_resp resp;
> +	int				ret;
>  
> -	if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
> +	if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
>  		return -EPERM;
>  
> -	INIT_UDATA(&udata, buf + sizeof cmd,
> -		   (unsigned long) cmd.response + sizeof resp,
> -		   in_len - sizeof cmd, out_len - sizeof resp);
> -
>  	obj = kzalloc(sizeof *obj, GFP_KERNEL);
>  	if (!obj)
>  		return -ENOMEM;
>  
> -	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
> +	init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
> +		  &qp_lock_class);
>  	down_write(&obj->uevent.uobject.mutex);
>  
> -	if (cmd.qp_type == IB_QPT_XRC_TGT) {
> -		xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
> +	if (cmd->qp_type == IB_QPT_XRC_TGT) {
> +		xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
> +				     &xrcd_uobj);
>  		if (!xrcd) {
>  			ret = -EINVAL;
>  			goto err_put;
>  		}
>  		device = xrcd->device;
>  	} else {
> -		if (cmd.qp_type == IB_QPT_XRC_INI) {
> -			cmd.max_recv_wr = cmd.max_recv_sge = 0;
> +		if (cmd->qp_type == IB_QPT_XRC_INI) {
> +			cmd->max_recv_wr = 0;
> +			cmd->max_recv_sge = 0;
>  		} else {
> -			if (cmd.is_srq) {
> -				srq = idr_read_srq(cmd.srq_handle, file->ucontext);
> +			if (cmd->is_srq) {
> +				srq = idr_read_srq(cmd->srq_handle,
> +						   file->ucontext);
>  				if (!srq || srq->srq_type != IB_SRQT_BASIC) {
>  					ret = -EINVAL;
>  					goto err_put;
>  				}
>  			}
>  
> -			if (cmd.recv_cq_handle != cmd.send_cq_handle) {
> -				rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0);
> +			if (cmd->recv_cq_handle != cmd->send_cq_handle) {
> +				rcq = idr_read_cq(cmd->recv_cq_handle,
> +						  file->ucontext, 0);
>  				if (!rcq) {
>  					ret = -EINVAL;
>  					goto err_put;
> @@ -1808,9 +1807,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>  			}
>  		}
>  
> -		scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq);
> +		scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
>  		rcq = rcq ?: scq;
> -		pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
> +		pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
>  		if (!pd || !scq) {
>  			ret = -EINVAL;
>  			goto err_put;
> @@ -1825,31 +1824,54 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>  	attr.recv_cq       = rcq;
>  	attr.srq           = srq;
>  	attr.xrcd	   = xrcd;
> -	attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
> -	attr.qp_type       = cmd.qp_type;
> +	attr.sq_sig_type   = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
> +					      IB_SIGNAL_REQ_WR;
> +	attr.qp_type       = cmd->qp_type;
>  	attr.create_flags  = 0;
>  
> -	attr.cap.max_send_wr     = cmd.max_send_wr;
> -	attr.cap.max_recv_wr     = cmd.max_recv_wr;
> -	attr.cap.max_send_sge    = cmd.max_send_sge;
> -	attr.cap.max_recv_sge    = cmd.max_recv_sge;
> -	attr.cap.max_inline_data = cmd.max_inline_data;
> +	attr.cap.max_send_wr     = cmd->max_send_wr;
> +	attr.cap.max_recv_wr     = cmd->max_recv_wr;
> +	attr.cap.max_send_sge    = cmd->max_send_sge;
> +	attr.cap.max_recv_sge    = cmd->max_recv_sge;
> +	attr.cap.max_inline_data = cmd->max_inline_data;
>  
>  	obj->uevent.events_reported     = 0;
>  	INIT_LIST_HEAD(&obj->uevent.event_list);
>  	INIT_LIST_HEAD(&obj->mcast_list);
>  
> -	if (cmd.qp_type == IB_QPT_XRC_TGT)
> +	if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
> +		      sizeof(cmd->create_flags))
> +		attr.create_flags = cmd->create_flags;
> +
> +	if (attr.create_flags) {
> +		ret = -EINVAL;
> +		goto err_put;
> +	}
> +
> +	if (cmd->comp_mask) {
Do you need this check in addition to the check in ib_uverbs_ex_create_qp()?

> +		ret = -EINVAL;
> +		goto err_put;
> +	}
> +
> +	buf = (void *)cmd + sizeof(*cmd);
> +	if (cmd_sz > sizeof(*cmd))
> +		if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
> +					     cmd_sz - sizeof(*cmd) - 1))) {
> +			ret = -EINVAL;
> +			goto err_put;
> +		}
> +
> +	if (cmd->qp_type == IB_QPT_XRC_TGT)
>  		qp = ib_create_qp(pd, &attr);
>  	else
> -		qp = device->create_qp(pd, &attr, &udata);
> +		qp = device->create_qp(pd, &attr, uhw);
>  
>  	if (IS_ERR(qp)) {
>  		ret = PTR_ERR(qp);
>  		goto err_put;
>  	}
>  
> -	if (cmd.qp_type != IB_QPT_XRC_TGT) {
> +	if (cmd->qp_type != IB_QPT_XRC_TGT) {
>  		qp->real_qp	  = qp;
>  		qp->device	  = device;
>  		qp->pd		  = pd;
> @@ -1875,19 +1897,20 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>  		goto err_destroy;
>  
>  	memset(&resp, 0, sizeof resp);
> -	resp.qpn             = qp->qp_num;
> -	resp.qp_handle       = obj->uevent.uobject.id;
> -	resp.max_recv_sge    = attr.cap.max_recv_sge;
> -	resp.max_send_sge    = attr.cap.max_send_sge;
> -	resp.max_recv_wr     = attr.cap.max_recv_wr;
> -	resp.max_send_wr     = attr.cap.max_send_wr;
> -	resp.max_inline_data = attr.cap.max_inline_data;
> +	resp.base.qpn             = qp->qp_num;
> +	resp.base.qp_handle       = obj->uevent.uobject.id;
> +	resp.base.max_recv_sge    = attr.cap.max_recv_sge;
> +	resp.base.max_send_sge    = attr.cap.max_send_sge;
> +	resp.base.max_recv_wr     = attr.cap.max_recv_wr;
> +	resp.base.max_send_wr     = attr.cap.max_send_wr;
> +	resp.base.max_inline_data = attr.cap.max_inline_data;
>  
> -	if (copy_to_user((void __user *) (unsigned long) cmd.response,
> -			 &resp, sizeof resp)) {
> -		ret = -EFAULT;
> -		goto err_copy;
> -	}
> +	resp.response_length = offsetof(typeof(resp), response_length) +
> +			       sizeof(resp.response_length);
> +
> +	ret = cb(file, &resp, ucore);
> +	if (ret)
> +		goto err_cb;
>  
>  	if (xrcd) {
>  		obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
> @@ -1913,9 +1936,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>  
>  	up_write(&obj->uevent.uobject.mutex);
>  
> -	return in_len;
> -
> -err_copy:
> +	return 0;
> +err_cb:
>  	idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
>  
>  err_destroy:
> @@ -1937,6 +1959,113 @@ err_put:
>  	return ret;
>  }
>  
> +static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
> +				  struct ib_uverbs_ex_create_qp_resp *resp,
> +				  struct ib_udata *ucore)
> +{
> +	if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
> +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
> +			    struct ib_device *ib_dev,
> +			    const char __user *buf, int in_len,
> +			    int out_len)
> +{
> +	struct ib_uverbs_create_qp      cmd;
> +	struct ib_uverbs_ex_create_qp	cmd_ex;
> +	struct ib_udata			ucore;
> +	struct ib_udata			uhw;
> +	ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
> +	int				err;

I would expect a check here that in_len >= sizeof(cmd). But I see the
previous code didn't have it either. Any reason adding the check would
break user-space?

> +
> +	if (out_len < resp_size)
> +		return -ENOSPC;
> +
> +	if (copy_from_user(&cmd, buf, sizeof(cmd)))
> +		return -EFAULT;
> +
> +	INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
> +		   resp_size);
> +	INIT_UDATA(&uhw, buf + sizeof(cmd),
> +		   (unsigned long)cmd.response + resp_size,
> +		   in_len - sizeof(cmd), out_len - resp_size);
> +
> +	memset(&cmd_ex, 0, sizeof(cmd_ex));
> +	cmd_ex.user_handle = cmd.user_handle;
> +	cmd_ex.pd_handle = cmd.pd_handle;
> +	cmd_ex.send_cq_handle = cmd.send_cq_handle;
> +	cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
> +	cmd_ex.srq_handle = cmd.srq_handle;
> +	cmd_ex.max_send_wr = cmd.max_send_wr;
> +	cmd_ex.max_recv_wr = cmd.max_recv_wr;
> +	cmd_ex.max_send_sge = cmd.max_send_sge;
> +	cmd_ex.max_recv_sge = cmd.max_recv_sge;
> +	cmd_ex.max_inline_data = cmd.max_inline_data;
> +	cmd_ex.sq_sig_all = cmd.sq_sig_all;
> +	cmd_ex.qp_type = cmd.qp_type;
> +	cmd_ex.is_srq = cmd.is_srq;
> +
> +	err = create_qp(file, &ucore, &uhw, &cmd_ex,
> +			offsetof(typeof(cmd_ex), is_srq) +
> +			sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
> +			NULL);
> +
> +	if (err)
> +		return err;
> +
> +	return in_len;
> +}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg Oct. 21, 2015, 9:53 a.m. UTC | #2
On 10/15/2015 2:44 PM, Eran Ben Elisha wrote:
> ib_uverbs_ex_create_qp follows the extension verbs
> mechanism. New features (for example, QP creation flags
> field which is added in a downstream patch) could used
> via user-space libraries without breaking the ABI.

This is an important addition, I'll need it soon for stuff
I'm working on...

> +struct ib_uverbs_ex_create_qp {
> +	__u64 user_handle;
> +	__u32 pd_handle;
> +	__u32 send_cq_handle;
> +	__u32 recv_cq_handle;
> +	__u32 srq_handle;
> +	__u32 max_send_wr;
> +	__u32 max_recv_wr;
> +	__u32 max_send_sge;
> +	__u32 max_recv_sge;
> +	__u32 max_inline_data;
> +	__u8  sq_sig_all;
> +	__u8  qp_type;
> +	__u8  is_srq;
> +	__u8 reserved;
> +	__u32 comp_mask;
> +	__u32 create_flags;

Can we please make create_flags u64 to begin with?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Oct. 21, 2015, 10:04 a.m. UTC | #3
On 10/21/2015 12:53 PM, Sagi Grimberg wrote:
> On 10/15/2015 2:44 PM, Eran Ben Elisha wrote:
>
>
>> +struct ib_uverbs_ex_create_qp {
>> +    __u64 user_handle;
>> +    __u32 pd_handle;
>> +    __u32 send_cq_handle;
>> +    __u32 recv_cq_handle;
>> +    __u32 srq_handle;
>> +    __u32 max_send_wr;
>> +    __u32 max_recv_wr;
>> +    __u32 max_send_sge;
>> +    __u32 max_recv_sge;
>> +    __u32 max_inline_data;
>> +    __u8  sq_sig_all;
>> +    __u8  qp_type;
>> +    __u8  is_srq;
>> +    __u8 reserved;
>> +    __u32 comp_mask;
>> +    __u32 create_flags;
>
> Can we please make create_flags u64 to begin with?


In the kernel we used 4-5 flags over 10 years, why worry?

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haggai Eran Oct. 21, 2015, 10:26 a.m. UTC | #4
On 21/10/2015 12:53, Sagi Grimberg wrote:
> On 10/15/2015 2:44 PM, Eran Ben Elisha wrote:
>> ib_uverbs_ex_create_qp follows the extension verbs
>> mechanism. New features (for example, QP creation flags
>> field which is added in a downstream patch) could used
>> via user-space libraries without breaking the ABI.
> 
> This is an important addition, I'll need it soon for stuff
> I'm working on...
> 
>> +struct ib_uverbs_ex_create_qp {
>> +    __u64 user_handle;
>> +    __u32 pd_handle;
>> +    __u32 send_cq_handle;
>> +    __u32 recv_cq_handle;
>> +    __u32 srq_handle;
>> +    __u32 max_send_wr;
>> +    __u32 max_recv_wr;
>> +    __u32 max_send_sge;
>> +    __u32 max_recv_sge;
>> +    __u32 max_inline_data;
>> +    __u8  sq_sig_all;
>> +    __u8  qp_type;
>> +    __u8  is_srq;
>> +    __u8 reserved;
>> +    __u32 comp_mask;
>> +    __u32 create_flags;
> 
> Can we please make create_flags u64 to begin with?
Note that the size of the struct must be a round number of 64-bit words
to avoid different sizeof values on 32-bit systems. If you change the
size of create_flags you'll need another 32-bit padding field in the struct.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
eran ben elisha Oct. 21, 2015, 1:46 p.m. UTC | #5
On Wed, Oct 21, 2015 at 11:34 AM, Haggai Eran <haggaie@mellanox.com> wrote:
> On 15/10/2015 14:44, Eran Ben Elisha wrote:
>> ib_uverbs_ex_create_qp follows the extension verbs
>> mechanism. New features (for example, QP creation flags
>> field which is added in a downstream patch) could used
>> via user-space libraries without breaking the ABI.
>>
>> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>> ---
>
>> diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
>> index be4cb9f..e795d59 100644
>> --- a/drivers/infiniband/core/uverbs_cmd.c
>> +++ b/drivers/infiniband/core/uverbs_cmd.c
>> @@ -1741,66 +1741,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
>>       return in_len;
>>  }
>>
>> -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>> -                         struct ib_device *ib_dev,
>> -                         const char __user *buf, int in_len,
>> -                         int out_len)
>> -{
>> -     struct ib_uverbs_create_qp      cmd;
>> -     struct ib_uverbs_create_qp_resp resp;
>> -     struct ib_udata                 udata;
>> -     struct ib_uqp_object           *obj;
>> -     struct ib_device               *device;
>> -     struct ib_pd                   *pd = NULL;
>> -     struct ib_xrcd                 *xrcd = NULL;
>> -     struct ib_uobject              *uninitialized_var(xrcd_uobj);
>> -     struct ib_cq                   *scq = NULL, *rcq = NULL;
>> -     struct ib_srq                  *srq = NULL;
>> -     struct ib_qp                   *qp;
>> -     struct ib_qp_init_attr          attr;
>> -     int ret;
>> -
>> -     if (out_len < sizeof resp)
>> -             return -ENOSPC;
>> -
>> -     if (copy_from_user(&cmd, buf, sizeof cmd))
>> -             return -EFAULT;
>> +static int create_qp(struct ib_uverbs_file *file,
>> +                  struct ib_udata *ucore,
>> +                  struct ib_udata *uhw,
>> +                  struct ib_uverbs_ex_create_qp *cmd,
>> +                  size_t cmd_sz,
>> +                  int (*cb)(struct ib_uverbs_file *file,
>> +                            struct ib_uverbs_ex_create_qp_resp *resp,
>> +                            struct ib_udata *udata),
>> +                  void *context)
>> +{
>> +     struct ib_uqp_object            *obj;
>> +     struct ib_device                *device;
>> +     struct ib_pd                    *pd = NULL;
>> +     struct ib_xrcd                  *xrcd = NULL;
>> +     struct ib_uobject               *uninitialized_var(xrcd_uobj);
>> +     struct ib_cq                    *scq = NULL, *rcq = NULL;
>> +     struct ib_srq                   *srq = NULL;
>> +     struct ib_qp                    *qp;
>> +     char                            *buf;
>> +     struct ib_qp_init_attr          attr;
>> +     struct ib_uverbs_ex_create_qp_resp resp;
>> +     int                             ret;
>>
>> -     if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
>> +     if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
>>               return -EPERM;
>>
>> -     INIT_UDATA(&udata, buf + sizeof cmd,
>> -                (unsigned long) cmd.response + sizeof resp,
>> -                in_len - sizeof cmd, out_len - sizeof resp);
>> -
>>       obj = kzalloc(sizeof *obj, GFP_KERNEL);
>>       if (!obj)
>>               return -ENOMEM;
>>
>> -     init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
>> +     init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
>> +               &qp_lock_class);
>>       down_write(&obj->uevent.uobject.mutex);
>>
>> -     if (cmd.qp_type == IB_QPT_XRC_TGT) {
>> -             xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
>> +     if (cmd->qp_type == IB_QPT_XRC_TGT) {
>> +             xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
>> +                                  &xrcd_uobj);
>>               if (!xrcd) {
>>                       ret = -EINVAL;
>>                       goto err_put;
>>               }
>>               device = xrcd->device;
>>       } else {
>> -             if (cmd.qp_type == IB_QPT_XRC_INI) {
>> -                     cmd.max_recv_wr = cmd.max_recv_sge = 0;
>> +             if (cmd->qp_type == IB_QPT_XRC_INI) {
>> +                     cmd->max_recv_wr = 0;
>> +                     cmd->max_recv_sge = 0;
>>               } else {
>> -                     if (cmd.is_srq) {
>> -                             srq = idr_read_srq(cmd.srq_handle, file->ucontext);
>> +                     if (cmd->is_srq) {
>> +                             srq = idr_read_srq(cmd->srq_handle,
>> +                                                file->ucontext);
>>                               if (!srq || srq->srq_type != IB_SRQT_BASIC) {
>>                                       ret = -EINVAL;
>>                                       goto err_put;
>>                               }
>>                       }
>>
>> -                     if (cmd.recv_cq_handle != cmd.send_cq_handle) {
>> -                             rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0);
>> +                     if (cmd->recv_cq_handle != cmd->send_cq_handle) {
>> +                             rcq = idr_read_cq(cmd->recv_cq_handle,
>> +                                               file->ucontext, 0);
>>                               if (!rcq) {
>>                                       ret = -EINVAL;
>>                                       goto err_put;
>> @@ -1808,9 +1807,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>>                       }
>>               }
>>
>> -             scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq);
>> +             scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
>>               rcq = rcq ?: scq;
>> -             pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
>> +             pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
>>               if (!pd || !scq) {
>>                       ret = -EINVAL;
>>                       goto err_put;
>> @@ -1825,31 +1824,54 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>>       attr.recv_cq       = rcq;
>>       attr.srq           = srq;
>>       attr.xrcd          = xrcd;
>> -     attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
>> -     attr.qp_type       = cmd.qp_type;
>> +     attr.sq_sig_type   = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
>> +                                           IB_SIGNAL_REQ_WR;
>> +     attr.qp_type       = cmd->qp_type;
>>       attr.create_flags  = 0;
>>
>> -     attr.cap.max_send_wr     = cmd.max_send_wr;
>> -     attr.cap.max_recv_wr     = cmd.max_recv_wr;
>> -     attr.cap.max_send_sge    = cmd.max_send_sge;
>> -     attr.cap.max_recv_sge    = cmd.max_recv_sge;
>> -     attr.cap.max_inline_data = cmd.max_inline_data;
>> +     attr.cap.max_send_wr     = cmd->max_send_wr;
>> +     attr.cap.max_recv_wr     = cmd->max_recv_wr;
>> +     attr.cap.max_send_sge    = cmd->max_send_sge;
>> +     attr.cap.max_recv_sge    = cmd->max_recv_sge;
>> +     attr.cap.max_inline_data = cmd->max_inline_data;
>>
>>       obj->uevent.events_reported     = 0;
>>       INIT_LIST_HEAD(&obj->uevent.event_list);
>>       INIT_LIST_HEAD(&obj->mcast_list);
>>
>> -     if (cmd.qp_type == IB_QPT_XRC_TGT)
>> +     if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
>> +                   sizeof(cmd->create_flags))
>> +             attr.create_flags = cmd->create_flags;
>> +
>> +     if (attr.create_flags) {
>> +             ret = -EINVAL;
>> +             goto err_put;
>> +     }
>> +
>> +     if (cmd->comp_mask) {
> Do you need this check in addition to the check in ib_uverbs_ex_create_qp()?

You are right, I will remove this extra check and resend.

>
>> +             ret = -EINVAL;
>> +             goto err_put;
>> +     }
>> +
>> +     buf = (void *)cmd + sizeof(*cmd);
>> +     if (cmd_sz > sizeof(*cmd))
>> +             if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
>> +                                          cmd_sz - sizeof(*cmd) - 1))) {
>> +                     ret = -EINVAL;
>> +                     goto err_put;
>> +             }
>> +
>> +     if (cmd->qp_type == IB_QPT_XRC_TGT)
>>               qp = ib_create_qp(pd, &attr);
>>       else
>> -             qp = device->create_qp(pd, &attr, &udata);
>> +             qp = device->create_qp(pd, &attr, uhw);
>>
>>       if (IS_ERR(qp)) {
>>               ret = PTR_ERR(qp);
>>               goto err_put;
>>       }
>>
>> -     if (cmd.qp_type != IB_QPT_XRC_TGT) {
>> +     if (cmd->qp_type != IB_QPT_XRC_TGT) {
>>               qp->real_qp       = qp;
>>               qp->device        = device;
>>               qp->pd            = pd;
>> @@ -1875,19 +1897,20 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>>               goto err_destroy;
>>
>>       memset(&resp, 0, sizeof resp);
>> -     resp.qpn             = qp->qp_num;
>> -     resp.qp_handle       = obj->uevent.uobject.id;
>> -     resp.max_recv_sge    = attr.cap.max_recv_sge;
>> -     resp.max_send_sge    = attr.cap.max_send_sge;
>> -     resp.max_recv_wr     = attr.cap.max_recv_wr;
>> -     resp.max_send_wr     = attr.cap.max_send_wr;
>> -     resp.max_inline_data = attr.cap.max_inline_data;
>> +     resp.base.qpn             = qp->qp_num;
>> +     resp.base.qp_handle       = obj->uevent.uobject.id;
>> +     resp.base.max_recv_sge    = attr.cap.max_recv_sge;
>> +     resp.base.max_send_sge    = attr.cap.max_send_sge;
>> +     resp.base.max_recv_wr     = attr.cap.max_recv_wr;
>> +     resp.base.max_send_wr     = attr.cap.max_send_wr;
>> +     resp.base.max_inline_data = attr.cap.max_inline_data;
>>
>> -     if (copy_to_user((void __user *) (unsigned long) cmd.response,
>> -                      &resp, sizeof resp)) {
>> -             ret = -EFAULT;
>> -             goto err_copy;
>> -     }
>> +     resp.response_length = offsetof(typeof(resp), response_length) +
>> +                            sizeof(resp.response_length);
>> +
>> +     ret = cb(file, &resp, ucore);
>> +     if (ret)
>> +             goto err_cb;
>>
>>       if (xrcd) {
>>               obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
>> @@ -1913,9 +1936,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>>
>>       up_write(&obj->uevent.uobject.mutex);
>>
>> -     return in_len;
>> -
>> -err_copy:
>> +     return 0;
>> +err_cb:
>>       idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
>>
>>  err_destroy:
>> @@ -1937,6 +1959,113 @@ err_put:
>>       return ret;
>>  }
>>
>> +static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
>> +                               struct ib_uverbs_ex_create_qp_resp *resp,
>> +                               struct ib_udata *ucore)
>> +{
>> +     if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
>> +             return -EFAULT;
>> +
>> +     return 0;
>> +}
>> +
>> +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>> +                         struct ib_device *ib_dev,
>> +                         const char __user *buf, int in_len,
>> +                         int out_len)
>> +{
>> +     struct ib_uverbs_create_qp      cmd;
>> +     struct ib_uverbs_ex_create_qp   cmd_ex;
>> +     struct ib_udata                 ucore;
>> +     struct ib_udata                 uhw;
>> +     ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
>> +     int                             err;
>
> I would expect a check here that in_len >= sizeof(cmd). But I see the
> previous code didn't have it either. Any reason adding the check would
> break user-space?

This patch just refactor in ib_uverbs_create_qp and doesn't change any
of it's logic or fix any bug. we can consider such a fix for the
future.

>
>> +
>> +     if (out_len < resp_size)
>> +             return -ENOSPC;
>> +
>> +     if (copy_from_user(&cmd, buf, sizeof(cmd)))
>> +             return -EFAULT;
>> +
>> +     INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
>> +                resp_size);
>> +     INIT_UDATA(&uhw, buf + sizeof(cmd),
>> +                (unsigned long)cmd.response + resp_size,
>> +                in_len - sizeof(cmd), out_len - resp_size);
>> +
>> +     memset(&cmd_ex, 0, sizeof(cmd_ex));
>> +     cmd_ex.user_handle = cmd.user_handle;
>> +     cmd_ex.pd_handle = cmd.pd_handle;
>> +     cmd_ex.send_cq_handle = cmd.send_cq_handle;
>> +     cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
>> +     cmd_ex.srq_handle = cmd.srq_handle;
>> +     cmd_ex.max_send_wr = cmd.max_send_wr;
>> +     cmd_ex.max_recv_wr = cmd.max_recv_wr;
>> +     cmd_ex.max_send_sge = cmd.max_send_sge;
>> +     cmd_ex.max_recv_sge = cmd.max_recv_sge;
>> +     cmd_ex.max_inline_data = cmd.max_inline_data;
>> +     cmd_ex.sq_sig_all = cmd.sq_sig_all;
>> +     cmd_ex.qp_type = cmd.qp_type;
>> +     cmd_ex.is_srq = cmd.is_srq;
>> +
>> +     err = create_qp(file, &ucore, &uhw, &cmd_ex,
>> +                     offsetof(typeof(cmd_ex), is_srq) +
>> +                     sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
>> +                     NULL);
>> +
>> +     if (err)
>> +             return err;
>> +
>> +     return in_len;
>> +}
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg Oct. 21, 2015, 2:09 p.m. UTC | #6
On 10/21/2015 1:04 PM, Or Gerlitz wrote:
> On 10/21/2015 12:53 PM, Sagi Grimberg wrote:
>> On 10/15/2015 2:44 PM, Eran Ben Elisha wrote:
>>
>>
>>> +struct ib_uverbs_ex_create_qp {
>>> +    __u64 user_handle;
>>> +    __u32 pd_handle;
>>> +    __u32 send_cq_handle;
>>> +    __u32 recv_cq_handle;
>>> +    __u32 srq_handle;
>>> +    __u32 max_send_wr;
>>> +    __u32 max_recv_wr;
>>> +    __u32 max_send_sge;
>>> +    __u32 max_recv_sge;
>>> +    __u32 max_inline_data;
>>> +    __u8  sq_sig_all;
>>> +    __u8  qp_type;
>>> +    __u8  is_srq;
>>> +    __u8 reserved;
>>> +    __u32 comp_mask;
>>> +    __u32 create_flags;
>>
>> Can we please make create_flags u64 to begin with?
>
>
> In the kernel we used 4-5 flags over 10 years, why worry?

I don't see why we should assume that we won't occupy enough bits in a
long time just for saving extra 4 bytes. But if you are strongly against
it then I won't persist here.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haggai Eran Oct. 21, 2015, 2:42 p.m. UTC | #7
On 21/10/2015 16:46, eran ben elisha wrote:
>>> +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
>>> >> +                         struct ib_device *ib_dev,
>>> >> +                         const char __user *buf, int in_len,
>>> >> +                         int out_len)
>>> >> +{
>>> >> +     struct ib_uverbs_create_qp      cmd;
>>> >> +     struct ib_uverbs_ex_create_qp   cmd_ex;
>>> >> +     struct ib_udata                 ucore;
>>> >> +     struct ib_udata                 uhw;
>>> >> +     ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
>>> >> +     int                             err;
>> >
>> > I would expect a check here that in_len >= sizeof(cmd). But I see the
>> > previous code didn't have it either. Any reason adding the check would
>> > break user-space?
> This patch just refactor in ib_uverbs_create_qp and doesn't change any
> of it's logic or fix any bug. we can consider such a fix for the
> future.

Fair enough.

Haggai
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Oct. 21, 2015, 6:15 p.m. UTC | #8
On Wed, Oct 21, 2015 at 5:09 PM, Sagi Grimberg  wrote:
> On 10/21/2015 1:04 PM, Or Gerlitz wrote:
>> On 10/21/2015 12:53 PM, Sagi Grimberg wrote:
>>> On 10/15/2015 2:44 PM, Eran Ben Elisha wrote:

>>>> +struct ib_uverbs_ex_create_qp {
>>>> +    __u64 user_handle;
>>>> +    __u32 pd_handle;
>>>> +    __u32 send_cq_handle;
>>>> +    __u32 recv_cq_handle;
>>>> +    __u32 srq_handle;
>>>> +    __u32 max_send_wr;
>>>> +    __u32 max_recv_wr;
>>>> +    __u32 max_send_sge;
>>>> +    __u32 max_recv_sge;
>>>> +    __u32 max_inline_data;
>>>> +    __u8  sq_sig_all;
>>>> +    __u8  qp_type;
>>>> +    __u8  is_srq;
>>>> +    __u8 reserved;
>>>> +    __u32 comp_mask;
>>>> +    __u32 create_flags;
>>>
>>>
>>> Can we please make create_flags u64 to begin with?

>> In the kernel we used 4-5 flags over 10 years, why worry?

> I don't see why we should assume that we won't occupy enough bits in a
> long time just for saving extra 4 bytes. But if you are strongly against
> it then I won't persist here.

Again, the kernel stack consuming rate here was < 1 bit a year when
averaging over time since this was introduced. So we should be doing
well for the coming ~10-20 years with this 32 bit field, and we can
easily extend it later, I verified that with Haggai, so yes, don't
want 64 bits now.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Lameter (Ampere) Oct. 21, 2015, 6:21 p.m. UTC | #9
On Wed, 21 Oct 2015, Or Gerlitz wrote:

> Again, the kernel stack consuming rate here was < 1 bit a year when
> averaging over time since this was introduced. So we should be doing
> well for the coming ~10-20 years with this 32 bit field, and we can
> easily extend it later, I verified that with Haggai, so yes, don't
> want 64 bits now.

Hmmm... We are running out of dev_cap flags on mlx4 already. They are 32
bits.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Oct. 21, 2015, 6:52 p.m. UTC | #10
On Wed, Oct 21, 2015 at 9:21 PM, Christoph Lameter <cl@linux.com> wrote:

> Hmmm... We are running out of dev_cap flags on mlx4 already. They are 32
> bits.

Christoph,

These  are QP creation flags (IB_QP_CREATE_XXX) not device capability
flags (IB_DEVICE_YYY)

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 3863d33..94bbd8c 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -272,5 +272,6 @@  IB_UVERBS_DECLARE_EX_CMD(create_flow);
 IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
 IB_UVERBS_DECLARE_EX_CMD(query_device);
 IB_UVERBS_DECLARE_EX_CMD(create_cq);
+IB_UVERBS_DECLARE_EX_CMD(create_qp);
 
 #endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index be4cb9f..e795d59 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1741,66 +1741,65 @@  ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 	return in_len;
 }
 
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
-			    const char __user *buf, int in_len,
-			    int out_len)
-{
-	struct ib_uverbs_create_qp      cmd;
-	struct ib_uverbs_create_qp_resp resp;
-	struct ib_udata                 udata;
-	struct ib_uqp_object           *obj;
-	struct ib_device	       *device;
-	struct ib_pd                   *pd = NULL;
-	struct ib_xrcd		       *xrcd = NULL;
-	struct ib_uobject	       *uninitialized_var(xrcd_uobj);
-	struct ib_cq                   *scq = NULL, *rcq = NULL;
-	struct ib_srq                  *srq = NULL;
-	struct ib_qp                   *qp;
-	struct ib_qp_init_attr          attr;
-	int ret;
-
-	if (out_len < sizeof resp)
-		return -ENOSPC;
-
-	if (copy_from_user(&cmd, buf, sizeof cmd))
-		return -EFAULT;
+static int create_qp(struct ib_uverbs_file *file,
+		     struct ib_udata *ucore,
+		     struct ib_udata *uhw,
+		     struct ib_uverbs_ex_create_qp *cmd,
+		     size_t cmd_sz,
+		     int (*cb)(struct ib_uverbs_file *file,
+			       struct ib_uverbs_ex_create_qp_resp *resp,
+			       struct ib_udata *udata),
+		     void *context)
+{
+	struct ib_uqp_object		*obj;
+	struct ib_device		*device;
+	struct ib_pd			*pd = NULL;
+	struct ib_xrcd			*xrcd = NULL;
+	struct ib_uobject		*uninitialized_var(xrcd_uobj);
+	struct ib_cq			*scq = NULL, *rcq = NULL;
+	struct ib_srq			*srq = NULL;
+	struct ib_qp			*qp;
+	char				*buf;
+	struct ib_qp_init_attr		attr;
+	struct ib_uverbs_ex_create_qp_resp resp;
+	int				ret;
 
-	if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
+	if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
 		return -EPERM;
 
-	INIT_UDATA(&udata, buf + sizeof cmd,
-		   (unsigned long) cmd.response + sizeof resp,
-		   in_len - sizeof cmd, out_len - sizeof resp);
-
 	obj = kzalloc(sizeof *obj, GFP_KERNEL);
 	if (!obj)
 		return -ENOMEM;
 
-	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
+	init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
+		  &qp_lock_class);
 	down_write(&obj->uevent.uobject.mutex);
 
-	if (cmd.qp_type == IB_QPT_XRC_TGT) {
-		xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+	if (cmd->qp_type == IB_QPT_XRC_TGT) {
+		xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
+				     &xrcd_uobj);
 		if (!xrcd) {
 			ret = -EINVAL;
 			goto err_put;
 		}
 		device = xrcd->device;
 	} else {
-		if (cmd.qp_type == IB_QPT_XRC_INI) {
-			cmd.max_recv_wr = cmd.max_recv_sge = 0;
+		if (cmd->qp_type == IB_QPT_XRC_INI) {
+			cmd->max_recv_wr = 0;
+			cmd->max_recv_sge = 0;
 		} else {
-			if (cmd.is_srq) {
-				srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+			if (cmd->is_srq) {
+				srq = idr_read_srq(cmd->srq_handle,
+						   file->ucontext);
 				if (!srq || srq->srq_type != IB_SRQT_BASIC) {
 					ret = -EINVAL;
 					goto err_put;
 				}
 			}
 
-			if (cmd.recv_cq_handle != cmd.send_cq_handle) {
-				rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0);
+			if (cmd->recv_cq_handle != cmd->send_cq_handle) {
+				rcq = idr_read_cq(cmd->recv_cq_handle,
+						  file->ucontext, 0);
 				if (!rcq) {
 					ret = -EINVAL;
 					goto err_put;
@@ -1808,9 +1807,9 @@  ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 			}
 		}
 
-		scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq);
+		scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
 		rcq = rcq ?: scq;
-		pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
+		pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
 		if (!pd || !scq) {
 			ret = -EINVAL;
 			goto err_put;
@@ -1825,31 +1824,54 @@  ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	attr.recv_cq       = rcq;
 	attr.srq           = srq;
 	attr.xrcd	   = xrcd;
-	attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
-	attr.qp_type       = cmd.qp_type;
+	attr.sq_sig_type   = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
+					      IB_SIGNAL_REQ_WR;
+	attr.qp_type       = cmd->qp_type;
 	attr.create_flags  = 0;
 
-	attr.cap.max_send_wr     = cmd.max_send_wr;
-	attr.cap.max_recv_wr     = cmd.max_recv_wr;
-	attr.cap.max_send_sge    = cmd.max_send_sge;
-	attr.cap.max_recv_sge    = cmd.max_recv_sge;
-	attr.cap.max_inline_data = cmd.max_inline_data;
+	attr.cap.max_send_wr     = cmd->max_send_wr;
+	attr.cap.max_recv_wr     = cmd->max_recv_wr;
+	attr.cap.max_send_sge    = cmd->max_send_sge;
+	attr.cap.max_recv_sge    = cmd->max_recv_sge;
+	attr.cap.max_inline_data = cmd->max_inline_data;
 
 	obj->uevent.events_reported     = 0;
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 	INIT_LIST_HEAD(&obj->mcast_list);
 
-	if (cmd.qp_type == IB_QPT_XRC_TGT)
+	if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
+		      sizeof(cmd->create_flags))
+		attr.create_flags = cmd->create_flags;
+
+	if (attr.create_flags) {
+		ret = -EINVAL;
+		goto err_put;
+	}
+
+	if (cmd->comp_mask) {
+		ret = -EINVAL;
+		goto err_put;
+	}
+
+	buf = (void *)cmd + sizeof(*cmd);
+	if (cmd_sz > sizeof(*cmd))
+		if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
+					     cmd_sz - sizeof(*cmd) - 1))) {
+			ret = -EINVAL;
+			goto err_put;
+		}
+
+	if (cmd->qp_type == IB_QPT_XRC_TGT)
 		qp = ib_create_qp(pd, &attr);
 	else
-		qp = device->create_qp(pd, &attr, &udata);
+		qp = device->create_qp(pd, &attr, uhw);
 
 	if (IS_ERR(qp)) {
 		ret = PTR_ERR(qp);
 		goto err_put;
 	}
 
-	if (cmd.qp_type != IB_QPT_XRC_TGT) {
+	if (cmd->qp_type != IB_QPT_XRC_TGT) {
 		qp->real_qp	  = qp;
 		qp->device	  = device;
 		qp->pd		  = pd;
@@ -1875,19 +1897,20 @@  ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 		goto err_destroy;
 
 	memset(&resp, 0, sizeof resp);
-	resp.qpn             = qp->qp_num;
-	resp.qp_handle       = obj->uevent.uobject.id;
-	resp.max_recv_sge    = attr.cap.max_recv_sge;
-	resp.max_send_sge    = attr.cap.max_send_sge;
-	resp.max_recv_wr     = attr.cap.max_recv_wr;
-	resp.max_send_wr     = attr.cap.max_send_wr;
-	resp.max_inline_data = attr.cap.max_inline_data;
+	resp.base.qpn             = qp->qp_num;
+	resp.base.qp_handle       = obj->uevent.uobject.id;
+	resp.base.max_recv_sge    = attr.cap.max_recv_sge;
+	resp.base.max_send_sge    = attr.cap.max_send_sge;
+	resp.base.max_recv_wr     = attr.cap.max_recv_wr;
+	resp.base.max_send_wr     = attr.cap.max_send_wr;
+	resp.base.max_inline_data = attr.cap.max_inline_data;
 
-	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp)) {
-		ret = -EFAULT;
-		goto err_copy;
-	}
+	resp.response_length = offsetof(typeof(resp), response_length) +
+			       sizeof(resp.response_length);
+
+	ret = cb(file, &resp, ucore);
+	if (ret)
+		goto err_cb;
 
 	if (xrcd) {
 		obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
@@ -1913,9 +1936,8 @@  ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 
 	up_write(&obj->uevent.uobject.mutex);
 
-	return in_len;
-
-err_copy:
+	return 0;
+err_cb:
 	idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
 
 err_destroy:
@@ -1937,6 +1959,113 @@  err_put:
 	return ret;
 }
 
+static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
+				  struct ib_uverbs_ex_create_qp_resp *resp,
+				  struct ib_udata *ucore)
+{
+	if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
+		return -EFAULT;
+
+	return 0;
+}
+
+ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
+			    struct ib_device *ib_dev,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_create_qp      cmd;
+	struct ib_uverbs_ex_create_qp	cmd_ex;
+	struct ib_udata			ucore;
+	struct ib_udata			uhw;
+	ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
+	int				err;
+
+	if (out_len < resp_size)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof(cmd)))
+		return -EFAULT;
+
+	INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
+		   resp_size);
+	INIT_UDATA(&uhw, buf + sizeof(cmd),
+		   (unsigned long)cmd.response + resp_size,
+		   in_len - sizeof(cmd), out_len - resp_size);
+
+	memset(&cmd_ex, 0, sizeof(cmd_ex));
+	cmd_ex.user_handle = cmd.user_handle;
+	cmd_ex.pd_handle = cmd.pd_handle;
+	cmd_ex.send_cq_handle = cmd.send_cq_handle;
+	cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
+	cmd_ex.srq_handle = cmd.srq_handle;
+	cmd_ex.max_send_wr = cmd.max_send_wr;
+	cmd_ex.max_recv_wr = cmd.max_recv_wr;
+	cmd_ex.max_send_sge = cmd.max_send_sge;
+	cmd_ex.max_recv_sge = cmd.max_recv_sge;
+	cmd_ex.max_inline_data = cmd.max_inline_data;
+	cmd_ex.sq_sig_all = cmd.sq_sig_all;
+	cmd_ex.qp_type = cmd.qp_type;
+	cmd_ex.is_srq = cmd.is_srq;
+
+	err = create_qp(file, &ucore, &uhw, &cmd_ex,
+			offsetof(typeof(cmd_ex), is_srq) +
+			sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
+			NULL);
+
+	if (err)
+		return err;
+
+	return in_len;
+}
+
+static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
+				     struct ib_uverbs_ex_create_qp_resp *resp,
+				     struct ib_udata *ucore)
+{
+	if (ib_copy_to_udata(ucore, resp, resp->response_length))
+		return -EFAULT;
+
+	return 0;
+}
+
+int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
+			   struct ib_device *ib_dev,
+			   struct ib_udata *ucore,
+			   struct ib_udata *uhw)
+{
+	struct ib_uverbs_ex_create_qp_resp resp;
+	struct ib_uverbs_ex_create_qp cmd = {0};
+	int err;
+
+	if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
+			    sizeof(cmd.comp_mask)))
+		return -EINVAL;
+
+	err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+	if (err)
+		return err;
+
+	if (cmd.comp_mask)
+		return -EINVAL;
+
+	if (cmd.reserved)
+		return -EINVAL;
+
+	if (ucore->outlen < (offsetof(typeof(resp), response_length) +
+			     sizeof(resp.response_length)))
+		return -ENOSPC;
+
+	err = create_qp(file, ucore, uhw, &cmd,
+			min(ucore->inlen, sizeof(cmd)),
+			ib_uverbs_ex_create_qp_cb, NULL);
+
+	if (err)
+		return err;
+
+	return 0;
+}
+
 ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
 			  struct ib_device *ib_dev,
 			  const char __user *buf, int in_len, int out_len)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index c29a660..e3ef288 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -127,6 +127,7 @@  static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
 	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow,
 	[IB_USER_VERBS_EX_CMD_QUERY_DEVICE]	= ib_uverbs_ex_query_device,
 	[IB_USER_VERBS_EX_CMD_CREATE_CQ]	= ib_uverbs_ex_create_cq,
+	[IB_USER_VERBS_EX_CMD_CREATE_QP]        = ib_uverbs_ex_create_qp,
 };
 
 static void ib_uverbs_add_one(struct ib_device *device);
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 978841e..8126c14 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -92,6 +92,7 @@  enum {
 enum {
 	IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
 	IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ,
+	IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
 	IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
 	IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
 };
@@ -516,6 +517,25 @@  struct ib_uverbs_create_qp {
 	__u64 driver_data[0];
 };
 
+struct ib_uverbs_ex_create_qp {
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 send_cq_handle;
+	__u32 recv_cq_handle;
+	__u32 srq_handle;
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_send_sge;
+	__u32 max_recv_sge;
+	__u32 max_inline_data;
+	__u8  sq_sig_all;
+	__u8  qp_type;
+	__u8  is_srq;
+	__u8 reserved;
+	__u32 comp_mask;
+	__u32 create_flags;
+};
+
 struct ib_uverbs_open_qp {
 	__u64 response;
 	__u64 user_handle;
@@ -538,6 +558,12 @@  struct ib_uverbs_create_qp_resp {
 	__u32 reserved;
 };
 
+struct ib_uverbs_ex_create_qp_resp {
+	struct ib_uverbs_create_qp_resp base;
+	__u32 comp_mask;
+	__u32 response_length;
+};
+
 /*
  * This struct needs to remain a multiple of 8 bytes to keep the
  * alignment of the modify QP parameters.