diff mbox

[rdma-core,5/5] mlx5: Add support for ibv_alloc_null_mr

Message ID 1529512102-24740-6-git-send-email-yishaih@mellanox.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Yishai Hadas June 20, 2018, 4:28 p.m. UTC
From: Yonatan Cohen <yonatanc@mellanox.com>

mlx5_alloc_null_mr allocates MR and sets its lkey member
with the dump_fill_mkey from mlx5_context if it's supported.
The dump_fill_mkey from this MR is later used
in ibv_post_receive() and ibv_post_send() to exclude
write and read respectively to/from ibv_sge that were set
with the dump and fill memory key.

Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Guy Levi <guyle@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
---
 providers/mlx5/mlx5.c  | 13 +++++++++++++
 providers/mlx5/mlx5.h  |  3 +++
 providers/mlx5/qp.c    | 27 ++++++++++++++++++++++-----
 providers/mlx5/verbs.c | 31 +++++++++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 5 deletions(-)

Comments

Jason Gunthorpe June 20, 2018, 7:35 p.m. UTC | #1
On Wed, Jun 20, 2018 at 07:28:22PM +0300, Yishai Hadas wrote:
> @@ -80,7 +80,16 @@ static int copy_to_scat(struct mlx5_wqe_data_seg *scat, void *buf, int *size,
>  
>  	for (i = 0; i < max; ++i) {
>  		copy = min_t(long, *size, be32toh(scat->byte_count));
> -		memcpy((void *)(unsigned long)be64toh(scat->addr), buf, copy);
> +
> +		/* When NULL MR is used can't copy to target,
> +		 * expected to be NULL.
> +		 */
> +		if (unlikely(scat->lkey == ctx->dump_fill_mkey_be))
> +			;
> +		else

What is that? Better as:

if (likely(scat->lkey != ctx->dump_fill_mkey_be))
			memcpy((void *)(unsigned long)be64toh(scat->addr),
			       buf, copy);

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yishai Hadas June 21, 2018, 1:53 p.m. UTC | #2
On 6/20/2018 10:35 PM, Jason Gunthorpe wrote:
> On Wed, Jun 20, 2018 at 07:28:22PM +0300, Yishai Hadas wrote:
>> @@ -80,7 +80,16 @@ static int copy_to_scat(struct mlx5_wqe_data_seg *scat, void *buf, int *size,
>>   
>>   	for (i = 0; i < max; ++i) {
>>   		copy = min_t(long, *size, be32toh(scat->byte_count));
>> -		memcpy((void *)(unsigned long)be64toh(scat->addr), buf, copy);
>> +
>> +		/* When NULL MR is used can't copy to target,
>> +		 * expected to be NULL.
>> +		 */
>> +		if (unlikely(scat->lkey == ctx->dump_fill_mkey_be))
>> +			;
>> +		else
> 
> What is that? Better as:
> 
> if (likely(scat->lkey != ctx->dump_fill_mkey_be))
> 			memcpy((void *)(unsigned long)be64toh(scat->addr),
> 			       buf, copy);
> 

This is data path flow, upon performance testing we found the unlikely 
variant to be better than this suggestion.

I have updated the PR to handle other notes that you posted, please have 
a look.

Yishai

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe June 21, 2018, 2:13 p.m. UTC | #3
On Thu, Jun 21, 2018 at 04:53:54PM +0300, Yishai Hadas wrote:
> On 6/20/2018 10:35 PM, Jason Gunthorpe wrote:
> >On Wed, Jun 20, 2018 at 07:28:22PM +0300, Yishai Hadas wrote:
> >>@@ -80,7 +80,16 @@ static int copy_to_scat(struct mlx5_wqe_data_seg *scat, void *buf, int *size,
> >>  	for (i = 0; i < max; ++i) {
> >>  		copy = min_t(long, *size, be32toh(scat->byte_count));
> >>-		memcpy((void *)(unsigned long)be64toh(scat->addr), buf, copy);
> >>+
> >>+		/* When NULL MR is used can't copy to target,
> >>+		 * expected to be NULL.
> >>+		 */
> >>+		if (unlikely(scat->lkey == ctx->dump_fill_mkey_be))
> >>+			;
> >>+		else
> >
> >What is that? Better as:
> >
> >if (likely(scat->lkey != ctx->dump_fill_mkey_be))
> >			memcpy((void *)(unsigned long)be64toh(scat->addr),
> >			       buf, copy);
> >
> 
> This is data path flow, upon performance testing we found the unlikely
> variant to be better than this suggestion.

That is a highly compiler specific statement. I assume you tested this
with some ancient compiler?

IMHO you should stick to the sane code.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
yonatan cohen June 24, 2018, 6:47 a.m. UTC | #4
On 6/21/2018 5:13 PM, Jason Gunthorpe wrote:
> On Thu, Jun 21, 2018 at 04:53:54PM +0300, Yishai Hadas wrote:
>> On 6/20/2018 10:35 PM, Jason Gunthorpe wrote:
>>> On Wed, Jun 20, 2018 at 07:28:22PM +0300, Yishai Hadas wrote:
>>>> @@ -80,7 +80,16 @@ static int copy_to_scat(struct mlx5_wqe_data_seg *scat, void *buf, int *size,
>>>>   	for (i = 0; i < max; ++i) {
>>>>   		copy = min_t(long, *size, be32toh(scat->byte_count));
>>>> -		memcpy((void *)(unsigned long)be64toh(scat->addr), buf, copy);
>>>> +
>>>> +		/* When NULL MR is used can't copy to target,
>>>> +		 * expected to be NULL.
>>>> +		 */
>>>> +		if (unlikely(scat->lkey == ctx->dump_fill_mkey_be))
>>>> +			;
>>>> +		else
>>>
>>> What is that? Better as:
>>>
>>> if (likely(scat->lkey != ctx->dump_fill_mkey_be))
>>> 			memcpy((void *)(unsigned long)be64toh(scat->addr),
>>> 			       buf, copy);
>>>
>>
>> This is data path flow, upon performance testing we found the unlikely
>> variant to be better than this suggestion.
> 
> That is a highly compiler specific statement. I assume you tested this
> with some ancient compiler?
gcc --version
gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-28)
> 
> IMHO you should stick to the sane code.
> 
> Jason
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky June 24, 2018, 7:57 a.m. UTC | #5
On Sun, Jun 24, 2018 at 09:47:21AM +0300, Yonatan Cohen wrote:
> On 6/21/2018 5:13 PM, Jason Gunthorpe wrote:
> > On Thu, Jun 21, 2018 at 04:53:54PM +0300, Yishai Hadas wrote:
> > > On 6/20/2018 10:35 PM, Jason Gunthorpe wrote:
> > > > On Wed, Jun 20, 2018 at 07:28:22PM +0300, Yishai Hadas wrote:
> > > > > @@ -80,7 +80,16 @@ static int copy_to_scat(struct mlx5_wqe_data_seg *scat, void *buf, int *size,
> > > > >   	for (i = 0; i < max; ++i) {
> > > > >   		copy = min_t(long, *size, be32toh(scat->byte_count));
> > > > > -		memcpy((void *)(unsigned long)be64toh(scat->addr), buf, copy);
> > > > > +
> > > > > +		/* When NULL MR is used can't copy to target,
> > > > > +		 * expected to be NULL.
> > > > > +		 */
> > > > > +		if (unlikely(scat->lkey == ctx->dump_fill_mkey_be))
> > > > > +			;
> > > > > +		else
> > > >
> > > > What is that? Better as:
> > > >
> > > > if (likely(scat->lkey != ctx->dump_fill_mkey_be))
> > > > 			memcpy((void *)(unsigned long)be64toh(scat->addr),
> > > > 			       buf, copy);
> > > >
> > >
> > > This is data path flow, upon performance testing we found the unlikely
> > > variant to be better than this suggestion.
> >
> > That is a highly compiler specific statement. I assume you tested this
> > with some ancient compiler?
> gcc --version
> gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-28)

This gcc version is so ancient, so don't do any optimizations decisions based on it.

> >
> > IMHO you should stick to the sane code.
> >
> > Jason
> >
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yishai Hadas July 4, 2018, 12:58 p.m. UTC | #6
On 6/21/2018 5:13 PM, Jason Gunthorpe wrote:
> On Thu, Jun 21, 2018 at 04:53:54PM +0300, Yishai Hadas wrote:
>> On 6/20/2018 10:35 PM, Jason Gunthorpe wrote:
>>> On Wed, Jun 20, 2018 at 07:28:22PM +0300, Yishai Hadas wrote:
>>>> @@ -80,7 +80,16 @@ static int copy_to_scat(struct mlx5_wqe_data_seg *scat, void *buf, int *size,
>>>>   	for (i = 0; i < max; ++i) {
>>>>   		copy = min_t(long, *size, be32toh(scat->byte_count));
>>>> -		memcpy((void *)(unsigned long)be64toh(scat->addr), buf, copy);
>>>> +
>>>> +		/* When NULL MR is used can't copy to target,
>>>> +		 * expected to be NULL.
>>>> +		 */
>>>> +		if (unlikely(scat->lkey == ctx->dump_fill_mkey_be))
>>>> +			;
>>>> +		else
>>>
>>> What is that? Better as:
>>>
>>> if (likely(scat->lkey != ctx->dump_fill_mkey_be))
>>> 			memcpy((void *)(unsigned long)be64toh(scat->addr),
>>> 			       buf, copy);
>>>
>>
>> This is data path flow, upon performance testing we found the unlikely
>> variant to be better than this suggestion.
> 
> That is a highly compiler specific statement. I assume you tested this
> with some ancient compiler?
> 
> IMHO you should stick to the sane code.

As you asked, the PR [1] was updated to use 'likely' to enable moving 
forward with this series in both kernel and user.

However, a follow-up performance test should be done to check on some 
newer compiler. In case needed, an incremental patch will be sent on top 
of this series.

[1] https://github.com/linux-rdma/rdma-core/pull/351
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c
index ea9f6ea..97a82a2 100644
--- a/providers/mlx5/mlx5.c
+++ b/providers/mlx5/mlx5.c
@@ -146,6 +146,7 @@  static const struct verbs_context_ops mlx5_ctx_common_ops = {
 	.query_rt_values = mlx5_query_rt_values,
 	.read_counters = mlx5_read_counters,
 	.reg_dm_mr = mlx5_reg_dm_mr,
+	.alloc_null_mr = mlx5_alloc_null_mr,
 };
 
 static const struct verbs_context_ops mlx5_ctx_cqev1_ops = {
@@ -1063,6 +1064,18 @@  static struct verbs_context *mlx5_alloc_context(struct ibv_device *ibdev,
 	context->max_srq_recv_wr = resp.max_srq_recv_wr;
 	context->num_dyn_bfregs = resp.num_dyn_bfregs;
 
+	if (resp.comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY) {
+		context->dump_fill_mkey = resp.dump_fill_mkey;
+		/* Have the BE value ready to be used in data path */
+		context->dump_fill_mkey_be = htobe32(resp.dump_fill_mkey);
+	} else {
+		/* kernel driver will never return MLX5_INVALID_LKEY for
+		 * dump_fill_mkey
+		 */
+		context->dump_fill_mkey = MLX5_INVALID_LKEY;
+		context->dump_fill_mkey_be = htobe32(MLX5_INVALID_LKEY);
+	}
+
 	if (context->num_dyn_bfregs) {
 		context->count_dyn_bfregs = calloc(context->num_dyn_bfregs,
 						   sizeof(*context->count_dyn_bfregs));
diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h
index 737a09e..16ce2cc 100644
--- a/providers/mlx5/mlx5.h
+++ b/providers/mlx5/mlx5.h
@@ -295,6 +295,8 @@  struct mlx5_context {
 	uint16_t			flow_action_flags;
 	uint64_t			max_dm_size;
 	uint32_t                        eth_min_inline_size;
+	uint32_t                        dump_fill_mkey;
+	__be32                          dump_fill_mkey_be;
 };
 
 struct mlx5_bitmap {
@@ -735,6 +737,7 @@  int mlx5_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *mlx5_alloc_pd(struct ibv_context *context);
 int mlx5_free_pd(struct ibv_pd *pd);
 
+struct ibv_mr *mlx5_alloc_null_mr(struct ibv_pd *pd);
 struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr,
 			   size_t length, int access);
 int mlx5_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr,
diff --git a/providers/mlx5/qp.c b/providers/mlx5/qp.c
index e630115..9a0d2d2 100644
--- a/providers/mlx5/qp.c
+++ b/providers/mlx5/qp.c
@@ -70,7 +70,7 @@  static void *get_wq_recv_wqe(struct mlx5_rwq *rwq, int n)
 }
 
 static int copy_to_scat(struct mlx5_wqe_data_seg *scat, void *buf, int *size,
-			 int max)
+			 int max, struct mlx5_context *ctx)
 {
 	int copy;
 	int i;
@@ -80,7 +80,16 @@  static int copy_to_scat(struct mlx5_wqe_data_seg *scat, void *buf, int *size,
 
 	for (i = 0; i < max; ++i) {
 		copy = min_t(long, *size, be32toh(scat->byte_count));
-		memcpy((void *)(unsigned long)be64toh(scat->addr), buf, copy);
+
+		/* When NULL MR is used can't copy to target,
+		 * expected to be NULL.
+		 */
+		if (unlikely(scat->lkey == ctx->dump_fill_mkey_be))
+			;
+		else
+			memcpy((void *)(unsigned long)be64toh(scat->addr),
+			       buf, copy);
+
 		*size -= copy;
 		if (*size == 0)
 			return IBV_WC_SUCCESS;
@@ -93,6 +102,8 @@  static int copy_to_scat(struct mlx5_wqe_data_seg *scat, void *buf, int *size,
 
 int mlx5_copy_to_recv_wqe(struct mlx5_qp *qp, int idx, void *buf, int size)
 {
+	struct mlx5_context *ctx = to_mctx(qp->ibv_qp->pd->context);
+
 	struct mlx5_wqe_data_seg *scat;
 	int max = 1 << (qp->rq.wqe_shift - 4);
 
@@ -100,11 +111,12 @@  int mlx5_copy_to_recv_wqe(struct mlx5_qp *qp, int idx, void *buf, int size)
 	if (unlikely(qp->wq_sig))
 		++scat;
 
-	return copy_to_scat(scat, buf, &size, max);
+	return copy_to_scat(scat, buf, &size, max, ctx);
 }
 
 int mlx5_copy_to_send_wqe(struct mlx5_qp *qp, int idx, void *buf, int size)
 {
+	struct mlx5_context *ctx = to_mctx(qp->ibv_qp->pd->context);
 	struct mlx5_wqe_ctrl_seg *ctrl;
 	struct mlx5_wqe_data_seg *scat;
 	void *p;
@@ -141,14 +153,14 @@  int mlx5_copy_to_send_wqe(struct mlx5_qp *qp, int idx, void *buf, int size)
 		int tmp = ((void *)qp->sq.qend - (void *)scat) >> 4;
 		int orig_size = size;
 
-		if (copy_to_scat(scat, buf, &size, tmp) == IBV_WC_SUCCESS)
+		if (copy_to_scat(scat, buf, &size, tmp, ctx) == IBV_WC_SUCCESS)
 			return IBV_WC_SUCCESS;
 		max = max - tmp;
 		buf += orig_size - size;
 		scat = mlx5_get_send_wqe(qp, 0);
 	}
 
-	return copy_to_scat(scat, buf, &size, max);
+	return copy_to_scat(scat, buf, &size, max, ctx);
 }
 
 void *mlx5_get_send_wqe(struct mlx5_qp *qp, int n)
@@ -1124,6 +1136,11 @@  int mlx5_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
 	}
 
 	if (bind_info->mr) {
+		if (verbs_get_mr(bind_info->mr)->mr_type != IBV_MR_TYPE_MR) {
+			errno = ENOTSUP;
+			return errno;
+		}
+
 		if (to_mmr(bind_info->mr)->alloc_flags & IBV_ACCESS_ZERO_BASED) {
 			errno = EINVAL;
 			return errno;
diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c
index 2949c2e..e95cdb8 100644
--- a/providers/mlx5/verbs.c
+++ b/providers/mlx5/verbs.c
@@ -396,6 +396,33 @@  struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	return &mr->vmr.ibv_mr;
 }
 
+struct ibv_mr *mlx5_alloc_null_mr(struct ibv_pd *pd)
+{
+	struct mlx5_mr *mr;
+	struct mlx5_context *ctx = to_mctx(pd->context);
+
+	if (ctx->dump_fill_mkey == MLX5_INVALID_LKEY) {
+		errno = ENOTSUP;
+		return NULL;
+	}
+
+	mr = calloc(1, sizeof(*mr));
+	if (!mr) {
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	mr->vmr.ibv_mr.lkey = ctx->dump_fill_mkey;
+
+	mr->vmr.ibv_mr.context = pd->context;
+	mr->vmr.ibv_mr.pd      = pd;
+	mr->vmr.ibv_mr.addr    = NULL;
+	mr->vmr.ibv_mr.length  = SIZE_MAX;
+	mr->vmr.mr_type = IBV_MR_TYPE_NULL_MR;
+
+	return &mr->vmr.ibv_mr;
+}
+
 enum {
 	MLX5_DM_ALLOWED_ACCESS = IBV_ACCESS_LOCAL_WRITE		|
 				 IBV_ACCESS_REMOTE_WRITE	|
@@ -453,10 +480,14 @@  int mlx5_dereg_mr(struct verbs_mr *vmr)
 {
 	int ret;
 
+	if (vmr->mr_type == IBV_MR_TYPE_NULL_MR)
+		goto free;
+
 	ret = ibv_cmd_dereg_mr(vmr);
 	if (ret)
 		return ret;
 
+free:
 	free(vmr);
 	return 0;
 }