diff mbox

[rdma-core,3/3] mlx5: Add support for sending IPoIB packets

Message ID 1504708729-15249-4-git-send-email-yishaih@mellanox.com (mailing list archive)
State Superseded
Headers show

Commit Message

Yishai Hadas Sept. 6, 2017, 2:38 p.m. UTC
It includes:
- Update WQE size to match the addition required segments.
- Build a proper IPoIB packet as part of the post send flow.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
---
 providers/mlx5/qp.c    | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++
 providers/mlx5/verbs.c | 10 +++++---
 providers/mlx5/wqe.h   |  6 +++++
 3 files changed, 78 insertions(+), 3 deletions(-)

Comments

Jason Gunthorpe Sept. 6, 2017, 4:04 p.m. UTC | #1
On Wed, Sep 06, 2017 at 05:38:49PM +0300, Yishai Hadas wrote:

> +			if (unlikely(qp->flags & MLX5_QP_FLAGS_USE_UNDERLAY)) {
> +				err = mlx5_post_send_underlay(qp, wr, &seg, &size, &sg_copy_ptr);
> +				if (unlikely(err)) {
> +					*bad_wr = wr;
> +					goto out;
> +				}
> +			}

I don't know anything about your device, but this patch looks to me
like it enables for any IBV_QP_CREATE_SOURCE_QPN user - and it seems
to contain a lot of IPoIB specific commentary.

It is wrong to assume that all IBV_QP_CREATE_SOURCE_QPN QP's are IPoIB.

At a minimum some of the commentary needs revising..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yishai Hadas Sept. 6, 2017, 4:49 p.m. UTC | #2
On 9/6/2017 7:04 PM, Jason Gunthorpe wrote:
> On Wed, Sep 06, 2017 at 05:38:49PM +0300, Yishai Hadas wrote:
>
>> +			if (unlikely(qp->flags & MLX5_QP_FLAGS_USE_UNDERLAY)) {
>> +				err = mlx5_post_send_underlay(qp, wr, &seg, &size, &sg_copy_ptr);
>> +				if (unlikely(err)) {
>> +					*bad_wr = wr;
>> +					goto out;
>> +				}
>> +			}
>
> I don't know anything about your device, but this patch looks to me
> like it enables for any IBV_QP_CREATE_SOURCE_QPN user

Correct, the code is generic and can work not only with IPoIB packets.

  - and it seems
> to contain a lot of IPoIB specific commentary.
>
> It is wrong to assume that all IBV_QP_CREATE_SOURCE_QPN QP's are IPoIB.
>
> At a minimum some of the commentary needs revising..

The commentary was focused on IPoIB as this is the main use case that we 
come to solve.

I'll revise the commit log to better describe the general case and 
mention the IPoIB as a specific use case, will send V1 for that.

Thanks for comment on.
Yishai

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Sept. 6, 2017, 4:53 p.m. UTC | #3
On Wed, Sep 06, 2017 at 07:49:03PM +0300, Yishai Hadas wrote:

> The commentary was focused on IPoIB as this is the main use case that we
> come to solve.

It is more than that, why does mlx5_post_send_underlay refer to
things like MLX5_IPOIB_INLINE_MAX_HEADER_SIZE ? Those constants seem
mis-named.

And then there are comments like this:

> /* We expect at least 4 bytes as part of first entry to hold the IPoIB header */

Which really doesn't sound general at all.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yishai Hadas Sept. 7, 2017, 8:57 a.m. UTC | #4
On 9/6/2017 7:53 PM, Jason Gunthorpe wrote:
> On Wed, Sep 06, 2017 at 07:49:03PM +0300, Yishai Hadas wrote:
>
>> The commentary was focused on IPoIB as this is the main use case that we
>> come to solve.
>
> It is more than that, why does mlx5_post_send_underlay refer to
> things like MLX5_IPOIB_INLINE_MAX_HEADER_SIZE ? Those constants seem
> mis-named.
>
> And then there are comments like this:
>
>> /* We expect at least 4 bytes as part of first entry to hold the IPoIB header */
>
> Which really doesn't sound general at all.

The specific IPoIB handling as of copying from the application gather 
data into the header is some device limitation to support sending IPoIB 
packets. This is not required in the general case but will work as well. 
Future devices may work properly without that need, once be ready we may 
have some device capability exposed from kernel and clean this specific 
handling. Will update commit log to include this information as well.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/providers/mlx5/qp.c b/providers/mlx5/qp.c
index 2960ba0..7b4a754 100644
--- a/providers/mlx5/qp.c
+++ b/providers/mlx5/qp.c
@@ -604,6 +604,63 @@  static inline int set_tso_eth_seg(void **seg, struct ibv_send_wr *wr,
 	return 0;
 }
 
+static inline int mlx5_post_send_underlay(struct mlx5_qp *qp, struct ibv_send_wr *wr,
+					  void **pseg, int *total_size,
+					  struct mlx5_sg_copy_ptr *sg_copy_ptr)
+{
+	struct mlx5_wqe_eth_seg *eseg;
+	int inl_hdr_copy_size;
+	void *seg = *pseg;
+	int size = 0;
+
+	if (unlikely(wr->opcode == IBV_WR_SEND_WITH_IMM))
+		return EINVAL;
+
+	memset(seg, 0, sizeof(struct mlx5_wqe_eth_pad));
+	size += sizeof(struct mlx5_wqe_eth_pad);
+	seg += sizeof(struct mlx5_wqe_eth_pad);
+	eseg = seg;
+	*((uint64_t *)eseg) = 0;
+	eseg->rsvd2 = 0;
+
+	if (wr->send_flags & IBV_SEND_IP_CSUM) {
+		if (!(qp->qp_cap_cache & MLX5_CSUM_SUPPORT_UNDERLAY_UD))
+			return EINVAL;
+
+		eseg->cs_flags |= MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+	}
+
+	if (likely(wr->sg_list[0].length >= MLX5_IPOIB_INLINE_MAX_HEADER_SIZE))
+		/* Copying the minimum required data unless inline mode is set */
+		inl_hdr_copy_size = (wr->send_flags & IBV_SEND_INLINE) ?
+				MLX5_IPOIB_INLINE_MAX_HEADER_SIZE :
+				MLX5_IPOIB_INLINE_MIN_HEADER_SIZE;
+	else {
+		inl_hdr_copy_size = MLX5_IPOIB_INLINE_MIN_HEADER_SIZE;
+		/* We expect at least 4 bytes as part of first entry to hold the IPoIB header */
+		if (unlikely(wr->sg_list[0].length < inl_hdr_copy_size))
+			return EINVAL;
+	}
+
+	memcpy(eseg->inline_hdr_start, (void *)(uintptr_t)wr->sg_list[0].addr,
+	       inl_hdr_copy_size);
+	eseg->inline_hdr_sz = htobe16(inl_hdr_copy_size);
+	size += sizeof(struct mlx5_wqe_eth_seg);
+	seg += sizeof(struct mlx5_wqe_eth_seg);
+
+	/* If we copied all the sge into the inline-headers, then we need to
+	 * start copying from the next sge into the data-segment.
+	 */
+	if (unlikely(wr->sg_list[0].length == inl_hdr_copy_size))
+		sg_copy_ptr->index++;
+	else
+		sg_copy_ptr->offset = inl_hdr_copy_size;
+
+	*pseg = seg;
+	*total_size += (size / 16);
+	return 0;
+}
+
 static inline int _mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 				  struct ibv_send_wr **bad_wr)
 {
@@ -806,6 +863,14 @@  static inline int _mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 			size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
 			if (unlikely((seg == qend)))
 				seg = mlx5_get_send_wqe(qp, 0);
+
+			if (unlikely(qp->flags & MLX5_QP_FLAGS_USE_UNDERLAY)) {
+				err = mlx5_post_send_underlay(qp, wr, &seg, &size, &sg_copy_ptr);
+				if (unlikely(err)) {
+					*bad_wr = wr;
+					goto out;
+				}
+			}
 			break;
 
 		case IBV_QPT_RAW_PACKET:
diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c
index a469c93..5a360a9 100644
--- a/providers/mlx5/verbs.c
+++ b/providers/mlx5/verbs.c
@@ -756,7 +756,7 @@  int mlx5_destroy_srq(struct ibv_srq *srq)
 	return 0;
 }
 
-static int sq_overhead(enum ibv_qp_type	qp_type)
+static int sq_overhead(struct mlx5_qp *qp, enum ibv_qp_type qp_type)
 {
 	size_t size = 0;
 	size_t mw_bind_size =
@@ -781,6 +781,10 @@  static int sq_overhead(enum ibv_qp_type	qp_type)
 	case IBV_QPT_UD:
 		size = sizeof(struct mlx5_wqe_ctrl_seg) +
 			sizeof(struct mlx5_wqe_datagram_seg);
+
+		if (qp->flags & MLX5_QP_FLAGS_USE_UNDERLAY)
+			size += (sizeof(struct mlx5_wqe_eth_seg) + sizeof(struct mlx5_wqe_eth_pad));
+
 		break;
 
 	case IBV_QPT_XRC_SEND:
@@ -814,7 +818,7 @@  static int mlx5_calc_send_wqe(struct mlx5_context *ctx,
 	int max_gather;
 	int tot_size;
 
-	size = sq_overhead(attr->qp_type);
+	size = sq_overhead(qp, attr->qp_type);
 	if (size < 0)
 		return size;
 
@@ -887,7 +891,7 @@  static int mlx5_calc_sq_size(struct mlx5_context *ctx,
 		return -EINVAL;
 	}
 
-	qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
+	qp->max_inline_data = wqe_size - sq_overhead(qp, attr->qp_type) -
 		sizeof(struct mlx5_wqe_inl_data_seg);
 	attr->cap.max_inline_data = qp->max_inline_data;
 
diff --git a/providers/mlx5/wqe.h b/providers/mlx5/wqe.h
index 063dc9a..f3f7964 100644
--- a/providers/mlx5/wqe.h
+++ b/providers/mlx5/wqe.h
@@ -50,6 +50,10 @@  struct mlx5_eqe_qp_srq {
 	uint32_t	qp_srq_n;
 };
 
+struct mlx5_wqe_eth_pad {
+	uint8_t rsvd0[16];
+};
+
 struct mlx5_wqe_xrc_seg {
 	__be32		xrc_srqn;
 	uint8_t		rsvd[12];
@@ -63,6 +67,8 @@  struct mlx5_wqe_masked_atomic_seg {
 };
 
 enum {
+	MLX5_IPOIB_INLINE_MIN_HEADER_SIZE	= 4,
+	MLX5_IPOIB_INLINE_MAX_HEADER_SIZE	= 18,
 	MLX5_ETH_L2_INLINE_HEADER_SIZE	= 18,
 	MLX5_ETH_L2_MIN_HEADER_SIZE	= 14,
 };